Compare commits

...

3 commits

Author SHA1 Message Date
rswier
558e70d96c Include headers 2016-02-26 01:36:07 -05:00
rswier
05cb8b3b58 Added structures 2016-02-26 01:27:34 -05:00
rswier
dbb64bf54f New branch with struct support
This change adds structures (struct) along with the dot (.) and arrow
(->) operators.  It is very silly to add this level of complexity while
keeping it just four functions.  But it had to be done :-)
2016-02-26 01:23:08 -05:00
2 changed files with 375 additions and 217 deletions

View file

@ -3,6 +3,10 @@ c4 - C in four functions
An exercise in minimalism.
This branch adds structures (struct) along with the dot (.) and arrow (->) operators.
It is very silly to add this level of complexity while keeping it just four functions.
But it had to be done :-)
Try the following:
gcc -o c4 c4.c (you may need the -m32 option on 64bit machines)

588
c4.c
View file

@ -1,23 +1,27 @@
// c4.c - C in four functions
// char, int, and pointer types
// if, while, return, and expression statements
// char, int, structs, and pointer types
// if, while, return, switch, and expression statements
// just enough features to allow self-compilation and a bit more
// Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
#include <fcntl.h>
char *p, *lp, // current position in source code
*data; // data/bss pointer
*data, // data/bss pointer
*ops; // opcodes
int *e, *le, // current position in emitted code
*id, // currently parsed identifier
*sym, // symbol table (simple list of identifiers)
*cas, // case statement patch-up pointer
*brk, // break statement patch-up pointer
*def, // default statement patch-up pointer
*tsize, // array (indexed by type) of type sizes
tnew, // next available type
tk, // current token
ival, // current token value
ty, // current expression type
@ -26,11 +30,33 @@ int *e, *le, // current position in emitted code
src, // print source and assembly flag
debug; // print executed instructions
// identifier
struct ident_s {
int tk;
int hash;
char *name;
int class;
int type;
int val;
int stype;
int hclass;
int htype;
int hval;
} *id, // currently parsed identifier
*sym; // symbol table (simple list of identifiers)
struct member_s {
struct ident_s *id;
int offset;
int type;
struct member_s *next;
} **members; // array (indexed by type) of struct member lists
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Struct, Switch, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Dot, Arrow, Brak
};
// opcodes
@ -39,46 +65,27 @@ enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
// types
enum { CHAR, INT, PTR };
// identifier offsets (since we can't create an ident struct)
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
enum { CHAR, INT, PTR = 256, PTR2 = 512 };
void next()
{
char *pp;
while (tk = *p) {
++p;
if (tk == '\n') {
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
++line;
}
else if (tk == '#') {
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1;
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp);
id = sym;
while (id[Tk]) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
id = id + Idsz;
while (id->tk) {
if (tk == id->hash && !memcmp(id->name, pp, p - pp)) { tk = id->tk; return; }
id = id + 1;
}
id[Name] = (int)pp;
id[Hash] = tk;
tk = id[Tk] = Id;
id->name = pp;
id->hash = tk;
tk = id->tk = Id;
return;
}
else if (tk >= '0' && tk <= '9') {
@ -91,89 +98,114 @@ void next()
tk = Num;
return;
}
else if (tk == '/') {
if (*p == '/') {
++p;
while (*p != 0 && *p != '\n') ++p;
switch (tk) {
case '\n':
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &ops[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
else {
++line;
case ' ': case '\t': case '\v': case '\f': case '\r':
break;
case '/':
if (*p == '/') {
case '#':
while (*p != 0 && *p != '\n') ++p;
} else {
tk = Div;
return;
}
}
else if (tk == '\'' || tk == '"') {
break;
case '\'':
case '"':
pp = data;
while (*p != 0 && *p != tk) {
if ((ival = *p++) == '\\') {
if ((ival = *p++) == 'n') ival = '\n';
switch (ival = *p++) {
case 'n': ival = '\n'; break;
case 't': ival = '\t'; break;
case 'v': ival = '\v'; break;
case 'f': ival = '\f'; break;
case 'r': ival = '\r';
}
}
if (tk == '"') *data++ = ival;
}
++p;
if (tk == '"') ival = (int)pp; else tk = Num;
return;
case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return;
case '+': if (*p == '+') { ++p; tk = Inc; } else tk = Add; return;
case '-': if (*p == '-') { ++p; tk = Dec; } else if (*p == '>') { ++p; tk = Arrow; } else tk = Sub; return;
case '!': if (*p == '=') { ++p; tk = Ne; } return;
case '<': if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return;
case '>': if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return;
case '|': if (*p == '|') { ++p; tk = Lor; } else tk = Or; return;
case '&': if (*p == '&') { ++p; tk = Lan; } else tk = And; return;
case '^': tk = Xor; return;
case '%': tk = Mod; return;
case '*': tk = Mul; return;
case '[': tk = Brak; return;
case '?': tk = Cond; return;
case '.': tk = Dot; return;
default: return;
}
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
}
}
void expr(int lev)
{
int t, *d;
int t, *b, sz;
struct ident_s *d;
struct member_s *m;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; }
else if (tk == '"') {
switch (tk) {
case 0: printf("%d: unexpected eof in expression\n", line); exit(-1);
case Num: *++e = IMM; *++e = ival; next(); ty = INT; break;
case '"':
*++e = IMM; *++e = ival; next();
while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
}
else if (tk == Sizeof) {
break;
case Sizeof:
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
else if (tk == Struct) { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } ty = id->stype; next(); }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);
*++e = IMM; *++e = ty >= PTR ? sizeof(int) : tsize[ty];
ty = INT;
}
else if (tk == Id) {
break;
case Id:
d = id; next();
if (tk == '(') {
next();
t = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
next();
if (d[Class] == Sys) *++e = d[Val];
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
if (d->class == Sys) *++e = d->val;
else if (d->class == Fun) { *++e = JSR; *++e = d->val; }
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }
ty = d[Type];
ty = d->type;
}
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }
else if (d->class == Num) { *++e = IMM; *++e = d->val; ty = INT; }
else {
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }
if (d->class == Loc) { *++e = LEA; *++e = loc - d->val; }
else if (d->class == Glo) { *++e = IMM; *++e = d->val; }
else { printf("%d: undefined variable\n", line); exit(-1); }
*++e = ((ty = d[Type]) == CHAR) ? LC : LI;
if ((ty = d->type) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
}
}
else if (tk == '(') {
break;
case '(':
next();
if (tk == Int || tk == Char) {
t = (tk == Int) ? INT : CHAR; next();
if (tk == Int || tk == Char || tk == Struct) {
if (tk == Int) { next(); t = INT; } else if (tk == Char) { next(); t = CHAR; }
else { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } t = id->stype; next(); }
while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc);
@ -183,108 +215,130 @@ void expr(int lev)
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
}
}
else if (tk == Mul) {
break;
case Mul:
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*++e = (ty == CHAR) ? LC : LI;
}
else if (tk == And) {
if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
case And:
next(); expr(Inc);
if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); }
if (*e == LC || *e == LI) --e; // XXX else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
}
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
break;
case '!': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; break;
case '~': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; break;
case Add: next(); expr(Inc); ty = INT; break;
case Sub:
next(); *++e = IMM;
if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }
ty = INT;
}
else if (tk == Inc || tk == Dec) {
break;
case Inc: case Dec:
t = tk; next(); expr(Inc);
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = IMM; *++e = ty >= PTR2 ? sizeof(int) : (ty >= PTR) ? tsize[ty - PTR] : 1;
*++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
break;
default: printf("%d: bad expression\n", line); exit(-1);
}
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty;
if (tk == Assign) {
switch (tk) {
case Assign:
next();
if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;
}
else if (tk == Cond) {
break;
case Cond:
next();
*++e = BZ; d = ++e;
*++e = BZ; b = ++e;
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*d = (int)(e + 3); *++e = JMP; d = ++e;
*b = (int)(e + 3); *++e = JMP; b = ++e;
expr(Cond);
*d = (int)(e + 1);
}
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; }
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; }
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; }
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; }
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; }
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; }
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; }
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; }
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; }
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }
else if (tk == Add) {
*b = (int)(e + 1);
break;
case Lor: next(); *++e = BNZ; b = ++e; expr(Lan); *b = (int)(e + 1); ty = INT; break;
case Lan: next(); *++e = BZ; b = ++e; expr(Or); *b = (int)(e + 1); ty = INT; break;
case Or: next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; break;
case Xor: next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; break;
case And: next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; break;
case Eq: next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; break;
case Ne: next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; break;
case Lt: next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; break;
case Gt: next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; break;
case Le: next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; break;
case Ge: next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; break;
case Shl: next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; break;
case Shr: next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; break;
case Add:
next(); *++e = PSH; expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
sz = (ty = t) >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
*++e = ADD;
}
else if (tk == Sub) {
break;
case Sub:
next(); *++e = PSH; expr(Mul);
if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }
else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }
sz = t >= PTR2 ? sizeof(int) : t >= PTR ? tsize[t - PTR] : 1;
if (t == ty && sz > 1) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sz; *++e = DIV; ty = INT; }
else if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; *++e = SUB; }
else *++e = SUB;
}
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }
else if (tk == Inc || tk == Dec) {
ty = t;
break;
case Mul: next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; break;
case Div: next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; break;
case Mod: next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; break;
case Inc: case Dec:
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
*++e = PSH; *++e = IMM; *++e = sz;
*++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = PSH; *++e = IMM; *++e = sz;
*++e = (tk == Inc) ? SUB : ADD;
next();
}
else if (tk == Brak) {
break;
case Dot:
ty = ty + PTR;
case Arrow:
if (ty <= PTR+INT || ty >= PTR2) { printf("%d: structure expected\n", line); exit(-1); }
next();
if (tk != Id) { printf("%d: structure member expected\n", line); exit(-1); }
m = members[ty - PTR]; while (m && m->id != id) m = m->next;
if (!m) { printf("%d: structure member not found\n", line); exit(-1); }
if (m->offset) { *++e = PSH; *++e = IMM; *++e = m->offset; *++e = ADD; }
ty = m->type;
if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
next();
break;
case Brak:
next(); *++e = PSH; expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t];
if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
*++e = ADD;
*++e = ((ty = t - PTR) == CHAR) ? LC : LI;
if ((ty = t) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
default: printf("%d: compiler error tk=%d\n", line, tk); exit(-1);
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
}
void stmt()
{
int *a, *b;
int *a, *b, *d, i;
if (tk == If) {
switch (tk) {
case If:
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
@ -297,8 +351,8 @@ void stmt()
stmt();
}
*b = (int)(e + 1);
}
else if (tk == While) {
return;
case While:
next();
a = e + 1;
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
@ -308,22 +362,54 @@ void stmt()
stmt();
*++e = JMP; *++e = (int)a;
*b = (int)(e + 1);
}
else if (tk == Return) {
return;
case Switch:
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
a = cas; *++e = JMP; cas = ++e;
b = brk; d = def; brk = def = 0;
stmt();
*cas = def ? (int)def : (int)(e + 1); cas = a;
while (brk) { a = (int *)*brk; *brk = (int)(e + 1); brk = a; }
brk = b; def = d;
return;
case Case:
*++e = JMP; ++e; *e = (int)(e + 7); *++e = PSH; i = *cas; *cas = (int)e;
next();
expr(Or);
if (e[-1] != IMM) { printf("%d: bad case immediate\n", line); exit(-1); }
*e = *e - i; *++e = SUB; *++e = BNZ; cas = ++e; *e = i + e[-3];
if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); }
stmt();
return;
case Break:
next();
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*++e = JMP; *++e = (int)brk; brk = e;
return;
case Default:
next();
if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); }
def = e + 1;
stmt();
return;
case Return:
next();
if (tk != ';') expr(Assign);
*++e = LEV;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
else if (tk == '{') {
return;
case '{':
next();
while (tk != '}') stmt();
next();
}
else if (tk == ';') {
return;
case ';':
next();
}
else {
return;
default:
expr(Assign);
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
@ -331,7 +417,9 @@ void stmt()
int main(int argc, char **argv)
{
int fd, bt, ty, poolsz, *idmain;
int fd, bt, mbt, ty, poolsz;
struct ident_s *idmain, *d;
struct member_s *m;
int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t; // temps
@ -347,16 +435,24 @@ int main(int argc, char **argv)
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
if (!(tsize = malloc(PTR * sizeof(int)))) { printf("could not malloc() tsize area\n"); return -1; }
if (!(members = malloc(PTR * sizeof(struct member_s *)))) { printf("could not malloc() members area\n"); return -1; }
memset(sym, 0, poolsz);
memset(e, 0, poolsz);
memset(data, 0, poolsz);
p = "char else enum if int return sizeof while "
memset(tsize, 0, PTR * sizeof(int));
memset(members, 0, PTR * sizeof(struct member_s *));
ops = "LEA IMM JMP JSR BZ BNZ ENT ADJ LEV LI LC SI SC PSH "
"OR XOR AND EQ NE LT GT LE GE SHL SHR ADD SUB MUL DIV MOD "
"OPEN READ CLOS PRTF MALC MSET MCMP EXIT ";
p = "break case char default else enum if int return sizeof struct switch while "
"open read close printf malloc memset memcmp exit void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
next(); id[Tk] = Char; // handle void type
i = Break; while (i <= While) { next(); id->tk = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id->class = Sys; id->type = INT; id->val = i++; } // add library to symbol table
next(); id->tk = Char; // handle void type
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
@ -364,6 +460,10 @@ int main(int argc, char **argv)
p[i] = 0;
close(fd);
// add primitive types
tsize[tnew++] = sizeof(char);
tsize[tnew++] = sizeof(int);
// parse declarations
line = 1;
next();
@ -386,33 +486,83 @@ int main(int argc, char **argv)
i = ival;
next();
}
id[Class] = Num; id[Type] = INT; id[Val] = i++;
id->class = Num; id->type = INT; id->val = i++;
if (tk == ',') next();
}
next();
}
}
else if (tk == Struct) {
next();
if (tk == Id) {
if (!id->stype) id->stype = tnew++;
bt = id->stype;
next();
} else {
bt = tnew++;
}
if (tk == '{') {
next();
if (members[bt]) { printf("%d: duplicate structure definition\n", line); return -1; }
i = 0;
while (tk != '}') {
mbt = INT;
if (tk == Int) next();
else if (tk == Char) { next(); mbt = CHAR; }
else if (tk == Struct) {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
mbt = id->stype;
next();
}
while (tk != ';') {
ty = mbt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad struct member definition\n", line); return -1; }
m = malloc(sizeof(struct member_s));
m->id = id;
m->offset = i;
m->type = ty;
m->next = members[bt];
members[bt] = m;
i = i + (ty >= PTR ? sizeof(int) : tsize[ty]);
i = (i + 3) & -4;
next();
if (tk == ',') next();
}
next();
}
next();
tsize[bt] = i;
}
}
while (tk != ';' && tk != '}') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
if (id->class) { printf("%d: duplicate global definition\n", line); return -1; }
next();
id[Type] = ty;
id->type = ty;
if (tk == '(') { // function
id[Class] = Fun;
id[Val] = (int)(e + 1);
id->class = Fun;
id->val = (int)(e + 1);
next(); i = 0;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
else if (tk == Struct) {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
ty = id->stype;
next();
}
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = i++;
if (id->class == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id->hclass = id->class; id->class = Loc;
id->htype = id->type; id->type = ty;
id->hval = id->val; id->val = i++;
next();
if (tk == ',') next();
}
@ -420,17 +570,21 @@ int main(int argc, char **argv)
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i;
next();
while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR;
while (tk == Int || tk == Char || tk == Struct) {
if (tk == Int) bt = INT; else if (tk == Char) bt = CHAR; else {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
bt = id->stype;
}
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = ++i;
if (id->class == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id->hclass = id->class; id->class = Loc;
id->htype = id->type; id->type = ty;
id->hval = id->val; id->val = ++i;
next();
if (tk == ',') next();
}
@ -440,18 +594,18 @@ int main(int argc, char **argv)
while (tk != '}') stmt();
*++e = LEV;
id = sym; // unwind symbol table locals
while (id[Tk]) {
if (id[Class] == Loc) {
id[Class] = id[HClass];
id[Type] = id[HType];
id[Val] = id[HVal];
while (id->tk) {
if (id->class == Loc) {
id->class = id->hclass;
id->type = id->htype;
id->val = id->hval;
}
id = id + Idsz;
id = id + 1;
}
}
else {
id[Class] = Glo;
id[Val] = (int)data;
id->class = Glo;
id->val = (int)data;
data = data + sizeof(int);
}
if (tk == ',') next();
@ -459,7 +613,7 @@ int main(int argc, char **argv)
next();
}
if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; }
if (!(pc = (int *)idmain->val)) { printf("main() not defined\n"); return -1; }
if (src) return 0;
// setup stack
@ -476,51 +630,51 @@ int main(int argc, char **argv)
i = *pc++; ++cycle;
if (debug) {
printf("%d> %.4s", cycle,
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]);
&ops[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
}
if (i == LEA) a = (int)(bp + *pc++); // load local address
else if (i == IMM) a = *pc++; // load global address or immediate
else if (i == JMP) pc = (int *)*pc; // jump
else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine
else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero
else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero
else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine
else if (i == ADJ) sp = sp + *pc++; // stack adjust
else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine
else if (i == LI) a = *(int *)a; // load int
else if (i == LC) a = *(char *)a; // load char
else if (i == SI) *(int *)*sp++ = a; // store int
else if (i == SC) a = *(char *)*sp++ = a; // store char
else if (i == PSH) *--sp = a; // push
switch (i) {
case LEA: a = (int)(bp + *pc++); break; // load local address
case IMM: a = *pc++; break; // load global address or immediate
case JMP: pc = (int *)*pc; break; // jump
case JSR: *--sp = (int)(pc + 1); pc = (int *)*pc; break; // jump to subroutine
case BZ: pc = a ? pc + 1 : (int *)*pc; break; // branch if zero
case BNZ: pc = a ? (int *)*pc : pc + 1; break; // branch if not zero
case ENT: *--sp = (int)bp; bp = sp; sp = sp - *pc++; break; // enter subroutine
case ADJ: sp = sp + *pc++; break; // stack adjust
case LEV: sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; break; // leave subroutine
case LI: a = *(int *)a; break; // load int
case LC: a = *(char *)a; break; // load char
case SI: *(int *)*sp++ = a; break; // store int
case SC: a = *(char *)*sp++ = a; break; // store char
case PSH: *--sp = a; break; // push
else if (i == OR) a = *sp++ | a;
else if (i == XOR) a = *sp++ ^ a;
else if (i == AND) a = *sp++ & a;
else if (i == EQ) a = *sp++ == a;
else if (i == NE) a = *sp++ != a;
else if (i == LT) a = *sp++ < a;
else if (i == GT) a = *sp++ > a;
else if (i == LE) a = *sp++ <= a;
else if (i == GE) a = *sp++ >= a;
else if (i == SHL) a = *sp++ << a;
else if (i == SHR) a = *sp++ >> a;
else if (i == ADD) a = *sp++ + a;
else if (i == SUB) a = *sp++ - a;
else if (i == MUL) a = *sp++ * a;
else if (i == DIV) a = *sp++ / a;
else if (i == MOD) a = *sp++ % a;
case OR: a = *sp++ | a; break;
case XOR: a = *sp++ ^ a; break;
case AND: a = *sp++ & a; break;
case EQ: a = *sp++ == a; break;
case NE: a = *sp++ != a; break;
case LT: a = *sp++ < a; break;
case GT: a = *sp++ > a; break;
case LE: a = *sp++ <= a; break;
case GE: a = *sp++ >= a; break;
case SHL: a = *sp++ << a; break;
case SHR: a = *sp++ >> a; break;
case ADD: a = *sp++ + a; break;
case SUB: a = *sp++ - a; break;
case MUL: a = *sp++ * a; break;
case DIV: a = *sp++ / a; break;
case MOD: a = *sp++ % a; break;
else if (i == OPEN) a = open((char *)sp[1], *sp);
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp);
else if (i == CLOS) a = close(*sp);
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); }
else if (i == MALC) a = (int)malloc(*sp);
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp);
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp);
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
case OPEN: a = open((char *)sp[1], *sp); break;
case READ: a = read(sp[2], (char *)sp[1], *sp); break;
case CLOS: a = close(*sp); break;
case PRTF: t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); break;
case MALC: a = (int)malloc(*sp); break;
case MSET: a = (int)memset((char *)sp[2], sp[1], *sp); break;
case MCMP: a = memcmp((char *)sp[2], (char *)sp[1], *sp); break;
case EXIT: printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp;
default: printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1;
}
}
}
}