diff --git a/c4.c b/c4.c index 95d3905..8642646 100644 --- a/c4.c +++ b/c4.c @@ -1,27 +1,23 @@ // c4.c - C in four functions -// char, int, structs, and pointer types -// if, while, return, switch, and expression statements +// char, int, and pointer types +// if, while, return, and expression statements // just enough features to allow self-compilation and a bit more // Written by Robert Swierczek -//#include -#include #include #include #include +#include +#include char *p, *lp, // current position in source code - *data, // data/bss pointer - *ops; // opcodes + *data; // data/bss pointer int *e, *le, // current position in emitted code - *cas, // case statement patch-up pointer - *brk, // break statement patch-up pointer - *def, // default statement patch-up pointer - *tsize, // array (indexed by type) of type sizes - tnew, // next available type + *id, // currently parsed identifier + *sym, // symbol table (simple list of identifiers) tk, // current token ival, // current token value ty, // current expression type @@ -30,33 +26,11 @@ int *e, *le, // current position in emitted code src, // print source and assembly flag debug; // print executed instructions -// identifier -struct ident_s { - int tk; - int hash; - char *name; - int class; - int type; - int val; - int stype; - int hclass; - int htype; - int hval; -} *id, // currently parsed identifier - *sym; // symbol table (simple list of identifiers) - -struct member_s { - struct ident_s *id; - int offset; - int type; - struct member_s *next; -} **members; // array (indexed by type) of struct member lists - // tokens and classes (operators last and in precedence order) enum { Num = 128, Fun, Sys, Glo, Loc, Id, - Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Struct, Switch, While, - Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Dot, Arrow, Brak + Char, Else, Enum, If, Int, Return, Sizeof, While, + Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak }; // opcodes @@ -65,27 +39,46 @@ enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT }; // types -enum { CHAR, INT, PTR = 256, PTR2 = 512 }; - +enum { CHAR, INT, PTR }; + +// identifier offsets (since we can't create an ident struct) +enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; + void next() { char *pp; while (tk = *p) { ++p; - if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { + if (tk == '\n') { + if (src) { + printf("%d: %.*s", line, p - lp, lp); + lp = p; + while (le < e) { + printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]); + if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); + } + } + ++line; + } + else if (tk == '#') { + while (*p != 0 && *p != '\n') ++p; + } + else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { pp = p - 1; while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') tk = tk * 147 + *p++; tk = (tk << 6) + (p - pp); id = sym; - while (id->tk) { - if (tk == id->hash && !memcmp(id->name, pp, p - pp)) { tk = id->tk; return; } - id = id + 1; + while (id[Tk]) { + if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } + id = id + Idsz; } - id->name = pp; - id->hash = tk; - tk = id->tk = Id; + id[Name] = (int)pp; + id[Hash] = tk; + tk = id[Tk] = Id; return; } else if (tk >= '0' && tk <= '9') { @@ -98,114 +91,89 @@ void next() tk = Num; return; } - switch (tk) { - case '\n': - if (src) { - printf("%d: %.*s", line, p - lp, lp); - lp = p; - while (le < e) { - printf("%8.4s", &ops[*++le * 5]); - if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); - } - } - ++line; - case ' ': case '\t': case '\v': case '\f': case '\r': - break; - case '/': + else if (tk == '/') { if (*p == '/') { - case '#': + ++p; while (*p != 0 && *p != '\n') ++p; - } else { + } + else { tk = Div; return; } - break; - case '\'': - case '"': + } + else if (tk == '\'' || tk == '"') { pp = data; while (*p != 0 && *p != tk) { if ((ival = *p++) == '\\') { - switch (ival = *p++) { - case 'n': ival = '\n'; break; - case 't': ival = '\t'; break; - case 'v': ival = '\v'; break; - case 'f': ival = '\f'; break; - case 'r': ival = '\r'; - } + if ((ival = *p++) == 'n') ival = '\n'; } if (tk == '"') *data++ = ival; } ++p; if (tk == '"') ival = (int)pp; else tk = Num; return; - case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; - case '+': if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; - case '-': if (*p == '-') { ++p; tk = Dec; } else if (*p == '>') { ++p; tk = Arrow; } else tk = Sub; return; - case '!': if (*p == '=') { ++p; tk = Ne; } return; - case '<': if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; - case '>': if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; - case '|': if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; - case '&': if (*p == '&') { ++p; tk = Lan; } else tk = And; return; - case '^': tk = Xor; return; - case '%': tk = Mod; return; - case '*': tk = Mul; return; - case '[': tk = Brak; return; - case '?': tk = Cond; return; - case '.': tk = Dot; return; - default: return; } + else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } + else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } + else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } + else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } + else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } + else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } + else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } + else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } + else if (tk == '^') { tk = Xor; return; } + else if (tk == '%') { tk = Mod; return; } + else if (tk == '*') { tk = Mul; return; } + else if (tk == '[') { tk = Brak; return; } + else if (tk == '?') { tk = Cond; return; } + else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; } } void expr(int lev) { - int t, *b, sz; - struct ident_s *d; - struct member_s *m; + int t, *d; - switch (tk) { - case 0: printf("%d: unexpected eof in expression\n", line); exit(-1); - case Num: *++e = IMM; *++e = ival; next(); ty = INT; break; - case '"': + if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } + else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } + else if (tk == '"') { *++e = IMM; *++e = ival; next(); while (tk == '"') next(); data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; - break; - case Sizeof: + } + else if (tk == Sizeof) { next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } - else if (tk == Struct) { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } ty = id->stype; next(); } while (tk == Mul) { next(); ty = ty + PTR; } if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } - *++e = IMM; *++e = ty >= PTR ? sizeof(int) : tsize[ty]; + *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); ty = INT; - break; - case Id: + } + else if (tk == Id) { d = id; next(); if (tk == '(') { next(); t = 0; while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } next(); - if (d->class == Sys) *++e = d->val; - else if (d->class == Fun) { *++e = JSR; *++e = d->val; } + if (d[Class] == Sys) *++e = d[Val]; + else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } else { printf("%d: bad function call\n", line); exit(-1); } if (t) { *++e = ADJ; *++e = t; } - ty = d->type; + ty = d[Type]; } - else if (d->class == Num) { *++e = IMM; *++e = d->val; ty = INT; } + else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } else { - if (d->class == Loc) { *++e = LEA; *++e = loc - d->val; } - else if (d->class == Glo) { *++e = IMM; *++e = d->val; } + if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } + else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } else { printf("%d: undefined variable\n", line); exit(-1); } - if ((ty = d->type) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; + *++e = ((ty = d[Type]) == CHAR) ? LC : LI; } - break; - case '(': + } + else if (tk == '(') { next(); - if (tk == Int || tk == Char || tk == Struct) { - if (tk == Int) { next(); t = INT; } else if (tk == Char) { next(); t = CHAR; } - else { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } t = id->stype; next(); } + if (tk == Int || tk == Char) { + t = (tk == Int) ? INT : CHAR; next(); while (tk == Mul) { next(); t = t + PTR; } if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } expr(Inc); @@ -215,130 +183,108 @@ void expr(int lev) expr(Assign); if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } } - break; - case Mul: + } + else if (tk == Mul) { next(); expr(Inc); if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } - if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; - break; - case And: + *++e = (ty == CHAR) ? LC : LI; + } + else if (tk == And) { next(); expr(Inc); - if (*e == LC || *e == LI) --e; // XXX else { printf("%d: bad address-of\n", line); exit(-1); } + if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } ty = ty + PTR; - break; - case '!': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; break; - case '~': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; break; - case Add: next(); expr(Inc); ty = INT; break; - case Sub: + } + else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } + else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } + else if (tk == Add) { next(); expr(Inc); ty = INT; } + else if (tk == Sub) { next(); *++e = IMM; if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } ty = INT; - break; - case Inc: case Dec: + } + else if (tk == Inc || tk == Dec) { t = tk; next(); expr(Inc); if (*e == LC) { *e = PSH; *++e = LC; } else if (*e == LI) { *e = PSH; *++e = LI; } else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } *++e = PSH; - *++e = IMM; *++e = ty >= PTR2 ? sizeof(int) : (ty >= PTR) ? tsize[ty - PTR] : 1; + *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = (t == Inc) ? ADD : SUB; *++e = (ty == CHAR) ? SC : SI; - break; - default: printf("%d: bad expression\n", line); exit(-1); } + else { printf("%d: bad expression\n", line); exit(-1); } while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method t = ty; - switch (tk) { - case Assign: + if (tk == Assign) { next(); if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; - break; - case Cond: + } + else if (tk == Cond) { next(); - *++e = BZ; b = ++e; + *++e = BZ; d = ++e; expr(Assign); if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } - *b = (int)(e + 3); *++e = JMP; b = ++e; + *d = (int)(e + 3); *++e = JMP; d = ++e; expr(Cond); - *b = (int)(e + 1); - break; - case Lor: next(); *++e = BNZ; b = ++e; expr(Lan); *b = (int)(e + 1); ty = INT; break; - case Lan: next(); *++e = BZ; b = ++e; expr(Or); *b = (int)(e + 1); ty = INT; break; - case Or: next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; break; - case Xor: next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; break; - case And: next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; break; - case Eq: next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; break; - case Ne: next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; break; - case Lt: next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; break; - case Gt: next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; break; - case Le: next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; break; - case Ge: next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; break; - case Shl: next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; break; - case Shr: next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; break; - case Add: + *d = (int)(e + 1); + } + else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } + else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } + else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } + else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } + else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } + else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } + else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } + else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } + else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } + else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } + else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } + else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } + else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } + else if (tk == Add) { next(); *++e = PSH; expr(Mul); - sz = (ty = t) >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1; - if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; } + if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } *++e = ADD; - break; - case Sub: + } + else if (tk == Sub) { next(); *++e = PSH; expr(Mul); - sz = t >= PTR2 ? sizeof(int) : t >= PTR ? tsize[t - PTR] : 1; - if (t == ty && sz > 1) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sz; *++e = DIV; ty = INT; } - else if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; *++e = SUB; } + if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } + else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } else *++e = SUB; - ty = t; - break; - case Mul: next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; break; - case Div: next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; break; - case Mod: next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; break; - case Inc: case Dec: + } + else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } + else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } + else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } + else if (tk == Inc || tk == Dec) { if (*e == LC) { *e = PSH; *++e = LC; } else if (*e == LI) { *e = PSH; *++e = LI; } else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } - sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1; - *++e = PSH; *++e = IMM; *++e = sz; + *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = (tk == Inc) ? ADD : SUB; *++e = (ty == CHAR) ? SC : SI; - *++e = PSH; *++e = IMM; *++e = sz; + *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = (tk == Inc) ? SUB : ADD; next(); - break; - case Dot: - ty = ty + PTR; - case Arrow: - if (ty <= PTR+INT || ty >= PTR2) { printf("%d: structure expected\n", line); exit(-1); } - next(); - if (tk != Id) { printf("%d: structure member expected\n", line); exit(-1); } - m = members[ty - PTR]; while (m && m->id != id) m = m->next; - if (!m) { printf("%d: structure member not found\n", line); exit(-1); } - if (m->offset) { *++e = PSH; *++e = IMM; *++e = m->offset; *++e = ADD; } - ty = m->type; - if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; - next(); - break; - case Brak: + } + else if (tk == Brak) { next(); *++e = PSH; expr(Assign); if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } - if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } - sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t]; - if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; } + if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } + else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } *++e = ADD; - if ((ty = t) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI; - break; - default: printf("%d: compiler error tk=%d\n", line, tk); exit(-1); + *++e = ((ty = t - PTR) == CHAR) ? LC : LI; } + else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } } } void stmt() { - int *a, *b, *d, i; + int *a, *b; - switch (tk) { - case If: + if (tk == If) { next(); if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } expr(Assign); @@ -351,8 +297,8 @@ void stmt() stmt(); } *b = (int)(e + 1); - return; - case While: + } + else if (tk == While) { next(); a = e + 1; if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } @@ -362,54 +308,22 @@ void stmt() stmt(); *++e = JMP; *++e = (int)a; *b = (int)(e + 1); - return; - case Switch: - next(); - if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } - expr(Assign); - if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } - a = cas; *++e = JMP; cas = ++e; - b = brk; d = def; brk = def = 0; - stmt(); - *cas = def ? (int)def : (int)(e + 1); cas = a; - while (brk) { a = (int *)*brk; *brk = (int)(e + 1); brk = a; } - brk = b; def = d; - return; - case Case: - *++e = JMP; ++e; *e = (int)(e + 7); *++e = PSH; i = *cas; *cas = (int)e; - next(); - expr(Or); - if (e[-1] != IMM) { printf("%d: bad case immediate\n", line); exit(-1); } - *e = *e - i; *++e = SUB; *++e = BNZ; cas = ++e; *e = i + e[-3]; - if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); } - stmt(); - return; - case Break: - next(); - if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } - *++e = JMP; *++e = (int)brk; brk = e; - return; - case Default: - next(); - if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); } - def = e + 1; - stmt(); - return; - case Return: + } + else if (tk == Return) { next(); if (tk != ';') expr(Assign); *++e = LEV; if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } - return; - case '{': + } + else if (tk == '{') { next(); while (tk != '}') stmt(); next(); - return; - case ';': + } + else if (tk == ';') { next(); - return; - default: + } + else { expr(Assign); if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } } @@ -417,9 +331,7 @@ void stmt() int main(int argc, char **argv) { - int fd, bt, mbt, ty, poolsz; - struct ident_s *idmain, *d; - struct member_s *m; + int fd, bt, ty, poolsz, *idmain; int *pc, *sp, *bp, a, cycle; // vm registers int i, *t; // temps @@ -435,24 +347,16 @@ int main(int argc, char **argv) if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } - if (!(tsize = malloc(PTR * sizeof(int)))) { printf("could not malloc() tsize area\n"); return -1; } - if (!(members = malloc(PTR * sizeof(struct member_s *)))) { printf("could not malloc() members area\n"); return -1; } memset(sym, 0, poolsz); memset(e, 0, poolsz); memset(data, 0, poolsz); - memset(tsize, 0, PTR * sizeof(int)); - memset(members, 0, PTR * sizeof(struct member_s *)); - - ops = "LEA IMM JMP JSR BZ BNZ ENT ADJ LEV LI LC SI SC PSH " - "OR XOR AND EQ NE LT GT LE GE SHL SHR ADD SUB MUL DIV MOD " - "OPEN READ CLOS PRTF MALC MSET MCMP EXIT "; - - p = "break case char default else enum if int return sizeof struct switch while " + + p = "char else enum if int return sizeof while " "open read close printf malloc memset memcmp exit void main"; - i = Break; while (i <= While) { next(); id->tk = i++; } // add keywords to symbol table - i = OPEN; while (i <= EXIT) { next(); id->class = Sys; id->type = INT; id->val = i++; } // add library to symbol table - next(); id->tk = Char; // handle void type + i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table + i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table + next(); id[Tk] = Char; // handle void type next(); idmain = id; // keep track of main if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } @@ -460,10 +364,6 @@ int main(int argc, char **argv) p[i] = 0; close(fd); - // add primitive types - tsize[tnew++] = sizeof(char); - tsize[tnew++] = sizeof(int); - // parse declarations line = 1; next(); @@ -486,83 +386,33 @@ int main(int argc, char **argv) i = ival; next(); } - id->class = Num; id->type = INT; id->val = i++; + id[Class] = Num; id[Type] = INT; id[Val] = i++; if (tk == ',') next(); } next(); } } - else if (tk == Struct) { - next(); - if (tk == Id) { - if (!id->stype) id->stype = tnew++; - bt = id->stype; - next(); - } else { - bt = tnew++; - } - if (tk == '{') { - next(); - if (members[bt]) { printf("%d: duplicate structure definition\n", line); return -1; } - i = 0; - while (tk != '}') { - mbt = INT; - if (tk == Int) next(); - else if (tk == Char) { next(); mbt = CHAR; } - else if (tk == Struct) { - next(); - if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; } - mbt = id->stype; - next(); - } - while (tk != ';') { - ty = mbt; - while (tk == Mul) { next(); ty = ty + PTR; } - if (tk != Id) { printf("%d: bad struct member definition\n", line); return -1; } - m = malloc(sizeof(struct member_s)); - m->id = id; - m->offset = i; - m->type = ty; - m->next = members[bt]; - members[bt] = m; - i = i + (ty >= PTR ? sizeof(int) : tsize[ty]); - i = (i + 3) & -4; - next(); - if (tk == ',') next(); - } - next(); - } - next(); - tsize[bt] = i; - } - } while (tk != ';' && tk != '}') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } - if (id->class) { printf("%d: duplicate global definition\n", line); return -1; } + if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } next(); - id->type = ty; + id[Type] = ty; if (tk == '(') { // function - id->class = Fun; - id->val = (int)(e + 1); + id[Class] = Fun; + id[Val] = (int)(e + 1); next(); i = 0; while (tk != ')') { ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } - else if (tk == Struct) { - next(); - if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; } - ty = id->stype; - next(); - } while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } - if (id->class == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } - id->hclass = id->class; id->class = Loc; - id->htype = id->type; id->type = ty; - id->hval = id->val; id->val = i++; + if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = i++; next(); if (tk == ',') next(); } @@ -570,21 +420,17 @@ int main(int argc, char **argv) if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } loc = ++i; next(); - while (tk == Int || tk == Char || tk == Struct) { - if (tk == Int) bt = INT; else if (tk == Char) bt = CHAR; else { - next(); - if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; } - bt = id->stype; - } + while (tk == Int || tk == Char) { + bt = (tk == Int) ? INT : CHAR; next(); while (tk != ';') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } - if (id->class == Loc) { printf("%d: duplicate local definition\n", line); return -1; } - id->hclass = id->class; id->class = Loc; - id->htype = id->type; id->type = ty; - id->hval = id->val; id->val = ++i; + if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } + id[HClass] = id[Class]; id[Class] = Loc; + id[HType] = id[Type]; id[Type] = ty; + id[HVal] = id[Val]; id[Val] = ++i; next(); if (tk == ',') next(); } @@ -594,18 +440,18 @@ int main(int argc, char **argv) while (tk != '}') stmt(); *++e = LEV; id = sym; // unwind symbol table locals - while (id->tk) { - if (id->class == Loc) { - id->class = id->hclass; - id->type = id->htype; - id->val = id->hval; + while (id[Tk]) { + if (id[Class] == Loc) { + id[Class] = id[HClass]; + id[Type] = id[HType]; + id[Val] = id[HVal]; } - id = id + 1; + id = id + Idsz; } } else { - id->class = Glo; - id->val = (int)data; + id[Class] = Glo; + id[Val] = (int)data; data = data + sizeof(int); } if (tk == ',') next(); @@ -613,7 +459,7 @@ int main(int argc, char **argv) next(); } - if (!(pc = (int *)idmain->val)) { printf("main() not defined\n"); return -1; } + if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } if (src) return 0; // setup stack @@ -630,51 +476,51 @@ int main(int argc, char **argv) i = *pc++; ++cycle; if (debug) { printf("%d> %.4s", cycle, - &ops[i * 5]); + &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," + "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," + "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]); if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); } - switch (i) { - case LEA: a = (int)(bp + *pc++); break; // load local address - case IMM: a = *pc++; break; // load global address or immediate - case JMP: pc = (int *)*pc; break; // jump - case JSR: *--sp = (int)(pc + 1); pc = (int *)*pc; break; // jump to subroutine - case BZ: pc = a ? pc + 1 : (int *)*pc; break; // branch if zero - case BNZ: pc = a ? (int *)*pc : pc + 1; break; // branch if not zero - case ENT: *--sp = (int)bp; bp = sp; sp = sp - *pc++; break; // enter subroutine - case ADJ: sp = sp + *pc++; break; // stack adjust - case LEV: sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; break; // leave subroutine - case LI: a = *(int *)a; break; // load int - case LC: a = *(char *)a; break; // load char - case SI: *(int *)*sp++ = a; break; // store int - case SC: a = *(char *)*sp++ = a; break; // store char - case PSH: *--sp = a; break; // push + if (i == LEA) a = (int)(bp + *pc++); // load local address + else if (i == IMM) a = *pc++; // load global address or immediate + else if (i == JMP) pc = (int *)*pc; // jump + else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine + else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero + else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero + else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine + else if (i == ADJ) sp = sp + *pc++; // stack adjust + else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine + else if (i == LI) a = *(int *)a; // load int + else if (i == LC) a = *(char *)a; // load char + else if (i == SI) *(int *)*sp++ = a; // store int + else if (i == SC) a = *(char *)*sp++ = a; // store char + else if (i == PSH) *--sp = a; // push - case OR: a = *sp++ | a; break; - case XOR: a = *sp++ ^ a; break; - case AND: a = *sp++ & a; break; - case EQ: a = *sp++ == a; break; - case NE: a = *sp++ != a; break; - case LT: a = *sp++ < a; break; - case GT: a = *sp++ > a; break; - case LE: a = *sp++ <= a; break; - case GE: a = *sp++ >= a; break; - case SHL: a = *sp++ << a; break; - case SHR: a = *sp++ >> a; break; - case ADD: a = *sp++ + a; break; - case SUB: a = *sp++ - a; break; - case MUL: a = *sp++ * a; break; - case DIV: a = *sp++ / a; break; - case MOD: a = *sp++ % a; break; + else if (i == OR) a = *sp++ | a; + else if (i == XOR) a = *sp++ ^ a; + else if (i == AND) a = *sp++ & a; + else if (i == EQ) a = *sp++ == a; + else if (i == NE) a = *sp++ != a; + else if (i == LT) a = *sp++ < a; + else if (i == GT) a = *sp++ > a; + else if (i == LE) a = *sp++ <= a; + else if (i == GE) a = *sp++ >= a; + else if (i == SHL) a = *sp++ << a; + else if (i == SHR) a = *sp++ >> a; + else if (i == ADD) a = *sp++ + a; + else if (i == SUB) a = *sp++ - a; + else if (i == MUL) a = *sp++ * a; + else if (i == DIV) a = *sp++ / a; + else if (i == MOD) a = *sp++ % a; - case OPEN: a = open((char *)sp[1], *sp); break; - case READ: a = read(sp[2], (char *)sp[1], *sp); break; - case CLOS: a = close(*sp); break; - case PRTF: t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); break; - case MALC: a = (int)malloc(*sp); break; - case MSET: a = (int)memset((char *)sp[2], sp[1], *sp); break; - case MCMP: a = memcmp((char *)sp[2], (char *)sp[1], *sp); break; - case EXIT: printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; - default: printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; - } + else if (i == OPEN) a = open((char *)sp[1], *sp); + else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); + else if (i == CLOS) a = close(*sp); + else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } + else if (i == MALC) a = (int)malloc(*sp); + else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); + else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); + else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } + else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } } -} \ No newline at end of file +}