Compare commits

..

3 commits

Author SHA1 Message Date
rswier
558e70d96c Include headers 2016-02-26 01:36:07 -05:00
rswier
05cb8b3b58 Added structures 2016-02-26 01:27:34 -05:00
rswier
dbb64bf54f New branch with struct support
This change adds structures (struct) along with the dot (.) and arrow
(->) operators.  It is very silly to add this level of complexity while
keeping it just four functions.  But it had to be done :-)
2016-02-26 01:23:08 -05:00
2 changed files with 378 additions and 221 deletions

View file

@ -3,6 +3,10 @@ c4 - C in four functions
An exercise in minimalism. An exercise in minimalism.
This branch adds structures (struct) along with the dot (.) and arrow (->) operators.
It is very silly to add this level of complexity while keeping it just four functions.
But it had to be done :-)
Try the following: Try the following:
gcc -o c4 c4.c (you may need the -m32 option on 64bit machines) gcc -o c4 c4.c (you may need the -m32 option on 64bit machines)

595
c4.c
View file

@ -1,23 +1,27 @@
// c4.c - C in four functions // c4.c - C in four functions
// char, int, and pointer types // char, int, structs, and pointer types
// if, while, return, and expression statements // if, while, return, switch, and expression statements
// just enough features to allow self-compilation and a bit more // just enough features to allow self-compilation and a bit more
// Written by Robert Swierczek // Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <memory.h> #include <memory.h>
#include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
char *p, *lp, // current position in source code char *p, *lp, // current position in source code
*data; // data/bss pointer *data, // data/bss pointer
*ops; // opcodes
int *e, *le, // current position in emitted code int *e, *le, // current position in emitted code
*id, // currently parsed identifier *cas, // case statement patch-up pointer
*sym, // symbol table (simple list of identifiers) *brk, // break statement patch-up pointer
*def, // default statement patch-up pointer
*tsize, // array (indexed by type) of type sizes
tnew, // next available type
tk, // current token tk, // current token
ival, // current token value ival, // current token value
ty, // current expression type ty, // current expression type
@ -26,59 +30,62 @@ int *e, *le, // current position in emitted code
src, // print source and assembly flag src, // print source and assembly flag
debug; // print executed instructions debug; // print executed instructions
// identifier
struct ident_s {
int tk;
int hash;
char *name;
int class;
int type;
int val;
int stype;
int hclass;
int htype;
int hval;
} *id, // currently parsed identifier
*sym; // symbol table (simple list of identifiers)
struct member_s {
struct ident_s *id;
int offset;
int type;
struct member_s *next;
} **members; // array (indexed by type) of struct member lists
// tokens and classes (operators last and in precedence order) // tokens and classes (operators last and in precedence order)
enum { enum {
Num = 128, Fun, Sys, Glo, Loc, Id, Num = 128, Fun, Sys, Glo, Loc, Id,
Char, Else, Enum, If, Int, Return, Sizeof, While, Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Struct, Switch, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Dot, Arrow, Brak
}; };
// opcodes // opcodes
enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT }; OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
// types // types
enum { CHAR, INT, PTR }; enum { CHAR, INT, PTR = 256, PTR2 = 512 };
// identifier offsets (since we can't create an ident struct)
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
void next() void next()
{ {
char *pp; char *pp;
while (tk = *p) { while (tk = *p) {
++p; ++p;
if (tk == '\n') { if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
++line;
}
else if (tk == '#') {
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1; pp = p - 1;
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++; tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp); tk = (tk << 6) + (p - pp);
id = sym; id = sym;
while (id[Tk]) { while (id->tk) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } if (tk == id->hash && !memcmp(id->name, pp, p - pp)) { tk = id->tk; return; }
id = id + Idsz; id = id + 1;
} }
id[Name] = (int)pp; id->name = pp;
id[Hash] = tk; id->hash = tk;
tk = id[Tk] = Id; tk = id->tk = Id;
return; return;
} }
else if (tk >= '0' && tk <= '9') { else if (tk >= '0' && tk <= '9') {
@ -91,89 +98,114 @@ void next()
tk = Num; tk = Num;
return; return;
} }
else if (tk == '/') { switch (tk) {
if (*p == '/') { case '\n':
++p; if (src) {
while (*p != 0 && *p != '\n') ++p; printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &ops[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
} }
else { ++line;
case ' ': case '\t': case '\v': case '\f': case '\r':
break;
case '/':
if (*p == '/') {
case '#':
while (*p != 0 && *p != '\n') ++p;
} else {
tk = Div; tk = Div;
return; return;
} }
} break;
else if (tk == '\'' || tk == '"') { case '\'':
case '"':
pp = data; pp = data;
while (*p != 0 && *p != tk) { while (*p != 0 && *p != tk) {
if ((ival = *p++) == '\\') { if ((ival = *p++) == '\\') {
if ((ival = *p++) == 'n') ival = '\n'; switch (ival = *p++) {
case 'n': ival = '\n'; break;
case 't': ival = '\t'; break;
case 'v': ival = '\v'; break;
case 'f': ival = '\f'; break;
case 'r': ival = '\r';
}
} }
if (tk == '"') *data++ = ival; if (tk == '"') *data++ = ival;
} }
++p; ++p;
if (tk == '"') ival = (int)pp; else tk = Num; if (tk == '"') ival = (int)pp; else tk = Num;
return; return;
case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return;
case '+': if (*p == '+') { ++p; tk = Inc; } else tk = Add; return;
case '-': if (*p == '-') { ++p; tk = Dec; } else if (*p == '>') { ++p; tk = Arrow; } else tk = Sub; return;
case '!': if (*p == '=') { ++p; tk = Ne; } return;
case '<': if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return;
case '>': if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return;
case '|': if (*p == '|') { ++p; tk = Lor; } else tk = Or; return;
case '&': if (*p == '&') { ++p; tk = Lan; } else tk = And; return;
case '^': tk = Xor; return;
case '%': tk = Mod; return;
case '*': tk = Mul; return;
case '[': tk = Brak; return;
case '?': tk = Cond; return;
case '.': tk = Dot; return;
default: return;
} }
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
} }
} }
void expr(int lev) void expr(int lev)
{ {
int t, *d; int t, *b, sz;
struct ident_s *d;
struct member_s *m;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } switch (tk) {
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } case 0: printf("%d: unexpected eof in expression\n", line); exit(-1);
else if (tk == '"') { case Num: *++e = IMM; *++e = ival; next(); ty = INT; break;
case '"':
*++e = IMM; *++e = ival; next(); *++e = IMM; *++e = ival; next();
while (tk == '"') next(); while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
} break;
else if (tk == Sizeof) { case Sizeof:
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
else if (tk == Struct) { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } ty = id->stype; next(); }
while (tk == Mul) { next(); ty = ty + PTR; } while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); *++e = IMM; *++e = ty >= PTR ? sizeof(int) : tsize[ty];
ty = INT; ty = INT;
} break;
else if (tk == Id) { case Id:
d = id; next(); d = id; next();
if (tk == '(') { if (tk == '(') {
next(); next();
t = 0; t = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
next(); next();
if (d[Class] == Sys) *++e = d[Val]; if (d->class == Sys) *++e = d->val;
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } else if (d->class == Fun) { *++e = JSR; *++e = d->val; }
else { printf("%d: bad function call\n", line); exit(-1); } else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; } if (t) { *++e = ADJ; *++e = t; }
ty = d[Type]; ty = d->type;
} }
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } else if (d->class == Num) { *++e = IMM; *++e = d->val; ty = INT; }
else { else {
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } if (d->class == Loc) { *++e = LEA; *++e = loc - d->val; }
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } else if (d->class == Glo) { *++e = IMM; *++e = d->val; }
else { printf("%d: undefined variable\n", line); exit(-1); } else { printf("%d: undefined variable\n", line); exit(-1); }
*++e = ((ty = d[Type]) == CHAR) ? LC : LI; if ((ty = d->type) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
} }
} break;
else if (tk == '(') { case '(':
next(); next();
if (tk == Int || tk == Char) { if (tk == Int || tk == Char || tk == Struct) {
t = (tk == Int) ? INT : CHAR; next(); if (tk == Int) { next(); t = INT; } else if (tk == Char) { next(); t = CHAR; }
else { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } t = id->stype; next(); }
while (tk == Mul) { next(); t = t + PTR; } while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc); expr(Inc);
@ -183,108 +215,130 @@ void expr(int lev)
expr(Assign); expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
} }
} break;
else if (tk == Mul) { case Mul:
next(); expr(Inc); next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*++e = (ty == CHAR) ? LC : LI; if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
} break;
else if (tk == And) { case And:
next(); expr(Inc); next(); expr(Inc);
if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } if (*e == LC || *e == LI) --e; // XXX else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR; ty = ty + PTR;
} break;
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } case '!': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; break;
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } case '~': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; break;
else if (tk == Add) { next(); expr(Inc); ty = INT; } case Add: next(); expr(Inc); ty = INT; break;
else if (tk == Sub) { case Sub:
next(); *++e = IMM; next(); *++e = IMM;
if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }
ty = INT; ty = INT;
} break;
else if (tk == Inc || tk == Dec) { case Inc: case Dec:
t = tk; next(); expr(Inc); t = tk; next(); expr(Inc);
if (*e == LC) { *e = PSH; *++e = LC; } if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; } else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH; *++e = PSH;
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = IMM; *++e = ty >= PTR2 ? sizeof(int) : (ty >= PTR) ? tsize[ty - PTR] : 1;
*++e = (t == Inc) ? ADD : SUB; *++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI; *++e = (ty == CHAR) ? SC : SI;
break;
default: printf("%d: bad expression\n", line); exit(-1);
} }
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty; t = ty;
if (tk == Assign) { switch (tk) {
case Assign:
next(); next();
if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;
} break;
else if (tk == Cond) { case Cond:
next(); next();
*++e = BZ; d = ++e; *++e = BZ; b = ++e;
expr(Assign); expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*d = (int)(e + 3); *++e = JMP; d = ++e; *b = (int)(e + 3); *++e = JMP; b = ++e;
expr(Cond); expr(Cond);
*d = (int)(e + 1); *b = (int)(e + 1);
} break;
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } case Lor: next(); *++e = BNZ; b = ++e; expr(Lan); *b = (int)(e + 1); ty = INT; break;
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } case Lan: next(); *++e = BZ; b = ++e; expr(Or); *b = (int)(e + 1); ty = INT; break;
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } case Or: next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; break;
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } case Xor: next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; break;
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } case And: next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; break;
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } case Eq: next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; break;
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } case Ne: next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; break;
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } case Lt: next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; break;
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } case Gt: next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; break;
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } case Le: next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; break;
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } case Ge: next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; break;
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } case Shl: next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; break;
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } case Shr: next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; break;
else if (tk == Add) { case Add:
next(); *++e = PSH; expr(Mul); next(); *++e = PSH; expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } sz = (ty = t) >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
*++e = ADD; *++e = ADD;
} break;
else if (tk == Sub) { case Sub:
next(); *++e = PSH; expr(Mul); next(); *++e = PSH; expr(Mul);
if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } sz = t >= PTR2 ? sizeof(int) : t >= PTR ? tsize[t - PTR] : 1;
else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } if (t == ty && sz > 1) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sz; *++e = DIV; ty = INT; }
else if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; *++e = SUB; }
else *++e = SUB; else *++e = SUB;
} ty = t;
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } break;
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } case Mul: next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; break;
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } case Div: next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; break;
else if (tk == Inc || tk == Dec) { case Mod: next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; break;
case Inc: case Dec:
if (*e == LC) { *e = PSH; *++e = LC; } if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; } else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
*++e = PSH; *++e = IMM; *++e = sz;
*++e = (tk == Inc) ? ADD : SUB; *++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI; *++e = (ty == CHAR) ? SC : SI;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = PSH; *++e = IMM; *++e = sz;
*++e = (tk == Inc) ? SUB : ADD; *++e = (tk == Inc) ? SUB : ADD;
next(); next();
} break;
else if (tk == Brak) { case Dot:
ty = ty + PTR;
case Arrow:
if (ty <= PTR+INT || ty >= PTR2) { printf("%d: structure expected\n", line); exit(-1); }
next();
if (tk != Id) { printf("%d: structure member expected\n", line); exit(-1); }
m = members[ty - PTR]; while (m && m->id != id) m = m->next;
if (!m) { printf("%d: structure member not found\n", line); exit(-1); }
if (m->offset) { *++e = PSH; *++e = IMM; *++e = m->offset; *++e = ADD; }
ty = m->type;
if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
next();
break;
case Brak:
next(); *++e = PSH; expr(Assign); next(); *++e = PSH; expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t];
if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
*++e = ADD; *++e = ADD;
*++e = ((ty = t - PTR) == CHAR) ? LC : LI; if ((ty = t) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
default: printf("%d: compiler error tk=%d\n", line, tk); exit(-1);
} }
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
} }
} }
void stmt() void stmt()
{ {
int *a, *b; int *a, *b, *d, i;
if (tk == If) { switch (tk) {
case If:
next(); next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); expr(Assign);
@ -297,8 +351,8 @@ void stmt()
stmt(); stmt();
} }
*b = (int)(e + 1); *b = (int)(e + 1);
} return;
else if (tk == While) { case While:
next(); next();
a = e + 1; a = e + 1;
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
@ -308,22 +362,54 @@ void stmt()
stmt(); stmt();
*++e = JMP; *++e = (int)a; *++e = JMP; *++e = (int)a;
*b = (int)(e + 1); *b = (int)(e + 1);
} return;
else if (tk == Return) { case Switch:
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
a = cas; *++e = JMP; cas = ++e;
b = brk; d = def; brk = def = 0;
stmt();
*cas = def ? (int)def : (int)(e + 1); cas = a;
while (brk) { a = (int *)*brk; *brk = (int)(e + 1); brk = a; }
brk = b; def = d;
return;
case Case:
*++e = JMP; ++e; *e = (int)(e + 7); *++e = PSH; i = *cas; *cas = (int)e;
next();
expr(Or);
if (e[-1] != IMM) { printf("%d: bad case immediate\n", line); exit(-1); }
*e = *e - i; *++e = SUB; *++e = BNZ; cas = ++e; *e = i + e[-3];
if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); }
stmt();
return;
case Break:
next();
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*++e = JMP; *++e = (int)brk; brk = e;
return;
case Default:
next();
if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); }
def = e + 1;
stmt();
return;
case Return:
next(); next();
if (tk != ';') expr(Assign); if (tk != ';') expr(Assign);
*++e = LEV; *++e = LEV;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
} return;
else if (tk == '{') { case '{':
next(); next();
while (tk != '}') stmt(); while (tk != '}') stmt();
next(); next();
} return;
else if (tk == ';') { case ';':
next(); next();
} return;
else { default:
expr(Assign); expr(Assign);
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
} }
@ -331,7 +417,9 @@ void stmt()
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int fd, bt, ty, poolsz, *idmain; int fd, bt, mbt, ty, poolsz;
struct ident_s *idmain, *d;
struct member_s *m;
int *pc, *sp, *bp, a, cycle; // vm registers int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t; // temps int i, *t; // temps
@ -347,16 +435,24 @@ int main(int argc, char **argv)
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
if (!(tsize = malloc(PTR * sizeof(int)))) { printf("could not malloc() tsize area\n"); return -1; }
if (!(members = malloc(PTR * sizeof(struct member_s *)))) { printf("could not malloc() members area\n"); return -1; }
memset(sym, 0, poolsz); memset(sym, 0, poolsz);
memset(e, 0, poolsz); memset(e, 0, poolsz);
memset(data, 0, poolsz); memset(data, 0, poolsz);
memset(tsize, 0, PTR * sizeof(int));
p = "char else enum if int return sizeof while " memset(members, 0, PTR * sizeof(struct member_s *));
"open read close printf malloc free memset memcmp exit void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table ops = "LEA IMM JMP JSR BZ BNZ ENT ADJ LEV LI LC SI SC PSH "
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table "OR XOR AND EQ NE LT GT LE GE SHL SHR ADD SUB MUL DIV MOD "
next(); id[Tk] = Char; // handle void type "OPEN READ CLOS PRTF MALC MSET MCMP EXIT ";
p = "break case char default else enum if int return sizeof struct switch while "
"open read close printf malloc memset memcmp exit void main";
i = Break; while (i <= While) { next(); id->tk = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id->class = Sys; id->type = INT; id->val = i++; } // add library to symbol table
next(); id->tk = Char; // handle void type
next(); idmain = id; // keep track of main next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
@ -364,6 +460,10 @@ int main(int argc, char **argv)
p[i] = 0; p[i] = 0;
close(fd); close(fd);
// add primitive types
tsize[tnew++] = sizeof(char);
tsize[tnew++] = sizeof(int);
// parse declarations // parse declarations
line = 1; line = 1;
next(); next();
@ -386,33 +486,83 @@ int main(int argc, char **argv)
i = ival; i = ival;
next(); next();
} }
id[Class] = Num; id[Type] = INT; id[Val] = i++; id->class = Num; id->type = INT; id->val = i++;
if (tk == ',') next(); if (tk == ',') next();
} }
next(); next();
} }
} }
else if (tk == Struct) {
next();
if (tk == Id) {
if (!id->stype) id->stype = tnew++;
bt = id->stype;
next();
} else {
bt = tnew++;
}
if (tk == '{') {
next();
if (members[bt]) { printf("%d: duplicate structure definition\n", line); return -1; }
i = 0;
while (tk != '}') {
mbt = INT;
if (tk == Int) next();
else if (tk == Char) { next(); mbt = CHAR; }
else if (tk == Struct) {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
mbt = id->stype;
next();
}
while (tk != ';') {
ty = mbt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad struct member definition\n", line); return -1; }
m = malloc(sizeof(struct member_s));
m->id = id;
m->offset = i;
m->type = ty;
m->next = members[bt];
members[bt] = m;
i = i + (ty >= PTR ? sizeof(int) : tsize[ty]);
i = (i + 3) & -4;
next();
if (tk == ',') next();
}
next();
}
next();
tsize[bt] = i;
}
}
while (tk != ';' && tk != '}') { while (tk != ';' && tk != '}') {
ty = bt; ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; } while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } if (id->class) { printf("%d: duplicate global definition\n", line); return -1; }
next(); next();
id[Type] = ty; id->type = ty;
if (tk == '(') { // function if (tk == '(') { // function
id[Class] = Fun; id->class = Fun;
id[Val] = (int)(e + 1); id->val = (int)(e + 1);
next(); i = 0; next(); i = 0;
while (tk != ')') { while (tk != ')') {
ty = INT; ty = INT;
if (tk == Int) next(); if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; } else if (tk == Char) { next(); ty = CHAR; }
else if (tk == Struct) {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
ty = id->stype;
next();
}
while (tk == Mul) { next(); ty = ty + PTR; } while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } if (id->class == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc; id->hclass = id->class; id->class = Loc;
id[HType] = id[Type]; id[Type] = ty; id->htype = id->type; id->type = ty;
id[HVal] = id[Val]; id[Val] = i++; id->hval = id->val; id->val = i++;
next(); next();
if (tk == ',') next(); if (tk == ',') next();
} }
@ -420,17 +570,21 @@ int main(int argc, char **argv)
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i; loc = ++i;
next(); next();
while (tk == Int || tk == Char) { while (tk == Int || tk == Char || tk == Struct) {
bt = (tk == Int) ? INT : CHAR; if (tk == Int) bt = INT; else if (tk == Char) bt = CHAR; else {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
bt = id->stype;
}
next(); next();
while (tk != ';') { while (tk != ';') {
ty = bt; ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; } while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } if (id->class == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc; id->hclass = id->class; id->class = Loc;
id[HType] = id[Type]; id[Type] = ty; id->htype = id->type; id->type = ty;
id[HVal] = id[Val]; id[Val] = ++i; id->hval = id->val; id->val = ++i;
next(); next();
if (tk == ',') next(); if (tk == ',') next();
} }
@ -440,18 +594,18 @@ int main(int argc, char **argv)
while (tk != '}') stmt(); while (tk != '}') stmt();
*++e = LEV; *++e = LEV;
id = sym; // unwind symbol table locals id = sym; // unwind symbol table locals
while (id[Tk]) { while (id->tk) {
if (id[Class] == Loc) { if (id->class == Loc) {
id[Class] = id[HClass]; id->class = id->hclass;
id[Type] = id[HType]; id->type = id->htype;
id[Val] = id[HVal]; id->val = id->hval;
} }
id = id + Idsz; id = id + 1;
} }
} }
else { else {
id[Class] = Glo; id->class = Glo;
id[Val] = (int)data; id->val = (int)data;
data = data + sizeof(int); data = data + sizeof(int);
} }
if (tk == ',') next(); if (tk == ',') next();
@ -459,11 +613,11 @@ int main(int argc, char **argv)
next(); next();
} }
if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } if (!(pc = (int *)idmain->val)) { printf("main() not defined\n"); return -1; }
if (src) return 0; if (src) return 0;
// setup stack // setup stack
bp = sp = (int *)((int)sp + poolsz); sp = (int *)((int)sp + poolsz);
*--sp = EXIT; // call exit if main returns *--sp = EXIT; // call exit if main returns
*--sp = PSH; t = sp; *--sp = PSH; t = sp;
*--sp = argc; *--sp = argc;
@ -476,52 +630,51 @@ int main(int argc, char **argv)
i = *pc++; ++cycle; i = *pc++; ++cycle;
if (debug) { if (debug) {
printf("%d> %.4s", cycle, printf("%d> %.4s", cycle,
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," &ops[i * 5]);
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
} }
if (i == LEA) a = (int)(bp + *pc++); // load local address switch (i) {
else if (i == IMM) a = *pc++; // load global address or immediate case LEA: a = (int)(bp + *pc++); break; // load local address
else if (i == JMP) pc = (int *)*pc; // jump case IMM: a = *pc++; break; // load global address or immediate
else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine case JMP: pc = (int *)*pc; break; // jump
else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero case JSR: *--sp = (int)(pc + 1); pc = (int *)*pc; break; // jump to subroutine
else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero case BZ: pc = a ? pc + 1 : (int *)*pc; break; // branch if zero
else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine case BNZ: pc = a ? (int *)*pc : pc + 1; break; // branch if not zero
else if (i == ADJ) sp = sp + *pc++; // stack adjust case ENT: *--sp = (int)bp; bp = sp; sp = sp - *pc++; break; // enter subroutine
else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine case ADJ: sp = sp + *pc++; break; // stack adjust
else if (i == LI) a = *(int *)a; // load int case LEV: sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; break; // leave subroutine
else if (i == LC) a = *(char *)a; // load char case LI: a = *(int *)a; break; // load int
else if (i == SI) *(int *)*sp++ = a; // store int case LC: a = *(char *)a; break; // load char
else if (i == SC) a = *(char *)*sp++ = a; // store char case SI: *(int *)*sp++ = a; break; // store int
else if (i == PSH) *--sp = a; // push case SC: a = *(char *)*sp++ = a; break; // store char
case PSH: *--sp = a; break; // push
else if (i == OR) a = *sp++ | a; case OR: a = *sp++ | a; break;
else if (i == XOR) a = *sp++ ^ a; case XOR: a = *sp++ ^ a; break;
else if (i == AND) a = *sp++ & a; case AND: a = *sp++ & a; break;
else if (i == EQ) a = *sp++ == a; case EQ: a = *sp++ == a; break;
else if (i == NE) a = *sp++ != a; case NE: a = *sp++ != a; break;
else if (i == LT) a = *sp++ < a; case LT: a = *sp++ < a; break;
else if (i == GT) a = *sp++ > a; case GT: a = *sp++ > a; break;
else if (i == LE) a = *sp++ <= a; case LE: a = *sp++ <= a; break;
else if (i == GE) a = *sp++ >= a; case GE: a = *sp++ >= a; break;
else if (i == SHL) a = *sp++ << a; case SHL: a = *sp++ << a; break;
else if (i == SHR) a = *sp++ >> a; case SHR: a = *sp++ >> a; break;
else if (i == ADD) a = *sp++ + a; case ADD: a = *sp++ + a; break;
else if (i == SUB) a = *sp++ - a; case SUB: a = *sp++ - a; break;
else if (i == MUL) a = *sp++ * a; case MUL: a = *sp++ * a; break;
else if (i == DIV) a = *sp++ / a; case DIV: a = *sp++ / a; break;
else if (i == MOD) a = *sp++ % a; case MOD: a = *sp++ % a; break;
else if (i == OPEN) a = open((char *)sp[1], *sp); case OPEN: a = open((char *)sp[1], *sp); break;
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); case READ: a = read(sp[2], (char *)sp[1], *sp); break;
else if (i == CLOS) a = close(*sp); case CLOS: a = close(*sp); break;
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } case PRTF: t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); break;
else if (i == MALC) a = (int)malloc(*sp); case MALC: a = (int)malloc(*sp); break;
else if (i == FREE) free((void *)*sp); case MSET: a = (int)memset((char *)sp[2], sp[1], *sp); break;
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); case MCMP: a = memcmp((char *)sp[2], (char *)sp[1], *sp); break;
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); case EXIT: printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp;
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } default: printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1;
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } }
} }
} }