Compare commits

..

6 commits

Author SHA1 Message Date
Robert Swierczek
3f098f3ea8 Merge pull request #32 from nubok/patch-2
Avoid undefined behavior - bp is initialized when setting up the stack
2017-08-21 17:39:47 -04:00
Wolfgang Keller
6884cb4de9 Avoid undefined behavior - bp is initialized when setting up the stack 2017-08-21 23:30:09 +02:00
Robert Swierczek
567db57ef2 Merge pull request #27 from chainhelen/master
add a FREE opcode
2017-02-24 06:21:56 -05:00
chainhelen
461f117cda add a FREE opcode 2017-02-24 17:28:35 +08:00
rswier
80db11debf Revert "Added structures"
This reverts commit 32b33ff4f6.
2016-02-26 01:17:04 -05:00
rswier
32b33ff4f6 Added structures
This change adds structures (struct) along with the dot (.) and arrow (->) operators.  It is very silly to add this level of complexity while keeping it just four functions.  But it had to be done :-)
2016-02-25 20:59:44 -05:00
2 changed files with 220 additions and 377 deletions

View file

@ -3,10 +3,6 @@ c4 - C in four functions
An exercise in minimalism.
This branch adds structures (struct) along with the dot (.) and arrow (->) operators.
It is very silly to add this level of complexity while keeping it just four functions.
But it had to be done :-)
Try the following:
gcc -o c4 c4.c (you may need the -m32 option on 64bit machines)

593
c4.c
View file

@ -1,27 +1,23 @@
// c4.c - C in four functions
// char, int, structs, and pointer types
// if, while, return, switch, and expression statements
// char, int, and pointer types
// if, while, return, and expression statements
// just enough features to allow self-compilation and a bit more
// Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
#include <fcntl.h>
char *p, *lp, // current position in source code
*data, // data/bss pointer
*ops; // opcodes
*data; // data/bss pointer
int *e, *le, // current position in emitted code
*cas, // case statement patch-up pointer
*brk, // break statement patch-up pointer
*def, // default statement patch-up pointer
*tsize, // array (indexed by type) of type sizes
tnew, // next available type
*id, // currently parsed identifier
*sym, // symbol table (simple list of identifiers)
tk, // current token
ival, // current token value
ty, // current expression type
@ -30,62 +26,59 @@ int *e, *le, // current position in emitted code
src, // print source and assembly flag
debug; // print executed instructions
// identifier
struct ident_s {
int tk;
int hash;
char *name;
int class;
int type;
int val;
int stype;
int hclass;
int htype;
int hval;
} *id, // currently parsed identifier
*sym; // symbol table (simple list of identifiers)
struct member_s {
struct ident_s *id;
int offset;
int type;
struct member_s *next;
} **members; // array (indexed by type) of struct member lists
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
Break, Case, Char, Default, Else, Enum, If, Int, Return, Sizeof, Struct, Switch, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Dot, Arrow, Brak
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
// opcodes
enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT };
// types
enum { CHAR, INT, PTR = 256, PTR2 = 512 };
enum { CHAR, INT, PTR };
// identifier offsets (since we can't create an ident struct)
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
void next()
{
char *pp;
while (tk = *p) {
++p;
if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
if (tk == '\n') {
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
++line;
}
else if (tk == '#') {
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1;
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp);
id = sym;
while (id->tk) {
if (tk == id->hash && !memcmp(id->name, pp, p - pp)) { tk = id->tk; return; }
id = id + 1;
while (id[Tk]) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
id = id + Idsz;
}
id->name = pp;
id->hash = tk;
tk = id->tk = Id;
id[Name] = (int)pp;
id[Hash] = tk;
tk = id[Tk] = Id;
return;
}
else if (tk >= '0' && tk <= '9') {
@ -98,114 +91,89 @@ void next()
tk = Num;
return;
}
switch (tk) {
case '\n':
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
while (le < e) {
printf("%8.4s", &ops[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
++line;
case ' ': case '\t': case '\v': case '\f': case '\r':
break;
case '/':
else if (tk == '/') {
if (*p == '/') {
case '#':
++p;
while (*p != 0 && *p != '\n') ++p;
} else {
}
else {
tk = Div;
return;
}
break;
case '\'':
case '"':
}
else if (tk == '\'' || tk == '"') {
pp = data;
while (*p != 0 && *p != tk) {
if ((ival = *p++) == '\\') {
switch (ival = *p++) {
case 'n': ival = '\n'; break;
case 't': ival = '\t'; break;
case 'v': ival = '\v'; break;
case 'f': ival = '\f'; break;
case 'r': ival = '\r';
}
if ((ival = *p++) == 'n') ival = '\n';
}
if (tk == '"') *data++ = ival;
}
++p;
if (tk == '"') ival = (int)pp; else tk = Num;
return;
case '=': if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return;
case '+': if (*p == '+') { ++p; tk = Inc; } else tk = Add; return;
case '-': if (*p == '-') { ++p; tk = Dec; } else if (*p == '>') { ++p; tk = Arrow; } else tk = Sub; return;
case '!': if (*p == '=') { ++p; tk = Ne; } return;
case '<': if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return;
case '>': if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return;
case '|': if (*p == '|') { ++p; tk = Lor; } else tk = Or; return;
case '&': if (*p == '&') { ++p; tk = Lan; } else tk = And; return;
case '^': tk = Xor; return;
case '%': tk = Mod; return;
case '*': tk = Mul; return;
case '[': tk = Brak; return;
case '?': tk = Cond; return;
case '.': tk = Dot; return;
default: return;
}
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
}
}
void expr(int lev)
{
int t, *b, sz;
struct ident_s *d;
struct member_s *m;
int t, *d;
switch (tk) {
case 0: printf("%d: unexpected eof in expression\n", line); exit(-1);
case Num: *++e = IMM; *++e = ival; next(); ty = INT; break;
case '"':
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; }
else if (tk == '"') {
*++e = IMM; *++e = ival; next();
while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
break;
case Sizeof:
}
else if (tk == Sizeof) {
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
else if (tk == Struct) { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } ty = id->stype; next(); }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = ty >= PTR ? sizeof(int) : tsize[ty];
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);
ty = INT;
break;
case Id:
}
else if (tk == Id) {
d = id; next();
if (tk == '(') {
next();
t = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
next();
if (d->class == Sys) *++e = d->val;
else if (d->class == Fun) { *++e = JSR; *++e = d->val; }
if (d[Class] == Sys) *++e = d[Val];
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }
ty = d->type;
ty = d[Type];
}
else if (d->class == Num) { *++e = IMM; *++e = d->val; ty = INT; }
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }
else {
if (d->class == Loc) { *++e = LEA; *++e = loc - d->val; }
else if (d->class == Glo) { *++e = IMM; *++e = d->val; }
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }
else { printf("%d: undefined variable\n", line); exit(-1); }
if ((ty = d->type) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
*++e = ((ty = d[Type]) == CHAR) ? LC : LI;
}
break;
case '(':
}
else if (tk == '(') {
next();
if (tk == Int || tk == Char || tk == Struct) {
if (tk == Int) { next(); t = INT; } else if (tk == Char) { next(); t = CHAR; }
else { next(); if (tk != Id) { printf("%d: bad struct type\n", line); exit(-1); } t = id->stype; next(); }
if (tk == Int || tk == Char) {
t = (tk == Int) ? INT : CHAR; next();
while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc);
@ -215,130 +183,108 @@ void expr(int lev)
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
}
break;
case Mul:
}
else if (tk == Mul) {
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
case And:
*++e = (ty == CHAR) ? LC : LI;
}
else if (tk == And) {
next(); expr(Inc);
if (*e == LC || *e == LI) --e; // XXX else { printf("%d: bad address-of\n", line); exit(-1); }
if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
break;
case '!': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; break;
case '~': next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; break;
case Add: next(); expr(Inc); ty = INT; break;
case Sub:
}
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
next(); *++e = IMM;
if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }
ty = INT;
break;
case Inc: case Dec:
}
else if (tk == Inc || tk == Dec) {
t = tk; next(); expr(Inc);
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;
*++e = IMM; *++e = ty >= PTR2 ? sizeof(int) : (ty >= PTR) ? tsize[ty - PTR] : 1;
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
break;
default: printf("%d: bad expression\n", line); exit(-1);
}
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty;
switch (tk) {
case Assign:
if (tk == Assign) {
next();
if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;
break;
case Cond:
}
else if (tk == Cond) {
next();
*++e = BZ; b = ++e;
*++e = BZ; d = ++e;
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*b = (int)(e + 3); *++e = JMP; b = ++e;
*d = (int)(e + 3); *++e = JMP; d = ++e;
expr(Cond);
*b = (int)(e + 1);
break;
case Lor: next(); *++e = BNZ; b = ++e; expr(Lan); *b = (int)(e + 1); ty = INT; break;
case Lan: next(); *++e = BZ; b = ++e; expr(Or); *b = (int)(e + 1); ty = INT; break;
case Or: next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; break;
case Xor: next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; break;
case And: next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; break;
case Eq: next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; break;
case Ne: next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; break;
case Lt: next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; break;
case Gt: next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; break;
case Le: next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; break;
case Ge: next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; break;
case Shl: next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; break;
case Shr: next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; break;
case Add:
*d = (int)(e + 1);
}
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; }
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; }
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; }
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; }
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; }
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; }
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; }
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; }
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; }
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }
else if (tk == Add) {
next(); *++e = PSH; expr(Mul);
sz = (ty = t) >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
*++e = ADD;
break;
case Sub:
}
else if (tk == Sub) {
next(); *++e = PSH; expr(Mul);
sz = t >= PTR2 ? sizeof(int) : t >= PTR ? tsize[t - PTR] : 1;
if (t == ty && sz > 1) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sz; *++e = DIV; ty = INT; }
else if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; *++e = SUB; }
if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }
else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }
else *++e = SUB;
ty = t;
break;
case Mul: next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; break;
case Div: next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; break;
case Mod: next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; break;
case Inc: case Dec:
}
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }
else if (tk == Inc || tk == Dec) {
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
sz = ty >= PTR2 ? sizeof(int) : ty >= PTR ? tsize[ty - PTR] : 1;
*++e = PSH; *++e = IMM; *++e = sz;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
*++e = PSH; *++e = IMM; *++e = sz;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? SUB : ADD;
next();
break;
case Dot:
ty = ty + PTR;
case Arrow:
if (ty <= PTR+INT || ty >= PTR2) { printf("%d: structure expected\n", line); exit(-1); }
next();
if (tk != Id) { printf("%d: structure member expected\n", line); exit(-1); }
m = members[ty - PTR]; while (m && m->id != id) m = m->next;
if (!m) { printf("%d: structure member not found\n", line); exit(-1); }
if (m->offset) { *++e = PSH; *++e = IMM; *++e = m->offset; *++e = ADD; }
ty = m->type;
if (ty <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
next();
break;
case Brak:
}
else if (tk == Brak) {
next(); *++e = PSH; expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
sz = (t = t - PTR) >= PTR ? sizeof(int) : tsize[t];
if (sz > 1) { *++e = PSH; *++e = IMM; *++e = sz; *++e = MUL; }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
*++e = ADD;
if ((ty = t) <= INT || ty >= PTR) *++e = (ty == CHAR) ? LC : LI;
break;
default: printf("%d: compiler error tk=%d\n", line, tk); exit(-1);
*++e = ((ty = t - PTR) == CHAR) ? LC : LI;
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
}
void stmt()
{
int *a, *b, *d, i;
int *a, *b;
switch (tk) {
case If:
if (tk == If) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
@ -351,8 +297,8 @@ void stmt()
stmt();
}
*b = (int)(e + 1);
return;
case While:
}
else if (tk == While) {
next();
a = e + 1;
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
@ -362,54 +308,22 @@ void stmt()
stmt();
*++e = JMP; *++e = (int)a;
*b = (int)(e + 1);
return;
case Switch:
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
a = cas; *++e = JMP; cas = ++e;
b = brk; d = def; brk = def = 0;
stmt();
*cas = def ? (int)def : (int)(e + 1); cas = a;
while (brk) { a = (int *)*brk; *brk = (int)(e + 1); brk = a; }
brk = b; def = d;
return;
case Case:
*++e = JMP; ++e; *e = (int)(e + 7); *++e = PSH; i = *cas; *cas = (int)e;
next();
expr(Or);
if (e[-1] != IMM) { printf("%d: bad case immediate\n", line); exit(-1); }
*e = *e - i; *++e = SUB; *++e = BNZ; cas = ++e; *e = i + e[-3];
if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); }
stmt();
return;
case Break:
next();
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*++e = JMP; *++e = (int)brk; brk = e;
return;
case Default:
next();
if (tk == ':') next(); else { printf("%d: colon expected\n", line); exit(-1); }
def = e + 1;
stmt();
return;
case Return:
}
else if (tk == Return) {
next();
if (tk != ';') expr(Assign);
*++e = LEV;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
return;
case '{':
}
else if (tk == '{') {
next();
while (tk != '}') stmt();
next();
return;
case ';':
}
else if (tk == ';') {
next();
return;
default:
}
else {
expr(Assign);
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
@ -417,9 +331,7 @@ void stmt()
int main(int argc, char **argv)
{
int fd, bt, mbt, ty, poolsz;
struct ident_s *idmain, *d;
struct member_s *m;
int fd, bt, ty, poolsz, *idmain;
int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t; // temps
@ -435,24 +347,16 @@ int main(int argc, char **argv)
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
if (!(tsize = malloc(PTR * sizeof(int)))) { printf("could not malloc() tsize area\n"); return -1; }
if (!(members = malloc(PTR * sizeof(struct member_s *)))) { printf("could not malloc() members area\n"); return -1; }
memset(sym, 0, poolsz);
memset(e, 0, poolsz);
memset(data, 0, poolsz);
memset(tsize, 0, PTR * sizeof(int));
memset(members, 0, PTR * sizeof(struct member_s *));
ops = "LEA IMM JMP JSR BZ BNZ ENT ADJ LEV LI LC SI SC PSH "
"OR XOR AND EQ NE LT GT LE GE SHL SHR ADD SUB MUL DIV MOD "
"OPEN READ CLOS PRTF MALC MSET MCMP EXIT ";
p = "break case char default else enum if int return sizeof struct switch while "
"open read close printf malloc memset memcmp exit void main";
i = Break; while (i <= While) { next(); id->tk = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id->class = Sys; id->type = INT; id->val = i++; } // add library to symbol table
next(); id->tk = Char; // handle void type
p = "char else enum if int return sizeof while "
"open read close printf malloc free memset memcmp exit void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
next(); id[Tk] = Char; // handle void type
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
@ -460,10 +364,6 @@ int main(int argc, char **argv)
p[i] = 0;
close(fd);
// add primitive types
tsize[tnew++] = sizeof(char);
tsize[tnew++] = sizeof(int);
// parse declarations
line = 1;
next();
@ -486,83 +386,33 @@ int main(int argc, char **argv)
i = ival;
next();
}
id->class = Num; id->type = INT; id->val = i++;
id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next();
}
next();
}
}
else if (tk == Struct) {
next();
if (tk == Id) {
if (!id->stype) id->stype = tnew++;
bt = id->stype;
next();
} else {
bt = tnew++;
}
if (tk == '{') {
next();
if (members[bt]) { printf("%d: duplicate structure definition\n", line); return -1; }
i = 0;
while (tk != '}') {
mbt = INT;
if (tk == Int) next();
else if (tk == Char) { next(); mbt = CHAR; }
else if (tk == Struct) {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
mbt = id->stype;
next();
}
while (tk != ';') {
ty = mbt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad struct member definition\n", line); return -1; }
m = malloc(sizeof(struct member_s));
m->id = id;
m->offset = i;
m->type = ty;
m->next = members[bt];
members[bt] = m;
i = i + (ty >= PTR ? sizeof(int) : tsize[ty]);
i = (i + 3) & -4;
next();
if (tk == ',') next();
}
next();
}
next();
tsize[bt] = i;
}
}
while (tk != ';' && tk != '}') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id->class) { printf("%d: duplicate global definition\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
next();
id->type = ty;
id[Type] = ty;
if (tk == '(') { // function
id->class = Fun;
id->val = (int)(e + 1);
id[Class] = Fun;
id[Val] = (int)(e + 1);
next(); i = 0;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
else if (tk == Struct) {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
ty = id->stype;
next();
}
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id->class == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id->hclass = id->class; id->class = Loc;
id->htype = id->type; id->type = ty;
id->hval = id->val; id->val = i++;
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = i++;
next();
if (tk == ',') next();
}
@ -570,21 +420,17 @@ int main(int argc, char **argv)
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i;
next();
while (tk == Int || tk == Char || tk == Struct) {
if (tk == Int) bt = INT; else if (tk == Char) bt = CHAR; else {
next();
if (tk != Id) { printf("%d: bad struct declaration\n", line); return -1; }
bt = id->stype;
}
while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR;
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id->class == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id->hclass = id->class; id->class = Loc;
id->htype = id->type; id->type = ty;
id->hval = id->val; id->val = ++i;
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = ++i;
next();
if (tk == ',') next();
}
@ -594,18 +440,18 @@ int main(int argc, char **argv)
while (tk != '}') stmt();
*++e = LEV;
id = sym; // unwind symbol table locals
while (id->tk) {
if (id->class == Loc) {
id->class = id->hclass;
id->type = id->htype;
id->val = id->hval;
while (id[Tk]) {
if (id[Class] == Loc) {
id[Class] = id[HClass];
id[Type] = id[HType];
id[Val] = id[HVal];
}
id = id + 1;
id = id + Idsz;
}
}
else {
id->class = Glo;
id->val = (int)data;
id[Class] = Glo;
id[Val] = (int)data;
data = data + sizeof(int);
}
if (tk == ',') next();
@ -613,11 +459,11 @@ int main(int argc, char **argv)
next();
}
if (!(pc = (int *)idmain->val)) { printf("main() not defined\n"); return -1; }
if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; }
if (src) return 0;
// setup stack
sp = (int *)((int)sp + poolsz);
bp = sp = (int *)((int)sp + poolsz);
*--sp = EXIT; // call exit if main returns
*--sp = PSH; t = sp;
*--sp = argc;
@ -630,51 +476,52 @@ int main(int argc, char **argv)
i = *pc++; ++cycle;
if (debug) {
printf("%d> %.4s", cycle,
&ops[i * 5]);
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,FREE,MSET,MCMP,EXIT,"[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
}
switch (i) {
case LEA: a = (int)(bp + *pc++); break; // load local address
case IMM: a = *pc++; break; // load global address or immediate
case JMP: pc = (int *)*pc; break; // jump
case JSR: *--sp = (int)(pc + 1); pc = (int *)*pc; break; // jump to subroutine
case BZ: pc = a ? pc + 1 : (int *)*pc; break; // branch if zero
case BNZ: pc = a ? (int *)*pc : pc + 1; break; // branch if not zero
case ENT: *--sp = (int)bp; bp = sp; sp = sp - *pc++; break; // enter subroutine
case ADJ: sp = sp + *pc++; break; // stack adjust
case LEV: sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; break; // leave subroutine
case LI: a = *(int *)a; break; // load int
case LC: a = *(char *)a; break; // load char
case SI: *(int *)*sp++ = a; break; // store int
case SC: a = *(char *)*sp++ = a; break; // store char
case PSH: *--sp = a; break; // push
if (i == LEA) a = (int)(bp + *pc++); // load local address
else if (i == IMM) a = *pc++; // load global address or immediate
else if (i == JMP) pc = (int *)*pc; // jump
else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine
else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero
else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero
else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine
else if (i == ADJ) sp = sp + *pc++; // stack adjust
else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine
else if (i == LI) a = *(int *)a; // load int
else if (i == LC) a = *(char *)a; // load char
else if (i == SI) *(int *)*sp++ = a; // store int
else if (i == SC) a = *(char *)*sp++ = a; // store char
else if (i == PSH) *--sp = a; // push
case OR: a = *sp++ | a; break;
case XOR: a = *sp++ ^ a; break;
case AND: a = *sp++ & a; break;
case EQ: a = *sp++ == a; break;
case NE: a = *sp++ != a; break;
case LT: a = *sp++ < a; break;
case GT: a = *sp++ > a; break;
case LE: a = *sp++ <= a; break;
case GE: a = *sp++ >= a; break;
case SHL: a = *sp++ << a; break;
case SHR: a = *sp++ >> a; break;
case ADD: a = *sp++ + a; break;
case SUB: a = *sp++ - a; break;
case MUL: a = *sp++ * a; break;
case DIV: a = *sp++ / a; break;
case MOD: a = *sp++ % a; break;
else if (i == OR) a = *sp++ | a;
else if (i == XOR) a = *sp++ ^ a;
else if (i == AND) a = *sp++ & a;
else if (i == EQ) a = *sp++ == a;
else if (i == NE) a = *sp++ != a;
else if (i == LT) a = *sp++ < a;
else if (i == GT) a = *sp++ > a;
else if (i == LE) a = *sp++ <= a;
else if (i == GE) a = *sp++ >= a;
else if (i == SHL) a = *sp++ << a;
else if (i == SHR) a = *sp++ >> a;
else if (i == ADD) a = *sp++ + a;
else if (i == SUB) a = *sp++ - a;
else if (i == MUL) a = *sp++ * a;
else if (i == DIV) a = *sp++ / a;
else if (i == MOD) a = *sp++ % a;
case OPEN: a = open((char *)sp[1], *sp); break;
case READ: a = read(sp[2], (char *)sp[1], *sp); break;
case CLOS: a = close(*sp); break;
case PRTF: t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); break;
case MALC: a = (int)malloc(*sp); break;
case MSET: a = (int)memset((char *)sp[2], sp[1], *sp); break;
case MCMP: a = memcmp((char *)sp[2], (char *)sp[1], *sp); break;
case EXIT: printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp;
default: printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1;
}
else if (i == OPEN) a = open((char *)sp[1], *sp);
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp);
else if (i == CLOS) a = close(*sp);
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); }
else if (i == MALC) a = (int)malloc(*sp);
else if (i == FREE) free((void *)*sp);
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp);
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp);
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
}
}
}