Compare commits

...

2 commits

Author SHA1 Message Date
rswier
13835a4b41 Include headers 2016-02-26 01:40:06 -05:00
rswier
d8e61a829c AST + Code Generator
Extends c4 by adding Abstract Syntax Tree creation and back-end code
generation
2016-01-27 02:11:00 -05:00
4 changed files with 798 additions and 124 deletions

View file

@ -1,14 +1,20 @@
c4 - C in four functions c5 - C in four functions + AST + back-end code generator
======================== ========================================================
An exercise in minimalism. An exercise in minimalism.
This branch extends **c4.c** by adding:
* Abstract Syntax Tree creation
* Back-end code generator function: **gen()**
* Standard ordering of function parameters on stack
* Native x86 version: **c5x86.c**
* Various optimizations
Try the following: Try the following:
gcc -o c4 c4.c (you may need the -m32 option on 64bit machines) gcc -o c5 c5.c (you may need the -m32 option on 64bit machines)
./c4 hello.c ./c5 hello.c
./c4 -s hello.c ./c5 -s hello.c
./c4 c4.c hello.c ./c5 c5.c hello.c
./c4 c4.c c4.c hello.c ./c5 c5.c c5.c hello.c

View file

@ -1,33 +1,39 @@
// c4.c - C in four functions // c5.c - C in five functions
// char, int, and pointer types // c4.c plus
// if, while, return, and expression statements // abstract syntax tree creation
// just enough features to allow self-compilation and a bit more // back-end code generator
// parameters passed in correct order
// various optimizations
// Written by Robert Swierczek // Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <memory.h> #include <memory.h>
#include <unistd.h> #include <fcntl.h>
#ifdef _WIN32
#include "w32.h"
#endif
char *p, *lp, // current position in source code char *p, *lp, // current position in source code
*data; // data/bss pointer *data; // data/bss pointer
int *e, *le, // current position in emitted code int *e, *le, // current position in emitted code
*id, // currently parsed identifier *id, // currently parsed identifier
*n, // current node in abstract syntax tree
*sym, // symbol table (simple list of identifiers) *sym, // symbol table (simple list of identifiers)
tk, // current token tk, // current token
ival, // current token value ival, // current token value
ty, // current expression type ty, // current expression type
loc, // local variable offset
line, // current line number line, // current line number
src, // print source and assembly flag src, // print source and assembly flag
debug; // print executed instructions debug; // print executed instructions
// tokens and classes (operators last and in precedence order) // tokens and classes (operators last and in precedence order)
enum { enum {
Num = 128, Fun, Sys, Glo, Loc, Id, Num = 128, Fun, Sys, Glo, Loc, Id, Load, Enter,
Char, Else, Enum, If, Int, Return, Sizeof, While, Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
}; };
@ -35,7 +41,7 @@ enum {
// opcodes // opcodes
enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH , enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD , OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT }; OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT };
// types // types
enum { CHAR, INT, PTR }; enum { CHAR, INT, PTR };
@ -56,7 +62,7 @@ void next()
while (le < e) { while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]); "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n"); if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
} }
} }
@ -131,12 +137,12 @@ void next()
void expr(int lev) void expr(int lev)
{ {
int t, *d; int t, *d, *b;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; }
else if (tk == '"') { else if (tk == '"') {
*++e = IMM; *++e = ival; next(); *--n = ival; *--n = Num; next();
while (tk == '"') next(); while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR; data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
} }
@ -145,28 +151,26 @@ void expr(int lev)
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; } while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num;
ty = INT; ty = INT;
} }
else if (tk == Id) { else if (tk == Id) {
d = id; next(); d = id; next();
if (tk == '(') { if (tk == '(') {
if (d[Class] != Sys && d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); }
next(); next();
t = 0; t = 0; b = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); }
next(); next();
if (d[Class] == Sys) *++e = d[Val]; *--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class];
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }
ty = d[Type]; ty = d[Type];
} }
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; }
else { else {
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; }
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; }
else { printf("%d: undefined variable\n", line); exit(-1); } else { printf("%d: undefined variable\n", line); exit(-1); }
*++e = ((ty = d[Type]) == CHAR) ? LC : LI; *--n = ty = d[Type]; *--n = Load;
} }
} }
else if (tk == '(') { else if (tk == '(') {
@ -186,94 +190,89 @@ void expr(int lev)
else if (tk == Mul) { else if (tk == Mul) {
next(); expr(Inc); next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*++e = (ty == CHAR) ? LC : LI; *--n = ty; *--n = Load;
} }
else if (tk == And) { else if (tk == And) {
next(); expr(Inc); next(); expr(Inc);
if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR; ty = ty + PTR;
} }
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } else if (tk == '!') {
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } next(); expr(Inc);
if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; }
ty = INT;
}
else if (tk == '~') {
next(); expr(Inc);
if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; }
ty = INT;
}
else if (tk == Add) { next(); expr(Inc); ty = INT; } else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) { else if (tk == Sub) {
next(); *++e = IMM; next(); expr(Inc);
if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; }
ty = INT; ty = INT;
} }
else if (tk == Inc || tk == Dec) { else if (tk == Inc || tk == Dec) {
t = tk; next(); expr(Inc); t = tk; next(); expr(Inc);
if (*e == LC) { *e = PSH; *++e = LC; } if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
} }
else { printf("%d: bad expression\n", line); exit(-1); } else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty; t = ty; b = n;
if (tk == Assign) { if (tk == Assign) {
next(); next();
if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign;
} }
else if (tk == Cond) { else if (tk == Cond) {
next(); next();
*++e = BZ; d = ++e;
expr(Assign); expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*d = (int)(e + 3); *++e = JMP; d = ++e; d = n;
expr(Cond); expr(Cond);
*d = (int)(e + 1); --n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond;
} }
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; }
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; }
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; }
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; }
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; }
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; }
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; }
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; }
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; }
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; }
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; }
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; }
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; }
else if (tk == Add) { else if (tk == Add) {
next(); *++e = PSH; expr(Mul); next(); expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
*++e = ADD; if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
} }
else if (tk == Sub) { else if (tk == Sub) {
next(); *++e = PSH; expr(Mul); next(); expr(Mul);
if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; }
else *++e = SUB;
} }
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; }
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; }
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; }
else if (tk == Inc || tk == Dec) { else if (tk == Inc || tk == Dec) {
if (*e == LC) { *e = PSH; *++e = LC; } if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
else if (*e == LI) { *e = PSH; *++e = LI; } *--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num;
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } *--n = (int)b; *--n = (tk == Inc) ? Sub : Add;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? SUB : ADD;
next(); next();
} }
else if (tk == Brak) { else if (tk == Brak) {
next(); *++e = PSH; expr(Assign); next(); expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
*++e = ADD; if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
*++e = ((ty = t - PTR) == CHAR) ? LC : LI; *--n = ty = t - PTR; *--n = Load;
} }
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
} }
@ -281,46 +280,39 @@ void expr(int lev)
void stmt() void stmt()
{ {
int *a, *b; int *a, *b, *c;
if (tk == If) { if (tk == If) {
next(); next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e; stmt(); b = n;
stmt(); if (tk == Else) { next(); stmt(); c = n; } else c = 0;
if (tk == Else) { *--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond;
*b = (int)(e + 3); *++e = JMP; b = ++e;
next();
stmt();
}
*b = (int)(e + 1);
} }
else if (tk == While) { else if (tk == While) {
next(); next();
a = e + 1;
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e;
stmt(); stmt();
*++e = JMP; *++e = (int)a; *--n = (int)a; *--n = While;
*b = (int)(e + 1);
} }
else if (tk == Return) { else if (tk == Return) {
next(); next();
if (tk != ';') expr(Assign); if (tk != ';') { expr(Assign); a = n; } else a = 0;
*++e = LEV;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*--n = (int)a; *--n = Return;
} }
else if (tk == '{') { else if (tk == '{') {
next(); next();
while (tk != '}') stmt(); *--n = ';';
while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; }
next(); next();
} }
else if (tk == ';') { else if (tk == ';') {
next(); next(); *--n = ';';
} }
else { else {
expr(Assign); expr(Assign);
@ -328,16 +320,74 @@ void stmt()
} }
} }
void gen(int *n)
{
int i, *a, *b;
i = *n;
if (i == Num) { *++e = IMM; *++e = n[1]; }
else if (i == Loc) { *++e = LEA; *++e = n[1]; }
else if (i == Load) { gen(n+2); *++e = (n[1] == CHAR) ? LC : LI; }
else if (i == Assign) { gen((int *)n[2]); *++e = PSH; gen(n+3); *++e = (n[1] == CHAR) ? SC : SI; }
else if (i == Inc || i == Dec) {
gen(n+2);
*++e = PSH; *++e = (n[1] == CHAR) ? LC : LI; *++e = PSH;
*++e = IMM; *++e = (n[1] > PTR) ? sizeof(int) : sizeof(char);
*++e = (i == Inc) ? ADD : SUB;
*++e = (n[1] == CHAR) ? SC : SI;
}
else if (i == Cond) {
gen((int *)n[1]);
*++e = BZ; b = ++e;
gen((int *)n[2]);
if (n[3]) { *b = (int)(e + 3); *++e = JMP; b = ++e; gen((int *)n[3]); }
*b = (int)(e + 1);
}
else if (i == Lor) { gen((int *)n[1]); *++e = BNZ; b = ++e; gen(n+2); *b = (int)(e + 1); }
else if (i == Lan) { gen((int *)n[1]); *++e = BZ; b = ++e; gen(n+2); *b = (int)(e + 1); }
else if (i == Or) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = OR; }
else if (i == Xor) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = XOR; }
else if (i == And) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = AND; }
else if (i == Eq) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = EQ; }
else if (i == Ne) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = NE; }
else if (i == Lt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LT; }
else if (i == Gt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GT; }
else if (i == Le) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LE; }
else if (i == Ge) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GE; }
else if (i == Shl) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHL; }
else if (i == Shr) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHR; }
else if (i == Add) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = ADD; }
else if (i == Sub) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SUB; }
else if (i == Mul) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MUL; }
else if (i == Div) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = DIV; }
else if (i == Mod) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MOD; }
else if (i == Sys || i == Fun) {
b = (int *)n[1];
while (b) { gen(b+1); *++e = PSH; b = (int *)*b; }
if (i == Fun) *++e = JSR; *++e = n[2];
if (n[3]) { *++e = ADJ; *++e = n[3]; }
}
else if (i == While) {
*++e = JMP; b = ++e; gen(n+2); *b = (int)(e + 1);
gen((int *)n[1]);
*++e = BNZ; *++e = (int)(b + 1);
}
else if (i == Return) { if (n[1]) gen((int *)n[1]); *++e = LEV; }
else if (i == '{') { gen((int *)n[1]); gen(n+2); }
else if (i == Enter) { *++e = ENT; *++e = n[1]; gen(n+2); *++e = LEV; }
else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); }
}
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
int fd, bt, ty, poolsz, *idmain; int fd, bt, ty, poolsz, *idmain, *ast;
int *pc, *sp, *bp, a, cycle; // vm registers int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t; // temps int i, *t; // temps
--argc; ++argv; --argc; ++argv;
if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; } if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; } if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; }
if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; } if (argc < 1) { printf("usage: c5 [-s] [-d] file ...\n"); return -1; }
if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; } if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }
@ -346,13 +396,15 @@ int main(int argc, char **argv)
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; }
ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack
memset(sym, 0, poolsz); memset(sym, 0, poolsz);
memset(e, 0, poolsz); memset(e, 0, poolsz);
memset(data, 0, poolsz); memset(data, 0, poolsz);
p = "char else enum if int return sizeof while " p = "char else enum if int return sizeof while "
"open read close printf malloc memset memcmp exit void main"; "open read close printf malloc memset memcmp memcpy mmap dlsym qsort exit void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
next(); id[Tk] = Char; // handle void type next(); id[Tk] = Char; // handle void type
@ -381,9 +433,9 @@ int main(int argc, char **argv)
next(); next();
if (tk == Assign) { if (tk == Assign) {
next(); next();
if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } n = ast; expr(Cond);
i = ival; if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; }
next(); i = n[1];
} }
id[Class] = Num; id[Type] = INT; id[Val] = i++; id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next(); if (tk == ',') next();
@ -401,7 +453,7 @@ int main(int argc, char **argv)
if (tk == '(') { // function if (tk == '(') { // function
id[Class] = Fun; id[Class] = Fun;
id[Val] = (int)(e + 1); id[Val] = (int)(e + 1);
next(); i = 0; next(); i = 2;
while (tk != ')') { while (tk != ')') {
ty = INT; ty = INT;
if (tk == Int) next(); if (tk == Int) next();
@ -417,7 +469,7 @@ int main(int argc, char **argv)
} }
next(); next();
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i; i = 0;
next(); next();
while (tk == Int || tk == Char) { while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR; bt = (tk == Int) ? INT : CHAR;
@ -429,15 +481,16 @@ int main(int argc, char **argv)
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc; id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty; id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = ++i; id[HVal] = id[Val]; id[Val] = --i;
next(); next();
if (tk == ',') next(); if (tk == ',') next();
} }
next(); next();
} }
*++e = ENT; *++e = i - loc; n = ast;
while (tk != '}') stmt(); *--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; }
*++e = LEV; *--n = -i; *--n = Enter;
gen(n);
id = sym; // unwind symbol table locals id = sym; // unwind symbol table locals
while (id[Tk]) { while (id[Tk]) {
if (id[Class] == Loc) { if (id[Class] == Loc) {
@ -465,8 +518,8 @@ int main(int argc, char **argv)
sp = (int *)((int)sp + poolsz); sp = (int *)((int)sp + poolsz);
*--sp = EXIT; // call exit if main returns *--sp = EXIT; // call exit if main returns
*--sp = PSH; t = sp; *--sp = PSH; t = sp;
*--sp = argc;
*--sp = (int)argv; *--sp = (int)argv;
*--sp = argc;
*--sp = (int)t; *--sp = (int)t;
// run... // run...
@ -477,7 +530,7 @@ int main(int argc, char **argv)
printf("%d> %.4s", cycle, printf("%d> %.4s", cycle,
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ," &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ," "OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]); "OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n"); if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
} }
if (i == LEA) a = (int)(bp + *pc++); // load local address if (i == LEA) a = (int)(bp + *pc++); // load local address
@ -512,13 +565,17 @@ int main(int argc, char **argv)
else if (i == DIV) a = *sp++ / a; else if (i == DIV) a = *sp++ / a;
else if (i == MOD) a = *sp++ % a; else if (i == MOD) a = *sp++ % a;
else if (i == OPEN) a = open((char *)sp[1], *sp); else if (i == OPEN) a = open((char *)*sp, sp[1]);
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp); else if (i == READ) a = read(*sp, (char *)sp[1], sp[2]);
else if (i == CLOS) a = close(*sp); else if (i == CLOS) a = close(*sp);
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } else if (i == PRTF) a = printf((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]);
else if (i == MALC) a = (int)malloc(*sp); else if (i == MALC) a = (int)malloc(*sp);
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); else if (i == MSET) a = (int)memset((char *)*sp, sp[1], sp[2]);
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); else if (i == MCMP) a = memcmp((char *)*sp, (char *)sp[1], sp[2]);
else if (i == MCPY) a = (int)memcpy((char *)*sp, (char *)sp[1], sp[2]);
else if (i == MMAP) a = (int)mmap((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]);
else if (i == DSYM) a = (int)dlsym((char *)*sp, (char *)sp[1]);
else if (i == QSRT) qsort((char *)sp, sp[1], sp[2], (void *)sp[3]);
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
} }

579
c5x86.c Normal file
View file

@ -0,0 +1,579 @@
// c5x86.c - C in five functions (native x86 version)
// c4.c plus
// abstract syntax tree creation
// back-end code generator
// parameters passed in correct order
// various optimizations
// Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <fcntl.h>
#ifdef _WIN32
#include "w32.h"
#else
#include <sys/mman.h>
#endif
char *p, *lp, // current position in source code
*e, // current position in emitted code
*data, // data/bss pointer
*dsym; // external function lookup name
int *id, // currently parsed identifier
*n, // current node in abstract syntax tree
*sym, // symbol table (simple list of identifiers)
tk, // current token
ival, // current token value
ty, // current expression type
line, // current line number
src; // print source and assembly flag
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Glo, Loc, Id, Load, Enter,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
// types
enum { CHAR, INT, PTR };
// identifier offsets (since we can't create an ident struct)
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
void next()
{
char *pp;
while (tk = *p) {
++p;
if (tk == '\n') {
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
}
++line;
}
else if (tk == '#') {
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1;
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp);
id = sym;
while (id[Tk]) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
id = id + Idsz;
}
id[Name] = (int)pp;
id[Hash] = tk;
tk = id[Tk] = Id;
return;
}
else if (tk >= '0' && tk <= '9') {
if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; }
else if (*p == 'x' || *p == 'X') {
while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F')))
ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0);
}
else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; }
tk = Num;
return;
}
else if (tk == '/') {
if (*p == '/') {
++p;
while (*p != 0 && *p != '\n') ++p;
}
else {
tk = Div;
return;
}
}
else if (tk == '\'' || tk == '"') {
pp = data;
while (*p != 0 && *p != tk) {
if ((ival = *p++) == '\\') {
if ((ival = *p++) == 'n') ival = '\n';
}
if (tk == '"') *data++ = ival;
}
++p;
if (tk == '"') ival = (int)pp; else tk = Num;
return;
}
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
}
}
void expr(int lev)
{
int t, *d, *b;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; }
else if (tk == '"') {
*--n = ival; *--n = Num; next();
while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
}
else if (tk == Sizeof) {
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num;
ty = INT;
}
else if (tk == Id) {
d = id; next();
if (tk == '(') {
if (!d[Class]) {
memcpy(dsym, (char *)d[Name], d[Hash] & 63); dsym[d[Hash] & 63] = 0;
if (d[Val] = (int)dlsym(0, dsym)) d[Class] = Fun;
}
if (d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); }
next();
t = 0; b = 0;
while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); }
next();
*--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class];
ty = d[Type];
}
else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; }
else {
if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; }
else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; }
else { printf("%d: undefined variable\n", line); exit(-1); }
*--n = ty = d[Type]; *--n = Load;
}
}
else if (tk == '(') {
next();
if (tk == Int || tk == Char) {
t = (tk == Int) ? INT : CHAR; next();
while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc);
ty = t;
}
else {
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
}
}
else if (tk == Mul) {
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*--n = ty; *--n = Load;
}
else if (tk == And) {
next(); expr(Inc);
if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
}
else if (tk == '!') {
next(); expr(Inc);
if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; }
ty = INT;
}
else if (tk == '~') {
next(); expr(Inc);
if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; }
ty = INT;
}
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
next(); expr(Inc);
if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; }
ty = INT;
}
else if (tk == Inc || tk == Dec) {
t = tk; next(); expr(Inc);
if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
}
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty; b = n;
if (tk == Assign) {
next();
if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign;
}
else if (tk == Cond) {
next();
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
d = n;
expr(Cond);
--n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond;
}
else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; }
else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; }
else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; }
else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; }
else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; }
else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; }
else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; }
else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; }
else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; }
else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; }
else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; }
else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; }
else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; }
else if (tk == Add) {
next(); expr(Mul);
if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
}
else if (tk == Sub) {
next(); expr(Mul);
if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; }
}
else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; }
else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; }
else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; }
else if (tk == Inc || tk == Dec) {
if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num;
*--n = (int)b; *--n = (tk == Inc) ? Sub : Add;
next();
}
else if (tk == Brak) {
next(); expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
*--n = ty = t - PTR; *--n = Load;
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
}
void stmt()
{
int *a, *b, *c;
if (tk == If) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
stmt(); b = n;
if (tk == Else) { next(); stmt(); c = n; } else c = 0;
*--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond;
}
else if (tk == While) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
stmt();
*--n = (int)a; *--n = While;
}
else if (tk == Return) {
next();
if (tk != ';') { expr(Assign); a = n; } else a = 0;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*--n = (int)a; *--n = Return;
}
else if (tk == '{') {
next();
*--n = ';';
while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; }
next();
}
else if (tk == ';') {
next(); *--n = ';';
}
else {
expr(Assign);
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
}
void gen(int *n)
{
int i; char *b;
i = *n;
if (i == Num) {
*e++ = 0xb8; *(int *)e = n[1]; e = e+4; if (src) printf(" movl $%d, %%eax\n",n[1]);
}
else if (i == Loc) {
if (n[1] < -32 || n[1] > 32) { printf("%d: gen(lea) out of bounds\n", line); exit(-1); }
*(int *)e = 0x458d + (n[1] << 18); e = e+3; if (src) printf(" leal $%d(%%ebp), %%eax\n", n[1]*4);
}
else if (i == Load) {
gen(n+2);
if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); }
else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); }
}
else if (i == Assign) {
gen((int *)n[2]); *e++ = 0x50; if (src) printf(" push %%eax\n");
gen(n+3); *e++ = 0x59; if (src) printf(" pop %%ecx\n");
if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); }
else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); }
}
else if (i == Inc || i == Dec) {
gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n");
if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); }
else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); }
i = ((i == Inc) ? 1 : -1) * ((n[1] > PTR) ? sizeof(int) : sizeof(char));
*e++ = 0xb9; *(int *)e = i; e = e+4; if (src) printf(" movl $%d, %%ecx\n", i);
*(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n");
*e++ = 0x59; if (src) printf(" pop %%ecx\n");
if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); }
else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); }
}
else if (i == Cond) {
gen((int *)n[1]);
*(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq <fwd>\n");
gen((int *)n[2]);
if (n[3]) {
*(int *)b = e+5 - b - 4;
*e++ = 0xe9; b = e; e = e + 4; if (src) printf(" jmp <fwd>\n");
gen((int *)n[3]);
}
*(int *)b = e - b - 4;
}
else if (i == Lor) {
gen((int *)n[1]);
*(int *)e = 0x850fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jne <fwd>\n");
gen(n+2);
*(int *)b = e - b - 4;
}
else if (i == Lan) {
gen((int *)n[1]);
*(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq <fwd>\n");
gen(n+2);
*(int *)b = e - b - 4;
}
else if (i >= Or && i <= Mod) {
gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n");
gen((int *)n[1]); *e++ = 0x59; if (src) printf(" pop %%ecx\n");
if (i == Or) { *(int *)e = 0xc809; e = e+2; if (src) printf(" orl %%ecx, %%eax\n"); }
else if (i == Xor) { *(int *)e = 0xc831; e = e+2; if (src) printf(" xorl %%ecx, %%eax\n"); }
else if (i == And) { *(int *)e = 0xc821; e = e+2; if (src) printf(" andl %%ecx, %%eax\n"); }
else if (i >= Eq && i <= Ge) {
*(int *)e = 0xc839; e = e+2; if (src) printf(" cmp %%eax, %%ecx\n");
*e++ = 0xb8; *(int *)e = 0; e = e+4; if (src) printf(" mov $0, %%eax\n");
if (i == Eq) { *(int *)e = 0xc0940f; if (src) printf(" sete %%al\n"); }
else if (i == Ne) { *(int *)e = 0xc0950f; if (src) printf(" setne %%al\n"); }
else if (i == Lt) { *(int *)e = 0xc09c0f; if (src) printf(" setl %%al\n"); }
else if (i == Gt) { *(int *)e = 0xc09f0f; if (src) printf(" setg %%al\n"); }
else if (i == Le) { *(int *)e = 0xc09e0f; if (src) printf(" setle %%al\n"); }
else { *(int *)e = 0xc09d0f; if (src) printf(" setge %%al\n"); }
e = e+3;
}
else if (i == Shl) { *(int *)e = 0xe0d3; e = e+2; if (src) printf(" shl %%cl, %%eax\n"); }
else if (i == Shr) { *(int *)e = 0xf8d3; e = e+2; if (src) printf(" sar %%cl, %%eax\n"); }
else if (i == Add) { *(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n"); }
else if (i == Sub) { *(int *)e = 0xc829; e = e+2; if (src) printf(" subl %%ecx, %%eax\n"); }
else if (i == Mul) { *(int *)e = 0xc1af0f; e = e+3; if (src) printf(" imul %%ecx, %%eax\n"); }
else if (i == Div) { *(int *)e = 0xf9f799; e = e+3; if (src) printf(" cltd\n idiv %%ecx, %%eax\n"); }
else if (i == Mod) { *(int *)e = 0x92f9f799; e=e+4; if (src) printf(" cltd\n idiv %%ecx, %%eax\n xchg %%edx, %%eax\n"); }
}
else if (i == Fun) {
i = n[1];
while (i) {
gen(((int *)i)+1); *e++ = 0x50; i = *(int *)i; if (src) printf(" push %%eax\n");
}
*e++ = 0xe8; *(int *)e = n[2]-(int)e-4; e = e+4; if (src) printf(" call <off32>\n");
if (n[3]) {
*(int *)e = 0xc481; e = e+2;
*(int *)e = n[3]*4; e = e+4; if (src) printf(" add $%d, %%esp\n", n[3]*4);
}
}
else if (i == While) {
*e++ = 0xe9; b = e; e = e+4; if (src) printf(" jmp <fwd>\n");
gen(n+2);
*(int *)b = e - b - 4;
gen((int *)n[1]);
*(int *)e = 0x850fc085; e = e+4; if (src) printf(" test %%eax, %%eax\n");
*(int *)e = b - e; e = e+4; if (src) printf(" jne $%d\n", b - e);
}
else if (i == Return) {
if (n[1]) gen((int *)n[1]); if (src) printf(" mov %%ebp, %%esp\n");
*(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n");
}
else if (i == '{') {
gen((int *)n[1]); gen(n+2);
}
else if (i == Enter) {
*(int *)e = 0xe58955; e = e+3; if (src) printf(" push %%ebp;\n mov %%esp, %%ebp\n");
if (n[1]) {
*(int *)e = 0xec81; e = e+2;
*(int *)e = n[1]*4; e = e+4; if (src) printf(" subl $%d, %%esp\n", n[1]*4);
}
gen(n+2); if (src) printf(" mov %%ebp, %%esp\n");
*(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n");
}
else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); }
}
int main(int argc, char **argv)
{
int fd, bt, ty, poolsz, *idmain, *ast;
int i, *t; // temps
--argc; ++argv;
if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
if (argc < 1) { printf("usage: c5x86 [-s] file ...\n"); return -1; }
if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }
poolsz = 256*1024; // arbitrary size
if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(dsym = malloc(64))) { printf("could not malloc(64) dsym\n"); return -1; }
if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; }
ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack
memset(sym, 0, poolsz);
memset(data, 0, poolsz);
if (!(e = mmap(0, poolsz, 7, 0x22, -1, 0))) { printf("could not mmap() executable memory\n"); return -1; }
p = "char else enum if int return sizeof while void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
next(); id[Tk] = Char; // handle void type
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; }
p[i] = 0;
close(fd);
// parse declarations
line = 1;
next();
while (tk) {
bt = INT; // basetype
if (tk == Int) next();
else if (tk == Char) { next(); bt = CHAR; }
else if (tk == Enum) {
next();
if (tk != '{') next();
if (tk == '{') {
next();
i = 0;
while (tk != '}') {
if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; }
next();
if (tk == Assign) {
next();
n = ast; expr(Cond);
if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; }
i = n[1];
}
id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next();
}
next();
}
}
while (tk != ';' && tk != '}') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
next();
id[Type] = ty;
if (tk == '(') { // function
id[Class] = Fun;
id[Val] = (int)e;
next(); i = 2;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = i++;
next();
if (tk == ',') next();
}
next();
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
i = 0;
next();
while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR;
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = --i;
next();
if (tk == ',') next();
}
next();
}
n = ast;
*--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; }
*--n = -i; *--n = Enter;
gen(n);
id = sym; // unwind symbol table locals
while (id[Tk]) {
if (id[Class] == Loc) {
id[Class] = id[HClass];
id[Type] = id[HType];
id[Val] = id[HVal];
}
id = id + Idsz;
}
}
else {
id[Class] = Glo;
id[Val] = (int)data;
data = data + sizeof(int);
}
if (tk == ',') next();
}
next();
}
if (!idmain[Val]) { printf("main() not defined\n"); return -1; }
if (!src) {
t = (int *)e;
*e++ = 0xb8; *(char ***)e = argv; e = e+4; *e++ = 0x50; // movl $argv, %eax; push %eax
*e++ = 0xb8; *(int *) e = argc; e = e+4; *e++ = 0x50; // movl $argc, %eax; push %eax
*e++ = 0xe8; *(int *)e = idmain[Val] - (int)e - 4; e = e+4; // call main
*e++ = 0x81; *e++ = 0xc4; *(int *)e = 8; e = e+4; // add $8, %esp
*e++ = 0xc3; // ret
qsort(dsym, 2, 1, (void *)t); // hack to call a function pointer
printf("exit(0) from c5x86\n");
}
return 0;
}

32
w32.h Normal file
View file

@ -0,0 +1,32 @@
#include <windows.h>
void *mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
{
HANDLE fm, h;
void *map;
const off_t maxSize = off + (off_t)len;
h = (HANDLE)_get_osfhandle(fildes);
fm = CreateFileMapping(h, NULL, PAGE_EXECUTE_READWRITE, 0, maxSize, NULL);
map = MapViewOfFile(fm, FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE, 0, off, len);
CloseHandle(fm);
return map;
}
void *dlsym(void *handle, char *name)
{
if (!strcmp(name, "open" )) return &open;
if (!strcmp(name, "read" )) return &read;
if (!strcmp(name, "close" )) return &close;
if (!strcmp(name, "printf")) return &printf;
if (!strcmp(name, "malloc")) return &malloc;
if (!strcmp(name, "memset")) return &memset;
if (!strcmp(name, "memcmp")) return &memcmp;
if (!strcmp(name, "memcpy")) return &memcpy;
if (!strcmp(name, "mmap" )) return &mmap;
if (!strcmp(name, "dlsym" )) return &dlsym;
if (!strcmp(name, "qsort" )) return &qsort;
if (!strcmp(name, "exit" )) return &exit;
return 0;
}
#define CHAR TYCHAR
#define INT TYINT