Compare commits

...

2 commits

Author SHA1 Message Date
rswier
13835a4b41 Include headers 2016-02-26 01:40:06 -05:00
rswier
d8e61a829c AST + Code Generator
Extends c4 by adding Abstract Syntax Tree creation and back-end code
generation
2016-01-27 02:11:00 -05:00
4 changed files with 798 additions and 124 deletions

View file

@ -1,14 +1,20 @@
c4 - C in four functions
========================
c5 - C in four functions + AST + back-end code generator
========================================================
An exercise in minimalism.
This branch extends **c4.c** by adding:
* Abstract Syntax Tree creation
* Back-end code generator function: **gen()**
* Standard ordering of function parameters on stack
* Native x86 version: **c5x86.c**
* Various optimizations
Try the following:
gcc -o c4 c4.c (you may need the -m32 option on 64bit machines)
./c4 hello.c
./c4 -s hello.c
gcc -o c5 c5.c (you may need the -m32 option on 64bit machines)
./c5 hello.c
./c5 -s hello.c
./c4 c4.c hello.c
./c4 c4.c c4.c hello.c
./c5 c5.c hello.c
./c5 c5.c c5.c hello.c

View file

@ -1,33 +1,39 @@
// c4.c - C in four functions
// c5.c - C in five functions
// char, int, and pointer types
// if, while, return, and expression statements
// just enough features to allow self-compilation and a bit more
// c4.c plus
// abstract syntax tree creation
// back-end code generator
// parameters passed in correct order
// various optimizations
// Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <unistd.h>
#include <fcntl.h>
#ifdef _WIN32
#include "w32.h"
#endif
char *p, *lp, // current position in source code
*data; // data/bss pointer
int *e, *le, // current position in emitted code
*id, // currently parsed identifier
*n, // current node in abstract syntax tree
*sym, // symbol table (simple list of identifiers)
tk, // current token
ival, // current token value
ty, // current expression type
loc, // local variable offset
line, // current line number
src, // print source and assembly flag
debug; // print executed instructions
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Sys, Glo, Loc, Id,
Num = 128, Fun, Sys, Glo, Loc, Id, Load, Enter,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
@ -35,7 +41,7 @@ enum {
// opcodes
enum { LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,
OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT };
OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT };
// types
enum { CHAR, INT, PTR };
@ -56,7 +62,7 @@ void next()
while (le < e) {
printf("%8.4s", &"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[*++le * 5]);
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[*++le * 5]);
if (*le <= ADJ) printf(" %d\n", *++le); else printf("\n");
}
}
@ -131,12 +137,12 @@ void next()
void expr(int lev)
{
int t, *d;
int t, *d, *b;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; }
else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; }
else if (tk == '"') {
*++e = IMM; *++e = ival; next();
*--n = ival; *--n = Num; next();
while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
}
@ -145,28 +151,26 @@ void expr(int lev)
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int);
*--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num;
ty = INT;
}
else if (tk == Id) {
d = id; next();
if (tk == '(') {
if (d[Class] != Sys && d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); }
next();
t = 0;
while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); }
t = 0; b = 0;
while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); }
next();
if (d[Class] == Sys) *++e = d[Val];
else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; }
else { printf("%d: bad function call\n", line); exit(-1); }
if (t) { *++e = ADJ; *++e = t; }
*--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class];
ty = d[Type];
}
else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; }
else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; }
else {
if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; }
else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; }
if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; }
else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; }
else { printf("%d: undefined variable\n", line); exit(-1); }
*++e = ((ty = d[Type]) == CHAR) ? LC : LI;
*--n = ty = d[Type]; *--n = Load;
}
}
else if (tk == '(') {
@ -186,94 +190,89 @@ void expr(int lev)
else if (tk == Mul) {
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*++e = (ty == CHAR) ? LC : LI;
*--n = ty; *--n = Load;
}
else if (tk == And) {
next(); expr(Inc);
if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); }
if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
}
else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; }
else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; }
else if (tk == '!') {
next(); expr(Inc);
if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; }
ty = INT;
}
else if (tk == '~') {
next(); expr(Inc);
if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; }
ty = INT;
}
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
next(); *++e = IMM;
if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; }
next(); expr(Inc);
if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; }
ty = INT;
}
else if (tk == Inc || tk == Dec) {
t = tk; next(); expr(Inc);
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
*++e = PSH;
*++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (t == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
}
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty;
t = ty; b = n;
if (tk == Assign) {
next();
if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI;
if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign;
}
else if (tk == Cond) {
next();
*++e = BZ; d = ++e;
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
*d = (int)(e + 3); *++e = JMP; d = ++e;
d = n;
expr(Cond);
*d = (int)(e + 1);
--n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond;
}
else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; }
else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; }
else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; }
else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; }
else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; }
else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; }
else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; }
else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; }
else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; }
else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; }
else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; }
else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; }
else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; }
else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; }
else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; }
else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; }
else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; }
else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; }
else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; }
else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; }
else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; }
else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; }
else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; }
else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; }
else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; }
else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; }
else if (tk == Add) {
next(); *++e = PSH; expr(Mul);
if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
*++e = ADD;
next(); expr(Mul);
if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
}
else if (tk == Sub) {
next(); *++e = PSH; expr(Mul);
if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; }
else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; }
else *++e = SUB;
next(); expr(Mul);
if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; }
}
else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; }
else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; }
else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; }
else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; }
else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; }
else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; }
else if (tk == Inc || tk == Dec) {
if (*e == LC) { *e = PSH; *++e = LC; }
else if (*e == LI) { *e = PSH; *++e = LI; }
else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? ADD : SUB;
*++e = (ty == CHAR) ? SC : SI;
*++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char);
*++e = (tk == Inc) ? SUB : ADD;
if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num;
*--n = (int)b; *--n = (tk == Inc) ? Sub : Add;
next();
}
else if (tk == Brak) {
next(); *++e = PSH; expr(Assign);
next(); expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; }
if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
*++e = ADD;
*++e = ((ty = t - PTR) == CHAR) ? LC : LI;
if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
*--n = ty = t - PTR; *--n = Load;
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
@ -281,46 +280,39 @@ void expr(int lev)
void stmt()
{
int *a, *b;
int *a, *b, *c;
if (tk == If) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e;
stmt();
if (tk == Else) {
*b = (int)(e + 3); *++e = JMP; b = ++e;
next();
stmt();
}
*b = (int)(e + 1);
stmt(); b = n;
if (tk == Else) { next(); stmt(); c = n; } else c = 0;
*--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond;
}
else if (tk == While) {
next();
a = e + 1;
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign);
expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
*++e = BZ; b = ++e;
stmt();
*++e = JMP; *++e = (int)a;
*b = (int)(e + 1);
*--n = (int)a; *--n = While;
}
else if (tk == Return) {
next();
if (tk != ';') expr(Assign);
*++e = LEV;
if (tk != ';') { expr(Assign); a = n; } else a = 0;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*--n = (int)a; *--n = Return;
}
else if (tk == '{') {
next();
while (tk != '}') stmt();
*--n = ';';
while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; }
next();
}
else if (tk == ';') {
next();
next(); *--n = ';';
}
else {
expr(Assign);
@ -328,16 +320,74 @@ void stmt()
}
}
void gen(int *n)
{
int i, *a, *b;
i = *n;
if (i == Num) { *++e = IMM; *++e = n[1]; }
else if (i == Loc) { *++e = LEA; *++e = n[1]; }
else if (i == Load) { gen(n+2); *++e = (n[1] == CHAR) ? LC : LI; }
else if (i == Assign) { gen((int *)n[2]); *++e = PSH; gen(n+3); *++e = (n[1] == CHAR) ? SC : SI; }
else if (i == Inc || i == Dec) {
gen(n+2);
*++e = PSH; *++e = (n[1] == CHAR) ? LC : LI; *++e = PSH;
*++e = IMM; *++e = (n[1] > PTR) ? sizeof(int) : sizeof(char);
*++e = (i == Inc) ? ADD : SUB;
*++e = (n[1] == CHAR) ? SC : SI;
}
else if (i == Cond) {
gen((int *)n[1]);
*++e = BZ; b = ++e;
gen((int *)n[2]);
if (n[3]) { *b = (int)(e + 3); *++e = JMP; b = ++e; gen((int *)n[3]); }
*b = (int)(e + 1);
}
else if (i == Lor) { gen((int *)n[1]); *++e = BNZ; b = ++e; gen(n+2); *b = (int)(e + 1); }
else if (i == Lan) { gen((int *)n[1]); *++e = BZ; b = ++e; gen(n+2); *b = (int)(e + 1); }
else if (i == Or) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = OR; }
else if (i == Xor) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = XOR; }
else if (i == And) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = AND; }
else if (i == Eq) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = EQ; }
else if (i == Ne) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = NE; }
else if (i == Lt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LT; }
else if (i == Gt) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GT; }
else if (i == Le) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = LE; }
else if (i == Ge) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = GE; }
else if (i == Shl) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHL; }
else if (i == Shr) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SHR; }
else if (i == Add) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = ADD; }
else if (i == Sub) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = SUB; }
else if (i == Mul) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MUL; }
else if (i == Div) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = DIV; }
else if (i == Mod) { gen((int *)n[1]); *++e = PSH; gen(n+2); *++e = MOD; }
else if (i == Sys || i == Fun) {
b = (int *)n[1];
while (b) { gen(b+1); *++e = PSH; b = (int *)*b; }
if (i == Fun) *++e = JSR; *++e = n[2];
if (n[3]) { *++e = ADJ; *++e = n[3]; }
}
else if (i == While) {
*++e = JMP; b = ++e; gen(n+2); *b = (int)(e + 1);
gen((int *)n[1]);
*++e = BNZ; *++e = (int)(b + 1);
}
else if (i == Return) { if (n[1]) gen((int *)n[1]); *++e = LEV; }
else if (i == '{') { gen((int *)n[1]); gen(n+2); }
else if (i == Enter) { *++e = ENT; *++e = n[1]; gen(n+2); *++e = LEV; }
else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); }
}
int main(int argc, char **argv)
{
int fd, bt, ty, poolsz, *idmain;
int fd, bt, ty, poolsz, *idmain, *ast;
int *pc, *sp, *bp, a, cycle; // vm registers
int i, *t; // temps
--argc; ++argv;
if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
if (argc > 0 && **argv == '-' && (*argv)[1] == 'd') { debug = 1; --argc; ++argv; }
if (argc < 1) { printf("usage: c4 [-s] [-d] file ...\n"); return -1; }
if (argc < 1) { printf("usage: c5 [-s] [-d] file ...\n"); return -1; }
if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }
@ -346,13 +396,15 @@ int main(int argc, char **argv)
if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; }
if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; }
ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack
memset(sym, 0, poolsz);
memset(e, 0, poolsz);
memset(data, 0, poolsz);
p = "char else enum if int return sizeof while "
"open read close printf malloc memset memcmp exit void main";
"open read close printf malloc memset memcmp memcpy mmap dlsym qsort exit void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table
next(); id[Tk] = Char; // handle void type
@ -381,9 +433,9 @@ int main(int argc, char **argv)
next();
if (tk == Assign) {
next();
if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; }
i = ival;
next();
n = ast; expr(Cond);
if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; }
i = n[1];
}
id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next();
@ -401,7 +453,7 @@ int main(int argc, char **argv)
if (tk == '(') { // function
id[Class] = Fun;
id[Val] = (int)(e + 1);
next(); i = 0;
next(); i = 2;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
@ -417,7 +469,7 @@ int main(int argc, char **argv)
}
next();
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
loc = ++i;
i = 0;
next();
while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR;
@ -429,15 +481,16 @@ int main(int argc, char **argv)
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = ++i;
id[HVal] = id[Val]; id[Val] = --i;
next();
if (tk == ',') next();
}
next();
}
*++e = ENT; *++e = i - loc;
while (tk != '}') stmt();
*++e = LEV;
n = ast;
*--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; }
*--n = -i; *--n = Enter;
gen(n);
id = sym; // unwind symbol table locals
while (id[Tk]) {
if (id[Class] == Loc) {
@ -465,8 +518,8 @@ int main(int argc, char **argv)
sp = (int *)((int)sp + poolsz);
*--sp = EXIT; // call exit if main returns
*--sp = PSH; t = sp;
*--sp = argc;
*--sp = (int)argv;
*--sp = argc;
*--sp = (int)t;
// run...
@ -477,7 +530,7 @@ int main(int argc, char **argv)
printf("%d> %.4s", cycle,
&"LEA ,IMM ,JMP ,JSR ,BZ ,BNZ ,ENT ,ADJ ,LEV ,LI ,LC ,SI ,SC ,PSH ,"
"OR ,XOR ,AND ,EQ ,NE ,LT ,GT ,LE ,GE ,SHL ,SHR ,ADD ,SUB ,MUL ,DIV ,MOD ,"
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,EXIT,"[i * 5]);
"OPEN,READ,CLOS,PRTF,MALC,MSET,MCMP,MCPY,MMAP,DSYM,QSRT,EXIT,"[i * 5]);
if (i <= ADJ) printf(" %d\n", *pc); else printf("\n");
}
if (i == LEA) a = (int)(bp + *pc++); // load local address
@ -512,13 +565,17 @@ int main(int argc, char **argv)
else if (i == DIV) a = *sp++ / a;
else if (i == MOD) a = *sp++ % a;
else if (i == OPEN) a = open((char *)sp[1], *sp);
else if (i == READ) a = read(sp[2], (char *)sp[1], *sp);
else if (i == OPEN) a = open((char *)*sp, sp[1]);
else if (i == READ) a = read(*sp, (char *)sp[1], sp[2]);
else if (i == CLOS) a = close(*sp);
else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); }
else if (i == PRTF) a = printf((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]);
else if (i == MALC) a = (int)malloc(*sp);
else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp);
else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp);
else if (i == MSET) a = (int)memset((char *)*sp, sp[1], sp[2]);
else if (i == MCMP) a = memcmp((char *)*sp, (char *)sp[1], sp[2]);
else if (i == MCPY) a = (int)memcpy((char *)*sp, (char *)sp[1], sp[2]);
else if (i == MMAP) a = (int)mmap((char *)*sp, sp[1], sp[2], sp[3], sp[4], sp[5]);
else if (i == DSYM) a = (int)dlsym((char *)*sp, (char *)sp[1]);
else if (i == QSRT) qsort((char *)sp, sp[1], sp[2], (void *)sp[3]);
else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; }
else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; }
}

579
c5x86.c Normal file
View file

@ -0,0 +1,579 @@
// c5x86.c - C in five functions (native x86 version)
// c4.c plus
// abstract syntax tree creation
// back-end code generator
// parameters passed in correct order
// various optimizations
// Written by Robert Swierczek
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <fcntl.h>
#ifdef _WIN32
#include "w32.h"
#else
#include <sys/mman.h>
#endif
char *p, *lp, // current position in source code
*e, // current position in emitted code
*data, // data/bss pointer
*dsym; // external function lookup name
int *id, // currently parsed identifier
*n, // current node in abstract syntax tree
*sym, // symbol table (simple list of identifiers)
tk, // current token
ival, // current token value
ty, // current expression type
line, // current line number
src; // print source and assembly flag
// tokens and classes (operators last and in precedence order)
enum {
Num = 128, Fun, Glo, Loc, Id, Load, Enter,
Char, Else, Enum, If, Int, Return, Sizeof, While,
Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak
};
// types
enum { CHAR, INT, PTR };
// identifier offsets (since we can't create an ident struct)
enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz };
void next()
{
char *pp;
while (tk = *p) {
++p;
if (tk == '\n') {
if (src) {
printf("%d: %.*s", line, p - lp, lp);
lp = p;
}
++line;
}
else if (tk == '#') {
while (*p != 0 && *p != '\n') ++p;
}
else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') {
pp = p - 1;
while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_')
tk = tk * 147 + *p++;
tk = (tk << 6) + (p - pp);
id = sym;
while (id[Tk]) {
if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; }
id = id + Idsz;
}
id[Name] = (int)pp;
id[Hash] = tk;
tk = id[Tk] = Id;
return;
}
else if (tk >= '0' && tk <= '9') {
if (ival = tk - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; }
else if (*p == 'x' || *p == 'X') {
while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F')))
ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0);
}
else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; }
tk = Num;
return;
}
else if (tk == '/') {
if (*p == '/') {
++p;
while (*p != 0 && *p != '\n') ++p;
}
else {
tk = Div;
return;
}
}
else if (tk == '\'' || tk == '"') {
pp = data;
while (*p != 0 && *p != tk) {
if ((ival = *p++) == '\\') {
if ((ival = *p++) == 'n') ival = '\n';
}
if (tk == '"') *data++ = ival;
}
++p;
if (tk == '"') ival = (int)pp; else tk = Num;
return;
}
else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; }
else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; }
else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; }
else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; }
else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; }
else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; }
else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; }
else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; }
else if (tk == '^') { tk = Xor; return; }
else if (tk == '%') { tk = Mod; return; }
else if (tk == '*') { tk = Mul; return; }
else if (tk == '[') { tk = Brak; return; }
else if (tk == '?') { tk = Cond; return; }
else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return;
}
}
void expr(int lev)
{
int t, *d, *b;
if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); }
else if (tk == Num) { *--n = ival; *--n = Num; next(); ty = INT; }
else if (tk == '"') {
*--n = ival; *--n = Num; next();
while (tk == '"') next();
data = (char *)((int)data + sizeof(int) & -sizeof(int)); ty = PTR;
}
else if (tk == Sizeof) {
next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); }
ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); }
*--n = (ty == CHAR) ? sizeof(char) : sizeof(int); *--n = Num;
ty = INT;
}
else if (tk == Id) {
d = id; next();
if (tk == '(') {
if (!d[Class]) {
memcpy(dsym, (char *)d[Name], d[Hash] & 63); dsym[d[Hash] & 63] = 0;
if (d[Val] = (int)dlsym(0, dsym)) d[Class] = Fun;
}
if (d[Class] != Fun) { printf("%d: bad function call\n", line); exit(-1); }
next();
t = 0; b = 0;
while (tk != ')') { expr(Assign); *--n = (int)b; b = n; ++t; if (tk == ',') next(); }
next();
*--n = t; *--n = d[Val]; *--n = (int)b; *--n = d[Class];
ty = d[Type];
}
else if (d[Class] == Num) { *--n = d[Val]; *--n = Num; ty = INT; }
else {
if (d[Class] == Loc) { *--n = d[Val]; *--n = Loc; }
else if (d[Class] == Glo) { *--n = d[Val]; *--n = Num; }
else { printf("%d: undefined variable\n", line); exit(-1); }
*--n = ty = d[Type]; *--n = Load;
}
}
else if (tk == '(') {
next();
if (tk == Int || tk == Char) {
t = (tk == Int) ? INT : CHAR; next();
while (tk == Mul) { next(); t = t + PTR; }
if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); }
expr(Inc);
ty = t;
}
else {
expr(Assign);
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
}
}
else if (tk == Mul) {
next(); expr(Inc);
if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); }
*--n = ty; *--n = Load;
}
else if (tk == And) {
next(); expr(Inc);
if (*n == Load) n = n+2; else { printf("%d: bad address-of\n", line); exit(-1); }
ty = ty + PTR;
}
else if (tk == '!') {
next(); expr(Inc);
if (*n == Num) n[1] = !n[1]; else { *--n = 0; *--n = Num; --n; *n = (int)(n+3); *--n = Eq; }
ty = INT;
}
else if (tk == '~') {
next(); expr(Inc);
if (*n == Num) n[1] = ~n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Xor; }
ty = INT;
}
else if (tk == Add) { next(); expr(Inc); ty = INT; }
else if (tk == Sub) {
next(); expr(Inc);
if (*n == Num) n[1] = -n[1]; else { *--n = -1; *--n = Num; --n; *n = (int)(n+3); *--n = Mul; }
ty = INT;
}
else if (tk == Inc || tk == Dec) {
t = tk; next(); expr(Inc);
if (*n == Load) *n = t; else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); }
}
else { printf("%d: bad expression\n", line); exit(-1); }
while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method
t = ty; b = n;
if (tk == Assign) {
next();
if (*n != Load) { printf("%d: bad lvalue in assignment\n", line); exit(-1); }
expr(Assign); *--n = (int)(b+2); *--n = ty = t; *--n = Assign;
}
else if (tk == Cond) {
next();
expr(Assign);
if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); }
d = n;
expr(Cond);
--n; *n = (int)(n+1); *--n = (int)d; *--n = (int)b; *--n = Cond;
}
else if (tk == Lor) { next(); expr(Lan); if (*n==Num && *b==Num) n[1] = b[1] || n[1]; else { *--n = (int)b; *--n = Lor; } ty = INT; }
else if (tk == Lan) { next(); expr(Or); if (*n==Num && *b==Num) n[1] = b[1] && n[1]; else { *--n = (int)b; *--n = Lan; } ty = INT; }
else if (tk == Or) { next(); expr(Xor); if (*n==Num && *b==Num) n[1] = b[1] | n[1]; else { *--n = (int)b; *--n = Or; } ty = INT; }
else if (tk == Xor) { next(); expr(And); if (*n==Num && *b==Num) n[1] = b[1] ^ n[1]; else { *--n = (int)b; *--n = Xor; } ty = INT; }
else if (tk == And) { next(); expr(Eq); if (*n==Num && *b==Num) n[1] = b[1] & n[1]; else { *--n = (int)b; *--n = And; } ty = INT; }
else if (tk == Eq) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] == n[1]; else { *--n = (int)b; *--n = Eq; } ty = INT; }
else if (tk == Ne) { next(); expr(Lt); if (*n==Num && *b==Num) n[1] = b[1] != n[1]; else { *--n = (int)b; *--n = Ne; } ty = INT; }
else if (tk == Lt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] < n[1]; else { *--n = (int)b; *--n = Lt; } ty = INT; }
else if (tk == Gt) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] > n[1]; else { *--n = (int)b; *--n = Gt; } ty = INT; }
else if (tk == Le) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] <= n[1]; else { *--n = (int)b; *--n = Le; } ty = INT; }
else if (tk == Ge) { next(); expr(Shl); if (*n==Num && *b==Num) n[1] = b[1] >= n[1]; else { *--n = (int)b; *--n = Ge; } ty = INT; }
else if (tk == Shl) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] << n[1]; else { *--n = (int)b; *--n = Shl; } ty = INT; }
else if (tk == Shr) { next(); expr(Add); if (*n==Num && *b==Num) n[1] = b[1] >> n[1]; else { *--n = (int)b; *--n = Shr; } ty = INT; }
else if (tk == Add) {
next(); expr(Mul);
if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
}
else if (tk == Sub) {
next(); expr(Mul);
if ((ty = t) > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
if (*n == Num && *b == Num) n[1] = b[1] - n[1]; else { *--n = (int)b; *--n = Sub; }
}
else if (tk == Mul) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] * n[1]; else { *--n = (int)b; *--n = Mul; } ty = INT; }
else if (tk == Div) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] / n[1]; else { *--n = (int)b; *--n = Div; } ty = INT; }
else if (tk == Mod) { next(); expr(Inc); if (*n==Num && *b==Num) n[1] = b[1] % n[1]; else { *--n = (int)b; *--n = Mod; } ty = INT; }
else if (tk == Inc || tk == Dec) {
if (*n == Load) *n = tk; else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); }
*--n = (ty > PTR) ? sizeof(int) : sizeof(char); *--n = Num;
*--n = (int)b; *--n = (tk == Inc) ? Sub : Add;
next();
}
else if (tk == Brak) {
next(); expr(Assign);
if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); }
if (t > PTR) { if (*n == Num) n[1] = n[1] * sizeof(int); else { *--n = sizeof(int); *--n = Num; --n; *n = (int)(n+3); *--n = Mul; } }
else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); }
if (*n == Num && *b == Num) n[1] = b[1] + n[1]; else { *--n = (int)b; *--n = Add; }
*--n = ty = t - PTR; *--n = Load;
}
else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); }
}
}
void stmt()
{
int *a, *b, *c;
if (tk == If) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
stmt(); b = n;
if (tk == Else) { next(); stmt(); c = n; } else c = 0;
*--n = (int)c; *--n = (int)b; *--n = (int)a; *--n = Cond;
}
else if (tk == While) {
next();
if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); }
expr(Assign); a = n;
if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); }
stmt();
*--n = (int)a; *--n = While;
}
else if (tk == Return) {
next();
if (tk != ';') { expr(Assign); a = n; } else a = 0;
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
*--n = (int)a; *--n = Return;
}
else if (tk == '{') {
next();
*--n = ';';
while (tk != '}') { a = n; stmt(); *--n = (int)a; *--n = '{'; }
next();
}
else if (tk == ';') {
next(); *--n = ';';
}
else {
expr(Assign);
if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); }
}
}
void gen(int *n)
{
int i; char *b;
i = *n;
if (i == Num) {
*e++ = 0xb8; *(int *)e = n[1]; e = e+4; if (src) printf(" movl $%d, %%eax\n",n[1]);
}
else if (i == Loc) {
if (n[1] < -32 || n[1] > 32) { printf("%d: gen(lea) out of bounds\n", line); exit(-1); }
*(int *)e = 0x458d + (n[1] << 18); e = e+3; if (src) printf(" leal $%d(%%ebp), %%eax\n", n[1]*4);
}
else if (i == Load) {
gen(n+2);
if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); }
else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); }
}
else if (i == Assign) {
gen((int *)n[2]); *e++ = 0x50; if (src) printf(" push %%eax\n");
gen(n+3); *e++ = 0x59; if (src) printf(" pop %%ecx\n");
if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); }
else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); }
}
else if (i == Inc || i == Dec) {
gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n");
if (n[1] == CHAR) { *(int *)e = 0x00be0f; e = e+3; if (src) printf(" movsbl (%%eax), %%eax\n"); }
else { *(int *)e = 0x008b; e = e+2; if (src) printf(" movl (%%eax), %%eax\n"); }
i = ((i == Inc) ? 1 : -1) * ((n[1] > PTR) ? sizeof(int) : sizeof(char));
*e++ = 0xb9; *(int *)e = i; e = e+4; if (src) printf(" movl $%d, %%ecx\n", i);
*(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n");
*e++ = 0x59; if (src) printf(" pop %%ecx\n");
if (n[1] == CHAR) { *(int *)e = 0x0188; e = e+2; if (src) printf(" movb %%al, (%%ecx)\n"); }
else { *(int *)e = 0x0189; e = e+2; if (src) printf(" movl %%eax, (%%ecx)\n"); }
}
else if (i == Cond) {
gen((int *)n[1]);
*(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq <fwd>\n");
gen((int *)n[2]);
if (n[3]) {
*(int *)b = e+5 - b - 4;
*e++ = 0xe9; b = e; e = e + 4; if (src) printf(" jmp <fwd>\n");
gen((int *)n[3]);
}
*(int *)b = e - b - 4;
}
else if (i == Lor) {
gen((int *)n[1]);
*(int *)e = 0x850fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jne <fwd>\n");
gen(n+2);
*(int *)b = e - b - 4;
}
else if (i == Lan) {
gen((int *)n[1]);
*(int *)e = 0x840fc085; e = e+4; b = e; e = e+4; if (src) printf(" test %%eax, %%eax\n jeq <fwd>\n");
gen(n+2);
*(int *)b = e - b - 4;
}
else if (i >= Or && i <= Mod) {
gen(n+2); *e++ = 0x50; if (src) printf(" push %%eax\n");
gen((int *)n[1]); *e++ = 0x59; if (src) printf(" pop %%ecx\n");
if (i == Or) { *(int *)e = 0xc809; e = e+2; if (src) printf(" orl %%ecx, %%eax\n"); }
else if (i == Xor) { *(int *)e = 0xc831; e = e+2; if (src) printf(" xorl %%ecx, %%eax\n"); }
else if (i == And) { *(int *)e = 0xc821; e = e+2; if (src) printf(" andl %%ecx, %%eax\n"); }
else if (i >= Eq && i <= Ge) {
*(int *)e = 0xc839; e = e+2; if (src) printf(" cmp %%eax, %%ecx\n");
*e++ = 0xb8; *(int *)e = 0; e = e+4; if (src) printf(" mov $0, %%eax\n");
if (i == Eq) { *(int *)e = 0xc0940f; if (src) printf(" sete %%al\n"); }
else if (i == Ne) { *(int *)e = 0xc0950f; if (src) printf(" setne %%al\n"); }
else if (i == Lt) { *(int *)e = 0xc09c0f; if (src) printf(" setl %%al\n"); }
else if (i == Gt) { *(int *)e = 0xc09f0f; if (src) printf(" setg %%al\n"); }
else if (i == Le) { *(int *)e = 0xc09e0f; if (src) printf(" setle %%al\n"); }
else { *(int *)e = 0xc09d0f; if (src) printf(" setge %%al\n"); }
e = e+3;
}
else if (i == Shl) { *(int *)e = 0xe0d3; e = e+2; if (src) printf(" shl %%cl, %%eax\n"); }
else if (i == Shr) { *(int *)e = 0xf8d3; e = e+2; if (src) printf(" sar %%cl, %%eax\n"); }
else if (i == Add) { *(int *)e = 0xc801; e = e+2; if (src) printf(" addl %%ecx, %%eax\n"); }
else if (i == Sub) { *(int *)e = 0xc829; e = e+2; if (src) printf(" subl %%ecx, %%eax\n"); }
else if (i == Mul) { *(int *)e = 0xc1af0f; e = e+3; if (src) printf(" imul %%ecx, %%eax\n"); }
else if (i == Div) { *(int *)e = 0xf9f799; e = e+3; if (src) printf(" cltd\n idiv %%ecx, %%eax\n"); }
else if (i == Mod) { *(int *)e = 0x92f9f799; e=e+4; if (src) printf(" cltd\n idiv %%ecx, %%eax\n xchg %%edx, %%eax\n"); }
}
else if (i == Fun) {
i = n[1];
while (i) {
gen(((int *)i)+1); *e++ = 0x50; i = *(int *)i; if (src) printf(" push %%eax\n");
}
*e++ = 0xe8; *(int *)e = n[2]-(int)e-4; e = e+4; if (src) printf(" call <off32>\n");
if (n[3]) {
*(int *)e = 0xc481; e = e+2;
*(int *)e = n[3]*4; e = e+4; if (src) printf(" add $%d, %%esp\n", n[3]*4);
}
}
else if (i == While) {
*e++ = 0xe9; b = e; e = e+4; if (src) printf(" jmp <fwd>\n");
gen(n+2);
*(int *)b = e - b - 4;
gen((int *)n[1]);
*(int *)e = 0x850fc085; e = e+4; if (src) printf(" test %%eax, %%eax\n");
*(int *)e = b - e; e = e+4; if (src) printf(" jne $%d\n", b - e);
}
else if (i == Return) {
if (n[1]) gen((int *)n[1]); if (src) printf(" mov %%ebp, %%esp\n");
*(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n");
}
else if (i == '{') {
gen((int *)n[1]); gen(n+2);
}
else if (i == Enter) {
*(int *)e = 0xe58955; e = e+3; if (src) printf(" push %%ebp;\n mov %%esp, %%ebp\n");
if (n[1]) {
*(int *)e = 0xec81; e = e+2;
*(int *)e = n[1]*4; e = e+4; if (src) printf(" subl $%d, %%esp\n", n[1]*4);
}
gen(n+2); if (src) printf(" mov %%ebp, %%esp\n");
*(int *)e = 0xc35dec89; e = e+4; if (src) printf(" pop %%ebp\n ret\n");
}
else if (i != ';') { printf("%d: compiler error gen=%d\n", line, i); exit(-1); }
}
int main(int argc, char **argv)
{
int fd, bt, ty, poolsz, *idmain, *ast;
int i, *t; // temps
--argc; ++argv;
if (argc > 0 && **argv == '-' && (*argv)[1] == 's') { src = 1; --argc; ++argv; }
if (argc < 1) { printf("usage: c5x86 [-s] file ...\n"); return -1; }
if ((fd = open(*argv, 0)) < 0) { printf("could not open(%s)\n", *argv); return -1; }
poolsz = 256*1024; // arbitrary size
if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; }
if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; }
if (!(dsym = malloc(64))) { printf("could not malloc(64) dsym\n"); return -1; }
if (!(ast = malloc(poolsz))) { printf("could not malloc(%d) abstract syntax tree area\n", poolsz); return -1; }
ast = (int *)((int)ast + poolsz); // abstract syntax tree is most efficiently built as a stack
memset(sym, 0, poolsz);
memset(data, 0, poolsz);
if (!(e = mmap(0, poolsz, 7, 0x22, -1, 0))) { printf("could not mmap() executable memory\n"); return -1; }
p = "char else enum if int return sizeof while void main";
i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table
next(); id[Tk] = Char; // handle void type
next(); idmain = id; // keep track of main
if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; }
if ((i = read(fd, p, poolsz-1)) <= 0) { printf("read() returned %d\n", i); return -1; }
p[i] = 0;
close(fd);
// parse declarations
line = 1;
next();
while (tk) {
bt = INT; // basetype
if (tk == Int) next();
else if (tk == Char) { next(); bt = CHAR; }
else if (tk == Enum) {
next();
if (tk != '{') next();
if (tk == '{') {
next();
i = 0;
while (tk != '}') {
if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; }
next();
if (tk == Assign) {
next();
n = ast; expr(Cond);
if (*n != Num) { printf("%d: bad enum initializer\n", line); return -1; }
i = n[1];
}
id[Class] = Num; id[Type] = INT; id[Val] = i++;
if (tk == ',') next();
}
next();
}
}
while (tk != ';' && tk != '}') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; }
if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; }
next();
id[Type] = ty;
if (tk == '(') { // function
id[Class] = Fun;
id[Val] = (int)e;
next(); i = 2;
while (tk != ')') {
ty = INT;
if (tk == Int) next();
else if (tk == Char) { next(); ty = CHAR; }
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = i++;
next();
if (tk == ',') next();
}
next();
if (tk != '{') { printf("%d: bad function definition\n", line); return -1; }
i = 0;
next();
while (tk == Int || tk == Char) {
bt = (tk == Int) ? INT : CHAR;
next();
while (tk != ';') {
ty = bt;
while (tk == Mul) { next(); ty = ty + PTR; }
if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; }
if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; }
id[HClass] = id[Class]; id[Class] = Loc;
id[HType] = id[Type]; id[Type] = ty;
id[HVal] = id[Val]; id[Val] = --i;
next();
if (tk == ',') next();
}
next();
}
n = ast;
*--n = ';'; while (tk != '}') { t = n; stmt(); *--n = (int)t; *--n = '{'; }
*--n = -i; *--n = Enter;
gen(n);
id = sym; // unwind symbol table locals
while (id[Tk]) {
if (id[Class] == Loc) {
id[Class] = id[HClass];
id[Type] = id[HType];
id[Val] = id[HVal];
}
id = id + Idsz;
}
}
else {
id[Class] = Glo;
id[Val] = (int)data;
data = data + sizeof(int);
}
if (tk == ',') next();
}
next();
}
if (!idmain[Val]) { printf("main() not defined\n"); return -1; }
if (!src) {
t = (int *)e;
*e++ = 0xb8; *(char ***)e = argv; e = e+4; *e++ = 0x50; // movl $argv, %eax; push %eax
*e++ = 0xb8; *(int *) e = argc; e = e+4; *e++ = 0x50; // movl $argc, %eax; push %eax
*e++ = 0xe8; *(int *)e = idmain[Val] - (int)e - 4; e = e+4; // call main
*e++ = 0x81; *e++ = 0xc4; *(int *)e = 8; e = e+4; // add $8, %esp
*e++ = 0xc3; // ret
qsort(dsym, 2, 1, (void *)t); // hack to call a function pointer
printf("exit(0) from c5x86\n");
}
return 0;
}

32
w32.h Normal file
View file

@ -0,0 +1,32 @@
#include <windows.h>
void *mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
{
HANDLE fm, h;
void *map;
const off_t maxSize = off + (off_t)len;
h = (HANDLE)_get_osfhandle(fildes);
fm = CreateFileMapping(h, NULL, PAGE_EXECUTE_READWRITE, 0, maxSize, NULL);
map = MapViewOfFile(fm, FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE, 0, off, len);
CloseHandle(fm);
return map;
}
void *dlsym(void *handle, char *name)
{
if (!strcmp(name, "open" )) return &open;
if (!strcmp(name, "read" )) return &read;
if (!strcmp(name, "close" )) return &close;
if (!strcmp(name, "printf")) return &printf;
if (!strcmp(name, "malloc")) return &malloc;
if (!strcmp(name, "memset")) return &memset;
if (!strcmp(name, "memcmp")) return &memcmp;
if (!strcmp(name, "memcpy")) return &memcpy;
if (!strcmp(name, "mmap" )) return &mmap;
if (!strcmp(name, "dlsym" )) return &dlsym;
if (!strcmp(name, "qsort" )) return &qsort;
if (!strcmp(name, "exit" )) return &exit;
return 0;
}
#define CHAR TYCHAR
#define INT TYINT