// Written by Robert Swierczek // Ported by Marvin Borner // TODO: Fix number conversion (ascii - 48?) #include #include #include #include #include char *p, *lp, // current position in source code *data; // data/bss pointer int *e, *le, // current position in emitted code *id, // currently parsed identifier *sym, // symbol table (simple list of identifiers) tk, // current token ival, // current token value ty, // current expression type loc, // local variable offset line; // current line number // tokens and classes (operators last and in precedence order) enum { Num = 128, Fun, Sys, Glo, Loc, Id, Char, Else, Enum, If, Int, Return, Sizeof, While, Assign, Cond, Lor, Lan, Or, Xor, And, Eq, Ne, Lt, Gt, Le, Ge, Shl, Shr, Add, Sub, Mul, Div, Mod, Inc, Dec, Brak }; // opcodes enum { LEA, IMM, JMP, JSR, BZ, BNZ, ENT, ADJ, LEV, LI, LC, SI, SC, PSH, OR, XOR, AND, EQ, NE, LT, GT, LE, GE, SHL, SHR, ADD, SUB, MUL, DIV, MOD, OPEN, READ, CLOS, PRTF, MALC, FREE, MSET, MCMP, EXIT }; // types enum { CHAR, INT, PTR }; // identifier offsets (since we can't create an ident struct) enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; void next() { char *pp; while ((tk = *p)) { ++p; if (tk == '\n') { ++line; } else if (tk == '#') { while (*p != 0 && *p != '\n') ++p; } else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { pp = p - 1; while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') tk = tk * 147 + *p++; tk = (tk << 6) + (p - pp); id = sym; while (id[Tk]) { if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { tk = id[Tk]; return; } id = id + Idsz; } id[Name] = (int)pp; id[Hash] = tk; tk = id[Tk] = Id; return; } else if (tk >= '0' && tk <= '9') { if ((ival = tk) - '0') { while (*p >= '0' && *p <= '9') ival = ival * 10 + *p++ - '0'; } else if (*p == 'x' || *p == 'X') { while ((tk = *++p) && ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || (tk >= 'A' && tk <= 'F'))) ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); } else { while (*p >= '0' && *p <= '7') ival = ival * 8 + *p++ - '0'; } tk = Num; return; } else if (tk == '/') { if (*p == '/') { ++p; while (*p != 0 && *p != '\n') ++p; } else { tk = Div; return; } } else if (tk == '\'' || tk == '"') { pp = data; while (*p != 0 && *p != tk) { if ((ival = *p++) == '\\') { if ((ival = *p++) == 'n') ival = '\n'; } if (tk == '"') *data++ = ival; } ++p; if (tk == '"') ival = (int)pp; else tk = Num; return; } else if (tk == '=') { if (*p == '=') { ++p; tk = Eq; } else tk = Assign; return; } else if (tk == '+') { if (*p == '+') { ++p; tk = Inc; } else tk = Add; return; } else if (tk == '-') { if (*p == '-') { ++p; tk = Dec; } else tk = Sub; return; } else if (tk == '!') { if (*p == '=') { ++p; tk = Ne; } return; } else if (tk == '<') { if (*p == '=') { ++p; tk = Le; } else if (*p == '<') { ++p; tk = Shl; } else tk = Lt; return; } else if (tk == '>') { if (*p == '=') { ++p; tk = Ge; } else if (*p == '>') { ++p; tk = Shr; } else tk = Gt; return; } else if (tk == '|') { if (*p == '|') { ++p; tk = Lor; } else tk = Or; return; } else if (tk == '&') { if (*p == '&') { ++p; tk = Lan; } else tk = And; return; } else if (tk == '^') { tk = Xor; return; } else if (tk == '%') { tk = Mod; return; } else if (tk == '*') { tk = Mul; return; } else if (tk == '[') { tk = Brak; return; } else if (tk == '?') { tk = Cond; return; } else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || tk == ')' || tk == ']' || tk == ',' || tk == ':') return; } } void expr(int lev) { int t, *d; if (!tk) { printf("%d: unexpected eof in expression\n", line); exit(-1); } else if (tk == Num) { *++e = IMM; *++e = ival; next(); ty = INT; } else if (tk == '"') { *++e = IMM; *++e = ival; next(); while (tk == '"') next(); data = (char *)(((int)data + sizeof(int)) & -sizeof(int)); ty = PTR; } else if (tk == Sizeof) { next(); if (tk == '(') next(); else { printf("%d: open paren expected in sizeof\n", line); exit(-1); } ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } while (tk == Mul) { next(); ty = ty + PTR; } if (tk == ')') next(); else { printf("%d: close paren expected in sizeof\n", line); exit(-1); } *++e = IMM; *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); ty = INT; } else if (tk == Id) { d = id; next(); if (tk == '(') { next(); t = 0; while (tk != ')') { expr(Assign); *++e = PSH; ++t; if (tk == ',') next(); } next(); if (d[Class] == Sys) *++e = d[Val]; else if (d[Class] == Fun) { *++e = JSR; *++e = d[Val]; } else { printf("%d: bad function call\n", line); exit(-1); } if (t) { *++e = ADJ; *++e = t; } ty = d[Type]; } else if (d[Class] == Num) { *++e = IMM; *++e = d[Val]; ty = INT; } else { if (d[Class] == Loc) { *++e = LEA; *++e = loc - d[Val]; } else if (d[Class] == Glo) { *++e = IMM; *++e = d[Val]; } else { printf("%d: undefined variable\n", line); exit(-1); } *++e = ((ty = d[Type]) == CHAR) ? LC : LI; } } else if (tk == '(') { next(); if (tk == Int || tk == Char) { t = (tk == Int) ? INT : CHAR; next(); while (tk == Mul) { next(); t = t + PTR; } if (tk == ')') next(); else { printf("%d: bad cast\n", line); exit(-1); } expr(Inc); ty = t; } else { expr(Assign); if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } } } else if (tk == Mul) { next(); expr(Inc); if (ty > INT) ty = ty - PTR; else { printf("%d: bad dereference\n", line); exit(-1); } *++e = (ty == CHAR) ? LC : LI; } else if (tk == And) { next(); expr(Inc); if (*e == LC || *e == LI) --e; else { printf("%d: bad address-of\n", line); exit(-1); } ty = ty + PTR; } else if (tk == '!') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = 0; *++e = EQ; ty = INT; } else if (tk == '~') { next(); expr(Inc); *++e = PSH; *++e = IMM; *++e = -1; *++e = XOR; ty = INT; } else if (tk == Add) { next(); expr(Inc); ty = INT; } else if (tk == Sub) { next(); *++e = IMM; if (tk == Num) { *++e = -ival; next(); } else { *++e = -1; *++e = PSH; expr(Inc); *++e = MUL; } ty = INT; } else if (tk == Inc || tk == Dec) { t = tk; next(); expr(Inc); if (*e == LC) { *e = PSH; *++e = LC; } else if (*e == LI) { *e = PSH; *++e = LI; } else { printf("%d: bad lvalue in pre-increment\n", line); exit(-1); } *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = (t == Inc) ? ADD : SUB; *++e = (ty == CHAR) ? SC : SI; } else { printf("%d: bad expression\n", line); exit(-1); } while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method t = ty; if (tk == Assign) { next(); if (*e == LC || *e == LI) *e = PSH; else { printf("%d: bad lvalue in assignment\n", line); exit(-1); } expr(Assign); *++e = ((ty = t) == CHAR) ? SC : SI; } else if (tk == Cond) { next(); *++e = BZ; d = ++e; expr(Assign); if (tk == ':') next(); else { printf("%d: conditional missing colon\n", line); exit(-1); } *d = (int)(e + 3); *++e = JMP; d = ++e; expr(Cond); *d = (int)(e + 1); } else if (tk == Lor) { next(); *++e = BNZ; d = ++e; expr(Lan); *d = (int)(e + 1); ty = INT; } else if (tk == Lan) { next(); *++e = BZ; d = ++e; expr(Or); *d = (int)(e + 1); ty = INT; } else if (tk == Or) { next(); *++e = PSH; expr(Xor); *++e = OR; ty = INT; } else if (tk == Xor) { next(); *++e = PSH; expr(And); *++e = XOR; ty = INT; } else if (tk == And) { next(); *++e = PSH; expr(Eq); *++e = AND; ty = INT; } else if (tk == Eq) { next(); *++e = PSH; expr(Lt); *++e = EQ; ty = INT; } else if (tk == Ne) { next(); *++e = PSH; expr(Lt); *++e = NE; ty = INT; } else if (tk == Lt) { next(); *++e = PSH; expr(Shl); *++e = LT; ty = INT; } else if (tk == Gt) { next(); *++e = PSH; expr(Shl); *++e = GT; ty = INT; } else if (tk == Le) { next(); *++e = PSH; expr(Shl); *++e = LE; ty = INT; } else if (tk == Ge) { next(); *++e = PSH; expr(Shl); *++e = GE; ty = INT; } else if (tk == Shl) { next(); *++e = PSH; expr(Add); *++e = SHL; ty = INT; } else if (tk == Shr) { next(); *++e = PSH; expr(Add); *++e = SHR; ty = INT; } else if (tk == Add) { next(); *++e = PSH; expr(Mul); if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } *++e = ADD; } else if (tk == Sub) { next(); *++e = PSH; expr(Mul); if (t > PTR && t == ty) { *++e = SUB; *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = DIV; ty = INT; } else if ((ty = t) > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; *++e = SUB; } else *++e = SUB; } else if (tk == Mul) { next(); *++e = PSH; expr(Inc); *++e = MUL; ty = INT; } else if (tk == Div) { next(); *++e = PSH; expr(Inc); *++e = DIV; ty = INT; } else if (tk == Mod) { next(); *++e = PSH; expr(Inc); *++e = MOD; ty = INT; } else if (tk == Inc || tk == Dec) { if (*e == LC) { *e = PSH; *++e = LC; } else if (*e == LI) { *e = PSH; *++e = LI; } else { printf("%d: bad lvalue in post-increment\n", line); exit(-1); } *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = (tk == Inc) ? ADD : SUB; *++e = (ty == CHAR) ? SC : SI; *++e = PSH; *++e = IMM; *++e = (ty > PTR) ? sizeof(int) : sizeof(char); *++e = (tk == Inc) ? SUB : ADD; next(); } else if (tk == Brak) { next(); *++e = PSH; expr(Assign); if (tk == ']') next(); else { printf("%d: close bracket expected\n", line); exit(-1); } if (t > PTR) { *++e = PSH; *++e = IMM; *++e = sizeof(int); *++e = MUL; } else if (t < PTR) { printf("%d: pointer type expected\n", line); exit(-1); } *++e = ADD; *++e = ((ty = t - PTR) == CHAR) ? LC : LI; } else { printf("%d: compiler error tk=%d\n", line, tk); exit(-1); } } } void stmt() { int *a, *b; if (tk == If) { next(); if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } expr(Assign); if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } *++e = BZ; b = ++e; stmt(); if (tk == Else) { *b = (int)(e + 3); *++e = JMP; b = ++e; next(); stmt(); } *b = (int)(e + 1); } else if (tk == While) { next(); a = e + 1; if (tk == '(') next(); else { printf("%d: open paren expected\n", line); exit(-1); } expr(Assign); if (tk == ')') next(); else { printf("%d: close paren expected\n", line); exit(-1); } *++e = BZ; b = ++e; stmt(); *++e = JMP; *++e = (int)a; *b = (int)(e + 1); } else if (tk == Return) { next(); if (tk != ';') expr(Assign); *++e = LEV; if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } } else if (tk == '{') { next(); while (tk != '}') stmt(); next(); } else if (tk == ';') { next(); } else { expr(Assign); if (tk == ';') next(); else { printf("%d: semicolon expected\n", line); exit(-1); } } } int main(int argc, char **argv) { int bt, poolsz, *idmain; int *pc, *sp, *bp, a, cycle; // vm registers int i, *t; // temps --argc; ++argv; if (argc < 1) { printf("usage: cc file ...\n"); return -1; } poolsz = 256 * 1024; // arbitrary size if (!(sym = malloc(poolsz))) { printf("could not malloc(%d) symbol area\n", poolsz); return -1; } if (!(le = e = malloc(poolsz))) { printf("could not malloc(%d) text area\n", poolsz); return -1; } if (!(data = malloc(poolsz))) { printf("could not malloc(%d) data area\n", poolsz); return -1; } if (!(sp = malloc(poolsz))) { printf("could not malloc(%d) stack area\n", poolsz); return -1; } memset(sym, 0, poolsz); memset(e, 0, poolsz); memset(data, 0, poolsz); p = strdup("char else enum if int return sizeof while " "open read close printf malloc free memset memcmp exit void main"); i = Char; while (i <= While) { next(); id[Tk] = i++; } // add keywords to symbol table i = OPEN; while (i <= EXIT) { next(); id[Class] = Sys; id[Type] = INT; id[Val] = i++; } // add library to symbol table next(); id[Tk] = Char; // handle void type next(); idmain = id; // keep track of main if (!(lp = p = malloc(poolsz))) { printf("could not malloc(%d) source area\n", poolsz); return -1; } void *file; if (!(file = sread(*argv))) { printf("could not read file %s\n", *argv); return -1; } memcpy(p, file, poolsz - 1); //p[i] = 0; // parse declarations line = 1; next(); while (tk) { bt = INT; // basetype if (tk == Int) next(); else if (tk == Char) { next(); bt = CHAR; } else if (tk == Enum) { next(); if (tk != '{') next(); if (tk == '{') { next(); i = 0; while (tk != '}') { if (tk != Id) { printf("%d: bad enum identifier %d\n", line, tk); return -1; } next(); if (tk == Assign) { next(); if (tk != Num) { printf("%d: bad enum initializer\n", line); return -1; } i = ival; next(); } id[Class] = Num; id[Type] = INT; id[Val] = i++; if (tk == ',') next(); } next(); } } while (tk != ';' && tk != '}') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad global declaration\n", line); return -1; } if (id[Class]) { printf("%d: duplicate global definition\n", line); return -1; } next(); id[Type] = ty; if (tk == '(') { // function id[Class] = Fun; id[Val] = (int)(e + 1); next(); i = 0; while (tk != ')') { ty = INT; if (tk == Int) next(); else if (tk == Char) { next(); ty = CHAR; } while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad parameter declaration\n", line); return -1; } if (id[Class] == Loc) { printf("%d: duplicate parameter definition\n", line); return -1; } id[HClass] = id[Class]; id[Class] = Loc; id[HType] = id[Type]; id[Type] = ty; id[HVal] = id[Val]; id[Val] = i++; next(); if (tk == ',') next(); } next(); if (tk != '{') { printf("%d: bad function definition\n", line); return -1; } loc = ++i; next(); while (tk == Int || tk == Char) { bt = (tk == Int) ? INT : CHAR; next(); while (tk != ';') { ty = bt; while (tk == Mul) { next(); ty = ty + PTR; } if (tk != Id) { printf("%d: bad local declaration\n", line); return -1; } if (id[Class] == Loc) { printf("%d: duplicate local definition\n", line); return -1; } id[HClass] = id[Class]; id[Class] = Loc; id[HType] = id[Type]; id[Type] = ty; id[HVal] = id[Val]; id[Val] = ++i; next(); if (tk == ',') next(); } next(); } *++e = ENT; *++e = i - loc; while (tk != '}') stmt(); *++e = LEV; id = sym; // unwind symbol table locals while (id[Tk]) { if (id[Class] == Loc) { id[Class] = id[HClass]; id[Type] = id[HType]; id[Val] = id[HVal]; } id = id + Idsz; } } else { id[Class] = Glo; id[Val] = (int)data; data = data + sizeof(int); } if (tk == ',') next(); } next(); } if (!(pc = (int *)idmain[Val])) { printf("main() not defined\n"); return -1; } // setup stack bp = sp = (int *)((int)sp + poolsz); *--sp = EXIT; // call exit if main returns *--sp = PSH; t = sp; *--sp = argc; *--sp = (int)argv; *--sp = (int)t; // run... cycle = a = 0; while (1) { i = *pc++; ++cycle; if (i == LEA) a = (int)(bp + *pc++); // load local address else if (i == IMM) a = *pc++; // load global address or immediate else if (i == JMP) pc = (int *)*pc; // jump else if (i == JSR) { *--sp = (int)(pc + 1); pc = (int *)*pc; } // jump to subroutine else if (i == BZ) pc = a ? pc + 1 : (int *)*pc; // branch if zero else if (i == BNZ) pc = a ? (int *)*pc : pc + 1; // branch if not zero else if (i == ENT) { *--sp = (int)bp; bp = sp; sp = sp - *pc++; } // enter subroutine else if (i == ADJ) sp = sp + *pc++; // stack adjust else if (i == LEV) { sp = bp; bp = (int *)*sp++; pc = (int *)*sp++; } // leave subroutine else if (i == LI) a = *(int *)a; // load int else if (i == LC) a = *(char *)a; // load char else if (i == SI) *(int *)*sp++ = a; // store int else if (i == SC) a = *(char *)*sp++ = a; // store char else if (i == PSH) *--sp = a; // push else if (i == OR) a = *sp++ | a; else if (i == XOR) a = *sp++ ^ a; else if (i == AND) a = *sp++ & a; else if (i == EQ) a = *sp++ == a; else if (i == NE) a = *sp++ != a; else if (i == LT) a = *sp++ < a; else if (i == GT) a = *sp++ > a; else if (i == LE) a = *sp++ <= a; else if (i == GE) a = *sp++ >= a; else if (i == SHL) a = *sp++ << a; else if (i == SHR) a = *sp++ >> a; else if (i == ADD) a = *sp++ + a; else if (i == SUB) a = *sp++ - a; else if (i == MUL) a = *sp++ * a; else if (i == DIV) a = *sp++ / a; else if (i == MOD) a = *sp++ % a; /* else if (i == OPEN) */ /* a = open((char *)sp[1], *sp); */ /* else if (i == READ) */ /* a = read(sp[2], (char *)sp[1], *sp); */ /* else if (i == CLOS) */ /* a = close(*sp); */ else if (i == PRTF) { t = sp + pc[1]; a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); } else if (i == MALC) a = (int)malloc(*sp); else if (i == FREE) free((void *)*sp); else if (i == MSET) a = (int)memset((char *)sp[2], sp[1], *sp); else if (i == MCMP) a = memcmp((char *)sp[2], (char *)sp[1], *sp); else if (i == EXIT) { printf("exit(%d) cycle = %d\n", *sp, cycle); return *sp; } else { printf("unknown instruction = %d! cycle = %d\n", i, cycle); return -1; } } }