diff options
Diffstat (limited to 'apps/cc.c')
-rw-r--r-- | apps/cc.c | 1083 |
1 files changed, 0 insertions, 1083 deletions
diff --git a/apps/cc.c b/apps/cc.c deleted file mode 100644 index 305d82e..0000000 --- a/apps/cc.c +++ /dev/null @@ -1,1083 +0,0 @@ -// Written by Robert Swierczek -// Ported by Marvin Borner -// TODO: Fix number conversion (ascii - 48?) - -#include <def.h> -#include <mem.h> -#include <print.h> -#include <str.h> -#include <sys.h> - -char *p, *lp, // current position in source code - *data; // data/bss pointer - -int *e, *le, // current position in emitted code - *id, // currently parsed identifier - *sym, // symbol table (simple list of identifiers) - tk, // current token - ival, // current token value - ty, // current expression type - loc, // local variable offset - line; // current line number - -// tokens and classes (operators last and in precedence order) -enum { - Num = 128, - Fun, - Sys, - Glo, - Loc, - Id, - Char, - Else, - Enum, - If, - Int, - Return, - Sizeof, - While, - Assign, - Cond, - Lor, - Lan, - Or, - Xor, - And, - Eq, - Ne, - Lt, - Gt, - Le, - Ge, - Shl, - Shr, - Add, - Sub, - Mul, - Div, - Mod, - Inc, - Dec, - Brak -}; - -// opcodes -enum { - LEA, - IMM, - JMP, - JSR, - BZ, - BNZ, - ENT, - ADJ, - LEV, - LI, - LC, - SI, - SC, - PSH, - OR, - XOR, - AND, - EQ, - NE, - LT, - GT, - LE, - GE, - SHL, - SHR, - ADD, - SUB, - MUL, - DIV, - MOD, - OPEN, - READ, - CLOS, - PRTF, - MALC, - FREE, - MSET, - MCMP, - EXIT -}; - -// types -enum { CHAR, INT, PTR }; - -// identifier offsets (since we can't create an ident struct) -enum { Tk, Hash, Name, Class, Type, Val, HClass, HType, HVal, Idsz }; - -void next() -{ - char *pp; - - while ((tk = *p)) { - ++p; - if (tk == '\n') { - ++line; - } else if (tk == '#') { - while (*p != 0 && *p != '\n') - ++p; - } else if ((tk >= 'a' && tk <= 'z') || (tk >= 'A' && tk <= 'Z') || tk == '_') { - pp = p - 1; - while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || - (*p >= '0' && *p <= '9') || *p == '_') - tk = tk * 147 + *p++; - tk = (tk << 6) + (p - pp); - id = sym; - while (id[Tk]) { - if (tk == id[Hash] && !memcmp((char *)id[Name], pp, p - pp)) { - tk = id[Tk]; - return; - } - id = id + Idsz; - } - id[Name] = (int)pp; - id[Hash] = tk; - tk = id[Tk] = Id; - return; - } else if (tk >= '0' && tk <= '9') { - if ((ival = tk) - '0') { - while (*p >= '0' && *p <= '9') - ival = ival * 10 + *p++ - '0'; - } else if (*p == 'x' || *p == 'X') { - while ((tk = *++p) && - ((tk >= '0' && tk <= '9') || (tk >= 'a' && tk <= 'f') || - (tk >= 'A' && tk <= 'F'))) - ival = ival * 16 + (tk & 15) + (tk >= 'A' ? 9 : 0); - } else { - while (*p >= '0' && *p <= '7') - ival = ival * 8 + *p++ - '0'; - } - tk = Num; - return; - } else if (tk == '/') { - if (*p == '/') { - ++p; - while (*p != 0 && *p != '\n') - ++p; - } else { - tk = Div; - return; - } - } else if (tk == '\'' || tk == '"') { - pp = data; - while (*p != 0 && *p != tk) { - if ((ival = *p++) == '\\') { - if ((ival = *p++) == 'n') - ival = '\n'; - } - if (tk == '"') - *data++ = ival; - } - ++p; - if (tk == '"') - ival = (int)pp; - else - tk = Num; - return; - } else if (tk == '=') { - if (*p == '=') { - ++p; - tk = Eq; - } else - tk = Assign; - return; - } else if (tk == '+') { - if (*p == '+') { - ++p; - tk = Inc; - } else - tk = Add; - return; - } else if (tk == '-') { - if (*p == '-') { - ++p; - tk = Dec; - } else - tk = Sub; - return; - } else if (tk == '!') { - if (*p == '=') { - ++p; - tk = Ne; - } - return; - } else if (tk == '<') { - if (*p == '=') { - ++p; - tk = Le; - } else if (*p == '<') { - ++p; - tk = Shl; - } else - tk = Lt; - return; - } else if (tk == '>') { - if (*p == '=') { - ++p; - tk = Ge; - } else if (*p == '>') { - ++p; - tk = Shr; - } else - tk = Gt; - return; - } else if (tk == '|') { - if (*p == '|') { - ++p; - tk = Lor; - } else - tk = Or; - return; - } else if (tk == '&') { - if (*p == '&') { - ++p; - tk = Lan; - } else - tk = And; - return; - } else if (tk == '^') { - tk = Xor; - return; - } else if (tk == '%') { - tk = Mod; - return; - } else if (tk == '*') { - tk = Mul; - return; - } else if (tk == '[') { - tk = Brak; - return; - } else if (tk == '?') { - tk = Cond; - return; - } else if (tk == '~' || tk == ';' || tk == '{' || tk == '}' || tk == '(' || - tk == ')' || tk == ']' || tk == ',' || tk == ':') - return; - } -} - -void expr(int lev) -{ - int t, *d; - - if (!tk) { - printf("%d: unexpected eof in expression\n", line); - exit(-1); - } else if (tk == Num) { - *++e = IMM; - *++e = ival; - next(); - ty = INT; - } else if (tk == '"') { - *++e = IMM; - *++e = ival; - next(); - while (tk == '"') - next(); - data = (char *)(((int)data + sizeof(int)) & -sizeof(int)); - ty = PTR; - } else if (tk == Sizeof) { - next(); - if (tk == '(') - next(); - else { - printf("%d: open paren expected in sizeof\n", line); - exit(-1); - } - ty = INT; - if (tk == Int) - next(); - else if (tk == Char) { - next(); - ty = CHAR; - } - while (tk == Mul) { - next(); - ty = ty + PTR; - } - if (tk == ')') - next(); - else { - printf("%d: close paren expected in sizeof\n", line); - exit(-1); - } - *++e = IMM; - *++e = (ty == CHAR) ? sizeof(char) : sizeof(int); - ty = INT; - } else if (tk == Id) { - d = id; - next(); - if (tk == '(') { - next(); - t = 0; - while (tk != ')') { - expr(Assign); - *++e = PSH; - ++t; - if (tk == ',') - next(); - } - next(); - if (d[Class] == Sys) - *++e = d[Val]; - else if (d[Class] == Fun) { - *++e = JSR; - *++e = d[Val]; - } else { - printf("%d: bad function call\n", line); - exit(-1); - } - if (t) { - *++e = ADJ; - *++e = t; - } - ty = d[Type]; - } else if (d[Class] == Num) { - *++e = IMM; - *++e = d[Val]; - ty = INT; - } else { - if (d[Class] == Loc) { - *++e = LEA; - *++e = loc - d[Val]; - } else if (d[Class] == Glo) { - *++e = IMM; - *++e = d[Val]; - } else { - printf("%d: undefined variable\n", line); - exit(-1); - } - *++e = ((ty = d[Type]) == CHAR) ? LC : LI; - } - } else if (tk == '(') { - next(); - if (tk == Int || tk == Char) { - t = (tk == Int) ? INT : CHAR; - next(); - while (tk == Mul) { - next(); - t = t + PTR; - } - if (tk == ')') - next(); - else { - printf("%d: bad cast\n", line); - exit(-1); - } - expr(Inc); - ty = t; - } else { - expr(Assign); - if (tk == ')') - next(); - else { - printf("%d: close paren expected\n", line); - exit(-1); - } - } - } else if (tk == Mul) { - next(); - expr(Inc); - if (ty > INT) - ty = ty - PTR; - else { - printf("%d: bad dereference\n", line); - exit(-1); - } - *++e = (ty == CHAR) ? LC : LI; - } else if (tk == And) { - next(); - expr(Inc); - if (*e == LC || *e == LI) - --e; - else { - printf("%d: bad address-of\n", line); - exit(-1); - } - ty = ty + PTR; - } else if (tk == '!') { - next(); - expr(Inc); - *++e = PSH; - *++e = IMM; - *++e = 0; - *++e = EQ; - ty = INT; - } else if (tk == '~') { - next(); - expr(Inc); - *++e = PSH; - *++e = IMM; - *++e = -1; - *++e = XOR; - ty = INT; - } else if (tk == Add) { - next(); - expr(Inc); - ty = INT; - } else if (tk == Sub) { - next(); - *++e = IMM; - if (tk == Num) { - *++e = -ival; - next(); - } else { - *++e = -1; - *++e = PSH; - expr(Inc); - *++e = MUL; - } - ty = INT; - } else if (tk == Inc || tk == Dec) { - t = tk; - next(); - expr(Inc); - if (*e == LC) { - *e = PSH; - *++e = LC; - } else if (*e == LI) { - *e = PSH; - *++e = LI; - } else { - printf("%d: bad lvalue in pre-increment\n", line); - exit(-1); - } - *++e = PSH; - *++e = IMM; - *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (t == Inc) ? ADD : SUB; - *++e = (ty == CHAR) ? SC : SI; - } else { - printf("%d: bad expression\n", line); - exit(-1); - } - - while (tk >= lev) { // "precedence climbing" or "Top Down Operator Precedence" method - t = ty; - if (tk == Assign) { - next(); - if (*e == LC || *e == LI) - *e = PSH; - else { - printf("%d: bad lvalue in assignment\n", line); - exit(-1); - } - expr(Assign); - *++e = ((ty = t) == CHAR) ? SC : SI; - } else if (tk == Cond) { - next(); - *++e = BZ; - d = ++e; - expr(Assign); - if (tk == ':') - next(); - else { - printf("%d: conditional missing colon\n", line); - exit(-1); - } - *d = (int)(e + 3); - *++e = JMP; - d = ++e; - expr(Cond); - *d = (int)(e + 1); - } else if (tk == Lor) { - next(); - *++e = BNZ; - d = ++e; - expr(Lan); - *d = (int)(e + 1); - ty = INT; - } else if (tk == Lan) { - next(); - *++e = BZ; - d = ++e; - expr(Or); - *d = (int)(e + 1); - ty = INT; - } else if (tk == Or) { - next(); - *++e = PSH; - expr(Xor); - *++e = OR; - ty = INT; - } else if (tk == Xor) { - next(); - *++e = PSH; - expr(And); - *++e = XOR; - ty = INT; - } else if (tk == And) { - next(); - *++e = PSH; - expr(Eq); - *++e = AND; - ty = INT; - } else if (tk == Eq) { - next(); - *++e = PSH; - expr(Lt); - *++e = EQ; - ty = INT; - } else if (tk == Ne) { - next(); - *++e = PSH; - expr(Lt); - *++e = NE; - ty = INT; - } else if (tk == Lt) { - next(); - *++e = PSH; - expr(Shl); - *++e = LT; - ty = INT; - } else if (tk == Gt) { - next(); - *++e = PSH; - expr(Shl); - *++e = GT; - ty = INT; - } else if (tk == Le) { - next(); - *++e = PSH; - expr(Shl); - *++e = LE; - ty = INT; - } else if (tk == Ge) { - next(); - *++e = PSH; - expr(Shl); - *++e = GE; - ty = INT; - } else if (tk == Shl) { - next(); - *++e = PSH; - expr(Add); - *++e = SHL; - ty = INT; - } else if (tk == Shr) { - next(); - *++e = PSH; - expr(Add); - *++e = SHR; - ty = INT; - } else if (tk == Add) { - next(); - *++e = PSH; - expr(Mul); - if ((ty = t) > PTR) { - *++e = PSH; - *++e = IMM; - *++e = sizeof(int); - *++e = MUL; - } - *++e = ADD; - } else if (tk == Sub) { - next(); - *++e = PSH; - expr(Mul); - if (t > PTR && t == ty) { - *++e = SUB; - *++e = PSH; - *++e = IMM; - *++e = sizeof(int); - *++e = DIV; - ty = INT; - } else if ((ty = t) > PTR) { - *++e = PSH; - *++e = IMM; - *++e = sizeof(int); - *++e = MUL; - *++e = SUB; - } else - *++e = SUB; - } else if (tk == Mul) { - next(); - *++e = PSH; - expr(Inc); - *++e = MUL; - ty = INT; - } else if (tk == Div) { - next(); - *++e = PSH; - expr(Inc); - *++e = DIV; - ty = INT; - } else if (tk == Mod) { - next(); - *++e = PSH; - expr(Inc); - *++e = MOD; - ty = INT; - } else if (tk == Inc || tk == Dec) { - if (*e == LC) { - *e = PSH; - *++e = LC; - } else if (*e == LI) { - *e = PSH; - *++e = LI; - } else { - printf("%d: bad lvalue in post-increment\n", line); - exit(-1); - } - *++e = PSH; - *++e = IMM; - *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (tk == Inc) ? ADD : SUB; - *++e = (ty == CHAR) ? SC : SI; - *++e = PSH; - *++e = IMM; - *++e = (ty > PTR) ? sizeof(int) : sizeof(char); - *++e = (tk == Inc) ? SUB : ADD; - next(); - } else if (tk == Brak) { - next(); - *++e = PSH; - expr(Assign); - if (tk == ']') - next(); - else { - printf("%d: close bracket expected\n", line); - exit(-1); - } - if (t > PTR) { - *++e = PSH; - *++e = IMM; - *++e = sizeof(int); - *++e = MUL; - } else if (t < PTR) { - printf("%d: pointer type expected\n", line); - exit(-1); - } - *++e = ADD; - *++e = ((ty = t - PTR) == CHAR) ? LC : LI; - } else { - printf("%d: compiler error tk=%d\n", line, tk); - exit(-1); - } - } -} - -void stmt() -{ - int *a, *b; - - if (tk == If) { - next(); - if (tk == '(') - next(); - else { - printf("%d: open paren expected\n", line); - exit(-1); - } - expr(Assign); - if (tk == ')') - next(); - else { - printf("%d: close paren expected\n", line); - exit(-1); - } - *++e = BZ; - b = ++e; - stmt(); - if (tk == Else) { - *b = (int)(e + 3); - *++e = JMP; - b = ++e; - next(); - stmt(); - } - *b = (int)(e + 1); - } else if (tk == While) { - next(); - a = e + 1; - if (tk == '(') - next(); - else { - printf("%d: open paren expected\n", line); - exit(-1); - } - expr(Assign); - if (tk == ')') - next(); - else { - printf("%d: close paren expected\n", line); - exit(-1); - } - *++e = BZ; - b = ++e; - stmt(); - *++e = JMP; - *++e = (int)a; - *b = (int)(e + 1); - } else if (tk == Return) { - next(); - if (tk != ';') - expr(Assign); - *++e = LEV; - if (tk == ';') - next(); - else { - printf("%d: semicolon expected\n", line); - exit(-1); - } - } else if (tk == '{') { - next(); - while (tk != '}') - stmt(); - next(); - } else if (tk == ';') { - next(); - } else { - expr(Assign); - if (tk == ';') - next(); - else { - printf("%d: semicolon expected\n", line); - exit(-1); - } - } -} - -int main(int argc, char **argv) -{ - int bt, poolsz, *idmain; - int *pc, *sp, *bp, a, cycle; // vm registers - int i, *t; // temps - - --argc; - ++argv; - if (argc < 1) { - printf("usage: cc file ...\n"); - return -1; - } - - poolsz = 256 * 1024; // arbitrary size - if (!(sym = malloc(poolsz))) { - printf("could not malloc(%d) symbol area\n", poolsz); - return -1; - } - if (!(le = e = malloc(poolsz))) { - printf("could not malloc(%d) text area\n", poolsz); - return -1; - } - if (!(data = malloc(poolsz))) { - printf("could not malloc(%d) data area\n", poolsz); - return -1; - } - if (!(sp = malloc(poolsz))) { - printf("could not malloc(%d) stack area\n", poolsz); - return -1; - } - - memset(sym, 0, poolsz); - memset(e, 0, poolsz); - memset(data, 0, poolsz); - - p = strdup("char else enum if int return sizeof while " - "open read close printf malloc free memset memcmp exit void main"); - i = Char; - while (i <= While) { - next(); - id[Tk] = i++; - } // add keywords to symbol table - i = OPEN; - while (i <= EXIT) { - next(); - id[Class] = Sys; - id[Type] = INT; - id[Val] = i++; - } // add library to symbol table - next(); - id[Tk] = Char; // handle void type - next(); - idmain = id; // keep track of main - - if (!(lp = p = malloc(poolsz))) { - printf("could not malloc(%d) source area\n", poolsz); - return -1; - } - - void *file; - if (!(file = sread(*argv))) { - printf("could not read file %s\n", *argv); - return -1; - } - - memcpy(p, file, poolsz - 1); - //p[i] = 0; - - // parse declarations - line = 1; - next(); - while (tk) { - bt = INT; // basetype - if (tk == Int) - next(); - else if (tk == Char) { - next(); - bt = CHAR; - } else if (tk == Enum) { - next(); - if (tk != '{') - next(); - if (tk == '{') { - next(); - i = 0; - while (tk != '}') { - if (tk != Id) { - printf("%d: bad enum identifier %d\n", line, tk); - return -1; - } - next(); - if (tk == Assign) { - next(); - if (tk != Num) { - printf("%d: bad enum initializer\n", line); - return -1; - } - i = ival; - next(); - } - id[Class] = Num; - id[Type] = INT; - id[Val] = i++; - if (tk == ',') - next(); - } - next(); - } - } - while (tk != ';' && tk != '}') { - ty = bt; - while (tk == Mul) { - next(); - ty = ty + PTR; - } - if (tk != Id) { - printf("%d: bad global declaration\n", line); - return -1; - } - if (id[Class]) { - printf("%d: duplicate global definition\n", line); - return -1; - } - next(); - id[Type] = ty; - if (tk == '(') { // function - id[Class] = Fun; - id[Val] = (int)(e + 1); - next(); - i = 0; - while (tk != ')') { - ty = INT; - if (tk == Int) - next(); - else if (tk == Char) { - next(); - ty = CHAR; - } - while (tk == Mul) { - next(); - ty = ty + PTR; - } - if (tk != Id) { - printf("%d: bad parameter declaration\n", line); - return -1; - } - if (id[Class] == Loc) { - printf("%d: duplicate parameter definition\n", - line); - return -1; - } - id[HClass] = id[Class]; - id[Class] = Loc; - id[HType] = id[Type]; - id[Type] = ty; - id[HVal] = id[Val]; - id[Val] = i++; - next(); - if (tk == ',') - next(); - } - next(); - if (tk != '{') { - printf("%d: bad function definition\n", line); - return -1; - } - loc = ++i; - next(); - while (tk == Int || tk == Char) { - bt = (tk == Int) ? INT : CHAR; - next(); - while (tk != ';') { - ty = bt; - while (tk == Mul) { - next(); - ty = ty + PTR; - } - if (tk != Id) { - printf("%d: bad local declaration\n", line); - return -1; - } - if (id[Class] == Loc) { - printf("%d: duplicate local definition\n", - line); - return -1; - } - id[HClass] = id[Class]; - id[Class] = Loc; - id[HType] = id[Type]; - id[Type] = ty; - id[HVal] = id[Val]; - id[Val] = ++i; - next(); - if (tk == ',') - next(); - } - next(); - } - *++e = ENT; - *++e = i - loc; - while (tk != '}') - stmt(); - *++e = LEV; - id = sym; // unwind symbol table locals - while (id[Tk]) { - if (id[Class] == Loc) { - id[Class] = id[HClass]; - id[Type] = id[HType]; - id[Val] = id[HVal]; - } - id = id + Idsz; - } - } else { - id[Class] = Glo; - id[Val] = (int)data; - data = data + sizeof(int); - } - if (tk == ',') - next(); - } - next(); - } - - if (!(pc = (int *)idmain[Val])) { - printf("main() not defined\n"); - return -1; - } - - // setup stack - bp = sp = (int *)((int)sp + poolsz); - *--sp = EXIT; // call exit if main returns - *--sp = PSH; - t = sp; - *--sp = argc; - *--sp = (int)argv; - *--sp = (int)t; - - // run... - cycle = a = 0; - while (1) { - i = *pc++; - ++cycle; - if (i == LEA) - a = (int)(bp + *pc++); // load local address - else if (i == IMM) - a = *pc++; // load global address or immediate - else if (i == JMP) - pc = (int *)*pc; // jump - else if (i == JSR) { - *--sp = (int)(pc + 1); - pc = (int *)*pc; - } // jump to subroutine - else if (i == BZ) - pc = a ? pc + 1 : (int *)*pc; // branch if zero - else if (i == BNZ) - pc = a ? (int *)*pc : pc + 1; // branch if not zero - else if (i == ENT) { - *--sp = (int)bp; - bp = sp; - sp = sp - *pc++; - } // enter subroutine - else if (i == ADJ) - sp = sp + *pc++; // stack adjust - else if (i == LEV) { - sp = bp; - bp = (int *)*sp++; - pc = (int *)*sp++; - } // leave subroutine - else if (i == LI) - a = *(int *)a; // load int - else if (i == LC) - a = *(char *)a; // load char - else if (i == SI) - *(int *)*sp++ = a; // store int - else if (i == SC) - a = *(char *)*sp++ = a; // store char - else if (i == PSH) - *--sp = a; // push - - else if (i == OR) - a = *sp++ | a; - else if (i == XOR) - a = *sp++ ^ a; - else if (i == AND) - a = *sp++ & a; - else if (i == EQ) - a = *sp++ == a; - else if (i == NE) - a = *sp++ != a; - else if (i == LT) - a = *sp++ < a; - else if (i == GT) - a = *sp++ > a; - else if (i == LE) - a = *sp++ <= a; - else if (i == GE) - a = *sp++ >= a; - else if (i == SHL) - a = *sp++ << a; - else if (i == SHR) - a = *sp++ >> a; - else if (i == ADD) - a = *sp++ + a; - else if (i == SUB) - a = *sp++ - a; - else if (i == MUL) - a = *sp++ * a; - else if (i == DIV) - a = *sp++ / a; - else if (i == MOD) - a = *sp++ % a; - - /* else if (i == OPEN) */ - /* a = open((char *)sp[1], *sp); */ - /* else if (i == READ) */ - /* a = read(sp[2], (char *)sp[1], *sp); */ - /* else if (i == CLOS) */ - /* a = close(*sp); */ - else if (i == PRTF) { - t = sp + pc[1]; - a = printf((char *)t[-1], t[-2], t[-3], t[-4], t[-5], t[-6]); - } else if (i == MALC) - a = (int)malloc(*sp); - else if (i == FREE) - free((void *)*sp); - else if (i == MSET) - a = (int)memset((char *)sp[2], sp[1], *sp); - else if (i == MCMP) - a = memcmp((char *)sp[2], (char *)sp[1], *sp); - else if (i == EXIT) { - printf("exit(%d) cycle = %d\n", *sp, cycle); - return *sp; - } else { - printf("unknown instruction = %d! cycle = %d\n", i, cycle); - return -1; - } - } -} |