diff options
author | Marvin Borner | 2021-04-20 17:42:10 +0200 |
---|---|---|
committer | Marvin Borner | 2021-04-20 17:43:30 +0200 |
commit | 5e257cbf702e57fd55f8477a63236bdb9ce454f4 (patch) | |
tree | 09567e1f12f8e617ebd3cfa17e0b773c026c57af | |
parent | fbf9cfee0d22a2750b894c2ddb19dff2a50810b4 (diff) |
Awesome stuff
-rw-r--r-- | src/inc/lexer.h | 8 | ||||
-rw-r--r-- | src/lexer.c | 38 | ||||
-rw-r--r-- | src/parser.c | 85 | ||||
-rw-r--r-- | src/warnings.c | 6 | ||||
-rw-r--r-- | test.asm | 2 |
5 files changed, 116 insertions, 23 deletions
diff --git a/src/inc/lexer.h b/src/inc/lexer.h index afc3a0a..c3999e7 100644 --- a/src/inc/lexer.h +++ b/src/inc/lexer.h @@ -58,6 +58,8 @@ enum token_type { ORG, DB, DW, + DATA, + BIT, INCLUDE, HASH, @@ -73,7 +75,12 @@ enum token_type { HEX_NUM, BIN_NUM, + STRING, + + // Registers ACCU, + ATR0, + ATR1, R0, R1, R2, @@ -88,7 +95,6 @@ struct token { enum token_type type; char *start; u32 length; - void *data; }; void token_print(struct token *tok); diff --git a/src/lexer.c b/src/lexer.c index 6e2cd94..d4a6d6a 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -5,10 +5,13 @@ #define ALPHA(a) (((a) >= 'a' && (a) <= 'z') || ((a) >= 'A' && (a) <= 'Z')) #define NUMERIC(a) ((a) >= '0' && (a) <= '9') +#define ALPHA_NUMERIC(a) (ALPHA(a) || NUMERIC(a)) +#define HEX_NUMERIC(a) (NUMERIC(a) || ((a) >= 'a' && (a) <= 'f') || ((a) >= 'A' && (a) <= 'F')) -#define CMPSP(tok) (strncmp(tok " ", str, MIN(strlen(tok) + 1, size)) == 0) // CMP with space at end +#define CMPSP(tok) \ + (strncasecmp(tok " ", str, MIN(strlen(tok) + 1, size)) == 0) // CMP with space at end #define CMPNA(tok) \ - (strncmp(tok, str, MIN(strlen(tok), size)) == 0 && \ + (strncasecmp(tok, str, MIN(strlen(tok), size)) == 0 && \ !ALPHA(str[strlen(tok)])) // CMP with non alpha at end void token_print(struct token *tok) @@ -28,11 +31,17 @@ struct token token_resolve(char *str, u32 size) u32 length = 0; // "Beautiful" ~ Everyone. Probably. - if (NUMERIC(str[0]) && !ALPHA(str[1])) { - while (NUMERIC(str[length++])) - ; - length--; - type = DEC_NUM; + if (NUMERIC(str[0])) { + while (HEX_NUMERIC(str[length])) + length++; + + if (str[length] == 'h' || str[length] == 'H') { + type = HEX_NUM; + } else if (str[length] == 'b' || str[length] == 'B') { + type = BIN_NUM; + } else { + type = DEC_NUM; + } } else if (str[0] == '\n') { type = NEWLINE; length = 1; @@ -240,16 +249,29 @@ struct token token_resolve(char *str, u32 size) } else if (CMPSP("dw")) { type = DW; length = 2; + } else if (CMPSP("data")) { // Lars: "Who cares?" + type = DATA; + length = 4; + } else if (CMPSP("bit")) { + type = BIT; + length = 3; } else if (CMPSP("include")) { type = INCLUDE; length = 7; + } else { + if (ALPHA(str[0])) { + while (ALPHA_NUMERIC(str[length])) + length++; + + if (length) + type = STRING; + } } struct token tok = { .type = type, .start = str, .length = length, - .data = 0, }; return tok; diff --git a/src/parser.c b/src/parser.c index 1698638..2e8894b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -25,10 +25,10 @@ static void rom_add(u8 byte) } /** - * Main parsing + * Toks parsing */ -static inline u32 toks_count(struct token *toks) +static u32 toks_count(struct token *toks) { struct token *p = toks; while (p && p->type) @@ -36,6 +36,20 @@ static inline u32 toks_count(struct token *toks) return p - toks; } +static struct token *toks_peek(struct token *toks, u32 cnt) +{ + return toks + cnt; +} + +static struct token *toks_peek_end(struct token *toks) +{ + return toks + toks_count(toks) - 1; +} + +/** + * Main parsing + */ + static void parse_nop(struct context *ctx, struct token *toks) { UNUSED(ctx); @@ -93,10 +107,10 @@ static void parse_sjmp(struct context *ctx, struct token *toks) static void parse_mov(struct context *ctx, struct token *toks) { - UNUSED(ctx); - printf("CNT: %d\n", toks_count(toks)); - /* if (toks_count(toks) > 4) */ - /* warnings_add(ctx, "Too many arguments"); */ + if (toks_peek_end(toks)->type == COMMA) + warnings_add(ctx, "Unexpected end of line"); + else if (toks_count(toks) > 5) + warnings_add(ctx, "Too many arguments"); } static void parse_orl(struct context *ctx, struct token *toks) @@ -345,7 +359,21 @@ static void parse_include(struct context *ctx, struct token *toks) UNUSED(toks); } -static u32 parse_instruction(struct context *ctx, char *str, u32 size) +static void parse_string(struct context *ctx, struct token *toks) +{ + enum token_type next = toks_peek(toks, 1)->type; + if (next == COLON) { + // TODO: Add label lookup tables + } else if (next == DATA) { + // TODO: Constants map + } else if (next == BIT) { + // TODO: Constants map + } else { + warnings_add(ctx, "Expected data/bit/colon"); + } +} + +static u32 parse_line(struct context *ctx, char *str, u32 size) { struct token toks[32] = { 0 }; u8 tok_ind = 0; @@ -369,7 +397,11 @@ static u32 parse_instruction(struct context *ctx, char *str, u32 size) token_print(&tok); - assert(tok_ind + 1 < (u8)(sizeof(toks) / sizeof(toks[0]))); + if (tok_ind + 1 >= (u8)(sizeof(toks) / sizeof(toks[0]))) { + warnings_add(ctx, "Token overflow"); + return str_ind; + } + toks[tok_ind++] = tok; str_ind += tok.length; } @@ -378,13 +410,13 @@ static u32 parse_instruction(struct context *ctx, char *str, u32 size) if (!tok_ind) { warnings_add(ctx, "Parsing failed"); - return 0; + return str_ind; } switch (toks[0].type) { case UNKNOWN: warnings_add(ctx, "Unknown instruction"); - return 0; + break; case NEWLINE: break; case NOP: @@ -540,8 +572,39 @@ static u32 parse_instruction(struct context *ctx, char *str, u32 size) case INCLUDE: parse_include(ctx, toks); break; + case STRING: + parse_string(ctx, toks); + break; + case SPACE: + case HASH: + case DOLLAR: + case SLASH: + case PLUS: + case COMMA: + case DOT: + case COLON: + case SEMICOLON: + case DEC_NUM: + case HEX_NUM: + case BIN_NUM: + case ACCU: + case ATR0: + case ATR1: + case R0: + case R1: + case R2: + case R3: + case R4: + case R5: + case R6: + case R7: + case DATA: + case BIT: + warnings_add(ctx, "Random non-instruction found"); + break; default: warnings_add(ctx, "Super-unknown instruction"); + break; } return str_ind; @@ -570,7 +633,7 @@ u8 parse(char *buf, u32 size) continue; } - u32 len = parse_instruction(&ctx, buf + i, size - i); + u32 len = parse_line(&ctx, buf + i, size - i); i += len; ctx.column += len; } diff --git a/src/warnings.c b/src/warnings.c index d9c7be6..b5a7851 100644 --- a/src/warnings.c +++ b/src/warnings.c @@ -11,7 +11,7 @@ struct warning { u8 exists; - struct context *ctx; + struct context ctx; char text[WARNING_LENGTH]; }; @@ -22,7 +22,7 @@ void warnings_add(struct context *ctx, const char *fmt, ...) assert(warning_index + 1 < WARNING_COUNT); warnings[warning_index].exists = 1; - warnings[warning_index].ctx = ctx; + warnings[warning_index].ctx = *ctx; va_list ap; va_start(ap, fmt); @@ -40,7 +40,7 @@ void warnings_print(void) continue; /* gui_show_warning(warnings[i].text); */ - printf("Line %d:%d: %s\n", warnings[i].ctx->line, warnings[i].ctx->column, + printf("Line %d:%d: %s\n", warnings[i].ctx.line, warnings[i].ctx.column, warnings[i].text); } } @@ -1,2 +1,4 @@ +P0 DATA 080h + mov A, #42 jmp $ |