Awesome stuff

author: Marvin Borner 2021-04-20 17:42:10 +0200
committer: Marvin Borner 2021-04-20 17:43:30 +0200
commit: 5e257cbf702e57fd55f8477a63236bdb9ce454f4 (patch)
tree: 09567e1f12f8e617ebd3cfa17e0b773c026c57af
parent: fbf9cfee0d22a2750b894c2ddb19dff2a50810b4 (diff)
5 files changed, 116 insertions, 23 deletions
diff --git a/src/inc/lexer.h b/src/inc/lexer.h
index afc3a0a..c3999e7 100644
--- a/src/inc/lexer.h
+++ b/src/inc/lexer.h
@@ -58,6 +58,8 @@ enum token_type {
 	ORG,
 	DB,
 	DW,
+	DATA,
+	BIT,
 	INCLUDE,
 
 	HASH,
@@ -73,7 +75,12 @@ enum token_type {
 	HEX_NUM,
 	BIN_NUM,
 
+	STRING,
+
+	// Registers
 	ACCU,
+	ATR0,
+	ATR1,
 	R0,
 	R1,
 	R2,
@@ -88,7 +95,6 @@ struct token {
 	enum token_type type;
 	char *start;
 	u32 length;
-	void *data;
 };
 
 void token_print(struct token *tok);
diff --git a/src/lexer.c b/src/lexer.c
index 6e2cd94..d4a6d6a 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -5,10 +5,13 @@
 
 #define ALPHA(a) (((a) >= 'a' && (a) <= 'z') || ((a) >= 'A' && (a) <= 'Z'))
 #define NUMERIC(a) ((a) >= '0' && (a) <= '9')
+#define ALPHA_NUMERIC(a) (ALPHA(a) || NUMERIC(a))
+#define HEX_NUMERIC(a) (NUMERIC(a) || ((a) >= 'a' && (a) <= 'f') || ((a) >= 'A' && (a) <= 'F'))
 
-#define CMPSP(tok) (strncmp(tok " ", str, MIN(strlen(tok) + 1, size)) == 0) // CMP with space at end
+#define CMPSP(tok)                                                                                 \
+	(strncasecmp(tok " ", str, MIN(strlen(tok) + 1, size)) == 0) // CMP with space at end
 #define CMPNA(tok)                                                                                 \
-	(strncmp(tok, str, MIN(strlen(tok), size)) == 0 &&                                         \
+	(strncasecmp(tok, str, MIN(strlen(tok), size)) == 0 &&                                     \
 	 !ALPHA(str[strlen(tok)])) // CMP with non alpha at end
 
 void token_print(struct token *tok)
@@ -28,11 +31,17 @@ struct token token_resolve(char *str, u32 size)
 	u32 length = 0;
 
 	// "Beautiful" ~ Everyone. Probably.
-	if (NUMERIC(str[0]) && !ALPHA(str[1])) {
-		while (NUMERIC(str[length++]))
-			;
-		length--;
-		type = DEC_NUM;
+	if (NUMERIC(str[0])) {
+		while (HEX_NUMERIC(str[length]))
+			length++;
+
+		if (str[length] == 'h' || str[length] == 'H') {
+			type = HEX_NUM;
+		} else if (str[length] == 'b' || str[length] == 'B') {
+			type = BIN_NUM;
+		} else {
+			type = DEC_NUM;
+		}
 	} else if (str[0] == '\n') {
 		type = NEWLINE;
 		length = 1;
@@ -240,16 +249,29 @@ struct token token_resolve(char *str, u32 size)
 	} else if (CMPSP("dw")) {
 		type = DW;
 		length = 2;
+	} else if (CMPSP("data")) { // Lars: "Who cares?"
+		type = DATA;
+		length = 4;
+	} else if (CMPSP("bit")) {
+		type = BIT;
+		length = 3;
 	} else if (CMPSP("include")) {
 		type = INCLUDE;
 		length = 7;
+	} else {
+		if (ALPHA(str[0])) {
+			while (ALPHA_NUMERIC(str[length]))
+				length++;
+
+			if (length)
+				type = STRING;
+		}
 	}
 
 	struct token tok = {
 		.type = type,
 		.start = str,
 		.length = length,
-		.data = 0,
 	};
 
 	return tok;
diff --git a/src/parser.c b/src/parser.c
index 1698638..2e8894b 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -25,10 +25,10 @@ static void rom_add(u8 byte)
 }
 
 /**
- * Main parsing
+ * Toks parsing
  */
 
-static inline u32 toks_count(struct token *toks)
+static u32 toks_count(struct token *toks)
 {
 	struct token *p = toks;
 	while (p && p->type)
@@ -36,6 +36,20 @@ static inline u32 toks_count(struct token *toks)
 	return p - toks;
 }
 
+static struct token *toks_peek(struct token *toks, u32 cnt)
+{
+	return toks + cnt;
+}
+
+static struct token *toks_peek_end(struct token *toks)
+{
+	return toks + toks_count(toks) - 1;
+}
+
+/**
+ * Main parsing
+ */
+
 static void parse_nop(struct context *ctx, struct token *toks)
 {
 	UNUSED(ctx);
@@ -93,10 +107,10 @@ static void parse_sjmp(struct context *ctx, struct token *toks)
 
 static void parse_mov(struct context *ctx, struct token *toks)
 {
-	UNUSED(ctx);
-	printf("CNT: %d\n", toks_count(toks));
-	/* if (toks_count(toks) > 4) */
-	/* 	warnings_add(ctx, "Too many arguments"); */
+	if (toks_peek_end(toks)->type == COMMA)
+		warnings_add(ctx, "Unexpected end of line");
+	else if (toks_count(toks) > 5)
+		warnings_add(ctx, "Too many arguments");
 }
 
 static void parse_orl(struct context *ctx, struct token *toks)
@@ -345,7 +359,21 @@ static void parse_include(struct context *ctx, struct token *toks)
 	UNUSED(toks);
 }
 
-static u32 parse_instruction(struct context *ctx, char *str, u32 size)
+static void parse_string(struct context *ctx, struct token *toks)
+{
+	enum token_type next = toks_peek(toks, 1)->type;
+	if (next == COLON) {
+		// TODO: Add label lookup tables
+	} else if (next == DATA) {
+		// TODO: Constants map
+	} else if (next == BIT) {
+		// TODO: Constants map
+	} else {
+		warnings_add(ctx, "Expected data/bit/colon");
+	}
+}
+
+static u32 parse_line(struct context *ctx, char *str, u32 size)
 {
 	struct token toks[32] = { 0 };
 	u8 tok_ind = 0;
@@ -369,7 +397,11 @@ static u32 parse_instruction(struct context *ctx, char *str, u32 size)
 
 		token_print(&tok);
 
-		assert(tok_ind + 1 < (u8)(sizeof(toks) / sizeof(toks[0])));
+		if (tok_ind + 1 >= (u8)(sizeof(toks) / sizeof(toks[0]))) {
+			warnings_add(ctx, "Token overflow");
+			return str_ind;
+		}
+
 		toks[tok_ind++] = tok;
 		str_ind += tok.length;
 	}
@@ -378,13 +410,13 @@ static u32 parse_instruction(struct context *ctx, char *str, u32 size)
 
 	if (!tok_ind) {
 		warnings_add(ctx, "Parsing failed");
-		return 0;
+		return str_ind;
 	}
 
 	switch (toks[0].type) {
 	case UNKNOWN:
 		warnings_add(ctx, "Unknown instruction");
-		return 0;
+		break;
 	case NEWLINE:
 		break;
 	case NOP:
@@ -540,8 +572,39 @@ static u32 parse_instruction(struct context *ctx, char *str, u32 size)
 	case INCLUDE:
 		parse_include(ctx, toks);
 		break;
+	case STRING:
+		parse_string(ctx, toks);
+		break;
+	case SPACE:
+	case HASH:
+	case DOLLAR:
+	case SLASH:
+	case PLUS:
+	case COMMA:
+	case DOT:
+	case COLON:
+	case SEMICOLON:
+	case DEC_NUM:
+	case HEX_NUM:
+	case BIN_NUM:
+	case ACCU:
+	case ATR0:
+	case ATR1:
+	case R0:
+	case R1:
+	case R2:
+	case R3:
+	case R4:
+	case R5:
+	case R6:
+	case R7:
+	case DATA:
+	case BIT:
+		warnings_add(ctx, "Random non-instruction found");
+		break;
 	default:
 		warnings_add(ctx, "Super-unknown instruction");
+		break;
 	}
 
 	return str_ind;
@@ -570,7 +633,7 @@ u8 parse(char *buf, u32 size)
 			continue;
 		}
 
-		u32 len = parse_instruction(&ctx, buf + i, size - i);
+		u32 len = parse_line(&ctx, buf + i, size - i);
 		i += len;
 		ctx.column += len;
 	}
diff --git a/src/warnings.c b/src/warnings.c
index d9c7be6..b5a7851 100644
--- a/src/warnings.c
+++ b/src/warnings.c
@@ -11,7 +11,7 @@
 
 struct warning {
 	u8 exists;
-	struct context *ctx;
+	struct context ctx;
 	char text[WARNING_LENGTH];
 };
 
@@ -22,7 +22,7 @@ void warnings_add(struct context *ctx, const char *fmt, ...)
 	assert(warning_index + 1 < WARNING_COUNT);
 
 	warnings[warning_index].exists = 1;
-	warnings[warning_index].ctx = ctx;
+	warnings[warning_index].ctx = *ctx;
 
 	va_list ap;
 	va_start(ap, fmt);
@@ -40,7 +40,7 @@ void warnings_print(void)
 			continue;
 
 		/* gui_show_warning(warnings[i].text); */
-		printf("Line %d:%d: %s\n", warnings[i].ctx->line, warnings[i].ctx->column,
+		printf("Line %d:%d: %s\n", warnings[i].ctx.line, warnings[i].ctx.column,
 		       warnings[i].text);
 	}
 }
diff --git a/test.asm b/test.asm
index 95063e7..f834fd5 100644
--- a/test.asm
+++ b/test.asm
@@ -1,2 +1,4 @@
+P0 DATA 080h
+
 mov A, #42
 jmp $
author	Marvin Borner	2021-04-20 17:42:10 +0200
committer	Marvin Borner	2021-04-20 17:43:30 +0200
commit	5e257cbf702e57fd55f8477a63236bdb9ce454f4 (patch)
tree	09567e1f12f8e617ebd3cfa17e0b773c026c57af
parent	fbf9cfee0d22a2750b894c2ddb19dff2a50810b4 (diff)