diff options
Diffstat (limited to 'src/tokenize.c')
-rw-r--r-- | src/tokenize.c | 234 |
1 files changed, 0 insertions, 234 deletions
diff --git a/src/tokenize.c b/src/tokenize.c deleted file mode 100644 index 0fa58fe..0000000 --- a/src/tokenize.c +++ /dev/null @@ -1,234 +0,0 @@ -#include <assert.h> -#include <ctype.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdio.h> - -#include <log.h> -#include <preprocess.h> -#include <tokenize.h> - -// TODO: Do some different limitations for identifiers/types - -static size_t peek_identifier(struct ctx *ctx, size_t start, size_t opt_count, ...) -{ - if (isdigit(context_getch(ctx, start))) - errln(&ctx->location, "Identifiers can't start with numbers"); - - for (size_t i = start; i < ctx->size; i++) { - char cur = context_getch(ctx, i); - - // Check for every option in variadic argument - va_list ap; - va_start(ap, opt_count); - for (size_t j = 0; j < opt_count; j++) { - char ch = va_arg(ap, int); - if (cur == ch) { - va_end(ap); - return i; - } - } - va_end(ap); - - if (cur == '\n') - errln(&ctx->location, "Unexpected end of line while scanning"); - - if (!isalnum(cur) && (cur < '!' || cur > '~')) - errln(&ctx->location, "'%c' is not an identifier", cur); - } - - errln(&ctx->location, "Unexpected end of buffer while scanning"); -} - -static size_t peek_type(struct ctx *ctx, size_t start, size_t opt_count, ...) -{ - if (isdigit(context_getch(ctx, start))) - errln(&ctx->location, "Types can't start with numbers"); - - for (size_t i = start; i < ctx->size; i++) { - char cur = context_getch(ctx, i); - - // Check for every option in variadic argument - va_list ap; - va_start(ap, opt_count); - for (size_t j = 0; j < opt_count; j++) { - char ch = va_arg(ap, int); - if (cur == ch) { - va_end(ap); - return i; - } - } - va_end(ap); - - if (cur == '\n') - errln(&ctx->location, "Unexpected end of line while scanning"); - - if (!isalnum(cur) && (cur < '!' || cur > '~')) - errln(&ctx->location, "'%c' is not an identifier", cur); - } - - errln(&ctx->location, "Unexpected end of buffer while scanning"); -} - -static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end) -{ - assert(type != UNKNOWN); - - struct token token = { 0 }; - token.type = type; - token.string.start = start; - token.string.end = end; - token.location = ctx->location; - - ctx->tokens[ctx->token_count] = token; - - ctx->token_count++; - assert(ctx->token_count < TOKENS_MAX); - - if (type == NEWLINE) { - ctx->location.line++; - ctx->location.column = 0; - } else { - ctx->location.column += end - start; - } -} - -void token_print(struct ctx *ctx, struct token *token) -{ - assert(token->type != UNKNOWN); - - printf("[token type=%d] ", token->type); - if (token->type == NEWLINE || token->type == END) { - printf("(Unprintable)\n"); - return; - } - - printf("'%.*s'\n", (int)(token->string.end - token->string.start), - ctx->data + token->string.start); -} - -void tokenize(struct ctx *ctx) -{ - enum { - PARSE_DECLARATION, - PARSE_DEFINITION, - PARSE_NUMBER, - PARSE_BODY, - PARSE_STRING, - } state = PARSE_DECLARATION, - prev = PARSE_DECLARATION; - - // TODO: Clean this loop up (move into seperate tokenizing functions) - - size_t start; - for (size_t i = 0; i < ctx->size; i++) { - const char cur = context_getch(ctx, i); - - // String parsing - if (cur == '"') { - if (state == PARSE_STRING) { - token_add(ctx, STRING, start, i + 1); - state = prev; - } else { - state = PARSE_STRING; - start = i; - } - continue; - } else if (state == PARSE_STRING) { - continue; - } - - if (state != PARSE_BODY) { - switch (cur) { - case '\0': - errln(&ctx->location, "Unexpected end of buffer"); - case '\n': - token_add(ctx, NEWLINE, i, i + 1); - continue; - case MACRO_SKIP: - ctx->location.column++; - continue; - case MACRO_NEWLINE: - ctx->location.line++; - continue; - default: - break; - } - } - - if (state == PARSE_BODY) { - switch (cur) { - case '(': - token_add(ctx, LPAREN, i, i + 1); - continue; - case ')': - token_add(ctx, RPAREN, i, i + 1); - continue; - case '\n': - token_add(ctx, NEWLINE, i, i + 1); - state = PARSE_DECLARATION; - continue; - default: - break; - } - - size_t end_ident = peek_identifier(ctx, i, 3, ' ', ')', '\n'); - token_add(ctx, IDENT, i, end_ident); - i = end_ident - (context_getch(ctx, end_ident) != ' '); - continue; - } - - if (state == PARSE_DECLARATION) { - size_t end_ident = peek_identifier(ctx, i, 1, ' '); - token_add(ctx, IDENT, i, end_ident); - - size_t start_type = end_ident + 1; - while (context_getch(ctx, start_type) != '-' || - context_getch(ctx, start_type + 1) != '>') { - size_t end_type = peek_type(ctx, start_type, 1, ' '); - token_add(ctx, TYPE, start_type, end_type); - start_type = end_type + 1; - } - - if (context_getch(ctx, start_type + 2) != ' ') - errln(&ctx->location, "Missing space"); - token_add(ctx, TYPEDELIM, start_type, start_type + 2); - - start_type += 3; - size_t final_type = peek_type(ctx, start_type, 1, '\n'); - token_add(ctx, TYPE, start_type, final_type); - - i = final_type - 1; - state = PARSE_DEFINITION; - continue; - } - - if (state == PARSE_DEFINITION) { - size_t end_ident = peek_identifier(ctx, i, 1, ' '); - token_add(ctx, IDENT, i, end_ident); - - size_t start_parameter = end_ident + 1; - while (context_getch(ctx, start_parameter) != ':') { - size_t end_parameter = - peek_identifier(ctx, start_parameter, 1, ' '); - token_add(ctx, PARAM, start_parameter, end_parameter); - start_parameter = end_parameter + 1; - } - - if (context_getch(ctx, start_parameter + 1) != ' ') - errln(&ctx->location, "Missing space"); - token_add(ctx, IDENTDELIM, start_parameter, start_parameter + 1); - - i = start_parameter + 1; - state = PARSE_BODY; - continue; - } - } - - /* for (size_t i = 0; i < ctx->token_count; i++) */ - /* token_print(ctx, &ctx->tokens[i]); */ - - token_add(ctx, END, ctx->size, ctx->size); - context_rewind(ctx); -} |