#include #include #include #include #include #include #include #include #include // TODO: Do some different limitations for identifiers/types static size_t peek_identifier(struct ctx *ctx, size_t start, size_t opt_count, ...) { if (isdigit(context_getch(ctx, start))) errln(&ctx->location, "Identifiers can't start with numbers"); for (size_t i = start; i < ctx->size; i++) { char cur = context_getch(ctx, i); // Check for every option in variadic argument va_list ap; va_start(ap, opt_count); for (size_t j = 0; j < opt_count; j++) { char ch = va_arg(ap, int); if (cur == ch) { va_end(ap); return i; } } va_end(ap); if (cur == '\n') errln(&ctx->location, "Unexpected end of line while scanning"); if (!isalnum(cur) && (cur < '!' || cur > '~')) errln(&ctx->location, "'%c' is not an identifier", cur); } errln(&ctx->location, "Unexpected end of buffer while scanning"); } static size_t peek_type(struct ctx *ctx, size_t start, size_t opt_count, ...) { if (isdigit(context_getch(ctx, start))) errln(&ctx->location, "Types can't start with numbers"); for (size_t i = start; i < ctx->size; i++) { char cur = context_getch(ctx, i); // Check for every option in variadic argument va_list ap; va_start(ap, opt_count); for (size_t j = 0; j < opt_count; j++) { char ch = va_arg(ap, int); if (cur == ch) { va_end(ap); return i; } } va_end(ap); if (cur == '\n') errln(&ctx->location, "Unexpected end of line while scanning"); if (!isalnum(cur) && (cur < '!' || cur > '~')) errln(&ctx->location, "'%c' is not an identifier", cur); } errln(&ctx->location, "Unexpected end of buffer while scanning"); } static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end) { assert(type != UNKNOWN); struct token token = { 0 }; token.type = type; token.string.start = start; token.string.end = end; token.location = ctx->location; ctx->tokens[ctx->token_count] = token; ctx->token_count++; assert(ctx->token_count < TOKENS_MAX); if (type == NEWLINE) { ctx->location.line++; ctx->location.column = 0; } else { ctx->location.column += end - start; } } void token_print(struct ctx *ctx, struct token *token) { assert(token->type != UNKNOWN); printf("[token type=%d] ", token->type); if (token->type == NEWLINE || token->type == END) { printf("(Unprintable)\n"); return; } printf("'%.*s'\n", (int)(token->string.end - token->string.start), ctx->data + token->string.start); } void tokenize(struct ctx *ctx) { enum { PARSE_DECLARATION, PARSE_DEFINITION, PARSE_NUMBER, PARSE_BODY, PARSE_STRING, } state = PARSE_DECLARATION, prev = PARSE_DECLARATION; // TODO: Clean this loop up (move into seperate tokenizing functions) size_t start; for (size_t i = 0; i < ctx->size; i++) { const char cur = context_getch(ctx, i); // String parsing if (cur == '"') { if (state == PARSE_STRING) { token_add(ctx, STRING, start, i + 1); state = prev; } else { state = PARSE_STRING; start = i; } continue; } else if (state == PARSE_STRING) { continue; } if (state != PARSE_BODY) { switch (cur) { case '\0': errln(&ctx->location, "Unexpected end of buffer"); case '\n': token_add(ctx, NEWLINE, i, i + 1); continue; case MACRO_SKIP: ctx->location.column++; continue; case MACRO_NEWLINE: ctx->location.line++; continue; default: break; } } if (state == PARSE_BODY) { switch (cur) { case '(': token_add(ctx, LPAREN, i, i + 1); continue; case ')': token_add(ctx, RPAREN, i, i + 1); continue; case '\n': token_add(ctx, NEWLINE, i, i + 1); state = PARSE_DECLARATION; continue; default: break; } size_t end_ident = peek_identifier(ctx, i, 3, ' ', ')', '\n'); token_add(ctx, IDENT, i, end_ident); i = end_ident - (context_getch(ctx, end_ident) != ' '); continue; } if (state == PARSE_DECLARATION) { size_t end_ident = peek_identifier(ctx, i, 1, ' '); token_add(ctx, IDENT, i, end_ident); size_t start_type = end_ident + 1; while (context_getch(ctx, start_type) != '-' || context_getch(ctx, start_type + 1) != '>') { size_t end_type = peek_type(ctx, start_type, 1, ' '); token_add(ctx, TYPE, start_type, end_type); start_type = end_type + 1; } if (context_getch(ctx, start_type + 2) != ' ') errln(&ctx->location, "Missing space"); token_add(ctx, TYPEDELIM, start_type, start_type + 2); start_type += 3; size_t final_type = peek_type(ctx, start_type, 1, '\n'); token_add(ctx, TYPE, start_type, final_type); i = final_type - 1; state = PARSE_DEFINITION; continue; } if (state == PARSE_DEFINITION) { size_t end_ident = peek_identifier(ctx, i, 1, ' '); token_add(ctx, IDENT, i, end_ident); size_t start_parameter = end_ident + 1; while (context_getch(ctx, start_parameter) != ':') { size_t end_parameter = peek_identifier(ctx, start_parameter, 1, ' '); token_add(ctx, PARAM, start_parameter, end_parameter); start_parameter = end_parameter + 1; } if (context_getch(ctx, start_parameter + 1) != ' ') errln(&ctx->location, "Missing space"); token_add(ctx, IDENTDELIM, start_parameter, start_parameter + 1); i = start_parameter + 1; state = PARSE_BODY; continue; } } /* for (size_t i = 0; i < ctx->token_count; i++) */ /* token_print(ctx, &ctx->tokens[i]); */ token_add(ctx, END, ctx->size, ctx->size); context_rewind(ctx); }