#include #include #include static char next_non_alnum(struct ctx *ctx, size_t start) { for (size_t i = start; i < ctx->size; i++) if (!isalnum(ctx->data[i])) return ctx->data[i]; errln(ctx, "Unexpected end of buffer"); } static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return true; if (!isalnum(cur)) return false; } errln(ctx, "Unexpected end of buffer"); } static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return i; if (!isalnum(cur)) errln(ctx, "'%c' is not alpha-numeric", cur); } errln(ctx, "Unexpected end of buffer"); } static size_t peek_special_to(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return i; if (isalnum(cur) || cur < '!' || cur > '~') errln(ctx, "'%c' is not special", cur); } errln(ctx, "Unexpected end of buffer"); } static size_t peek_to(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return i; } errln(ctx, "Unexpected end of buffer"); } static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end) { struct token token = { 0 }; token.type = type; token.start = start; token.end = end; assert(++ctx->token_count < TOKENS_MAX); ctx->tokens[ctx->token_count] = token; if (type == NEWLINE) { ctx->line++; ctx->column = 0; } else { ctx->column += end - start; } } static void token_print(struct ctx *ctx, struct token *token) { assert(token->type != UNKNOWN); printf("[token type=%d] '", token->type); if (token->type == NEWLINE || token->type == END) { printf("' (Unprintable)\n"); return; } for (size_t i = token->start; i < token->end; i++) printf("%c", ctx->data[i]); printf("'\n"); } void tokens_print(struct ctx *ctx) { for (size_t i = 1; i < ctx->token_count; i++) token_print(ctx, &ctx->tokens[i]); } void tokenize(struct ctx *ctx) { for (size_t i = 0; i < ctx->size; i++) { const char cur = ctx->data[i]; switch (cur) { case '\0': token_add(ctx, END, i, i + 1); return; case '\n': token_add(ctx, NEWLINE, i, i + 1); continue; case ';': token_add(ctx, EOL, i, i + 1); continue; case '(': token_add(ctx, LPAREN, i, i + 1); continue; case ')': token_add(ctx, RPAREN, i, i + 1); continue; case '=': token_add(ctx, EQUAL, i, i + 1); continue; case ' ': ctx->column++; continue; default: break; } if (next_non_alnum(ctx, i) == ':') { // Type with param identifier size_t start_param = peek_alnum_to(ctx, i, ':') + 1; size_t end_param; if (peek_to_is_alnum(ctx, start_param, ' ')) { end_param = peek_alnum_to(ctx, start_param, ' '); } else { // Unnamed identifier ('_') end_param = peek_to(ctx, start_param, ' '); if (end_param - start_param != 1 || ctx->data[start_param] != '_') errln(ctx, "Invalid param identifier"); } token_add(ctx, TYPE, i, start_param - 1); token_add(ctx, TYPEDELIM, start_param - 1, start_param); token_add(ctx, PARAM, start_param, end_param); i = end_param - 1; continue; } if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier size_t end_ident = peek_alnum_to(ctx, i, ' '); token_add(ctx, IDENT, i, end_ident); i = end_ident - 1; } else { // Special/custom operator size_t end_operator = peek_special_to(ctx, i, ' '); token_add(ctx, OPERATOR, i, end_operator); i = end_operator - 1; } } context_rewind(ctx); }