#include #include #include #include #include #include #include #include #include /** * Definitions */ #define TOKENS_MAX 4096 /** * Structures/enums */ enum token_type { UNKNOWN, TYPE, TYPEDELIM, PARAM, IDENT, OPERATOR, LPAREN, RPAREN, EQUAL, NEWLINE, EOL, END, }; struct token { enum token_type type; size_t start, end; }; struct ctx { size_t line; size_t column; const char *path; char *raw; char *data; size_t size; size_t token_count; struct token *tokens; }; /** * Contexts */ static struct ctx *context_create(const char *path) { struct ctx *ctx = calloc(1, sizeof(*ctx)); ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens)); ctx->path = path; // TODO: strdup? FILE *file = fopen(path, "r"); assert(file); // Find size of file fseek(file, 0, SEEK_END); ctx->size = ftell(file); rewind(file); assert(ctx->size); ctx->raw = malloc(ctx->size + 1); assert(ctx->raw); fread(ctx->raw, 1, ctx->size, file); fclose(file); ctx->raw[ctx->size] = 0; return ctx; } static void context_destroy(struct ctx *ctx) { if (!ctx) return; if (ctx->raw) free(ctx->raw); if (ctx->data && ctx->data != ctx->raw) free(ctx->data); if (ctx->tokens) free(ctx->tokens); free(ctx); } static void context_rewind(struct ctx *ctx) { ctx->line = 0; ctx->column = 0; } /** * Logging */ static void context_print(FILE *fd, struct ctx *ctx) { const char *data = ctx->data ? ctx->data : ctx->raw; // Find line, column size_t line = 0, column = 0, index = 0; for (; index < ctx->size; index++) { char cur = data[index]; column++; if (line == ctx->line && column == ctx->column) break; if (cur == '\n') { line++; column = 0; continue; } else if (cur == '\0') { fprintf(stderr, "Invalid context!"); context_destroy(ctx); exit(1); break; } } if (++index >= ctx->size) return; // Couldn't find context, idc? fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1); // Print line context size_t start = ctx->column > 5 ? index - 5 : index; size_t end = ctx->size - index > 5 ? index + 5 : index + 1; for (size_t i = start; i < end; i++) { if (i == index) { fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]); } else { fprintf(fd, "%c", data[i]); } } fprintf(fd, "': "); } static __attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...) { context_print(stderr, ctx); fprintf(stderr, "\x1B[1;31m"); va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); fprintf(stderr, "\x1B[0m"); context_destroy(ctx); exit(1); } static __attribute__((noreturn)) void err(const char *fmt, ...) { fprintf(stderr, "\x1B[1;31m"); va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); fprintf(stderr, "\x1B[0m"); exit(1); } /** * Preprocessor */ static void preprocess_erase(struct ctx *ctx, size_t start) { assert(ctx->raw[start] == '#'); for (size_t i = start; i < ctx->size; i++) { char cur = ctx->raw[i]; if (cur == '\n' || cur == '\0') break; ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways } } static void preprocess(struct ctx *ctx) { for (size_t i = 0; i < ctx->size; i++) { const char cur = ctx->raw[i]; ctx->column++; if (cur == '\n') { ctx->line++; ctx->column = 0; continue; } else if (cur == '\0') { break; } else if (cur == '#' && ctx->column == 1) { if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) { // TODO: Add include features } else { errln(ctx, "Invalid preprocessing directive"); } preprocess_erase(ctx, i); } } ctx->data = ctx->raw; ctx->line = 0; ctx->column = 0; } /** * Tokenizer */ static char next_non_alnum(struct ctx *ctx, size_t start) { for (size_t i = start; i < ctx->size; i++) if (!isalnum(ctx->data[i])) return ctx->data[i]; errln(ctx, "Unexpected end of buffer"); } static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return true; if (!isalnum(cur)) return false; } errln(ctx, "Unexpected end of buffer"); } static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return i; if (!isalnum(cur)) errln(ctx, "'%c' is not alpha-numeric", cur); } errln(ctx, "Unexpected end of buffer"); } static size_t peek_special_to(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return i; if (isalnum(cur) || cur < '!' || cur > '~') errln(ctx, "'%c' is not special", cur); } errln(ctx, "Unexpected end of buffer"); } static size_t peek_to(struct ctx *ctx, size_t start, char ch) { for (size_t i = start; i < ctx->size; i++) { char cur = ctx->data[i]; if (cur == ch || cur == ';' || cur == ')') return i; } errln(ctx, "Unexpected end of buffer"); } static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end) { struct token token = { 0 }; token.type = type; token.start = start; token.end = end; assert(++ctx->token_count < TOKENS_MAX); ctx->tokens[ctx->token_count] = token; if (type == NEWLINE) { ctx->line++; ctx->column = 0; } else { ctx->column += end - start; } } static void token_print(struct ctx *ctx, struct token *token) { assert(token->type != UNKNOWN); printf("[token type=%d] '", token->type); if (token->type == NEWLINE || token->type == END) { printf("' (Unprintable)\n"); return; } for (size_t i = token->start; i < token->end; i++) printf("%c", ctx->data[i]); printf("'\n"); } static void tokens_print(struct ctx *ctx) { for (size_t i = 1; i < ctx->token_count; i++) token_print(ctx, &ctx->tokens[i]); } static void tokenize(struct ctx *ctx) { for (size_t i = 0; i < ctx->size; i++) { const char cur = ctx->data[i]; switch (cur) { case '\0': token_add(ctx, END, i, i + 1); return; case '\n': token_add(ctx, NEWLINE, i, i + 1); continue; case ';': token_add(ctx, EOL, i, i + 1); continue; case '(': token_add(ctx, LPAREN, i, i + 1); continue; case ')': token_add(ctx, RPAREN, i, i + 1); continue; case '=': token_add(ctx, EQUAL, i, i + 1); continue; case ' ': ctx->column++; continue; default: break; } if (next_non_alnum(ctx, i) == ':') { // Type with param identifier size_t start_param = peek_alnum_to(ctx, i, ':') + 1; size_t end_param; if (peek_to_is_alnum(ctx, start_param, ' ')) { end_param = peek_alnum_to(ctx, start_param, ' '); } else { // Unnamed identifier ('_') end_param = peek_to(ctx, start_param, ' '); if (end_param - start_param != 1 || ctx->data[start_param] != '_') errln(ctx, "Invalid param identifier"); } token_add(ctx, TYPE, i, start_param - 1); token_add(ctx, TYPEDELIM, start_param - 1, start_param); token_add(ctx, PARAM, start_param, end_param); i = end_param - 1; continue; } if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier size_t end_ident = peek_alnum_to(ctx, i, ' '); token_add(ctx, IDENT, i, end_ident); i = end_ident - 1; } else { // Special/custom operator size_t end_operator = peek_special_to(ctx, i, ' '); token_add(ctx, OPERATOR, i, end_operator); i = end_operator - 1; } } context_rewind(ctx); } /** * Linter */ static void lint(struct ctx *ctx) { // Lint parens int parens = 0; for (size_t i = 1; i < ctx->token_count; i++) { struct token *token = &ctx->tokens[i]; if (token->type == LPAREN) parens++; else if (token->type == RPAREN) parens--; } if (parens != 0) errln(ctx, "Invalid parens balance"); } /** * Main */ int main(int argc, char *argv[]) { if (argc < 2) err("Not enough arguments!"); struct ctx *ctx = context_create(argv[1]); preprocess(ctx); tokenize(ctx); lint(ctx); tokens_print(ctx); context_destroy(ctx); return 0; }