aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2021-05-13 12:03:43 +0200
committerMarvin Borner2021-05-13 12:03:43 +0200
commitf181a8f04dfdfd8829861e0d0d549f39e40081e6 (patch)
treed937ae185e4a0dd97fd61b65be0cee01ac85876b
parent879663d7154201ace191425cbddb36dc18f39402 (diff)
New layout
-rw-r--r--Makefile13
-rw-r--r--src/context.c53
-rw-r--r--src/inc/context.h23
-rw-r--r--src/inc/lint.h8
-rw-r--r--src/inc/log.h9
-rw-r--r--src/inc/preprocess.h8
-rw-r--r--src/inc/tokenize.h35
-rw-r--r--src/lint.c20
-rw-r--r--src/log.c77
-rw-r--r--src/main.c432
-rw-r--r--src/preprocess.c46
-rw-r--r--src/tokenize.c175
12 files changed, 467 insertions, 432 deletions
diff --git a/Makefile b/Makefile
index 9fbaca1..6517207 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,13 @@
-CC = gcc
-CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined
-
SOURCEDIR = src
BUILDDIR = build
SOURCES = $(wildcard $(SOURCEDIR)/*.c)
OBJS = $(patsubst $(SOURCEDIR)/%.c, $(BUILDDIR)/%.o, $(SOURCES))
+CC = gcc
+CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined -I$(SOURCEDIR)/inc/
+
all: $(OBJS)
- @$(CC) -o ./$(BUILDDIR)/out $^ $(CFLAGS)
+ @$(CC) -o ./$(BUILDDIR)/out $(CFLAGS) $^
clean:
@$(RM) -rf $(BUILDDIR)
@@ -17,4 +17,7 @@ run: clean all
$(BUILDDIR)/%.o: $(SOURCEDIR)/%.c
@mkdir -p $(BUILDDIR)
- @$(CC) -c -o $@ $< $(CFLAGS)
+ @$(CC) -c -o $@ $(CFLAGS) $<
+
+sync:
+ @make --always-make --dry-run | grep -wE 'gcc|g\+\+' | grep -w '\-c' | jq -nR '[inputs|{directory:"$(PWD)", command:., file: match(" [^ ]+$$").string[1:]}]' > compile_commands.json
diff --git a/src/context.c b/src/context.c
new file mode 100644
index 0000000..3a98a64
--- /dev/null
+++ b/src/context.c
@@ -0,0 +1,53 @@
+#include <assert.h>
+#include <context.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tokenize.h>
+
+struct ctx *context_create(const char *path)
+{
+ struct ctx *ctx = calloc(1, sizeof(*ctx));
+ ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens));
+ ctx->path = path; // TODO: strdup?
+
+ FILE *file = fopen(path, "r");
+ assert(file);
+
+ // Find size of file
+ fseek(file, 0, SEEK_END);
+ ctx->size = ftell(file);
+ rewind(file);
+ assert(ctx->size);
+
+ ctx->raw = malloc(ctx->size + 1);
+ assert(ctx->raw);
+ fread(ctx->raw, 1, ctx->size, file);
+ fclose(file);
+
+ ctx->raw[ctx->size] = 0;
+
+ return ctx;
+}
+
+void context_destroy(struct ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ if (ctx->raw)
+ free(ctx->raw);
+
+ if (ctx->data && ctx->data != ctx->raw)
+ free(ctx->data);
+
+ if (ctx->tokens)
+ free(ctx->tokens);
+
+ free(ctx);
+}
+
+void context_rewind(struct ctx *ctx)
+{
+ ctx->line = 0;
+ ctx->column = 0;
+}
diff --git a/src/inc/context.h b/src/inc/context.h
new file mode 100644
index 0000000..81a2ca4
--- /dev/null
+++ b/src/inc/context.h
@@ -0,0 +1,23 @@
+#ifndef CONTEXT_H
+#define CONTEXT_H
+
+#include <string.h>
+
+struct ctx {
+ size_t line;
+ size_t column;
+ const char *path;
+
+ char *raw;
+ char *data;
+ size_t size;
+
+ size_t token_count;
+ struct token *tokens;
+};
+
+struct ctx *context_create(const char *path);
+void context_destroy(struct ctx *ctx);
+void context_rewind(struct ctx *ctx);
+
+#endif
diff --git a/src/inc/lint.h b/src/inc/lint.h
new file mode 100644
index 0000000..9c3f808
--- /dev/null
+++ b/src/inc/lint.h
@@ -0,0 +1,8 @@
+#ifndef LINT_H
+#define LINT_H
+
+#include <context.h>
+
+void lint(struct ctx *ctx);
+
+#endif
diff --git a/src/inc/log.h b/src/inc/log.h
new file mode 100644
index 0000000..4698f9c
--- /dev/null
+++ b/src/inc/log.h
@@ -0,0 +1,9 @@
+#ifndef LOG_H
+#define LOG_H
+
+#include <context.h>
+
+__attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...);
+__attribute__((noreturn)) void err(const char *fmt, ...);
+
+#endif
diff --git a/src/inc/preprocess.h b/src/inc/preprocess.h
new file mode 100644
index 0000000..e57af10
--- /dev/null
+++ b/src/inc/preprocess.h
@@ -0,0 +1,8 @@
+#ifndef PREPROCESS_H
+#define PREPROCESS_H
+
+#include <context.h>
+
+void preprocess(struct ctx *ctx);
+
+#endif
diff --git a/src/inc/tokenize.h b/src/inc/tokenize.h
new file mode 100644
index 0000000..557da9c
--- /dev/null
+++ b/src/inc/tokenize.h
@@ -0,0 +1,35 @@
+#ifndef TOKENIZE_H
+#define TOKENIZE_H
+
+#include <context.h>
+
+#define TOKENS_MAX 4096
+
+enum token_type {
+ UNKNOWN,
+
+ TYPE,
+ TYPEDELIM,
+ PARAM,
+
+ IDENT,
+ OPERATOR,
+
+ LPAREN,
+ RPAREN,
+ EQUAL,
+
+ NEWLINE,
+ EOL,
+ END,
+};
+
+struct token {
+ enum token_type type;
+ size_t start, end;
+};
+
+void tokens_print(struct ctx *ctx);
+void tokenize(struct ctx *ctx);
+
+#endif
diff --git a/src/lint.c b/src/lint.c
new file mode 100644
index 0000000..576c554
--- /dev/null
+++ b/src/lint.c
@@ -0,0 +1,20 @@
+#include <lint.h>
+#include <log.h>
+#include <string.h>
+#include <tokenize.h>
+
+void lint(struct ctx *ctx)
+{
+ // Lint parens
+ int parens = 0;
+ for (size_t i = 1; i < ctx->token_count; i++) {
+ struct token *token = &ctx->tokens[i];
+ if (token->type == LPAREN)
+ parens++;
+ else if (token->type == RPAREN)
+ parens--;
+ }
+
+ if (parens != 0)
+ errln(ctx, "Invalid parens balance");
+}
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..efcf9db
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,77 @@
+#include <log.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static void context_print(FILE *fd, struct ctx *ctx)
+{
+ const char *data = ctx->data ? ctx->data : ctx->raw;
+
+ // Find line, column
+ size_t line = 0, column = 0, index = 0;
+ for (; index < ctx->size; index++) {
+ char cur = data[index];
+
+ column++;
+
+ if (line == ctx->line && column == ctx->column)
+ break;
+
+ if (cur == '\n') {
+ line++;
+ column = 0;
+ continue;
+ } else if (cur == '\0') {
+ fprintf(stderr, "Invalid context!");
+ context_destroy(ctx);
+ exit(1);
+ break;
+ }
+ }
+
+ if (++index >= ctx->size)
+ return; // Couldn't find context, idc?
+
+ fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1);
+
+ // Print line context
+ size_t start = ctx->column > 5 ? index - 5 : index;
+ size_t end = ctx->size - index > 5 ? index + 5 : index + 1;
+ for (size_t i = start; i < end; i++) {
+ if (i == index) {
+ fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]);
+ } else {
+ fprintf(fd, "%c", data[i]);
+ }
+ }
+ fprintf(fd, "': ");
+}
+
+void errln(struct ctx *ctx, const char *fmt, ...)
+{
+ context_print(stderr, ctx);
+
+ fprintf(stderr, "\x1B[1;31m");
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ fprintf(stderr, "\x1B[0m");
+
+ context_destroy(ctx);
+ exit(1);
+}
+
+void err(const char *fmt, ...)
+{
+ fprintf(stderr, "\x1B[1;31m");
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ fprintf(stderr, "\x1B[0m");
+
+ exit(1);
+}
diff --git a/src/main.c b/src/main.c
index db4b890..74cc3d6 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,430 +1,8 @@
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/param.h>
-
-/**
- * Definitions
- */
-
-#define TOKENS_MAX 4096
-
-/**
- * Structures/enums
- */
-
-enum token_type {
- UNKNOWN,
-
- TYPE,
- TYPEDELIM,
- PARAM,
-
- IDENT,
- OPERATOR,
-
- LPAREN,
- RPAREN,
- EQUAL,
-
- NEWLINE,
- EOL,
- END,
-};
-
-struct token {
- enum token_type type;
- size_t start, end;
-};
-
-struct ctx {
- size_t line;
- size_t column;
- const char *path;
-
- char *raw;
- char *data;
- size_t size;
-
- size_t token_count;
- struct token *tokens;
-};
-
-/**
- * Contexts
- */
-
-static struct ctx *context_create(const char *path)
-{
- struct ctx *ctx = calloc(1, sizeof(*ctx));
- ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens));
- ctx->path = path; // TODO: strdup?
-
- FILE *file = fopen(path, "r");
- assert(file);
-
- // Find size of file
- fseek(file, 0, SEEK_END);
- ctx->size = ftell(file);
- rewind(file);
- assert(ctx->size);
-
- ctx->raw = malloc(ctx->size + 1);
- assert(ctx->raw);
- fread(ctx->raw, 1, ctx->size, file);
- fclose(file);
-
- ctx->raw[ctx->size] = 0;
-
- return ctx;
-}
-
-static void context_destroy(struct ctx *ctx)
-{
- if (!ctx)
- return;
-
- if (ctx->raw)
- free(ctx->raw);
-
- if (ctx->data && ctx->data != ctx->raw)
- free(ctx->data);
-
- if (ctx->tokens)
- free(ctx->tokens);
-
- free(ctx);
-}
-
-static void context_rewind(struct ctx *ctx)
-{
- ctx->line = 0;
- ctx->column = 0;
-}
-
-/**
- * Logging
- */
-
-static void context_print(FILE *fd, struct ctx *ctx)
-{
- const char *data = ctx->data ? ctx->data : ctx->raw;
-
- // Find line, column
- size_t line = 0, column = 0, index = 0;
- for (; index < ctx->size; index++) {
- char cur = data[index];
-
- column++;
-
- if (line == ctx->line && column == ctx->column)
- break;
-
- if (cur == '\n') {
- line++;
- column = 0;
- continue;
- } else if (cur == '\0') {
- fprintf(stderr, "Invalid context!");
- context_destroy(ctx);
- exit(1);
- break;
- }
- }
-
- if (++index >= ctx->size)
- return; // Couldn't find context, idc?
-
- fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1);
-
- // Print line context
- size_t start = ctx->column > 5 ? index - 5 : index;
- size_t end = ctx->size - index > 5 ? index + 5 : index + 1;
- for (size_t i = start; i < end; i++) {
- if (i == index) {
- fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]);
- } else {
- fprintf(fd, "%c", data[i]);
- }
- }
- fprintf(fd, "': ");
-}
-
-static __attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...)
-{
- context_print(stderr, ctx);
-
- fprintf(stderr, "\x1B[1;31m");
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- va_end(ap);
- fprintf(stderr, "\x1B[0m");
-
- context_destroy(ctx);
- exit(1);
-}
-
-static __attribute__((noreturn)) void err(const char *fmt, ...)
-{
- fprintf(stderr, "\x1B[1;31m");
- va_list ap;
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- va_end(ap);
- fprintf(stderr, "\x1B[0m");
-
- exit(1);
-}
-
-/**
- * Preprocessor
- */
-
-static void preprocess_erase(struct ctx *ctx, size_t start)
-{
- assert(ctx->raw[start] == '#');
-
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->raw[i];
- if (cur == '\n' || cur == '\0')
- break;
-
- ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways
- }
-}
-
-static void preprocess(struct ctx *ctx)
-{
- for (size_t i = 0; i < ctx->size; i++) {
- const char cur = ctx->raw[i];
-
- ctx->column++;
-
- if (cur == '\n') {
- ctx->line++;
- ctx->column = 0;
- continue;
- } else if (cur == '\0') {
- break;
- } else if (cur == '#' && ctx->column == 1) {
- if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) {
- // TODO: Add include features
- } else {
- errln(ctx, "Invalid preprocessing directive");
- }
- preprocess_erase(ctx, i);
- }
- }
-
- ctx->data = ctx->raw;
- ctx->line = 0;
- ctx->column = 0;
-}
-
-/**
- * Tokenizer
- */
-
-static char next_non_alnum(struct ctx *ctx, size_t start)
-{
- for (size_t i = start; i < ctx->size; i++)
- if (!isalnum(ctx->data[i]))
- return ctx->data[i];
-
- errln(ctx, "Unexpected end of buffer");
-}
-
-static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch)
-{
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
-
- if (cur == ch || cur == ';' || cur == ')')
- return true;
-
- if (!isalnum(cur))
- return false;
- }
-
- errln(ctx, "Unexpected end of buffer");
-}
-
-static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch)
-{
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
-
- if (cur == ch || cur == ';' || cur == ')')
- return i;
-
- if (!isalnum(cur))
- errln(ctx, "'%c' is not alpha-numeric", cur);
- }
-
- errln(ctx, "Unexpected end of buffer");
-}
-
-static size_t peek_special_to(struct ctx *ctx, size_t start, char ch)
-{
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
-
- if (cur == ch || cur == ';' || cur == ')')
- return i;
-
- if (isalnum(cur) || cur < '!' || cur > '~')
- errln(ctx, "'%c' is not special", cur);
- }
-
- errln(ctx, "Unexpected end of buffer");
-}
-
-static size_t peek_to(struct ctx *ctx, size_t start, char ch)
-{
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
-
- if (cur == ch || cur == ';' || cur == ')')
- return i;
- }
-
- errln(ctx, "Unexpected end of buffer");
-}
-
-static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
-{
- struct token token = { 0 };
- token.type = type;
- token.start = start;
- token.end = end;
-
- assert(++ctx->token_count < TOKENS_MAX);
- ctx->tokens[ctx->token_count] = token;
-
- if (type == NEWLINE) {
- ctx->line++;
- ctx->column = 0;
- } else {
- ctx->column += end - start;
- }
-}
-
-static void token_print(struct ctx *ctx, struct token *token)
-{
- assert(token->type != UNKNOWN);
-
- printf("[token type=%d] '", token->type);
- if (token->type == NEWLINE || token->type == END) {
- printf("' (Unprintable)\n");
- return;
- }
-
- for (size_t i = token->start; i < token->end; i++)
- printf("%c", ctx->data[i]);
- printf("'\n");
-}
-
-static void tokens_print(struct ctx *ctx)
-{
- for (size_t i = 1; i < ctx->token_count; i++)
- token_print(ctx, &ctx->tokens[i]);
-}
-
-static void tokenize(struct ctx *ctx)
-{
- for (size_t i = 0; i < ctx->size; i++) {
- const char cur = ctx->data[i];
-
- switch (cur) {
- case '\0':
- token_add(ctx, END, i, i + 1);
- return;
- case '\n':
- token_add(ctx, NEWLINE, i, i + 1);
- continue;
- case ';':
- token_add(ctx, EOL, i, i + 1);
- continue;
- case '(':
- token_add(ctx, LPAREN, i, i + 1);
- continue;
- case ')':
- token_add(ctx, RPAREN, i, i + 1);
- continue;
- case '=':
- token_add(ctx, EQUAL, i, i + 1);
- continue;
- case ' ':
- ctx->column++;
- continue;
- default:
- break;
- }
-
- if (next_non_alnum(ctx, i) == ':') { // Type with param identifier
- size_t start_param = peek_alnum_to(ctx, i, ':') + 1;
- size_t end_param;
- if (peek_to_is_alnum(ctx, start_param, ' ')) {
- end_param = peek_alnum_to(ctx, start_param, ' ');
- } else { // Unnamed identifier ('_')
- end_param = peek_to(ctx, start_param, ' ');
- if (end_param - start_param != 1 || ctx->data[start_param] != '_')
- errln(ctx, "Invalid param identifier");
- }
-
- token_add(ctx, TYPE, i, start_param - 1);
- token_add(ctx, TYPEDELIM, start_param - 1, start_param);
- token_add(ctx, PARAM, start_param, end_param);
-
- i = end_param - 1;
- continue;
- }
-
- if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier
- size_t end_ident = peek_alnum_to(ctx, i, ' ');
- token_add(ctx, IDENT, i, end_ident);
- i = end_ident - 1;
- } else { // Special/custom operator
- size_t end_operator = peek_special_to(ctx, i, ' ');
- token_add(ctx, OPERATOR, i, end_operator);
- i = end_operator - 1;
- }
- }
-
- context_rewind(ctx);
-}
-
-/**
- * Linter
- */
-
-static void lint(struct ctx *ctx)
-{
- // Lint parens
- int parens = 0;
- for (size_t i = 1; i < ctx->token_count; i++) {
- struct token *token = &ctx->tokens[i];
- if (token->type == LPAREN)
- parens++;
- else if (token->type == RPAREN)
- parens--;
- }
-
- if (parens != 0)
- errln(ctx, "Invalid parens balance");
-}
-
-/**
- * Main
- */
+#include <context.h>
+#include <lint.h>
+#include <log.h>
+#include <preprocess.h>
+#include <tokenize.h>
int main(int argc, char *argv[])
{
diff --git a/src/preprocess.c b/src/preprocess.c
new file mode 100644
index 0000000..673bd20
--- /dev/null
+++ b/src/preprocess.c
@@ -0,0 +1,46 @@
+#include <assert.h>
+#include <log.h>
+#include <preprocess.h>
+#include <string.h>
+#include <sys/param.h>
+
+static void preprocess_erase(struct ctx *ctx, size_t start)
+{
+ assert(ctx->raw[start] == '#');
+
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->raw[i];
+ if (cur == '\n' || cur == '\0')
+ break;
+
+ ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways
+ }
+}
+
+void preprocess(struct ctx *ctx)
+{
+ for (size_t i = 0; i < ctx->size; i++) {
+ const char cur = ctx->raw[i];
+
+ ctx->column++;
+
+ if (cur == '\n') {
+ ctx->line++;
+ ctx->column = 0;
+ continue;
+ } else if (cur == '\0') {
+ break;
+ } else if (cur == '#' && ctx->column == 1) {
+ if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) {
+ // TODO: Add include features
+ } else {
+ errln(ctx, "Invalid preprocessing directive");
+ }
+ preprocess_erase(ctx, i);
+ }
+ }
+
+ ctx->data = ctx->raw;
+ ctx->line = 0;
+ ctx->column = 0;
+}
diff --git a/src/tokenize.c b/src/tokenize.c
new file mode 100644
index 0000000..3424454
--- /dev/null
+++ b/src/tokenize.c
@@ -0,0 +1,175 @@
+#include <assert.h>
+#include <ctype.h>
+#include <log.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <tokenize.h>
+
+static char next_non_alnum(struct ctx *ctx, size_t start)
+{
+ for (size_t i = start; i < ctx->size; i++)
+ if (!isalnum(ctx->data[i]))
+ return ctx->data[i];
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return true;
+
+ if (!isalnum(cur))
+ return false;
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return i;
+
+ if (!isalnum(cur))
+ errln(ctx, "'%c' is not alpha-numeric", cur);
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_special_to(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return i;
+
+ if (isalnum(cur) || cur < '!' || cur > '~')
+ errln(ctx, "'%c' is not special", cur);
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_to(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return i;
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
+{
+ struct token token = { 0 };
+ token.type = type;
+ token.start = start;
+ token.end = end;
+
+ assert(++ctx->token_count < TOKENS_MAX);
+ ctx->tokens[ctx->token_count] = token;
+
+ if (type == NEWLINE) {
+ ctx->line++;
+ ctx->column = 0;
+ } else {
+ ctx->column += end - start;
+ }
+}
+
+static void token_print(struct ctx *ctx, struct token *token)
+{
+ assert(token->type != UNKNOWN);
+
+ printf("[token type=%d] '", token->type);
+ if (token->type == NEWLINE || token->type == END) {
+ printf("' (Unprintable)\n");
+ return;
+ }
+
+ for (size_t i = token->start; i < token->end; i++)
+ printf("%c", ctx->data[i]);
+ printf("'\n");
+}
+
+void tokens_print(struct ctx *ctx)
+{
+ for (size_t i = 1; i < ctx->token_count; i++)
+ token_print(ctx, &ctx->tokens[i]);
+}
+
+void tokenize(struct ctx *ctx)
+{
+ for (size_t i = 0; i < ctx->size; i++) {
+ const char cur = ctx->data[i];
+
+ switch (cur) {
+ case '\0':
+ token_add(ctx, END, i, i + 1);
+ return;
+ case '\n':
+ token_add(ctx, NEWLINE, i, i + 1);
+ continue;
+ case ';':
+ token_add(ctx, EOL, i, i + 1);
+ continue;
+ case '(':
+ token_add(ctx, LPAREN, i, i + 1);
+ continue;
+ case ')':
+ token_add(ctx, RPAREN, i, i + 1);
+ continue;
+ case '=':
+ token_add(ctx, EQUAL, i, i + 1);
+ continue;
+ case ' ':
+ ctx->column++;
+ continue;
+ default:
+ break;
+ }
+
+ if (next_non_alnum(ctx, i) == ':') { // Type with param identifier
+ size_t start_param = peek_alnum_to(ctx, i, ':') + 1;
+ size_t end_param;
+ if (peek_to_is_alnum(ctx, start_param, ' ')) {
+ end_param = peek_alnum_to(ctx, start_param, ' ');
+ } else { // Unnamed identifier ('_')
+ end_param = peek_to(ctx, start_param, ' ');
+ if (end_param - start_param != 1 || ctx->data[start_param] != '_')
+ errln(ctx, "Invalid param identifier");
+ }
+
+ token_add(ctx, TYPE, i, start_param - 1);
+ token_add(ctx, TYPEDELIM, start_param - 1, start_param);
+ token_add(ctx, PARAM, start_param, end_param);
+
+ i = end_param - 1;
+ continue;
+ }
+
+ if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier
+ size_t end_ident = peek_alnum_to(ctx, i, ' ');
+ token_add(ctx, IDENT, i, end_ident);
+ i = end_ident - 1;
+ } else { // Special/custom operator
+ size_t end_operator = peek_special_to(ctx, i, ' ');
+ token_add(ctx, OPERATOR, i, end_operator);
+ i = end_operator - 1;
+ }
+ }
+
+ context_rewind(ctx);
+}