New layout

author: Marvin Borner 2021-05-13 12:03:43 +0200
committer: Marvin Borner 2021-05-13 12:03:43 +0200
commit: f181a8f04dfdfd8829861e0d0d549f39e40081e6 (patch)
tree: d937ae185e4a0dd97fd61b65be0cee01ac85876b
parent: 879663d7154201ace191425cbddb36dc18f39402 (diff)
12 files changed, 467 insertions, 432 deletions
diff --git a/Makefile b/Makefile
index 9fbaca1..6517207 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,13 @@
-CC = gcc
-CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined
-
 SOURCEDIR = src
 BUILDDIR = build
 SOURCES = $(wildcard $(SOURCEDIR)/*.c)
 OBJS = $(patsubst $(SOURCEDIR)/%.c, $(BUILDDIR)/%.o, $(SOURCES))
 
+CC = gcc
+CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined -I$(SOURCEDIR)/inc/
+
 all: $(OBJS)
-	@$(CC) -o ./$(BUILDDIR)/out $^ $(CFLAGS)
+	@$(CC) -o ./$(BUILDDIR)/out $(CFLAGS) $^
 
 clean:
 	@$(RM) -rf $(BUILDDIR)
@@ -17,4 +17,7 @@ run: clean all
 
 $(BUILDDIR)/%.o: $(SOURCEDIR)/%.c
 	@mkdir -p $(BUILDDIR)
-	@$(CC) -c -o $@ $< $(CFLAGS)
+	@$(CC) -c -o $@ $(CFLAGS) $<
+
+sync:
+	@make --always-make --dry-run | grep -wE 'gcc|g\+\+' | grep -w '\-c' | jq -nR '[inputs|{directory:"$(PWD)", command:., file: match(" [^ ]+$$").string[1:]}]' > compile_commands.json
diff --git a/src/context.c b/src/context.c
new file mode 100644
index 0000000..3a98a64
--- /dev/null
+++ b/src/context.c
@@ -0,0 +1,53 @@
+#include <assert.h>
+#include <context.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <tokenize.h>
+
+struct ctx *context_create(const char *path)
+{
+	struct ctx *ctx = calloc(1, sizeof(*ctx));
+	ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens));
+	ctx->path = path; // TODO: strdup?
+
+	FILE *file = fopen(path, "r");
+	assert(file);
+
+	// Find size of file
+	fseek(file, 0, SEEK_END);
+	ctx->size = ftell(file);
+	rewind(file);
+	assert(ctx->size);
+
+	ctx->raw = malloc(ctx->size + 1);
+	assert(ctx->raw);
+	fread(ctx->raw, 1, ctx->size, file);
+	fclose(file);
+
+	ctx->raw[ctx->size] = 0;
+
+	return ctx;
+}
+
+void context_destroy(struct ctx *ctx)
+{
+	if (!ctx)
+		return;
+
+	if (ctx->raw)
+		free(ctx->raw);
+
+	if (ctx->data && ctx->data != ctx->raw)
+		free(ctx->data);
+
+	if (ctx->tokens)
+		free(ctx->tokens);
+
+	free(ctx);
+}
+
+void context_rewind(struct ctx *ctx)
+{
+	ctx->line = 0;
+	ctx->column = 0;
+}
diff --git a/src/inc/context.h b/src/inc/context.h
new file mode 100644
index 0000000..81a2ca4
--- /dev/null
+++ b/src/inc/context.h
@@ -0,0 +1,23 @@
+#ifndef CONTEXT_H
+#define CONTEXT_H
+
+#include <string.h>
+
+struct ctx {
+	size_t line;
+	size_t column;
+	const char *path;
+
+	char *raw;
+	char *data;
+	size_t size;
+
+	size_t token_count;
+	struct token *tokens;
+};
+
+struct ctx *context_create(const char *path);
+void context_destroy(struct ctx *ctx);
+void context_rewind(struct ctx *ctx);
+
+#endif
diff --git a/src/inc/lint.h b/src/inc/lint.h
new file mode 100644
index 0000000..9c3f808
--- /dev/null
+++ b/src/inc/lint.h
@@ -0,0 +1,8 @@
+#ifndef LINT_H
+#define LINT_H
+
+#include <context.h>
+
+void lint(struct ctx *ctx);
+
+#endif
diff --git a/src/inc/log.h b/src/inc/log.h
new file mode 100644
index 0000000..4698f9c
--- /dev/null
+++ b/src/inc/log.h
@@ -0,0 +1,9 @@
+#ifndef LOG_H
+#define LOG_H
+
+#include <context.h>
+
+__attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...);
+__attribute__((noreturn)) void err(const char *fmt, ...);
+
+#endif
diff --git a/src/inc/preprocess.h b/src/inc/preprocess.h
new file mode 100644
index 0000000..e57af10
--- /dev/null
+++ b/src/inc/preprocess.h
@@ -0,0 +1,8 @@
+#ifndef PREPROCESS_H
+#define PREPROCESS_H
+
+#include <context.h>
+
+void preprocess(struct ctx *ctx);
+
+#endif
diff --git a/src/inc/tokenize.h b/src/inc/tokenize.h
new file mode 100644
index 0000000..557da9c
--- /dev/null
+++ b/src/inc/tokenize.h
@@ -0,0 +1,35 @@
+#ifndef TOKENIZE_H
+#define TOKENIZE_H
+
+#include <context.h>
+
+#define TOKENS_MAX 4096
+
+enum token_type {
+	UNKNOWN,
+
+	TYPE,
+	TYPEDELIM,
+	PARAM,
+
+	IDENT,
+	OPERATOR,
+
+	LPAREN,
+	RPAREN,
+	EQUAL,
+
+	NEWLINE,
+	EOL,
+	END,
+};
+
+struct token {
+	enum token_type type;
+	size_t start, end;
+};
+
+void tokens_print(struct ctx *ctx);
+void tokenize(struct ctx *ctx);
+
+#endif
diff --git a/src/lint.c b/src/lint.c
new file mode 100644
index 0000000..576c554
--- /dev/null
+++ b/src/lint.c
@@ -0,0 +1,20 @@
+#include <lint.h>
+#include <log.h>
+#include <string.h>
+#include <tokenize.h>
+
+void lint(struct ctx *ctx)
+{
+	// Lint parens
+	int parens = 0;
+	for (size_t i = 1; i < ctx->token_count; i++) {
+		struct token *token = &ctx->tokens[i];
+		if (token->type == LPAREN)
+			parens++;
+		else if (token->type == RPAREN)
+			parens--;
+	}
+
+	if (parens != 0)
+		errln(ctx, "Invalid parens balance");
+}
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..efcf9db
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,77 @@
+#include <log.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static void context_print(FILE *fd, struct ctx *ctx)
+{
+	const char *data = ctx->data ? ctx->data : ctx->raw;
+
+	// Find line, column
+	size_t line = 0, column = 0, index = 0;
+	for (; index < ctx->size; index++) {
+		char cur = data[index];
+
+		column++;
+
+		if (line == ctx->line && column == ctx->column)
+			break;
+
+		if (cur == '\n') {
+			line++;
+			column = 0;
+			continue;
+		} else if (cur == '\0') {
+			fprintf(stderr, "Invalid context!");
+			context_destroy(ctx);
+			exit(1);
+			break;
+		}
+	}
+
+	if (++index >= ctx->size)
+		return; // Couldn't find context, idc?
+
+	fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1);
+
+	// Print line context
+	size_t start = ctx->column > 5 ? index - 5 : index;
+	size_t end = ctx->size - index > 5 ? index + 5 : index + 1;
+	for (size_t i = start; i < end; i++) {
+		if (i == index) {
+			fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]);
+		} else {
+			fprintf(fd, "%c", data[i]);
+		}
+	}
+	fprintf(fd, "': ");
+}
+
+void errln(struct ctx *ctx, const char *fmt, ...)
+{
+	context_print(stderr, ctx);
+
+	fprintf(stderr, "\x1B[1;31m");
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, "\n");
+	va_end(ap);
+	fprintf(stderr, "\x1B[0m");
+
+	context_destroy(ctx);
+	exit(1);
+}
+
+void err(const char *fmt, ...)
+{
+	fprintf(stderr, "\x1B[1;31m");
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, "\n");
+	va_end(ap);
+	fprintf(stderr, "\x1B[0m");
+
+	exit(1);
+}
diff --git a/src/main.c b/src/main.c
index db4b890..74cc3d6 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,430 +1,8 @@
-#include <assert.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/param.h>
-
-/**
- * Definitions
- */
-
-#define TOKENS_MAX 4096
-
-/**
- * Structures/enums
- */
-
-enum token_type {
-	UNKNOWN,
-
-	TYPE,
-	TYPEDELIM,
-	PARAM,
-
-	IDENT,
-	OPERATOR,
-
-	LPAREN,
-	RPAREN,
-	EQUAL,
-
-	NEWLINE,
-	EOL,
-	END,
-};
-
-struct token {
-	enum token_type type;
-	size_t start, end;
-};
-
-struct ctx {
-	size_t line;
-	size_t column;
-	const char *path;
-
-	char *raw;
-	char *data;
-	size_t size;
-
-	size_t token_count;
-	struct token *tokens;
-};
-
-/**
- * Contexts
- */
-
-static struct ctx *context_create(const char *path)
-{
-	struct ctx *ctx = calloc(1, sizeof(*ctx));
-	ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens));
-	ctx->path = path; // TODO: strdup?
-
-	FILE *file = fopen(path, "r");
-	assert(file);
-
-	// Find size of file
-	fseek(file, 0, SEEK_END);
-	ctx->size = ftell(file);
-	rewind(file);
-	assert(ctx->size);
-
-	ctx->raw = malloc(ctx->size + 1);
-	assert(ctx->raw);
-	fread(ctx->raw, 1, ctx->size, file);
-	fclose(file);
-
-	ctx->raw[ctx->size] = 0;
-
-	return ctx;
-}
-
-static void context_destroy(struct ctx *ctx)
-{
-	if (!ctx)
-		return;
-
-	if (ctx->raw)
-		free(ctx->raw);
-
-	if (ctx->data && ctx->data != ctx->raw)
-		free(ctx->data);
-
-	if (ctx->tokens)
-		free(ctx->tokens);
-
-	free(ctx);
-}
-
-static void context_rewind(struct ctx *ctx)
-{
-	ctx->line = 0;
-	ctx->column = 0;
-}
-
-/**
- * Logging
- */
-
-static void context_print(FILE *fd, struct ctx *ctx)
-{
-	const char *data = ctx->data ? ctx->data : ctx->raw;
-
-	// Find line, column
-	size_t line = 0, column = 0, index = 0;
-	for (; index < ctx->size; index++) {
-		char cur = data[index];
-
-		column++;
-
-		if (line == ctx->line && column == ctx->column)
-			break;
-
-		if (cur == '\n') {
-			line++;
-			column = 0;
-			continue;
-		} else if (cur == '\0') {
-			fprintf(stderr, "Invalid context!");
-			context_destroy(ctx);
-			exit(1);
-			break;
-		}
-	}
-
-	if (++index >= ctx->size)
-		return; // Couldn't find context, idc?
-
-	fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1);
-
-	// Print line context
-	size_t start = ctx->column > 5 ? index - 5 : index;
-	size_t end = ctx->size - index > 5 ? index + 5 : index + 1;
-	for (size_t i = start; i < end; i++) {
-		if (i == index) {
-			fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]);
-		} else {
-			fprintf(fd, "%c", data[i]);
-		}
-	}
-	fprintf(fd, "': ");
-}
-
-static __attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...)
-{
-	context_print(stderr, ctx);
-
-	fprintf(stderr, "\x1B[1;31m");
-	va_list ap;
-	va_start(ap, fmt);
-	vfprintf(stderr, fmt, ap);
-	fprintf(stderr, "\n");
-	va_end(ap);
-	fprintf(stderr, "\x1B[0m");
-
-	context_destroy(ctx);
-	exit(1);
-}
-
-static __attribute__((noreturn)) void err(const char *fmt, ...)
-{
-	fprintf(stderr, "\x1B[1;31m");
-	va_list ap;
-	va_start(ap, fmt);
-	vfprintf(stderr, fmt, ap);
-	fprintf(stderr, "\n");
-	va_end(ap);
-	fprintf(stderr, "\x1B[0m");
-
-	exit(1);
-}
-
-/**
- * Preprocessor
- */
-
-static void preprocess_erase(struct ctx *ctx, size_t start)
-{
-	assert(ctx->raw[start] == '#');
-
-	for (size_t i = start; i < ctx->size; i++) {
-		char cur = ctx->raw[i];
-		if (cur == '\n' || cur == '\0')
-			break;
-
-		ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways
-	}
-}
-
-static void preprocess(struct ctx *ctx)
-{
-	for (size_t i = 0; i < ctx->size; i++) {
-		const char cur = ctx->raw[i];
-
-		ctx->column++;
-
-		if (cur == '\n') {
-			ctx->line++;
-			ctx->column = 0;
-			continue;
-		} else if (cur == '\0') {
-			break;
-		} else if (cur == '#' && ctx->column == 1) {
-			if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) {
-				// TODO: Add include features
-			} else {
-				errln(ctx, "Invalid preprocessing directive");
-			}
-			preprocess_erase(ctx, i);
-		}
-	}
-
-	ctx->data = ctx->raw;
-	ctx->line = 0;
-	ctx->column = 0;
-}
-
-/**
- * Tokenizer
- */
-
-static char next_non_alnum(struct ctx *ctx, size_t start)
-{
-	for (size_t i = start; i < ctx->size; i++)
-		if (!isalnum(ctx->data[i]))
-			return ctx->data[i];
-
-	errln(ctx, "Unexpected end of buffer");
-}
-
-static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch)
-{
-	for (size_t i = start; i < ctx->size; i++) {
-		char cur = ctx->data[i];
-
-		if (cur == ch || cur == ';' || cur == ')')
-			return true;
-
-		if (!isalnum(cur))
-			return false;
-	}
-
-	errln(ctx, "Unexpected end of buffer");
-}
-
-static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch)
-{
-	for (size_t i = start; i < ctx->size; i++) {
-		char cur = ctx->data[i];
-
-		if (cur == ch || cur == ';' || cur == ')')
-			return i;
-
-		if (!isalnum(cur))
-			errln(ctx, "'%c' is not alpha-numeric", cur);
-	}
-
-	errln(ctx, "Unexpected end of buffer");
-}
-
-static size_t peek_special_to(struct ctx *ctx, size_t start, char ch)
-{
-	for (size_t i = start; i < ctx->size; i++) {
-		char cur = ctx->data[i];
-
-		if (cur == ch || cur == ';' || cur == ')')
-			return i;
-
-		if (isalnum(cur) || cur < '!' || cur > '~')
-			errln(ctx, "'%c' is not special", cur);
-	}
-
-	errln(ctx, "Unexpected end of buffer");
-}
-
-static size_t peek_to(struct ctx *ctx, size_t start, char ch)
-{
-	for (size_t i = start; i < ctx->size; i++) {
-		char cur = ctx->data[i];
-
-		if (cur == ch || cur == ';' || cur == ')')
-			return i;
-	}
-
-	errln(ctx, "Unexpected end of buffer");
-}
-
-static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
-{
-	struct token token = { 0 };
-	token.type = type;
-	token.start = start;
-	token.end = end;
-
-	assert(++ctx->token_count < TOKENS_MAX);
-	ctx->tokens[ctx->token_count] = token;
-
-	if (type == NEWLINE) {
-		ctx->line++;
-		ctx->column = 0;
-	} else {
-		ctx->column += end - start;
-	}
-}
-
-static void token_print(struct ctx *ctx, struct token *token)
-{
-	assert(token->type != UNKNOWN);
-
-	printf("[token type=%d] '", token->type);
-	if (token->type == NEWLINE || token->type == END) {
-		printf("' (Unprintable)\n");
-		return;
-	}
-
-	for (size_t i = token->start; i < token->end; i++)
-		printf("%c", ctx->data[i]);
-	printf("'\n");
-}
-
-static void tokens_print(struct ctx *ctx)
-{
-	for (size_t i = 1; i < ctx->token_count; i++)
-		token_print(ctx, &ctx->tokens[i]);
-}
-
-static void tokenize(struct ctx *ctx)
-{
-	for (size_t i = 0; i < ctx->size; i++) {
-		const char cur = ctx->data[i];
-
-		switch (cur) {
-		case '\0':
-			token_add(ctx, END, i, i + 1);
-			return;
-		case '\n':
-			token_add(ctx, NEWLINE, i, i + 1);
-			continue;
-		case ';':
-			token_add(ctx, EOL, i, i + 1);
-			continue;
-		case '(':
-			token_add(ctx, LPAREN, i, i + 1);
-			continue;
-		case ')':
-			token_add(ctx, RPAREN, i, i + 1);
-			continue;
-		case '=':
-			token_add(ctx, EQUAL, i, i + 1);
-			continue;
-		case ' ':
-			ctx->column++;
-			continue;
-		default:
-			break;
-		}
-
-		if (next_non_alnum(ctx, i) == ':') { // Type with param identifier
-			size_t start_param = peek_alnum_to(ctx, i, ':') + 1;
-			size_t end_param;
-			if (peek_to_is_alnum(ctx, start_param, ' ')) {
-				end_param = peek_alnum_to(ctx, start_param, ' ');
-			} else { // Unnamed identifier ('_')
-				end_param = peek_to(ctx, start_param, ' ');
-				if (end_param - start_param != 1 || ctx->data[start_param] != '_')
-					errln(ctx, "Invalid param identifier");
-			}
-
-			token_add(ctx, TYPE, i, start_param - 1);
-			token_add(ctx, TYPEDELIM, start_param - 1, start_param);
-			token_add(ctx, PARAM, start_param, end_param);
-
-			i = end_param - 1;
-			continue;
-		}
-
-		if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier
-			size_t end_ident = peek_alnum_to(ctx, i, ' ');
-			token_add(ctx, IDENT, i, end_ident);
-			i = end_ident - 1;
-		} else { // Special/custom operator
-			size_t end_operator = peek_special_to(ctx, i, ' ');
-			token_add(ctx, OPERATOR, i, end_operator);
-			i = end_operator - 1;
-		}
-	}
-
-	context_rewind(ctx);
-}
-
-/**
- * Linter
- */
-
-static void lint(struct ctx *ctx)
-{
-	// Lint parens
-	int parens = 0;
-	for (size_t i = 1; i < ctx->token_count; i++) {
-		struct token *token = &ctx->tokens[i];
-		if (token->type == LPAREN)
-			parens++;
-		else if (token->type == RPAREN)
-			parens--;
-	}
-
-	if (parens != 0)
-		errln(ctx, "Invalid parens balance");
-}
-
-/**
- * Main
- */
+#include <context.h>
+#include <lint.h>
+#include <log.h>
+#include <preprocess.h>
+#include <tokenize.h>
 
 int main(int argc, char *argv[])
 {
diff --git a/src/preprocess.c b/src/preprocess.c
new file mode 100644
index 0000000..673bd20
--- /dev/null
+++ b/src/preprocess.c
@@ -0,0 +1,46 @@
+#include <assert.h>
+#include <log.h>
+#include <preprocess.h>
+#include <string.h>
+#include <sys/param.h>
+
+static void preprocess_erase(struct ctx *ctx, size_t start)
+{
+	assert(ctx->raw[start] == '#');
+
+	for (size_t i = start; i < ctx->size; i++) {
+		char cur = ctx->raw[i];
+		if (cur == '\n' || cur == '\0')
+			break;
+
+		ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways
+	}
+}
+
+void preprocess(struct ctx *ctx)
+{
+	for (size_t i = 0; i < ctx->size; i++) {
+		const char cur = ctx->raw[i];
+
+		ctx->column++;
+
+		if (cur == '\n') {
+			ctx->line++;
+			ctx->column = 0;
+			continue;
+		} else if (cur == '\0') {
+			break;
+		} else if (cur == '#' && ctx->column == 1) {
+			if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) {
+				// TODO: Add include features
+			} else {
+				errln(ctx, "Invalid preprocessing directive");
+			}
+			preprocess_erase(ctx, i);
+		}
+	}
+
+	ctx->data = ctx->raw;
+	ctx->line = 0;
+	ctx->column = 0;
+}
diff --git a/src/tokenize.c b/src/tokenize.c
new file mode 100644
index 0000000..3424454
--- /dev/null
+++ b/src/tokenize.c
@@ -0,0 +1,175 @@
+#include <assert.h>
+#include <ctype.h>
+#include <log.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <tokenize.h>
+
+static char next_non_alnum(struct ctx *ctx, size_t start)
+{
+	for (size_t i = start; i < ctx->size; i++)
+		if (!isalnum(ctx->data[i]))
+			return ctx->data[i];
+
+	errln(ctx, "Unexpected end of buffer");
+}
+
+static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch)
+{
+	for (size_t i = start; i < ctx->size; i++) {
+		char cur = ctx->data[i];
+
+		if (cur == ch || cur == ';' || cur == ')')
+			return true;
+
+		if (!isalnum(cur))
+			return false;
+	}
+
+	errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch)
+{
+	for (size_t i = start; i < ctx->size; i++) {
+		char cur = ctx->data[i];
+
+		if (cur == ch || cur == ';' || cur == ')')
+			return i;
+
+		if (!isalnum(cur))
+			errln(ctx, "'%c' is not alpha-numeric", cur);
+	}
+
+	errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_special_to(struct ctx *ctx, size_t start, char ch)
+{
+	for (size_t i = start; i < ctx->size; i++) {
+		char cur = ctx->data[i];
+
+		if (cur == ch || cur == ';' || cur == ')')
+			return i;
+
+		if (isalnum(cur) || cur < '!' || cur > '~')
+			errln(ctx, "'%c' is not special", cur);
+	}
+
+	errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_to(struct ctx *ctx, size_t start, char ch)
+{
+	for (size_t i = start; i < ctx->size; i++) {
+		char cur = ctx->data[i];
+
+		if (cur == ch || cur == ';' || cur == ')')
+			return i;
+	}
+
+	errln(ctx, "Unexpected end of buffer");
+}
+
+static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
+{
+	struct token token = { 0 };
+	token.type = type;
+	token.start = start;
+	token.end = end;
+
+	assert(++ctx->token_count < TOKENS_MAX);
+	ctx->tokens[ctx->token_count] = token;
+
+	if (type == NEWLINE) {
+		ctx->line++;
+		ctx->column = 0;
+	} else {
+		ctx->column += end - start;
+	}
+}
+
+static void token_print(struct ctx *ctx, struct token *token)
+{
+	assert(token->type != UNKNOWN);
+
+	printf("[token type=%d] '", token->type);
+	if (token->type == NEWLINE || token->type == END) {
+		printf("' (Unprintable)\n");
+		return;
+	}
+
+	for (size_t i = token->start; i < token->end; i++)
+		printf("%c", ctx->data[i]);
+	printf("'\n");
+}
+
+void tokens_print(struct ctx *ctx)
+{
+	for (size_t i = 1; i < ctx->token_count; i++)
+		token_print(ctx, &ctx->tokens[i]);
+}
+
+void tokenize(struct ctx *ctx)
+{
+	for (size_t i = 0; i < ctx->size; i++) {
+		const char cur = ctx->data[i];
+
+		switch (cur) {
+		case '\0':
+			token_add(ctx, END, i, i + 1);
+			return;
+		case '\n':
+			token_add(ctx, NEWLINE, i, i + 1);
+			continue;
+		case ';':
+			token_add(ctx, EOL, i, i + 1);
+			continue;
+		case '(':
+			token_add(ctx, LPAREN, i, i + 1);
+			continue;
+		case ')':
+			token_add(ctx, RPAREN, i, i + 1);
+			continue;
+		case '=':
+			token_add(ctx, EQUAL, i, i + 1);
+			continue;
+		case ' ':
+			ctx->column++;
+			continue;
+		default:
+			break;
+		}
+
+		if (next_non_alnum(ctx, i) == ':') { // Type with param identifier
+			size_t start_param = peek_alnum_to(ctx, i, ':') + 1;
+			size_t end_param;
+			if (peek_to_is_alnum(ctx, start_param, ' ')) {
+				end_param = peek_alnum_to(ctx, start_param, ' ');
+			} else { // Unnamed identifier ('_')
+				end_param = peek_to(ctx, start_param, ' ');
+				if (end_param - start_param != 1 || ctx->data[start_param] != '_')
+					errln(ctx, "Invalid param identifier");
+			}
+
+			token_add(ctx, TYPE, i, start_param - 1);
+			token_add(ctx, TYPEDELIM, start_param - 1, start_param);
+			token_add(ctx, PARAM, start_param, end_param);
+
+			i = end_param - 1;
+			continue;
+		}
+
+		if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier
+			size_t end_ident = peek_alnum_to(ctx, i, ' ');
+			token_add(ctx, IDENT, i, end_ident);
+			i = end_ident - 1;
+		} else { // Special/custom operator
+			size_t end_operator = peek_special_to(ctx, i, ' ');
+			token_add(ctx, OPERATOR, i, end_operator);
+			i = end_operator - 1;
+		}
+	}
+
+	context_rewind(ctx);
+}
author	Marvin Borner	2021-05-13 12:03:43 +0200
committer	Marvin Borner	2021-05-13 12:03:43 +0200
commit	f181a8f04dfdfd8829861e0d0d549f39e40081e6 (patch)
tree	d937ae185e4a0dd97fd61b65be0cee01ac85876b
parent	879663d7154201ace191425cbddb36dc18f39402 (diff)