aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2021-05-12 19:16:43 +0200
committerMarvin Borner2021-05-12 19:16:43 +0200
commit879663d7154201ace191425cbddb36dc18f39402 (patch)
treed6b9c95eb0ddd52a2ce6f752fc101696f2e7e1f6
Initial commit
-rw-r--r--.gitignore3
-rw-r--r--Makefile20
-rw-r--r--src/main.c443
-rw-r--r--test.fun2
4 files changed, 468 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..017fcf9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+build/
+
+compile_commands.json
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..9fbaca1
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+CC = gcc
+CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined
+
+SOURCEDIR = src
+BUILDDIR = build
+SOURCES = $(wildcard $(SOURCEDIR)/*.c)
+OBJS = $(patsubst $(SOURCEDIR)/%.c, $(BUILDDIR)/%.o, $(SOURCES))
+
+all: $(OBJS)
+ @$(CC) -o ./$(BUILDDIR)/out $^ $(CFLAGS)
+
+clean:
+ @$(RM) -rf $(BUILDDIR)
+
+run: clean all
+ @./$(BUILDDIR)/out test.fun
+
+$(BUILDDIR)/%.o: $(SOURCEDIR)/%.c
+ @mkdir -p $(BUILDDIR)
+ @$(CC) -c -o $@ $< $(CFLAGS)
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..db4b890
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,443 @@
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+/**
+ * Definitions
+ */
+
+#define TOKENS_MAX 4096
+
+/**
+ * Structures/enums
+ */
+
+enum token_type {
+ UNKNOWN,
+
+ TYPE,
+ TYPEDELIM,
+ PARAM,
+
+ IDENT,
+ OPERATOR,
+
+ LPAREN,
+ RPAREN,
+ EQUAL,
+
+ NEWLINE,
+ EOL,
+ END,
+};
+
+struct token {
+ enum token_type type;
+ size_t start, end;
+};
+
+struct ctx {
+ size_t line;
+ size_t column;
+ const char *path;
+
+ char *raw;
+ char *data;
+ size_t size;
+
+ size_t token_count;
+ struct token *tokens;
+};
+
+/**
+ * Contexts
+ */
+
+static struct ctx *context_create(const char *path)
+{
+ struct ctx *ctx = calloc(1, sizeof(*ctx));
+ ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens));
+ ctx->path = path; // TODO: strdup?
+
+ FILE *file = fopen(path, "r");
+ assert(file);
+
+ // Find size of file
+ fseek(file, 0, SEEK_END);
+ ctx->size = ftell(file);
+ rewind(file);
+ assert(ctx->size);
+
+ ctx->raw = malloc(ctx->size + 1);
+ assert(ctx->raw);
+ fread(ctx->raw, 1, ctx->size, file);
+ fclose(file);
+
+ ctx->raw[ctx->size] = 0;
+
+ return ctx;
+}
+
+static void context_destroy(struct ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ if (ctx->raw)
+ free(ctx->raw);
+
+ if (ctx->data && ctx->data != ctx->raw)
+ free(ctx->data);
+
+ if (ctx->tokens)
+ free(ctx->tokens);
+
+ free(ctx);
+}
+
+static void context_rewind(struct ctx *ctx)
+{
+ ctx->line = 0;
+ ctx->column = 0;
+}
+
+/**
+ * Logging
+ */
+
+static void context_print(FILE *fd, struct ctx *ctx)
+{
+ const char *data = ctx->data ? ctx->data : ctx->raw;
+
+ // Find line, column
+ size_t line = 0, column = 0, index = 0;
+ for (; index < ctx->size; index++) {
+ char cur = data[index];
+
+ column++;
+
+ if (line == ctx->line && column == ctx->column)
+ break;
+
+ if (cur == '\n') {
+ line++;
+ column = 0;
+ continue;
+ } else if (cur == '\0') {
+ fprintf(stderr, "Invalid context!");
+ context_destroy(ctx);
+ exit(1);
+ break;
+ }
+ }
+
+ if (++index >= ctx->size)
+ return; // Couldn't find context, idc?
+
+ fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1);
+
+ // Print line context
+ size_t start = ctx->column > 5 ? index - 5 : index;
+ size_t end = ctx->size - index > 5 ? index + 5 : index + 1;
+ for (size_t i = start; i < end; i++) {
+ if (i == index) {
+ fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]);
+ } else {
+ fprintf(fd, "%c", data[i]);
+ }
+ }
+ fprintf(fd, "': ");
+}
+
+static __attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...)
+{
+ context_print(stderr, ctx);
+
+ fprintf(stderr, "\x1B[1;31m");
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ fprintf(stderr, "\x1B[0m");
+
+ context_destroy(ctx);
+ exit(1);
+}
+
+static __attribute__((noreturn)) void err(const char *fmt, ...)
+{
+ fprintf(stderr, "\x1B[1;31m");
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ fprintf(stderr, "\x1B[0m");
+
+ exit(1);
+}
+
+/**
+ * Preprocessor
+ */
+
+static void preprocess_erase(struct ctx *ctx, size_t start)
+{
+ assert(ctx->raw[start] == '#');
+
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->raw[i];
+ if (cur == '\n' || cur == '\0')
+ break;
+
+ ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways
+ }
+}
+
+static void preprocess(struct ctx *ctx)
+{
+ for (size_t i = 0; i < ctx->size; i++) {
+ const char cur = ctx->raw[i];
+
+ ctx->column++;
+
+ if (cur == '\n') {
+ ctx->line++;
+ ctx->column = 0;
+ continue;
+ } else if (cur == '\0') {
+ break;
+ } else if (cur == '#' && ctx->column == 1) {
+ if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) {
+ // TODO: Add include features
+ } else {
+ errln(ctx, "Invalid preprocessing directive");
+ }
+ preprocess_erase(ctx, i);
+ }
+ }
+
+ ctx->data = ctx->raw;
+ ctx->line = 0;
+ ctx->column = 0;
+}
+
+/**
+ * Tokenizer
+ */
+
+static char next_non_alnum(struct ctx *ctx, size_t start)
+{
+ for (size_t i = start; i < ctx->size; i++)
+ if (!isalnum(ctx->data[i]))
+ return ctx->data[i];
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return true;
+
+ if (!isalnum(cur))
+ return false;
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return i;
+
+ if (!isalnum(cur))
+ errln(ctx, "'%c' is not alpha-numeric", cur);
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_special_to(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return i;
+
+ if (isalnum(cur) || cur < '!' || cur > '~')
+ errln(ctx, "'%c' is not special", cur);
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static size_t peek_to(struct ctx *ctx, size_t start, char ch)
+{
+ for (size_t i = start; i < ctx->size; i++) {
+ char cur = ctx->data[i];
+
+ if (cur == ch || cur == ';' || cur == ')')
+ return i;
+ }
+
+ errln(ctx, "Unexpected end of buffer");
+}
+
+static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
+{
+ struct token token = { 0 };
+ token.type = type;
+ token.start = start;
+ token.end = end;
+
+ assert(++ctx->token_count < TOKENS_MAX);
+ ctx->tokens[ctx->token_count] = token;
+
+ if (type == NEWLINE) {
+ ctx->line++;
+ ctx->column = 0;
+ } else {
+ ctx->column += end - start;
+ }
+}
+
+static void token_print(struct ctx *ctx, struct token *token)
+{
+ assert(token->type != UNKNOWN);
+
+ printf("[token type=%d] '", token->type);
+ if (token->type == NEWLINE || token->type == END) {
+ printf("' (Unprintable)\n");
+ return;
+ }
+
+ for (size_t i = token->start; i < token->end; i++)
+ printf("%c", ctx->data[i]);
+ printf("'\n");
+}
+
+static void tokens_print(struct ctx *ctx)
+{
+ for (size_t i = 1; i < ctx->token_count; i++)
+ token_print(ctx, &ctx->tokens[i]);
+}
+
+static void tokenize(struct ctx *ctx)
+{
+ for (size_t i = 0; i < ctx->size; i++) {
+ const char cur = ctx->data[i];
+
+ switch (cur) {
+ case '\0':
+ token_add(ctx, END, i, i + 1);
+ return;
+ case '\n':
+ token_add(ctx, NEWLINE, i, i + 1);
+ continue;
+ case ';':
+ token_add(ctx, EOL, i, i + 1);
+ continue;
+ case '(':
+ token_add(ctx, LPAREN, i, i + 1);
+ continue;
+ case ')':
+ token_add(ctx, RPAREN, i, i + 1);
+ continue;
+ case '=':
+ token_add(ctx, EQUAL, i, i + 1);
+ continue;
+ case ' ':
+ ctx->column++;
+ continue;
+ default:
+ break;
+ }
+
+ if (next_non_alnum(ctx, i) == ':') { // Type with param identifier
+ size_t start_param = peek_alnum_to(ctx, i, ':') + 1;
+ size_t end_param;
+ if (peek_to_is_alnum(ctx, start_param, ' ')) {
+ end_param = peek_alnum_to(ctx, start_param, ' ');
+ } else { // Unnamed identifier ('_')
+ end_param = peek_to(ctx, start_param, ' ');
+ if (end_param - start_param != 1 || ctx->data[start_param] != '_')
+ errln(ctx, "Invalid param identifier");
+ }
+
+ token_add(ctx, TYPE, i, start_param - 1);
+ token_add(ctx, TYPEDELIM, start_param - 1, start_param);
+ token_add(ctx, PARAM, start_param, end_param);
+
+ i = end_param - 1;
+ continue;
+ }
+
+ if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier
+ size_t end_ident = peek_alnum_to(ctx, i, ' ');
+ token_add(ctx, IDENT, i, end_ident);
+ i = end_ident - 1;
+ } else { // Special/custom operator
+ size_t end_operator = peek_special_to(ctx, i, ' ');
+ token_add(ctx, OPERATOR, i, end_operator);
+ i = end_operator - 1;
+ }
+ }
+
+ context_rewind(ctx);
+}
+
+/**
+ * Linter
+ */
+
+static void lint(struct ctx *ctx)
+{
+ // Lint parens
+ int parens = 0;
+ for (size_t i = 1; i < ctx->token_count; i++) {
+ struct token *token = &ctx->tokens[i];
+ if (token->type == LPAREN)
+ parens++;
+ else if (token->type == RPAREN)
+ parens--;
+ }
+
+ if (parens != 0)
+ errln(ctx, "Invalid parens balance");
+}
+
+/**
+ * Main
+ */
+
+int main(int argc, char *argv[])
+{
+ if (argc < 2)
+ err("Not enough arguments!");
+
+ struct ctx *ctx = context_create(argv[1]);
+ preprocess(ctx);
+ tokenize(ctx);
+ lint(ctx);
+ tokens_print(ctx);
+
+ context_destroy(ctx);
+
+ return 0;
+}
diff --git a/test.fun b/test.fun
new file mode 100644
index 0000000..d15e9a5
--- /dev/null
+++ b/test.fun
@@ -0,0 +1,2 @@
+#inc other.fun
+u32:x u32:a u32:b = a * ((u32:_ u32:c u32:d = c + d) a b);