diff options
author | Marvin Borner | 2021-05-12 19:16:43 +0200 |
---|---|---|
committer | Marvin Borner | 2021-05-12 19:16:43 +0200 |
commit | 879663d7154201ace191425cbddb36dc18f39402 (patch) | |
tree | d6b9c95eb0ddd52a2ce6f752fc101696f2e7e1f6 |
Initial commit
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | src/main.c | 443 | ||||
-rw-r--r-- | test.fun | 2 |
4 files changed, 468 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..017fcf9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +build/ + +compile_commands.json diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9fbaca1 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CC = gcc +CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined + +SOURCEDIR = src +BUILDDIR = build +SOURCES = $(wildcard $(SOURCEDIR)/*.c) +OBJS = $(patsubst $(SOURCEDIR)/%.c, $(BUILDDIR)/%.o, $(SOURCES)) + +all: $(OBJS) + @$(CC) -o ./$(BUILDDIR)/out $^ $(CFLAGS) + +clean: + @$(RM) -rf $(BUILDDIR) + +run: clean all + @./$(BUILDDIR)/out test.fun + +$(BUILDDIR)/%.o: $(SOURCEDIR)/%.c + @mkdir -p $(BUILDDIR) + @$(CC) -c -o $@ $< $(CFLAGS) diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..db4b890 --- /dev/null +++ b/src/main.c @@ -0,0 +1,443 @@ +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> + +/** + * Definitions + */ + +#define TOKENS_MAX 4096 + +/** + * Structures/enums + */ + +enum token_type { + UNKNOWN, + + TYPE, + TYPEDELIM, + PARAM, + + IDENT, + OPERATOR, + + LPAREN, + RPAREN, + EQUAL, + + NEWLINE, + EOL, + END, +}; + +struct token { + enum token_type type; + size_t start, end; +}; + +struct ctx { + size_t line; + size_t column; + const char *path; + + char *raw; + char *data; + size_t size; + + size_t token_count; + struct token *tokens; +}; + +/** + * Contexts + */ + +static struct ctx *context_create(const char *path) +{ + struct ctx *ctx = calloc(1, sizeof(*ctx)); + ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens)); + ctx->path = path; // TODO: strdup? + + FILE *file = fopen(path, "r"); + assert(file); + + // Find size of file + fseek(file, 0, SEEK_END); + ctx->size = ftell(file); + rewind(file); + assert(ctx->size); + + ctx->raw = malloc(ctx->size + 1); + assert(ctx->raw); + fread(ctx->raw, 1, ctx->size, file); + fclose(file); + + ctx->raw[ctx->size] = 0; + + return ctx; +} + +static void context_destroy(struct ctx *ctx) +{ + if (!ctx) + return; + + if (ctx->raw) + free(ctx->raw); + + if (ctx->data && ctx->data != ctx->raw) + free(ctx->data); + + if (ctx->tokens) + free(ctx->tokens); + + free(ctx); +} + +static void context_rewind(struct ctx *ctx) +{ + ctx->line = 0; + ctx->column = 0; +} + +/** + * Logging + */ + +static void context_print(FILE *fd, struct ctx *ctx) +{ + const char *data = ctx->data ? ctx->data : ctx->raw; + + // Find line, column + size_t line = 0, column = 0, index = 0; + for (; index < ctx->size; index++) { + char cur = data[index]; + + column++; + + if (line == ctx->line && column == ctx->column) + break; + + if (cur == '\n') { + line++; + column = 0; + continue; + } else if (cur == '\0') { + fprintf(stderr, "Invalid context!"); + context_destroy(ctx); + exit(1); + break; + } + } + + if (++index >= ctx->size) + return; // Couldn't find context, idc? + + fprintf(fd, "\x1B[1;36m%s:%ld:%ld:\x1B[0m '", ctx->path, ctx->line + 1, ctx->column + 1); + + // Print line context + size_t start = ctx->column > 5 ? index - 5 : index; + size_t end = ctx->size - index > 5 ? index + 5 : index + 1; + for (size_t i = start; i < end; i++) { + if (i == index) { + fprintf(fd, "\x1B[1;32m%c\x1B[0m", data[i]); + } else { + fprintf(fd, "%c", data[i]); + } + } + fprintf(fd, "': "); +} + +static __attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...) +{ + context_print(stderr, ctx); + + fprintf(stderr, "\x1B[1;31m"); + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + fprintf(stderr, "\x1B[0m"); + + context_destroy(ctx); + exit(1); +} + +static __attribute__((noreturn)) void err(const char *fmt, ...) +{ + fprintf(stderr, "\x1B[1;31m"); + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + fprintf(stderr, "\x1B[0m"); + + exit(1); +} + +/** + * Preprocessor + */ + +static void preprocess_erase(struct ctx *ctx, size_t start) +{ + assert(ctx->raw[start] == '#'); + + for (size_t i = start; i < ctx->size; i++) { + char cur = ctx->raw[i]; + if (cur == '\n' || cur == '\0') + break; + + ctx->raw[i] = ' '; // Spaces get skipped by tokenizer anyways + } +} + +static void preprocess(struct ctx *ctx) +{ + for (size_t i = 0; i < ctx->size; i++) { + const char cur = ctx->raw[i]; + + ctx->column++; + + if (cur == '\n') { + ctx->line++; + ctx->column = 0; + continue; + } else if (cur == '\0') { + break; + } else if (cur == '#' && ctx->column == 1) { + if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) { + // TODO: Add include features + } else { + errln(ctx, "Invalid preprocessing directive"); + } + preprocess_erase(ctx, i); + } + } + + ctx->data = ctx->raw; + ctx->line = 0; + ctx->column = 0; +} + +/** + * Tokenizer + */ + +static char next_non_alnum(struct ctx *ctx, size_t start) +{ + for (size_t i = start; i < ctx->size; i++) + if (!isalnum(ctx->data[i])) + return ctx->data[i]; + + errln(ctx, "Unexpected end of buffer"); +} + +static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch) +{ + for (size_t i = start; i < ctx->size; i++) { + char cur = ctx->data[i]; + + if (cur == ch || cur == ';' || cur == ')') + return true; + + if (!isalnum(cur)) + return false; + } + + errln(ctx, "Unexpected end of buffer"); +} + +static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch) +{ + for (size_t i = start; i < ctx->size; i++) { + char cur = ctx->data[i]; + + if (cur == ch || cur == ';' || cur == ')') + return i; + + if (!isalnum(cur)) + errln(ctx, "'%c' is not alpha-numeric", cur); + } + + errln(ctx, "Unexpected end of buffer"); +} + +static size_t peek_special_to(struct ctx *ctx, size_t start, char ch) +{ + for (size_t i = start; i < ctx->size; i++) { + char cur = ctx->data[i]; + + if (cur == ch || cur == ';' || cur == ')') + return i; + + if (isalnum(cur) || cur < '!' || cur > '~') + errln(ctx, "'%c' is not special", cur); + } + + errln(ctx, "Unexpected end of buffer"); +} + +static size_t peek_to(struct ctx *ctx, size_t start, char ch) +{ + for (size_t i = start; i < ctx->size; i++) { + char cur = ctx->data[i]; + + if (cur == ch || cur == ';' || cur == ')') + return i; + } + + errln(ctx, "Unexpected end of buffer"); +} + +static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end) +{ + struct token token = { 0 }; + token.type = type; + token.start = start; + token.end = end; + + assert(++ctx->token_count < TOKENS_MAX); + ctx->tokens[ctx->token_count] = token; + + if (type == NEWLINE) { + ctx->line++; + ctx->column = 0; + } else { + ctx->column += end - start; + } +} + +static void token_print(struct ctx *ctx, struct token *token) +{ + assert(token->type != UNKNOWN); + + printf("[token type=%d] '", token->type); + if (token->type == NEWLINE || token->type == END) { + printf("' (Unprintable)\n"); + return; + } + + for (size_t i = token->start; i < token->end; i++) + printf("%c", ctx->data[i]); + printf("'\n"); +} + +static void tokens_print(struct ctx *ctx) +{ + for (size_t i = 1; i < ctx->token_count; i++) + token_print(ctx, &ctx->tokens[i]); +} + +static void tokenize(struct ctx *ctx) +{ + for (size_t i = 0; i < ctx->size; i++) { + const char cur = ctx->data[i]; + + switch (cur) { + case '\0': + token_add(ctx, END, i, i + 1); + return; + case '\n': + token_add(ctx, NEWLINE, i, i + 1); + continue; + case ';': + token_add(ctx, EOL, i, i + 1); + continue; + case '(': + token_add(ctx, LPAREN, i, i + 1); + continue; + case ')': + token_add(ctx, RPAREN, i, i + 1); + continue; + case '=': + token_add(ctx, EQUAL, i, i + 1); + continue; + case ' ': + ctx->column++; + continue; + default: + break; + } + + if (next_non_alnum(ctx, i) == ':') { // Type with param identifier + size_t start_param = peek_alnum_to(ctx, i, ':') + 1; + size_t end_param; + if (peek_to_is_alnum(ctx, start_param, ' ')) { + end_param = peek_alnum_to(ctx, start_param, ' '); + } else { // Unnamed identifier ('_') + end_param = peek_to(ctx, start_param, ' '); + if (end_param - start_param != 1 || ctx->data[start_param] != '_') + errln(ctx, "Invalid param identifier"); + } + + token_add(ctx, TYPE, i, start_param - 1); + token_add(ctx, TYPEDELIM, start_param - 1, start_param); + token_add(ctx, PARAM, start_param, end_param); + + i = end_param - 1; + continue; + } + + if (peek_to_is_alnum(ctx, i, ' ')) { // General identifier + size_t end_ident = peek_alnum_to(ctx, i, ' '); + token_add(ctx, IDENT, i, end_ident); + i = end_ident - 1; + } else { // Special/custom operator + size_t end_operator = peek_special_to(ctx, i, ' '); + token_add(ctx, OPERATOR, i, end_operator); + i = end_operator - 1; + } + } + + context_rewind(ctx); +} + +/** + * Linter + */ + +static void lint(struct ctx *ctx) +{ + // Lint parens + int parens = 0; + for (size_t i = 1; i < ctx->token_count; i++) { + struct token *token = &ctx->tokens[i]; + if (token->type == LPAREN) + parens++; + else if (token->type == RPAREN) + parens--; + } + + if (parens != 0) + errln(ctx, "Invalid parens balance"); +} + +/** + * Main + */ + +int main(int argc, char *argv[]) +{ + if (argc < 2) + err("Not enough arguments!"); + + struct ctx *ctx = context_create(argv[1]); + preprocess(ctx); + tokenize(ctx); + lint(ctx); + tokens_print(ctx); + + context_destroy(ctx); + + return 0; +} diff --git a/test.fun b/test.fun new file mode 100644 index 0000000..d15e9a5 --- /dev/null +++ b/test.fun @@ -0,0 +1,2 @@ +#inc other.fun +u32:x u32:a u32:b = a * ((u32:_ u32:c u32:d = c + d) a b); |