From bfdc8d2d843c08bd01517256a63518d03da236f6 Mon Sep 17 00:00:00 2001
From: Marvin Borner
Date: Tue, 27 Jul 2021 21:10:34 +0200
Subject: Started treeify

---
 .gitignore           |   1 +
 Makefile             |  18 +++++----
 inc/context.h        |  32 +++++++++++++++
 inc/lint.h           |   8 ++++
 inc/log.h            |   9 +++++
 inc/preprocess.h     |   8 ++++
 inc/tokenize.h       |  35 +++++++++++++++++
 inc/treeify.h        |  71 +++++++++++++++++++++++++++++++++
 src/context.c        |  12 +++++-
 src/inc/context.h    |  23 -----------
 src/inc/lib.h        |  33 ----------------
 src/inc/lint.h       |   8 ----
 src/inc/log.h        |   9 -----
 src/inc/preprocess.h |   8 ----
 src/inc/tokenize.h   |  35 -----------------
 src/lint.c           |   3 +-
 src/log.c            |   5 ++-
 src/main.c           |   3 +-
 src/preprocess.c     |   8 +++-
 src/tokenize.c       |  21 +++++-----
 src/treeify.c        | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++
 21 files changed, 318 insertions(+), 141 deletions(-)
 create mode 100644 inc/context.h
 create mode 100644 inc/lint.h
 create mode 100644 inc/log.h
 create mode 100644 inc/preprocess.h
 create mode 100644 inc/tokenize.h
 create mode 100644 inc/treeify.h
 delete mode 100644 src/inc/context.h
 delete mode 100644 src/inc/lib.h
 delete mode 100644 src/inc/lint.h
 delete mode 100644 src/inc/log.h
 delete mode 100644 src/inc/preprocess.h
 delete mode 100644 src/inc/tokenize.h
 create mode 100644 src/treeify.c

diff --git a/.gitignore b/.gitignore
index 017fcf9..50660f9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 build/
 
+tags
 compile_commands.json
diff --git a/Makefile b/Makefile
index 7e61d70..9f74876 100644
--- a/Makefile
+++ b/Makefile
@@ -1,23 +1,25 @@
-SOURCEDIR = src
-BUILDDIR = build
+INCLUDEDIR = $(PWD)/inc
+SOURCEDIR = $(PWD)/src
+BUILDDIR = $(PWD)/build
 SOURCES = $(wildcard $(SOURCEDIR)/*.c)
 OBJS = $(patsubst $(SOURCEDIR)/%.c, $(BUILDDIR)/%.o, $(SOURCES))
 
-CC = gcc
-CFLAGS = -Ofast -Wall -Wextra -Werror -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined -I$(SOURCEDIR)/inc/
+CC = ccache gcc
+CFLAGS = -Ofast -Wall -Wextra -Werror -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined -fstack-protector-strong -I$(INCLUDEDIR)
 
 all: $(OBJS)
-	@$(CC) -o ./$(BUILDDIR)/out $(CFLAGS) $^
+	@$(CC) -o $(BUILDDIR)/out $(CFLAGS) $^
 
 clean:
 	@$(RM) -rf $(BUILDDIR)
 
-run: clean all
-	@./$(BUILDDIR)/out test.fun
+run: clean all sync
+	@$(BUILDDIR)/out test.fun
 
 $(BUILDDIR)/%.o: $(SOURCEDIR)/%.c
 	@mkdir -p $(BUILDDIR)
 	@$(CC) -c -o $@ $(CFLAGS) $<
 
 sync:
-	@make --always-make --dry-run | grep -wE 'gcc|g\+\+' | grep -w '\-c' | jq -nR '[inputs|{directory:"$(PWD)", command:., file: match(" [^ ]+$$").string[1:]}]' > compile_commands.json
+	@ctags -R --exclude=.git --exclude=build .
+	@make --always-make --dry-run | grep -wE 'gcc|g\+\+' | grep -w '\-c' | jq -nR '[inputs|{directory:".", command:., file: match(" [^ ]+$$").string[1:]}]' > compile_commands.json
diff --git a/inc/context.h b/inc/context.h
new file mode 100644
index 0000000..3f7b686
--- /dev/null
+++ b/inc/context.h
@@ -0,0 +1,32 @@
+#ifndef CONTEXT_H
+#define CONTEXT_H
+
+#include <stddef.h>
+
+typedef struct {
+	size_t start, end;
+} ctx_string;
+
+struct ctx {
+	size_t line;
+	size_t column;
+	const char *path;
+
+	char *raw;
+	char *data;
+	size_t size;
+
+	size_t token_count;
+	struct token *tokens;
+
+	struct {
+		struct tree *head;
+		struct node *current;
+	} tree;
+};
+
+struct ctx *context_create(const char *path);
+void context_destroy(struct ctx *ctx);
+void context_rewind(struct ctx *ctx);
+
+#endif
diff --git a/inc/lint.h b/inc/lint.h
new file mode 100644
index 0000000..9c3f808
--- /dev/null
+++ b/inc/lint.h
@@ -0,0 +1,8 @@
+#ifndef LINT_H
+#define LINT_H
+
+#include <context.h>
+
+void lint(struct ctx *ctx);
+
+#endif
diff --git a/inc/log.h b/inc/log.h
new file mode 100644
index 0000000..4698f9c
--- /dev/null
+++ b/inc/log.h
@@ -0,0 +1,9 @@
+#ifndef LOG_H
+#define LOG_H
+
+#include <context.h>
+
+__attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...);
+__attribute__((noreturn)) void err(const char *fmt, ...);
+
+#endif
diff --git a/inc/preprocess.h b/inc/preprocess.h
new file mode 100644
index 0000000..e57af10
--- /dev/null
+++ b/inc/preprocess.h
@@ -0,0 +1,8 @@
+#ifndef PREPROCESS_H
+#define PREPROCESS_H
+
+#include <context.h>
+
+void preprocess(struct ctx *ctx);
+
+#endif
diff --git a/inc/tokenize.h b/inc/tokenize.h
new file mode 100644
index 0000000..0142bb9
--- /dev/null
+++ b/inc/tokenize.h
@@ -0,0 +1,35 @@
+#ifndef TOKENIZE_H
+#define TOKENIZE_H
+
+#include <context.h>
+
+#define TOKENS_MAX 4096
+
+enum token_type {
+	UNKNOWN,
+
+	TYPE,
+	TYPEDELIM,
+	PARAM,
+
+	IDENT,
+	OPERATOR,
+
+	LPAREN,
+	RPAREN,
+	EQUAL,
+
+	NEWLINE,
+	EOL,
+	END,
+};
+
+struct token {
+	enum token_type type;
+	ctx_string string;
+};
+
+void token_print(struct ctx *ctx, struct token *tok);
+void tokenize(struct ctx *ctx);
+
+#endif
diff --git a/inc/treeify.h b/inc/treeify.h
new file mode 100644
index 0000000..3af67f8
--- /dev/null
+++ b/inc/treeify.h
@@ -0,0 +1,71 @@
+#ifndef TREEIFY_H
+#define TREEIFY_H
+
+#include <context.h>
+
+enum node_type {
+	EXPRESSION,
+	DECLARATION,
+};
+
+/**
+ * Expressions
+ */
+
+// (*f* x y)
+struct node_expression_identifier {
+	ctx_string name; // f
+	ctx_string type; // u32
+};
+
+// (f *x* *y*)
+struct node_expression_parameter {
+	ctx_string name; // x or y
+	ctx_string type; // u32
+};
+
+// *(f x y)*
+struct node_expression {
+	struct node_expression_identifier *callee; // f
+	struct node_expression_parameter *parameters; // x y
+};
+
+/**
+ * Declarations
+ */
+
+// *u32:f* u32:x u32:y = (...)
+struct node_declaration_callee {
+	ctx_string name; // f
+	ctx_string type; // u32
+};
+
+// u32:f *u32:x* *u32:y* = (...)
+struct node_declaration_parameter {
+	ctx_string name; // x or y
+	ctx_string type; // u32
+};
+
+// *u32:f u32:x u32:y* = (...) OR
+// *u32:a* = ...
+struct node_declaration {
+	struct node_declaration_callee callee; // f
+	struct node_declaration_parameter *parameters; // x y OR NULL
+};
+
+struct node {
+	enum node_type type;
+	/* struct node *next; */
+	void *data;
+};
+
+struct tree {
+	struct node *node;
+};
+
+struct tree *tree_create(void);
+void tree_destroy(struct tree *tree);
+
+void treeify(struct ctx *ctx);
+
+#endif
diff --git a/src/context.c b/src/context.c
index c0898d8..ff0291a 100644
--- a/src/context.c
+++ b/src/context.c
@@ -1,6 +1,10 @@
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
 #include <context.h>
-#include <lib.h>
 #include <tokenize.h>
+#include <treeify.h>
 
 struct ctx *context_create(const char *path)
 {
@@ -24,6 +28,9 @@ struct ctx *context_create(const char *path)
 
 	ctx->raw[ctx->size] = 0;
 
+	ctx->tree.head = tree_create();
+	ctx->tree.current = NULL;
+
 	return ctx;
 }
 
@@ -41,6 +48,9 @@ void context_destroy(struct ctx *ctx)
 	if (ctx->tokens)
 		free(ctx->tokens);
 
+	if (ctx->tree.head)
+		tree_destroy(ctx->tree.head);
+
 	free(ctx);
 }
 
diff --git a/src/inc/context.h b/src/inc/context.h
deleted file mode 100644
index 603f42e..0000000
--- a/src/inc/context.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef CONTEXT_H
-#define CONTEXT_H
-
-#include <lib.h>
-
-struct ctx {
-	size_t line;
-	size_t column;
-	const char *path;
-
-	char *raw;
-	char *data;
-	size_t size;
-
-	size_t token_count;
-	struct token *tokens;
-};
-
-struct ctx *context_create(const char *path);
-void context_destroy(struct ctx *ctx);
-void context_rewind(struct ctx *ctx);
-
-#endif
diff --git a/src/inc/lib.h b/src/inc/lib.h
deleted file mode 100644
index 0402990..0000000
--- a/src/inc/lib.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef LIB_H
-#define LIB_H
-
-// Compatibility imports (mainly for Melvix)
-
-#ifdef __melvix__
-
-#include <arg.h>
-#include <assert.h>
-#include <def.h>
-#include <mem.h>
-#include <print.h>
-#include <str.h>
-#include <sys.h>
-typedef size_t u32;
-
-#elif defined(__linux__) || defined(unix) || defined(__unix__) || defined(__unix) ||               \
-	defined(__APPLE__) || defined(__FreeBSD)
-
-#include <assert.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/param.h>
-
-#else
-#error "Unknown operating system"
-#endif
-
-#endif
diff --git a/src/inc/lint.h b/src/inc/lint.h
deleted file mode 100644
index 9c3f808..0000000
--- a/src/inc/lint.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef LINT_H
-#define LINT_H
-
-#include <context.h>
-
-void lint(struct ctx *ctx);
-
-#endif
diff --git a/src/inc/log.h b/src/inc/log.h
deleted file mode 100644
index 4698f9c..0000000
--- a/src/inc/log.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef LOG_H
-#define LOG_H
-
-#include <context.h>
-
-__attribute__((noreturn)) void errln(struct ctx *ctx, const char *fmt, ...);
-__attribute__((noreturn)) void err(const char *fmt, ...);
-
-#endif
diff --git a/src/inc/preprocess.h b/src/inc/preprocess.h
deleted file mode 100644
index e57af10..0000000
--- a/src/inc/preprocess.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef PREPROCESS_H
-#define PREPROCESS_H
-
-#include <context.h>
-
-void preprocess(struct ctx *ctx);
-
-#endif
diff --git a/src/inc/tokenize.h b/src/inc/tokenize.h
deleted file mode 100644
index 557da9c..0000000
--- a/src/inc/tokenize.h
+++ /dev/null
@@ -1,35 +0,0 @@
-#ifndef TOKENIZE_H
-#define TOKENIZE_H
-
-#include <context.h>
-
-#define TOKENS_MAX 4096
-
-enum token_type {
-	UNKNOWN,
-
-	TYPE,
-	TYPEDELIM,
-	PARAM,
-
-	IDENT,
-	OPERATOR,
-
-	LPAREN,
-	RPAREN,
-	EQUAL,
-
-	NEWLINE,
-	EOL,
-	END,
-};
-
-struct token {
-	enum token_type type;
-	size_t start, end;
-};
-
-void tokens_print(struct ctx *ctx);
-void tokenize(struct ctx *ctx);
-
-#endif
diff --git a/src/lint.c b/src/lint.c
index 6b7fd44..5ff1864 100644
--- a/src/lint.c
+++ b/src/lint.c
@@ -1,4 +1,5 @@
-#include <lib.h>
+#include <stddef.h>
+
 #include <lint.h>
 #include <log.h>
 #include <tokenize.h>
diff --git a/src/log.c b/src/log.c
index ee06313..5820a67 100644
--- a/src/log.c
+++ b/src/log.c
@@ -1,4 +1,7 @@
-#include <lib.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
 #include <log.h>
 
 static void context_print(FILE *fd, struct ctx *ctx)
diff --git a/src/main.c b/src/main.c
index 74cc3d6..0e6807d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,6 +3,7 @@
 #include <log.h>
 #include <preprocess.h>
 #include <tokenize.h>
+#include <treeify.h>
 
 int main(int argc, char *argv[])
 {
@@ -13,7 +14,7 @@ int main(int argc, char *argv[])
 	preprocess(ctx);
 	tokenize(ctx);
 	lint(ctx);
-	tokens_print(ctx);
+	treeify(ctx);
 
 	context_destroy(ctx);
 
diff --git a/src/preprocess.c b/src/preprocess.c
index dec55b7..8d9394a 100644
--- a/src/preprocess.c
+++ b/src/preprocess.c
@@ -1,4 +1,8 @@
-#include <lib.h>
+#include <assert.h>
+#include <math.h>
+#include <stddef.h>
+#include <string.h>
+
 #include <log.h>
 #include <preprocess.h>
 
@@ -29,7 +33,7 @@ void preprocess(struct ctx *ctx)
 		} else if (cur == '\0') {
 			break;
 		} else if (cur == '#' && ctx->column == 1) {
-			if (strncmp(ctx->raw + i + 1, "inc ", MIN(4, ctx->size - i)) == 0) {
+			if (strncmp(ctx->raw + i + 1, "inc ", fmin(4, ctx->size - i)) == 0) {
 				// TODO: Add include features
 			} else if (*(ctx->raw + i + 1) == '#') {
 				// Comment
diff --git a/src/tokenize.c b/src/tokenize.c
index 69f17c9..179b251 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -1,4 +1,9 @@
-#include <lib.h>
+#include <assert.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
 #include <log.h>
 #include <tokenize.h>
 
@@ -72,8 +77,8 @@ static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_
 {
 	struct token token = { 0 };
 	token.type = type;
-	token.start = start;
-	token.end = end;
+	token.string.start = start;
+	token.string.end = end;
 
 	assert(++ctx->token_count < TOKENS_MAX);
 	ctx->tokens[ctx->token_count] = token;
@@ -86,7 +91,7 @@ static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_
 	}
 }
 
-static void token_print(struct ctx *ctx, struct token *token)
+void token_print(struct ctx *ctx, struct token *token)
 {
 	assert(token->type != UNKNOWN);
 
@@ -96,17 +101,11 @@ static void token_print(struct ctx *ctx, struct token *token)
 		return;
 	}
 
-	for (size_t i = token->start; i < token->end; i++)
+	for (size_t i = token->string.start; i < token->string.end; i++)
 		printf("%c", ctx->data[i]);
 	printf("'\n");
 }
 
-void tokens_print(struct ctx *ctx)
-{
-	for (size_t i = 1; i < ctx->token_count; i++)
-		token_print(ctx, &ctx->tokens[i]);
-}
-
 void tokenize(struct ctx *ctx)
 {
 	for (size_t i = 0; i < ctx->size; i++) {
diff --git a/src/treeify.c b/src/treeify.c
new file mode 100644
index 0000000..32a12c2
--- /dev/null
+++ b/src/treeify.c
@@ -0,0 +1,109 @@
+#include <assert.h>
+#include <stdlib.h>
+
+#include <log.h>
+#include <tokenize.h>
+#include <treeify.h>
+
+static void expected(enum token_type type)
+{
+	err("Expected token of type %d while creating tree!\n", type);
+}
+
+static void unexpected(struct ctx *ctx, struct token *token)
+{
+	token_print(ctx, token);
+	err("Unexpected token while creating tree!\n");
+}
+
+static struct token *next(struct token *token, size_t i)
+{
+	return token + i;
+}
+
+static struct token *parse_declaration(struct ctx *ctx, struct token *token)
+{
+	if (next(token, 1)->type != TYPEDELIM)
+		unexpected(ctx, next(token, 1));
+	if (next(token, 2)->type != PARAM)
+		unexpected(ctx, next(token, 2));
+
+	// Search for equal sign
+	struct token *iterator = token;
+	while ((iterator = next(iterator, 1))) {
+		if (iterator->type == EQUAL)
+			break;
+		if (iterator->type == EOL || iterator->type == END)
+			expected(EQUAL);
+	}
+
+	struct node_declaration *node = malloc(sizeof(*node));
+	node->callee.type = token->string;
+	node->callee.name = next(token, 2)->string;
+
+	// Magic
+	size_t diff = iterator - token - 3;
+	assert(diff % 3 == 0);
+	node->parameters = malloc((diff / 3 + 1) * sizeof(*node->parameters));
+	for (size_t i = 0; i < diff / 3; i++) {
+		struct token *param = token + (i + 1) * 3;
+		assert(param->type == TYPE);
+		assert(next(param, 2)->type == PARAM);
+		node->parameters[i].type = param->string;
+		node->parameters[i].name = next(param, 2)->string;
+	}
+
+	return next(iterator, 1);
+}
+
+static struct token *parse(struct ctx *ctx, struct token *token)
+{
+	switch (token->type) {
+	case TYPE:
+		return parse_declaration(ctx, token);
+	case UNKNOWN:
+	case TYPEDELIM:
+	case PARAM:
+	case IDENT:
+	case OPERATOR:
+	case LPAREN:
+	case RPAREN:
+	case EQUAL:
+	case NEWLINE:
+	case EOL:
+	case END:
+	default:
+		unexpected(ctx, token);
+	}
+
+	return NULL;
+}
+
+struct tree *tree_create(void)
+{
+	struct tree *tree = malloc(sizeof(*tree));
+	tree->node = NULL;
+	return tree;
+}
+
+void tree_destroy(struct tree *tree)
+{
+	// TODO: Destroy nodes
+	free(tree);
+}
+
+void treeify(struct ctx *ctx)
+{
+	struct token *token = &ctx->tokens[1];
+	while (token) {
+		assert(token->type != UNKNOWN);
+		if (token->type == NEWLINE || token->type == EOL) {
+			token = next(token, 1);
+			continue;
+		}
+		if (token->type == END)
+			break;
+
+		token = parse(ctx, token);
+	}
+}
-- 
cgit v1.2.3