aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/context.c18
-rw-r--r--src/preprocess.c9
-rw-r--r--src/tokenize.c241
-rw-r--r--src/treeify.c196
4 files changed, 286 insertions, 178 deletions
diff --git a/src/context.c b/src/context.c
index be93db0..756c38e 100644
--- a/src/context.c
+++ b/src/context.c
@@ -4,6 +4,8 @@
#include <string.h>
#include <context.h>
+#include <log.h>
+#include <preprocess.h>
#include <tokenize.h>
#include <treeify.h>
@@ -55,6 +57,13 @@ void context_destroy(struct ctx *ctx)
free(ctx);
}
+char context_getch(struct ctx *ctx, size_t i)
+{
+ if (i >= ctx->size || !ctx->data[i])
+ errln(&ctx->location, "Unexpected end of buffer");
+ return ctx->data[i];
+}
+
#define CONTEXT_COUNT 3
void context_print(FILE *fd, struct ctx_location *location)
{
@@ -63,25 +72,26 @@ void context_print(FILE *fd, struct ctx_location *location)
for (size_t line = 0, index = 0; line < end_line;) {
if (line < start_line) {
- if (location->data[index++] == '\n')
+ if (location->data[index] == '\n' || location->data[index] == MACRO_NEWLINE)
line++;
+ index++;
continue;
}
const char *end = strchr(location->data + index, '\n') + 1;
- assert(end);
+ assert(end > location->data);
size_t length = end - (location->data + index) - 1;
if (location->line == line) {
int pointer_length = location->column + 9;
char *pointer = malloc(pointer_length); // Literally a pointer
fprintf(fd,
- "\x1B[1;32m%6lu | %.*s\x1B[1;31m%c\x1B[1;32m%.*s\n\x1B[1;31m%.*s%c\x1B[0m\n",
+ "\x1B[1;32m%6lu | %.*s\x1B[1;31m%c\x1B[1;32m%.*s\n\x1B[1;31m%.*s%s\x1B[0m\n",
line + 1, (int)location->column, location->data + index,
*(location->data + index + location->column),
(int)(length - location->column - 1),
location->data + index + location->column + 1, pointer_length,
- (char *)memset(pointer, '~', pointer_length), '^');
+ (char *)memset(pointer, '~', pointer_length), "^ (around here)");
free(pointer);
} else {
fprintf(fd, "%6lu | %.*s\n", line + 1, (int)length, location->data + index);
diff --git a/src/preprocess.c b/src/preprocess.c
index 3a0c0bc..f4f67f7 100644
--- a/src/preprocess.c
+++ b/src/preprocess.c
@@ -13,10 +13,15 @@ static void preprocess_erase(struct ctx *ctx, size_t start)
for (size_t i = start; i < ctx->size; i++) {
char cur = ctx->data[i];
- if (cur == '\n' || cur == '\0')
+ if (cur == '\0')
break;
- ctx->data[i] = ' '; // Spaces get skipped by tokenizer anyways
+ if (cur == '\n') {
+ ctx->data[i] = MACRO_NEWLINE;
+ break;
+ } else {
+ ctx->data[i] = MACRO_SKIP;
+ }
}
}
diff --git a/src/tokenize.c b/src/tokenize.c
index 2a0eea6..0fa58fe 100644
--- a/src/tokenize.c
+++ b/src/tokenize.c
@@ -1,89 +1,91 @@
#include <assert.h>
#include <ctype.h>
+#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <log.h>
+#include <preprocess.h>
#include <tokenize.h>
-static char next_non_alnum(struct ctx *ctx, size_t start)
-{
- for (size_t i = start; i < ctx->size; i++)
- if (!isalnum(ctx->data[i]))
- return ctx->data[i];
-
- errln(&ctx->location, "Unexpected end of buffer");
-}
+// TODO: Do some different limitations for identifiers/types
-static bool peek_to_is_alnum(struct ctx *ctx, size_t start, char ch)
+static size_t peek_identifier(struct ctx *ctx, size_t start, size_t opt_count, ...)
{
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
-
- if (cur == ch || cur == ';' || cur == ')')
- return true;
-
- if (!isalnum(cur))
- return false;
- }
-
- errln(&ctx->location, "Unexpected end of buffer");
-}
+ if (isdigit(context_getch(ctx, start)))
+ errln(&ctx->location, "Identifiers can't start with numbers");
-static size_t peek_alnum_to(struct ctx *ctx, size_t start, char ch)
-{
for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
+ char cur = context_getch(ctx, i);
+
+ // Check for every option in variadic argument
+ va_list ap;
+ va_start(ap, opt_count);
+ for (size_t j = 0; j < opt_count; j++) {
+ char ch = va_arg(ap, int);
+ if (cur == ch) {
+ va_end(ap);
+ return i;
+ }
+ }
+ va_end(ap);
- if (cur == ch || cur == ';' || cur == ')')
- return i;
+ if (cur == '\n')
+ errln(&ctx->location, "Unexpected end of line while scanning");
- if (!isalnum(cur))
- errln(&ctx->location, "'%c' is not alpha-numeric", cur);
+ if (!isalnum(cur) && (cur < '!' || cur > '~'))
+ errln(&ctx->location, "'%c' is not an identifier", cur);
}
- errln(&ctx->location, "Unexpected end of buffer");
+ errln(&ctx->location, "Unexpected end of buffer while scanning");
}
-static size_t peek_identifier(struct ctx *ctx, size_t start, char ch)
+static size_t peek_type(struct ctx *ctx, size_t start, size_t opt_count, ...)
{
+ if (isdigit(context_getch(ctx, start)))
+ errln(&ctx->location, "Types can't start with numbers");
+
for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
+ char cur = context_getch(ctx, i);
+
+ // Check for every option in variadic argument
+ va_list ap;
+ va_start(ap, opt_count);
+ for (size_t j = 0; j < opt_count; j++) {
+ char ch = va_arg(ap, int);
+ if (cur == ch) {
+ va_end(ap);
+ return i;
+ }
+ }
+ va_end(ap);
- if (cur == ch || cur == ';' || cur == ')')
- return i;
+ if (cur == '\n')
+ errln(&ctx->location, "Unexpected end of line while scanning");
if (!isalnum(cur) && (cur < '!' || cur > '~'))
errln(&ctx->location, "'%c' is not an identifier", cur);
}
- errln(&ctx->location, "Unexpected end of buffer");
-}
-
-static size_t peek_to(struct ctx *ctx, size_t start, char ch)
-{
- for (size_t i = start; i < ctx->size; i++) {
- char cur = ctx->data[i];
-
- if (cur == ch || cur == ';' || cur == ')')
- return i;
- }
-
- errln(&ctx->location, "Unexpected end of buffer");
+ errln(&ctx->location, "Unexpected end of buffer while scanning");
}
static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
{
+ assert(type != UNKNOWN);
+
struct token token = { 0 };
token.type = type;
token.string.start = start;
token.string.end = end;
token.location = ctx->location;
- assert(++ctx->token_count < TOKENS_MAX);
ctx->tokens[ctx->token_count] = token;
+ ctx->token_count++;
+ assert(ctx->token_count < TOKENS_MAX);
+
if (type == NEWLINE) {
ctx->location.line++;
ctx->location.column = 0;
@@ -98,7 +100,7 @@ void token_print(struct ctx *ctx, struct token *token)
printf("[token type=%d] ", token->type);
if (token->type == NEWLINE || token->type == END) {
- printf("' (Unprintable)\n");
+ printf("(Unprintable)\n");
return;
}
@@ -108,60 +110,125 @@ void token_print(struct ctx *ctx, struct token *token)
void tokenize(struct ctx *ctx)
{
+ enum {
+ PARSE_DECLARATION,
+ PARSE_DEFINITION,
+ PARSE_NUMBER,
+ PARSE_BODY,
+ PARSE_STRING,
+ } state = PARSE_DECLARATION,
+ prev = PARSE_DECLARATION;
+
+ // TODO: Clean this loop up (move into seperate tokenizing functions)
+
+ size_t start;
for (size_t i = 0; i < ctx->size; i++) {
- const char cur = ctx->data[i];
-
- switch (cur) {
- case '\0':
- token_add(ctx, END, i, i + 1);
- return;
- case '\n':
- token_add(ctx, NEWLINE, i, i + 1);
- continue;
- case ';':
- token_add(ctx, EOL, i, i + 1);
- continue;
- case '(':
- token_add(ctx, LPAREN, i, i + 1);
- continue;
- case ')':
- token_add(ctx, RPAREN, i, i + 1);
+ const char cur = context_getch(ctx, i);
+
+ // String parsing
+ if (cur == '"') {
+ if (state == PARSE_STRING) {
+ token_add(ctx, STRING, start, i + 1);
+ state = prev;
+ } else {
+ state = PARSE_STRING;
+ start = i;
+ }
continue;
- case '=':
- token_add(ctx, EQUAL, i, i + 1);
+ } else if (state == PARSE_STRING) {
continue;
- case ' ':
- ctx->location.column++;
+ }
+
+ if (state != PARSE_BODY) {
+ switch (cur) {
+ case '\0':
+ errln(&ctx->location, "Unexpected end of buffer");
+ case '\n':
+ token_add(ctx, NEWLINE, i, i + 1);
+ continue;
+ case MACRO_SKIP:
+ ctx->location.column++;
+ continue;
+ case MACRO_NEWLINE:
+ ctx->location.line++;
+ continue;
+ default:
+ break;
+ }
+ }
+
+ if (state == PARSE_BODY) {
+ switch (cur) {
+ case '(':
+ token_add(ctx, LPAREN, i, i + 1);
+ continue;
+ case ')':
+ token_add(ctx, RPAREN, i, i + 1);
+ continue;
+ case '\n':
+ token_add(ctx, NEWLINE, i, i + 1);
+ state = PARSE_DECLARATION;
+ continue;
+ default:
+ break;
+ }
+
+ size_t end_ident = peek_identifier(ctx, i, 3, ' ', ')', '\n');
+ token_add(ctx, IDENT, i, end_ident);
+ i = end_ident - (context_getch(ctx, end_ident) != ' ');
continue;
- default:
- break;
}
- if (next_non_alnum(ctx, i) == ':') { // Type with param identifier
- size_t start_param = peek_alnum_to(ctx, i, ':') + 1;
- size_t end_param;
- if (peek_to_is_alnum(ctx, start_param, ' ')) {
- end_param = peek_alnum_to(ctx, start_param, ' ');
- } else { // Unnamed identifier ('_')
- end_param = peek_to(ctx, start_param, ' ');
- if (end_param - start_param != 1 || ctx->data[start_param] != '_')
- errln(&ctx->location, "Invalid param identifier");
+ if (state == PARSE_DECLARATION) {
+ size_t end_ident = peek_identifier(ctx, i, 1, ' ');
+ token_add(ctx, IDENT, i, end_ident);
+
+ size_t start_type = end_ident + 1;
+ while (context_getch(ctx, start_type) != '-' ||
+ context_getch(ctx, start_type + 1) != '>') {
+ size_t end_type = peek_type(ctx, start_type, 1, ' ');
+ token_add(ctx, TYPE, start_type, end_type);
+ start_type = end_type + 1;
}
- token_add(ctx, TYPE, i, start_param - 1);
- token_add(ctx, TYPEDELIM, start_param - 1, start_param);
- token_add(ctx, PARAM, start_param, end_param);
+ if (context_getch(ctx, start_type + 2) != ' ')
+ errln(&ctx->location, "Missing space");
+ token_add(ctx, TYPEDELIM, start_type, start_type + 2);
+
+ start_type += 3;
+ size_t final_type = peek_type(ctx, start_type, 1, '\n');
+ token_add(ctx, TYPE, start_type, final_type);
- i = end_param - 1;
+ i = final_type - 1;
+ state = PARSE_DEFINITION;
continue;
}
- if (peek_identifier(ctx, i, ' ')) { // General identifier
- size_t end_ident = peek_to(ctx, i, ' ');
+ if (state == PARSE_DEFINITION) {
+ size_t end_ident = peek_identifier(ctx, i, 1, ' ');
token_add(ctx, IDENT, i, end_ident);
- i = end_ident - 1;
+
+ size_t start_parameter = end_ident + 1;
+ while (context_getch(ctx, start_parameter) != ':') {
+ size_t end_parameter =
+ peek_identifier(ctx, start_parameter, 1, ' ');
+ token_add(ctx, PARAM, start_parameter, end_parameter);
+ start_parameter = end_parameter + 1;
+ }
+
+ if (context_getch(ctx, start_parameter + 1) != ' ')
+ errln(&ctx->location, "Missing space");
+ token_add(ctx, IDENTDELIM, start_parameter, start_parameter + 1);
+
+ i = start_parameter + 1;
+ state = PARSE_BODY;
+ continue;
}
}
+ /* for (size_t i = 0; i < ctx->token_count; i++) */
+ /* token_print(ctx, &ctx->tokens[i]); */
+
+ token_add(ctx, END, ctx->size, ctx->size);
context_rewind(ctx);
}
diff --git a/src/treeify.c b/src/treeify.c
index 5539d21..47f8216 100644
--- a/src/treeify.c
+++ b/src/treeify.c
@@ -6,6 +6,8 @@
#include <tokenize.h>
#include <treeify.h>
+#define INITIAL_PARAMETER_COUNT 3
+
static void __expect(struct ctx *ctx, struct token *token, enum token_type type, const char *file,
int line, const char *func, const char *type_enum)
{
@@ -23,96 +25,110 @@ static struct token *next(struct token *token, size_t i)
return token + i;
}
-static struct token *parse_declaration(struct ctx *ctx, struct token *token);
-static struct token *parse_expression(struct ctx *ctx, struct token *token)
+static struct token *parse_declaration(struct ctx *ctx, struct token *token)
{
- struct node_expression *node = malloc(sizeof(*node));
- // TODO: Push expressions into tree (using subtrees)
-
- struct token *iterator = token;
- while ((iterator = next(iterator, 1))) {
- if (iterator->type == LPAREN) {
- iterator = parse_expression(ctx, iterator);
- continue;
- }
+ expect(token, IDENT);
- if (iterator->type == TYPE) {
- iterator = parse_declaration(ctx, iterator);
- continue;
- }
+ struct node_declaration *node = malloc(sizeof(*node));
+ node->callee.name = token->string;
- if (iterator->type == RPAREN)
- break;
+ node->parameters = malloc(INITIAL_PARAMETER_COUNT * sizeof(*node->parameters));
+ size_t param_idx = 0;
- if (iterator->type == EOL || iterator->type == END)
- expect(iterator, RPAREN);
+ token = next(token, 1);
+ while (token->type != TYPEDELIM) {
+ if (token->type == NEWLINE || token->type == END)
+ expect(token, TYPEDELIM);
- expect(iterator, IDENT);
+ if (token->type != TYPE)
+ expect(token, TYPE);
+
+ // Expand parameter space if necessary
+ if ((param_idx + 1) % INITIAL_PARAMETER_COUNT == 0)
+ // TODO: Fix realloc failure check (and other mallocs too btw)
+ node->parameters = realloc(node->parameters,
+ ((param_idx / INITIAL_PARAMETER_COUNT) + 1) *
+ INITIAL_PARAMETER_COUNT *
+ sizeof(*node->parameters));
+
+ node->parameters[param_idx].type = token->string;
+ param_idx++;
+
+ token = next(token, 1);
}
- return iterator;
+ node->parameter_count = param_idx;
+
+ token = next(token, 1);
+ expect(token, TYPE);
+ node->callee.type = token->string;
+
+ tree_add(ctx, DECLARATION, node); // TODO: Push to declaration/signature array instead
+
+ expect(next(token, 1), NEWLINE);
+ return next(token, 2);
}
-static struct token *parse_declaration(struct ctx *ctx, struct token *token)
+static struct token *parse_definition(struct ctx *ctx, struct token *token)
{
- expect(next(token, 1), TYPEDELIM);
- expect(next(token, 2), PARAM);
-
- // Search for equal sign
- struct token *iterator = token;
- while ((iterator = next(iterator, 1))) {
- if (iterator->type == EQUAL)
- break;
- if (iterator->type == EOL || iterator->type == END)
- expect(iterator, EQUAL);
- }
+ expect(token, IDENT);
- struct node_declaration *node = malloc(sizeof(*node));
- node->callee.type = token->string;
- node->callee.name = next(token, 2)->string;
-
- // Magic
- size_t diff = iterator - token - 3;
- assert(diff % 3 == 0);
- node->parameters = malloc((diff / 3 + 1) * sizeof(*node->parameters));
- for (size_t i = 0; i < diff / 3; i++) {
- struct token *param = token + (i + 1) * 3;
- expect(param, TYPE);
- expect(next(param, 2), PARAM);
- node->parameters[i].type = param->string;
- node->parameters[i].name = next(param, 2)->string;
+ struct node_definition *node = malloc(sizeof(*node));
+ node->callee.name = token->string;
+
+ node->parameters = malloc(INITIAL_PARAMETER_COUNT * sizeof(*node->parameters));
+ size_t param_idx = 0;
+
+ token = next(token, 1);
+ while (token->type != IDENTDELIM) {
+ if (token->type == NEWLINE || token->type == END)
+ expect(token, IDENTDELIM);
+
+ if (token->type != PARAM)
+ expect(token, PARAM);
+
+ // Expand parameter space if necessary
+ if ((param_idx + 1) % INITIAL_PARAMETER_COUNT == 0)
+ // TODO: Fix realloc failure check (and other mallocs too btw)
+ node->parameters = realloc(node->parameters,
+ ((param_idx / INITIAL_PARAMETER_COUNT) + 1) *
+ INITIAL_PARAMETER_COUNT *
+ sizeof(*node->parameters));
+
+ node->parameters[param_idx].name = token->string;
+ param_idx++;
+
+ token = next(token, 1);
}
- tree_add(ctx, DECLARATION, node);
+ node->parameter_count = param_idx;
+
+ tree_add(ctx, DEFINITION, node);
- return next(iterator, 1);
+ // TODO: Parse expression
+ while (token->type != NEWLINE)
+ token = next(token, 1);
+
+ return next(token, 1);
+}
+
+static struct token *parse_block(struct ctx *ctx, struct token *token)
+{
+ if (token->type != IDENT)
+ return next(token, 1); //&ctx->tokens[ctx->token_count - 1];
+
+ token = parse_declaration(ctx, token);
+ token = parse_definition(ctx, token);
+ expect(token, NEWLINE);
+ return next(token, 1);
}
static struct token *parse(struct ctx *ctx, struct token *token)
{
- switch (token->type) {
- case LPAREN:
- return parse_expression(ctx, token);
- case TYPE:
- return parse_declaration(ctx, token);
- case RPAREN:
- return next(token, 1);
- case END:
- return NULL;
- case UNKNOWN:
- case TYPEDELIM:
- case PARAM:
- case IDENT:
- case OPERATOR:
- case EQUAL:
- case NEWLINE:
- case SOMETHING:
- case EOL:
- default:
- expect(token, SOMETHING);
- }
+ while (token->type != END)
+ token = parse_block(ctx, token);
- return NULL;
+ return token;
}
struct node *tree_create(void)
@@ -126,7 +142,7 @@ struct node *tree_create(void)
void tree_add(struct ctx *ctx, enum node_type type, void *data)
{
assert(ctx->tree.head);
- struct node *node = malloc(sizeof(*node));
+ struct node *node = calloc(sizeof(*node), 1);
node->type = type;
node->data = data;
if (!ctx->tree.current) {
@@ -139,23 +155,33 @@ void tree_add(struct ctx *ctx, enum node_type type, void *data)
ctx->tree.current = node;
}
-void tree_destroy(struct node *tree)
+void tree_destroy(struct node *node)
{
- // TODO: Destroy nodes
- free(tree);
+ while (node) {
+ struct node *next = node->next;
+
+ if (node->type == DEFINITION) {
+ struct node_definition *definition = node->data;
+ free(definition->parameters);
+ free(definition);
+ } else if (node->type == DECLARATION) {
+ struct node_declaration *declaration = node->data;
+ free(declaration->parameters);
+ free(declaration);
+ }
+
+ free(node);
+ node = next;
+ }
}
void treeify(struct ctx *ctx)
{
- struct token *token = &ctx->tokens[1];
- while (token) {
- if (token->type == NEWLINE || token->type == EOL) {
- token = next(token, 1);
- continue;
- }
- if (token->type == END || token->type == UNKNOWN)
- break;
+ struct token *token = ctx->tokens;
- token = parse(ctx, token);
- }
+ while (token->type == NEWLINE)
+ token = next(token, 1);
+
+ token = parse(ctx, token);
+ expect(token, END);
}