aboutsummaryrefslogtreecommitdiff
path: root/src/tokenize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenize.c')
-rw-r--r--src/tokenize.c234
1 files changed, 0 insertions, 234 deletions
diff --git a/src/tokenize.c b/src/tokenize.c
deleted file mode 100644
index 0fa58fe..0000000
--- a/src/tokenize.c
+++ /dev/null
@@ -1,234 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdio.h>
-
-#include <log.h>
-#include <preprocess.h>
-#include <tokenize.h>
-
-// TODO: Do some different limitations for identifiers/types
-
-static size_t peek_identifier(struct ctx *ctx, size_t start, size_t opt_count, ...)
-{
- if (isdigit(context_getch(ctx, start)))
- errln(&ctx->location, "Identifiers can't start with numbers");
-
- for (size_t i = start; i < ctx->size; i++) {
- char cur = context_getch(ctx, i);
-
- // Check for every option in variadic argument
- va_list ap;
- va_start(ap, opt_count);
- for (size_t j = 0; j < opt_count; j++) {
- char ch = va_arg(ap, int);
- if (cur == ch) {
- va_end(ap);
- return i;
- }
- }
- va_end(ap);
-
- if (cur == '\n')
- errln(&ctx->location, "Unexpected end of line while scanning");
-
- if (!isalnum(cur) && (cur < '!' || cur > '~'))
- errln(&ctx->location, "'%c' is not an identifier", cur);
- }
-
- errln(&ctx->location, "Unexpected end of buffer while scanning");
-}
-
-static size_t peek_type(struct ctx *ctx, size_t start, size_t opt_count, ...)
-{
- if (isdigit(context_getch(ctx, start)))
- errln(&ctx->location, "Types can't start with numbers");
-
- for (size_t i = start; i < ctx->size; i++) {
- char cur = context_getch(ctx, i);
-
- // Check for every option in variadic argument
- va_list ap;
- va_start(ap, opt_count);
- for (size_t j = 0; j < opt_count; j++) {
- char ch = va_arg(ap, int);
- if (cur == ch) {
- va_end(ap);
- return i;
- }
- }
- va_end(ap);
-
- if (cur == '\n')
- errln(&ctx->location, "Unexpected end of line while scanning");
-
- if (!isalnum(cur) && (cur < '!' || cur > '~'))
- errln(&ctx->location, "'%c' is not an identifier", cur);
- }
-
- errln(&ctx->location, "Unexpected end of buffer while scanning");
-}
-
-static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end)
-{
- assert(type != UNKNOWN);
-
- struct token token = { 0 };
- token.type = type;
- token.string.start = start;
- token.string.end = end;
- token.location = ctx->location;
-
- ctx->tokens[ctx->token_count] = token;
-
- ctx->token_count++;
- assert(ctx->token_count < TOKENS_MAX);
-
- if (type == NEWLINE) {
- ctx->location.line++;
- ctx->location.column = 0;
- } else {
- ctx->location.column += end - start;
- }
-}
-
-void token_print(struct ctx *ctx, struct token *token)
-{
- assert(token->type != UNKNOWN);
-
- printf("[token type=%d] ", token->type);
- if (token->type == NEWLINE || token->type == END) {
- printf("(Unprintable)\n");
- return;
- }
-
- printf("'%.*s'\n", (int)(token->string.end - token->string.start),
- ctx->data + token->string.start);
-}
-
-void tokenize(struct ctx *ctx)
-{
- enum {
- PARSE_DECLARATION,
- PARSE_DEFINITION,
- PARSE_NUMBER,
- PARSE_BODY,
- PARSE_STRING,
- } state = PARSE_DECLARATION,
- prev = PARSE_DECLARATION;
-
- // TODO: Clean this loop up (move into seperate tokenizing functions)
-
- size_t start;
- for (size_t i = 0; i < ctx->size; i++) {
- const char cur = context_getch(ctx, i);
-
- // String parsing
- if (cur == '"') {
- if (state == PARSE_STRING) {
- token_add(ctx, STRING, start, i + 1);
- state = prev;
- } else {
- state = PARSE_STRING;
- start = i;
- }
- continue;
- } else if (state == PARSE_STRING) {
- continue;
- }
-
- if (state != PARSE_BODY) {
- switch (cur) {
- case '\0':
- errln(&ctx->location, "Unexpected end of buffer");
- case '\n':
- token_add(ctx, NEWLINE, i, i + 1);
- continue;
- case MACRO_SKIP:
- ctx->location.column++;
- continue;
- case MACRO_NEWLINE:
- ctx->location.line++;
- continue;
- default:
- break;
- }
- }
-
- if (state == PARSE_BODY) {
- switch (cur) {
- case '(':
- token_add(ctx, LPAREN, i, i + 1);
- continue;
- case ')':
- token_add(ctx, RPAREN, i, i + 1);
- continue;
- case '\n':
- token_add(ctx, NEWLINE, i, i + 1);
- state = PARSE_DECLARATION;
- continue;
- default:
- break;
- }
-
- size_t end_ident = peek_identifier(ctx, i, 3, ' ', ')', '\n');
- token_add(ctx, IDENT, i, end_ident);
- i = end_ident - (context_getch(ctx, end_ident) != ' ');
- continue;
- }
-
- if (state == PARSE_DECLARATION) {
- size_t end_ident = peek_identifier(ctx, i, 1, ' ');
- token_add(ctx, IDENT, i, end_ident);
-
- size_t start_type = end_ident + 1;
- while (context_getch(ctx, start_type) != '-' ||
- context_getch(ctx, start_type + 1) != '>') {
- size_t end_type = peek_type(ctx, start_type, 1, ' ');
- token_add(ctx, TYPE, start_type, end_type);
- start_type = end_type + 1;
- }
-
- if (context_getch(ctx, start_type + 2) != ' ')
- errln(&ctx->location, "Missing space");
- token_add(ctx, TYPEDELIM, start_type, start_type + 2);
-
- start_type += 3;
- size_t final_type = peek_type(ctx, start_type, 1, '\n');
- token_add(ctx, TYPE, start_type, final_type);
-
- i = final_type - 1;
- state = PARSE_DEFINITION;
- continue;
- }
-
- if (state == PARSE_DEFINITION) {
- size_t end_ident = peek_identifier(ctx, i, 1, ' ');
- token_add(ctx, IDENT, i, end_ident);
-
- size_t start_parameter = end_ident + 1;
- while (context_getch(ctx, start_parameter) != ':') {
- size_t end_parameter =
- peek_identifier(ctx, start_parameter, 1, ' ');
- token_add(ctx, PARAM, start_parameter, end_parameter);
- start_parameter = end_parameter + 1;
- }
-
- if (context_getch(ctx, start_parameter + 1) != ' ')
- errln(&ctx->location, "Missing space");
- token_add(ctx, IDENTDELIM, start_parameter, start_parameter + 1);
-
- i = start_parameter + 1;
- state = PARSE_BODY;
- continue;
- }
- }
-
- /* for (size_t i = 0; i < ctx->token_count; i++) */
- /* token_print(ctx, &ctx->tokens[i]); */
-
- token_add(ctx, END, ctx->size, ctx->size);
- context_rewind(ctx);
-}