diff options
-rw-r--r-- | .gitignore | 6 | ||||
-rw-r--r-- | Setup.hs | 2 | ||||
-rw-r--r-- | app/Main.hs | 6 | ||||
-rw-r--r-- | fun.cabal | 61 | ||||
-rw-r--r-- | inc/context.h | 41 | ||||
-rw-r--r-- | inc/lint.h | 8 | ||||
-rw-r--r-- | inc/log.h | 8 | ||||
-rw-r--r-- | inc/preprocess.h | 11 | ||||
-rw-r--r-- | inc/tokenize.h | 40 | ||||
-rw-r--r-- | inc/treeify.h | 99 | ||||
-rw-r--r-- | makefile | 32 | ||||
-rw-r--r-- | package.yaml | 47 | ||||
-rw-r--r-- | readme.md | 11 | ||||
-rwxr-xr-x | run | 3 | ||||
-rw-r--r-- | src/Fun.hs | 14 | ||||
-rw-r--r-- | src/Fun/Compiler.hs | 17 | ||||
-rw-r--r-- | src/Fun/Parser.hs | 192 | ||||
-rw-r--r-- | src/Fun/Tree.hs | 18 | ||||
-rw-r--r-- | src/context.c | 111 | ||||
-rw-r--r-- | src/lint.c | 21 | ||||
-rw-r--r-- | src/log.c | 24 | ||||
-rw-r--r-- | src/main.c | 27 | ||||
-rw-r--r-- | src/preprocess.c | 59 | ||||
-rw-r--r-- | src/tokenize.c | 234 | ||||
-rw-r--r-- | src/treeify.c | 187 | ||||
-rw-r--r-- | stack.yaml | 68 | ||||
-rw-r--r-- | stack.yaml.lock | 20 | ||||
-rw-r--r-- | test.asm | 10 | ||||
-rw-r--r-- | test.fun | 29 | ||||
-rw-r--r-- | test/Spec.hs | 2 |
30 files changed, 478 insertions, 930 deletions
@@ -1,4 +1,2 @@ -build/ - -tags -compile_commands.json +.stack-work/ +*~
\ No newline at end of file diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/app/Main.hs b/app/Main.hs new file mode 100644 index 0000000..3444c6b --- /dev/null +++ b/app/Main.hs @@ -0,0 +1,6 @@ +module Main where + +import Fun + +main :: IO () +main = run diff --git a/fun.cabal b/fun.cabal new file mode 100644 index 0000000..0d343f5 --- /dev/null +++ b/fun.cabal @@ -0,0 +1,61 @@ +cabal-version: 1.12 + +-- This file has been generated from package.yaml by hpack version 0.34.4. +-- +-- see: https://github.com/sol/hpack + +name: fun +version: 0.1.0.0 +description: Please see the README on GitHub at <https://github.com/marvinborner/fun#readme> +homepage: https://github.com/marvinborner/fun#readme +bug-reports: https://github.com/marvinborner/fun/issues +author: Marvin Borner +maintainer: develop@marvinborner.de +copyright: 2022 Marvin Borner +license: BSD3 +build-type: Simple +extra-source-files: + readme.md + +source-repository head + type: git + location: https://github.com/marvinborner/fun + +library + exposed-modules: + Fun + Fun.Compiler + Fun.Parser + Fun.Tree + other-modules: + Paths_fun + hs-source-dirs: + src + build-depends: + base >=4.7 && <5 + default-language: Haskell2010 + +executable fun-exe + main-is: Main.hs + other-modules: + Paths_fun + hs-source-dirs: + app + ghc-options: -threaded -rtsopts -with-rtsopts=-N + build-depends: + base >=4.7 && <5 + , fun + default-language: Haskell2010 + +test-suite fun-test + type: exitcode-stdio-1.0 + main-is: Spec.hs + other-modules: + Paths_fun + hs-source-dirs: + test + ghc-options: -threaded -rtsopts -with-rtsopts=-N + build-depends: + base >=4.7 && <5 + , fun + default-language: Haskell2010 diff --git a/inc/context.h b/inc/context.h deleted file mode 100644 index f9bb5e5..0000000 --- a/inc/context.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CONTEXT_H -#define CONTEXT_H - -#include <stddef.h> -#include <stdio.h> - -typedef struct { - size_t start, end; -} ctx_string; - -struct ctx_location { - size_t line; - size_t column; - const char *path; - - char *data; // Raw - size_t size; -}; - -struct ctx { - struct ctx_location location; - - char *data; // Preprocessed - size_t size; - - size_t token_count; - struct token *tokens; - - struct { - struct node *head; - struct node *current; - } tree; -}; - -struct ctx *context_create(const char *path); -void context_destroy(struct ctx *ctx); -char context_getch(struct ctx *ctx, size_t i); -void context_print(FILE *fd, struct ctx_location *location); -void context_rewind(struct ctx *ctx); - -#endif diff --git a/inc/lint.h b/inc/lint.h deleted file mode 100644 index 9c3f808..0000000 --- a/inc/lint.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef LINT_H -#define LINT_H - -#include <context.h> - -void lint(struct ctx *ctx); - -#endif diff --git a/inc/log.h b/inc/log.h deleted file mode 100644 index da82233..0000000 --- a/inc/log.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef LOG_H -#define LOG_H - -#include <context.h> - -__attribute__((noreturn)) void errln(struct ctx_location *location, const char *fmt, ...); - -#endif diff --git a/inc/preprocess.h b/inc/preprocess.h deleted file mode 100644 index 2724068..0000000 --- a/inc/preprocess.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef PREPROCESS_H -#define PREPROCESS_H - -#include <context.h> - -#define MACRO_SKIP ((char)128) -#define MACRO_NEWLINE ((char)129) - -void preprocess(struct ctx *ctx); - -#endif diff --git a/inc/tokenize.h b/inc/tokenize.h deleted file mode 100644 index 61fcd67..0000000 --- a/inc/tokenize.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef TOKENIZE_H -#define TOKENIZE_H - -#include <context.h> - -#define TOKENS_MAX 4096 - -enum token_type { - UNKNOWN, - - TYPE, - TYPEDELIM, - - IDENT, - IDENTDELIM, - PARAM, - - STRING, - NUMBER, - OPERATOR, - - LPAREN, - RPAREN, - - NEWLINE, - END, - - SOMETHING, -}; - -struct token { - enum token_type type; - struct ctx_location location; - ctx_string string; -}; - -void token_print(struct ctx *ctx, struct token *token); -void tokenize(struct ctx *ctx); - -#endif diff --git a/inc/treeify.h b/inc/treeify.h deleted file mode 100644 index 1e2d3e5..0000000 --- a/inc/treeify.h +++ /dev/null @@ -1,99 +0,0 @@ -#ifndef TREEIFY_H -#define TREEIFY_H - -#include <context.h> - -enum node_type { - EXPRESSION, - DECLARATION, - DEFINITION, -}; - -/** - * Expressions - */ - -// (*f* x y) -struct node_expression_identifier { - ctx_string name; // f -}; - -enum node_expression_parameter_type { - PARAM_TYPE_IDENT, - PARAM_TYPE_EXPRESSION, -}; - -// (f *x* *y* *(expr)*) -struct node_expression_parameter { - enum node_expression_parameter_type type; - union { - ctx_string name; // x or y - struct node_expression *expression; // (expr) - } data; -}; - -// (*f x y*) -struct node_expression { - struct node_expression_identifier *callee; // f - struct node_expression_parameter *parameters; // x y - size_t parameter_count; -}; - -/** - * Declarations - */ - -// *f* u32 u32 -> *u32* -struct node_declaration_callee { - ctx_string name; // f - ctx_string type; // u32 -}; - -// f *u32* *u32* -> u32 -struct node_declaration_parameter { - ctx_string type; // u32 -}; - -// *f u32 u32 -> u32* -struct node_declaration { - struct node_declaration_callee callee; // f - struct node_declaration_parameter *parameters; // u32 u32 OR NULL - size_t parameter_count; -}; - -/** - * Definitions - */ - -// *f* a b : expr -struct node_definition_callee { - ctx_string name; -}; - -// f *a* *b* : expr -struct node_definition_parameter { - ctx_string name; // u32 -}; - -// *f a b : expr* -struct node_definition { - struct node_definition_callee callee; // f - struct node_definition_parameter *parameters; // a b - size_t parameter_count; - struct node_expression expression; // expr -}; - -struct node { - enum node_type type; - struct node *prev; - struct node *next; - void *data; -}; - -struct node *tree_create(void); -void tree_destroy(struct node *tree); -void tree_add(struct ctx *ctx, enum node_type type, void *data); - -void treeify(struct ctx *ctx); - -#endif diff --git a/makefile b/makefile deleted file mode 100644 index ebf50dc..0000000 --- a/makefile +++ /dev/null @@ -1,32 +0,0 @@ -INCLUDEDIR = $(PWD)/inc -SOURCEDIR = $(PWD)/src -BUILDDIR = $(PWD)/build -SOURCES = $(wildcard $(SOURCEDIR)/*.c) -OBJS = $(patsubst $(SOURCEDIR)/%.c, $(BUILDDIR)/%.o, $(SOURCES)) - -CA = ccache -AS = $(CA) nasm -LD = $(CA) ld -CC = $(CA) gcc -CFLAGS = -Ofast -Wall -Wextra -pedantic -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wformat=1 -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wlogical-op -Wunreachable-code -Wundef -Wold-style-definition -Wvla -std=c99 -fsanitize=address -fsanitize=undefined -fstack-protector-strong -I$(INCLUDEDIR) - -all: $(OBJS) - @$(CC) -o $(BUILDDIR)/out $(CFLAGS) $^ - -clean: - @$(RM) -rf $(BUILDDIR) - -run: clean all sync - @$(BUILDDIR)/out test.fun - -$(BUILDDIR)/%.o: $(SOURCEDIR)/%.c - @mkdir -p $(BUILDDIR) - @$(CC) -c -o $@ $(CFLAGS) $< - -compile: - @$(AS) -f elf64 -o $(BUILDDIR)/test.o $(BUILDDIR)/test.asm - @$(LD) $(BUILDDIR)/test.o -o $(BUILDDIR)/test - -sync: - @ctags -R --exclude=.git --exclude=build . - @make --always-make --dry-run | grep -wE 'gcc|g\+\+' | grep -w '\-c' | jq -nR '[inputs|{directory:".", command:., file: match(" [^ ]+$$").string[1:]}]' > compile_commands.json diff --git a/package.yaml b/package.yaml new file mode 100644 index 0000000..2490e20 --- /dev/null +++ b/package.yaml @@ -0,0 +1,47 @@ +name: fun +version: 0.1.0.0 +github: "marvinborner/fun" +license: BSD3 +author: "Marvin Borner" +maintainer: "develop@marvinborner.de" +copyright: "2022 Marvin Borner" + +extra-source-files: +- readme.md + +# Metadata used when publishing your package +# synopsis: Short description of your package +# category: Web + +# To avoid duplicated efforts in documentation and dealing with the +# complications of embedding Haddock markup inside cabal files, it is +# common to point users to the README.md file. +description: Please see the README on GitHub at <https://github.com/marvinborner/fun#readme> + +dependencies: +- base >= 4.7 && < 5 + +library: + source-dirs: src + +executables: + fun-exe: + main: Main.hs + source-dirs: app + ghc-options: + - -threaded + - -rtsopts + - -with-rtsopts=-N + dependencies: + - fun + +tests: + fun-test: + main: Spec.hs + source-dirs: test + ghc-options: + - -threaded + - -rtsopts + - -with-rtsopts=-N + dependencies: + - fun @@ -1,7 +1,16 @@ -# Fun with functions +# Totally Fun Programming Language + +This language is purely academic. The following features are - while planned - almost certainly not (yet/correctly) implemented. ## Features +- Total functional programming: Either + - primitive recursion over data + - coprimitive corecursion over codata +- Not Turing-complete (obviously) but PR-complete +- Machine-oriented +- No undefined behavior nor non-terminating functions +- Complete provability - Strict grammar - Unforgiving syntax - Force clean code @@ -0,0 +1,3 @@ +#!/bin/sh + +stack build && stack exec -- fun-exe test.fun diff --git a/src/Fun.hs b/src/Fun.hs new file mode 100644 index 0000000..fac4e23 --- /dev/null +++ b/src/Fun.hs @@ -0,0 +1,14 @@ +module Fun where + +import Fun.Compiler +import System.Environment + +usage :: IO () +usage = putStrLn "Usage: fun <path>" + +run :: IO () +run = do + args <- getArgs + case args of + [path] -> compile path + _ -> usage diff --git a/src/Fun/Compiler.hs b/src/Fun/Compiler.hs new file mode 100644 index 0000000..f7e5c85 --- /dev/null +++ b/src/Fun/Compiler.hs @@ -0,0 +1,17 @@ +module Fun.Compiler where + +import Control.Exception +import Fun.Parser +import Fun.Tree + +parse :: String -> Block -- TODO: Should be tree +parse file = case block file of + Nothing -> error "Invalid program" + Just (a, b) -> a + +compile :: String -> IO () +compile path = do + file <- try $ readFile path + case file of + Left exception -> print (exception :: IOError) + Right file -> putStrLn . show $ parse file diff --git a/src/Fun/Parser.hs b/src/Fun/Parser.hs new file mode 100644 index 0000000..f3f5201 --- /dev/null +++ b/src/Fun/Parser.hs @@ -0,0 +1,192 @@ +module Fun.Parser where + +import Data.Char +import Fun.Tree + +type Parser a = String -> Maybe (a, String) + +char :: Parser Char +char [] = Nothing +char (x : xs) = Just (x, xs) + +digit :: Parser Char +digit = char <=> isDigit + +digits :: Parser String +digits = iter digit + +number :: Parser Integer +number = + literal '-' + <-+> digits + >>> (\n -> -1 * (read n :: Integer)) + <|> digits + >>> (\n -> read n :: Integer) + +space :: Parser Char +space = char <=> isSpace + +newline :: Parser Char +newline = char <=> (== '\n') + +notSpace :: Parser Char +notSpace = char <=> (not . isSpace) + +letter :: Parser Char +letter = char <=> isAlpha + +letters :: Parser String +letters = iter letter + +alphanum :: Parser Char +alphanum = digit <|> letter + +literal :: Char -> Parser Char +literal c = char <=> (== c) + +result :: a -> Parser a +result a cs = Just (a, cs) + +iter :: Parser Char -> Parser String +iter m = (iterS m) <=> (/= "") + +iterS :: Parser a -> Parser [a] +iterS m = m <+> iterS m >>> (\(x, y) -> x : y) <|> result [] + +token :: Parser a -> Parser a +token = (<+-> iterS space) + +-- A parser that will accept a given alpha string +acceptWord :: String -> Parser String +acceptWord w = token (letters <=> (== w)) + +-- A parser that will accept a given string +accept :: String -> Parser String +accept w = token ((iter notSpace) <=> (== w)) + +-- Given a parser and a predicate return the parser only if it satisfies the predicate +infix 7 <=> +(<=>) :: Parser a -> (a -> Bool) -> Parser a +(parser <=> predicate) input = case parser input of + Nothing -> Nothing + Just (a, rest) -> if (predicate a) then Just (a, rest) else Nothing + + +-- Combine two parser together pairing their results up in a tuple +infixl 6 <+> +(<+>) :: Parser a -> Parser b -> Parser (a, b) +(parserA <+> parserB) input = case parserA input of + Nothing -> Nothing + Just (resultA, remainder) -> case parserB remainder of + Nothing -> Nothing + Just (resultB, cs) -> Just ((resultA, resultB), cs) + +-- Sequence operator that discards the second result +infixl 6 <+-> +(<+->) :: Parser a -> Parser b -> Parser a +(parserA <+-> parserB) input = case parserA input of + Nothing -> Nothing + Just (resultA, remainder) -> case parserB remainder of + Nothing -> Nothing + Just (_, cs) -> Just (resultA, cs) + +-- Sequence operator that discards the first result +infixl 6 <-+> +(<-+>) :: Parser a -> Parser b -> Parser b +(parserA <-+> parserB) input = case parserA input of + Nothing -> Nothing + Just (resultA, remainder) -> case parserB remainder of + Nothing -> Nothing + Just (resultB, cs) -> Just (resultB, cs) + +-- Transform a parsers result +infixl 5 >>> +(>>>) :: Parser a -> (a -> b) -> Parser b +(parser >>> transformer) input = case parser input of + Nothing -> Nothing + Just (resultA, remainder) -> Just ((transformer resultA), remainder) + +-- Extract a parsers result +infix 4 +> +(+>) :: Parser a -> (a -> Parser b) -> Parser b +(parser +> function) input = case parser input of + Nothing -> Nothing + Just (a, cs) -> function a cs + +-- Combine two parsers using a 'or' type operation +infixl 3 <|> +(<|>) :: Parser a -> Parser a -> Parser a +(parserA <|> parserB) input = case parserA input of + Nothing -> parserB input + result -> result + +---- + +tree :: Parser Tree +tree = iterS program >>> Tree + +program :: Parser Program +program = iterS block >>> Program + +block :: Parser Block +block = functionBlock <+-> newline >>> Block + +visibility :: Parser Char +visibility = literal '+' <|> literal '-' + +functionBlock :: Parser FunctionBlock +functionBlock = + functionDeclaration + <+> iterS functionDefinition + >>> (\(a, b) -> FunctionBlock a b) + +functionDeclaration :: Parser FunctionDeclaration +functionDeclaration = + functionDeclarationWithoutFlags <|> functionDeclarationWithFlags + +functionDeclarationWithoutFlags :: Parser FunctionDeclaration +functionDeclarationWithoutFlags = + functionName + <+> functionDeclarationDelimiter + <+> iterS functionType + <+-> newline + >>> (\((a, b), c) -> FunctionDeclarationWithoutFlags a b c) + +functionDeclarationWithFlags :: Parser FunctionDeclaration +functionDeclarationWithFlags = + functionName + <+> functionDeclarationDelimiter + <+> functionTypeList + <+-> space + <+-> literal '%' + <+-> space + <+> iterS functionFlag + <+-> newline + >>> (\(((a, b), c), d) -> FunctionDeclarationWithFlags a b c d) + +functionDeclarationDelimiter :: Parser Char +functionDeclarationDelimiter = + space <-+> literal ':' <-+> visibility <+-> literal ':' <+-> space + +functionName :: Parser String +functionName = letter <+> iterS alphanum >>> (\(a, b) -> a : b) + +functionTypeList :: Parser [String] +functionTypeList = + iterS ((functionType <+-> space <+-> literal ':' <+-> space) <|> functionType) + +functionType :: Parser String +functionType = letter <+> iterS alphanum >>> (\(a, b) -> a : b) + +functionFlag :: Parser String +functionFlag = letters + +functionDefinition :: Parser FunctionDefinition +functionDefinition = + letters + <+-> space + <+-> literal ':' + <+-> space + <+> letters + <+-> newline + >>> (\(a, b) -> FunctionDefinition a b) diff --git a/src/Fun/Tree.hs b/src/Fun/Tree.hs new file mode 100644 index 0000000..a7709ee --- /dev/null +++ b/src/Fun/Tree.hs @@ -0,0 +1,18 @@ +module Fun.Tree where + +data Tree = Tree [Program] + deriving Show + +data Program = Program [Block] + deriving Show + +data Block = Block FunctionBlock -- | Block DataBlock ... + deriving Show +data FunctionBlock = FunctionBlock FunctionDeclaration [FunctionDefinition] + deriving Show + +data FunctionDeclaration = FunctionDeclarationWithoutFlags String Char [String] | FunctionDeclarationWithFlags String Char [String] [String] + deriving Show + +data FunctionDefinition = FunctionDefinition String String + deriving Show diff --git a/src/context.c b/src/context.c deleted file mode 100644 index 756c38e..0000000 --- a/src/context.c +++ /dev/null @@ -1,111 +0,0 @@ -#include <assert.h> -#include <math.h> -#include <stdlib.h> -#include <string.h> - -#include <context.h> -#include <log.h> -#include <preprocess.h> -#include <tokenize.h> -#include <treeify.h> - -struct ctx *context_create(const char *path) -{ - struct ctx *ctx = calloc(1, sizeof(*ctx)); - ctx->tokens = calloc(TOKENS_MAX, sizeof(*ctx->tokens)); - ctx->location.path = path; // TODO: strdup? - - FILE *file = fopen(path, "r"); - assert(file); - - // Find size of file - fseek(file, 0, SEEK_END); - ctx->location.size = ftell(file); - rewind(file); - assert(ctx->location.size); - - ctx->location.data = malloc(ctx->location.size + 1); - assert(ctx->location.data); - fread(ctx->location.data, 1, ctx->location.size, file); - fclose(file); - - ctx->location.data[ctx->location.size] = 0; - - ctx->tree.head = tree_create(); - ctx->tree.current = NULL; - - return ctx; -} - -void context_destroy(struct ctx *ctx) -{ - if (!ctx) - return; - - if (ctx->location.data) - free(ctx->location.data); - - if (ctx->data && ctx->data != ctx->location.data) - free(ctx->data); - - if (ctx->tokens) - free(ctx->tokens); - - if (ctx->tree.head) - tree_destroy(ctx->tree.head); - - free(ctx); -} - -char context_getch(struct ctx *ctx, size_t i) -{ - if (i >= ctx->size || !ctx->data[i]) - errln(&ctx->location, "Unexpected end of buffer"); - return ctx->data[i]; -} - -#define CONTEXT_COUNT 3 -void context_print(FILE *fd, struct ctx_location *location) -{ - size_t start_line = fmax((int)location->line - CONTEXT_COUNT, 0); - size_t end_line = location->line + CONTEXT_COUNT + 1; - - for (size_t line = 0, index = 0; line < end_line;) { - if (line < start_line) { - if (location->data[index] == '\n' || location->data[index] == MACRO_NEWLINE) - line++; - index++; - continue; - } - - const char *end = strchr(location->data + index, '\n') + 1; - assert(end > location->data); - size_t length = end - (location->data + index) - 1; - - if (location->line == line) { - int pointer_length = location->column + 9; - char *pointer = malloc(pointer_length); // Literally a pointer - fprintf(fd, - "\x1B[1;32m%6lu | %.*s\x1B[1;31m%c\x1B[1;32m%.*s\n\x1B[1;31m%.*s%s\x1B[0m\n", - line + 1, (int)location->column, location->data + index, - *(location->data + index + location->column), - (int)(length - location->column - 1), - location->data + index + location->column + 1, pointer_length, - (char *)memset(pointer, '~', pointer_length), "^ (around here)"); - free(pointer); - } else { - fprintf(fd, "%6lu | %.*s\n", line + 1, (int)length, location->data + index); - } - - index = end - location->data; - line++; - if (index >= location->size) - return; - } -} - -void context_rewind(struct ctx *ctx) -{ - ctx->location.line = 0; - ctx->location.column = 0; -} diff --git a/src/lint.c b/src/lint.c deleted file mode 100644 index 7c2e9da..0000000 --- a/src/lint.c +++ /dev/null @@ -1,21 +0,0 @@ -#include <stddef.h> - -#include <lint.h> -#include <log.h> -#include <tokenize.h> - -void lint(struct ctx *ctx) -{ - // Lint parens - int parens = 0; - for (size_t i = 1; i < ctx->token_count; i++) { - struct token *token = &ctx->tokens[i]; - if (token->type == LPAREN) - parens++; - else if (token->type == RPAREN) - parens--; - } - - if (parens != 0) - errln(&ctx->location, "Invalid parens balance"); -} diff --git a/src/log.c b/src/log.c deleted file mode 100644 index 0e59ea2..0000000 --- a/src/log.c +++ /dev/null @@ -1,24 +0,0 @@ -#include <assert.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> - -#include <log.h> - -void errln(struct ctx_location *location, const char *fmt, ...) -{ - fprintf(stderr, "\x1B[1;36m%s:%lu:%lu:\x1B[0m ", location->path, location->line + 1, - location->column + 1); - - fprintf(stderr, "\x1B[1;31mError:\x1B[0m "); - - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - - fprintf(stderr, "\n"); - context_print(stderr, location); - - exit(1); -} diff --git a/src/main.c b/src/main.c deleted file mode 100644 index 5827913..0000000 --- a/src/main.c +++ /dev/null @@ -1,27 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> - -#include <context.h> -#include <lint.h> -#include <log.h> -#include <preprocess.h> -#include <tokenize.h> -#include <treeify.h> - -int main(int argc, char *argv[]) -{ - if (argc < 2) { - fprintf(stderr, "Not enough arguments!"); - exit(1); - } - - struct ctx *ctx = context_create(argv[1]); - preprocess(ctx); - tokenize(ctx); - lint(ctx); - treeify(ctx); - - context_destroy(ctx); - - return 0; -} diff --git a/src/preprocess.c b/src/preprocess.c deleted file mode 100644 index f4f67f7..0000000 --- a/src/preprocess.c +++ /dev/null @@ -1,59 +0,0 @@ -#include <assert.h> -#include <math.h> -#include <stddef.h> -#include <stdlib.h> -#include <string.h> - -#include <log.h> -#include <preprocess.h> - -static void preprocess_erase(struct ctx *ctx, size_t start) -{ - assert(ctx->data[start] == '#'); - - for (size_t i = start; i < ctx->size; i++) { - char cur = ctx->data[i]; - if (cur == '\0') - break; - - if (cur == '\n') { - ctx->data[i] = MACRO_NEWLINE; - break; - } else { - ctx->data[i] = MACRO_SKIP; - } - } -} - -void preprocess(struct ctx *ctx) -{ - ctx->size = ctx->location.size; - ctx->data = malloc(ctx->size); - memcpy(ctx->data, ctx->location.data, ctx->size); - - for (size_t i = 0; i < ctx->location.size; i++) { - const char cur = ctx->location.data[i]; - - ctx->location.column++; - - if (cur == '\n') { - ctx->location.line++; - ctx->location.column = 0; - continue; - } else if (cur == '\0') { - break; - } else if (cur == '#' && ctx->location.column == 1) { - if (strncmp(ctx->location.data + i + 1, "inc ", - fmin(4, ctx->location.size - i)) == 0) { - // TODO: Add include features - } else if (*(ctx->location.data + i + 1) == '#') { - // Comment - } else { - errln(&ctx->location, "Invalid preprocessing directive"); - } - preprocess_erase(ctx, i); - } - } - - context_rewind(ctx); -} diff --git a/src/tokenize.c b/src/tokenize.c deleted file mode 100644 index 0fa58fe..0000000 --- a/src/tokenize.c +++ /dev/null @@ -1,234 +0,0 @@ -#include <assert.h> -#include <ctype.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdio.h> - -#include <log.h> -#include <preprocess.h> -#include <tokenize.h> - -// TODO: Do some different limitations for identifiers/types - -static size_t peek_identifier(struct ctx *ctx, size_t start, size_t opt_count, ...) -{ - if (isdigit(context_getch(ctx, start))) - errln(&ctx->location, "Identifiers can't start with numbers"); - - for (size_t i = start; i < ctx->size; i++) { - char cur = context_getch(ctx, i); - - // Check for every option in variadic argument - va_list ap; - va_start(ap, opt_count); - for (size_t j = 0; j < opt_count; j++) { - char ch = va_arg(ap, int); - if (cur == ch) { - va_end(ap); - return i; - } - } - va_end(ap); - - if (cur == '\n') - errln(&ctx->location, "Unexpected end of line while scanning"); - - if (!isalnum(cur) && (cur < '!' || cur > '~')) - errln(&ctx->location, "'%c' is not an identifier", cur); - } - - errln(&ctx->location, "Unexpected end of buffer while scanning"); -} - -static size_t peek_type(struct ctx *ctx, size_t start, size_t opt_count, ...) -{ - if (isdigit(context_getch(ctx, start))) - errln(&ctx->location, "Types can't start with numbers"); - - for (size_t i = start; i < ctx->size; i++) { - char cur = context_getch(ctx, i); - - // Check for every option in variadic argument - va_list ap; - va_start(ap, opt_count); - for (size_t j = 0; j < opt_count; j++) { - char ch = va_arg(ap, int); - if (cur == ch) { - va_end(ap); - return i; - } - } - va_end(ap); - - if (cur == '\n') - errln(&ctx->location, "Unexpected end of line while scanning"); - - if (!isalnum(cur) && (cur < '!' || cur > '~')) - errln(&ctx->location, "'%c' is not an identifier", cur); - } - - errln(&ctx->location, "Unexpected end of buffer while scanning"); -} - -static void token_add(struct ctx *ctx, enum token_type type, size_t start, size_t end) -{ - assert(type != UNKNOWN); - - struct token token = { 0 }; - token.type = type; - token.string.start = start; - token.string.end = end; - token.location = ctx->location; - - ctx->tokens[ctx->token_count] = token; - - ctx->token_count++; - assert(ctx->token_count < TOKENS_MAX); - - if (type == NEWLINE) { - ctx->location.line++; - ctx->location.column = 0; - } else { - ctx->location.column += end - start; - } -} - -void token_print(struct ctx *ctx, struct token *token) -{ - assert(token->type != UNKNOWN); - - printf("[token type=%d] ", token->type); - if (token->type == NEWLINE || token->type == END) { - printf("(Unprintable)\n"); - return; - } - - printf("'%.*s'\n", (int)(token->string.end - token->string.start), - ctx->data + token->string.start); -} - -void tokenize(struct ctx *ctx) -{ - enum { - PARSE_DECLARATION, - PARSE_DEFINITION, - PARSE_NUMBER, - PARSE_BODY, - PARSE_STRING, - } state = PARSE_DECLARATION, - prev = PARSE_DECLARATION; - - // TODO: Clean this loop up (move into seperate tokenizing functions) - - size_t start; - for (size_t i = 0; i < ctx->size; i++) { - const char cur = context_getch(ctx, i); - - // String parsing - if (cur == '"') { - if (state == PARSE_STRING) { - token_add(ctx, STRING, start, i + 1); - state = prev; - } else { - state = PARSE_STRING; - start = i; - } - continue; - } else if (state == PARSE_STRING) { - continue; - } - - if (state != PARSE_BODY) { - switch (cur) { - case '\0': - errln(&ctx->location, "Unexpected end of buffer"); - case '\n': - token_add(ctx, NEWLINE, i, i + 1); - continue; - case MACRO_SKIP: - ctx->location.column++; - continue; - case MACRO_NEWLINE: - ctx->location.line++; - continue; - default: - break; - } - } - - if (state == PARSE_BODY) { - switch (cur) { - case '(': - token_add(ctx, LPAREN, i, i + 1); - continue; - case ')': - token_add(ctx, RPAREN, i, i + 1); - continue; - case '\n': - token_add(ctx, NEWLINE, i, i + 1); - state = PARSE_DECLARATION; - continue; - default: - break; - } - - size_t end_ident = peek_identifier(ctx, i, 3, ' ', ')', '\n'); - token_add(ctx, IDENT, i, end_ident); - i = end_ident - (context_getch(ctx, end_ident) != ' '); - continue; - } - - if (state == PARSE_DECLARATION) { - size_t end_ident = peek_identifier(ctx, i, 1, ' '); - token_add(ctx, IDENT, i, end_ident); - - size_t start_type = end_ident + 1; - while (context_getch(ctx, start_type) != '-' || - context_getch(ctx, start_type + 1) != '>') { - size_t end_type = peek_type(ctx, start_type, 1, ' '); - token_add(ctx, TYPE, start_type, end_type); - start_type = end_type + 1; - } - - if (context_getch(ctx, start_type + 2) != ' ') - errln(&ctx->location, "Missing space"); - token_add(ctx, TYPEDELIM, start_type, start_type + 2); - - start_type += 3; - size_t final_type = peek_type(ctx, start_type, 1, '\n'); - token_add(ctx, TYPE, start_type, final_type); - - i = final_type - 1; - state = PARSE_DEFINITION; - continue; - } - - if (state == PARSE_DEFINITION) { - size_t end_ident = peek_identifier(ctx, i, 1, ' '); - token_add(ctx, IDENT, i, end_ident); - - size_t start_parameter = end_ident + 1; - while (context_getch(ctx, start_parameter) != ':') { - size_t end_parameter = - peek_identifier(ctx, start_parameter, 1, ' '); - token_add(ctx, PARAM, start_parameter, end_parameter); - start_parameter = end_parameter + 1; - } - - if (context_getch(ctx, start_parameter + 1) != ' ') - errln(&ctx->location, "Missing space"); - token_add(ctx, IDENTDELIM, start_parameter, start_parameter + 1); - - i = start_parameter + 1; - state = PARSE_BODY; - continue; - } - } - - /* for (size_t i = 0; i < ctx->token_count; i++) */ - /* token_print(ctx, &ctx->tokens[i]); */ - - token_add(ctx, END, ctx->size, ctx->size); - context_rewind(ctx); -} diff --git a/src/treeify.c b/src/treeify.c deleted file mode 100644 index 47f8216..0000000 --- a/src/treeify.c +++ /dev/null @@ -1,187 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> - -#include <log.h> -#include <tokenize.h> -#include <treeify.h> - -#define INITIAL_PARAMETER_COUNT 3 - -static void __expect(struct ctx *ctx, struct token *token, enum token_type type, const char *file, - int line, const char *func, const char *type_enum) -{ - if (token->type != type) { - printf("[DBG] %s:%d: %s\n", file, line, func); - token_print(ctx, token); - errln(&token->location, "Expected token of type %s", type_enum); - } -} - -#define expect(token, type) __expect(ctx, token, type, __FILE__, __LINE__, __func__, #type) - -static struct token *next(struct token *token, size_t i) -{ - return token + i; -} - -static struct token *parse_declaration(struct ctx *ctx, struct token *token) -{ - expect(token, IDENT); - - struct node_declaration *node = malloc(sizeof(*node)); - node->callee.name = token->string; - - node->parameters = malloc(INITIAL_PARAMETER_COUNT * sizeof(*node->parameters)); - size_t param_idx = 0; - - token = next(token, 1); - while (token->type != TYPEDELIM) { - if (token->type == NEWLINE || token->type == END) - expect(token, TYPEDELIM); - - if (token->type != TYPE) - expect(token, TYPE); - - // Expand parameter space if necessary - if ((param_idx + 1) % INITIAL_PARAMETER_COUNT == 0) - // TODO: Fix realloc failure check (and other mallocs too btw) - node->parameters = realloc(node->parameters, - ((param_idx / INITIAL_PARAMETER_COUNT) + 1) * - INITIAL_PARAMETER_COUNT * - sizeof(*node->parameters)); - - node->parameters[param_idx].type = token->string; - param_idx++; - - token = next(token, 1); - } - - node->parameter_count = param_idx; - - token = next(token, 1); - expect(token, TYPE); - node->callee.type = token->string; - - tree_add(ctx, DECLARATION, node); // TODO: Push to declaration/signature array instead - - expect(next(token, 1), NEWLINE); - return next(token, 2); -} - -static struct token *parse_definition(struct ctx *ctx, struct token *token) -{ - expect(token, IDENT); - - struct node_definition *node = malloc(sizeof(*node)); - node->callee.name = token->string; - - node->parameters = malloc(INITIAL_PARAMETER_COUNT * sizeof(*node->parameters)); - size_t param_idx = 0; - - token = next(token, 1); - while (token->type != IDENTDELIM) { - if (token->type == NEWLINE || token->type == END) - expect(token, IDENTDELIM); - - if (token->type != PARAM) - expect(token, PARAM); - - // Expand parameter space if necessary - if ((param_idx + 1) % INITIAL_PARAMETER_COUNT == 0) - // TODO: Fix realloc failure check (and other mallocs too btw) - node->parameters = realloc(node->parameters, - ((param_idx / INITIAL_PARAMETER_COUNT) + 1) * - INITIAL_PARAMETER_COUNT * - sizeof(*node->parameters)); - - node->parameters[param_idx].name = token->string; - param_idx++; - - token = next(token, 1); - } - - node->parameter_count = param_idx; - - tree_add(ctx, DEFINITION, node); - - // TODO: Parse expression - while (token->type != NEWLINE) - token = next(token, 1); - - return next(token, 1); -} - -static struct token *parse_block(struct ctx *ctx, struct token *token) -{ - if (token->type != IDENT) - return next(token, 1); //&ctx->tokens[ctx->token_count - 1]; - - token = parse_declaration(ctx, token); - token = parse_definition(ctx, token); - expect(token, NEWLINE); - return next(token, 1); -} - -static struct token *parse(struct ctx *ctx, struct token *token) -{ - while (token->type != END) - token = parse_block(ctx, token); - - return token; -} - -struct node *tree_create(void) -{ - struct node *tree = malloc(sizeof(*tree)); - tree->prev = NULL; - tree->next = NULL; - return tree; -} - -void tree_add(struct ctx *ctx, enum node_type type, void *data) -{ - assert(ctx->tree.head); - struct node *node = calloc(sizeof(*node), 1); - node->type = type; - node->data = data; - if (!ctx->tree.current) { - ctx->tree.head->next = node; - node->prev = ctx->tree.head; - } else { - ctx->tree.current->next = node; - node->prev = ctx->tree.current; - } - ctx->tree.current = node; -} - -void tree_destroy(struct node *node) -{ - while (node) { - struct node *next = node->next; - - if (node->type == DEFINITION) { - struct node_definition *definition = node->data; - free(definition->parameters); - free(definition); - } else if (node->type == DECLARATION) { - struct node_declaration *declaration = node->data; - free(declaration->parameters); - free(declaration); - } - - free(node); - node = next; - } -} - -void treeify(struct ctx *ctx) -{ - struct token *token = ctx->tokens; - - while (token->type == NEWLINE) - token = next(token, 1); - - token = parse(ctx, token); - expect(token, END); -} diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000..7ffdea0 --- /dev/null +++ b/stack.yaml @@ -0,0 +1,68 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# https://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# +# The location of a snapshot can be provided as a file or url. Stack assumes +# a snapshot provided as a file might change, whereas a url resource does not. +# +# resolver: ./custom-snapshot.yaml +# resolver: https://example.com/snapshots/2018-01-01.yaml +resolver: + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/25.yaml + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# subdirs: +# - auto-update +# - wai +packages: +- . +# Dependency packages to be pulled from upstream that are not in the resolver. +# These entries can reference officially published versions as well as +# forks / in-progress versions pinned to a git hash. For example: +# +# extra-deps: +# - acme-missiles-0.3 +# - git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# +extra-deps: + - data-tree-print-0.1.0.2@sha256:d845e99f322df70e0c06d6743bf80336f5918d5423498528beb0593a2afc1703,1620 + +# Override default flag values for local packages and extra-deps +# flags: {} + +# Extra package databases containing global packages +# extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=2.7" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor diff --git a/stack.yaml.lock b/stack.yaml.lock new file mode 100644 index 0000000..0152bd5 --- /dev/null +++ b/stack.yaml.lock @@ -0,0 +1,20 @@ +# This file was autogenerated by Stack. +# You should not edit this file by hand. +# For more information, please see the documentation at: +# https://docs.haskellstack.org/en/stable/lock_files + +packages: +- completed: + hackage: data-tree-print-0.1.0.2@sha256:d845e99f322df70e0c06d6743bf80336f5918d5423498528beb0593a2afc1703,1620 + pantry-tree: + size: 272 + sha256: b8778eb1b16fddb91b2eed2b25f33a89d1e4f7a533160de4ccbf226f82456135 + original: + hackage: data-tree-print-0.1.0.2@sha256:d845e99f322df70e0c06d6743bf80336f5918d5423498528beb0593a2afc1703,1620 +snapshots: +- completed: + size: 587393 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/25.yaml + sha256: 1b74fb5e970497b5aefae56703f1bd44aa648bd1a5ef95c1eb8c29775087e2bf + original: + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/25.yaml diff --git a/test.asm b/test.asm new file mode 100644 index 0000000..3fb6e31 --- /dev/null +++ b/test.asm @@ -0,0 +1,10 @@ +global _start +section .text + +_start: + xor ebx, ebx + xor eax, eax + mov al, 0x1 + int 0x80 + +; nasm -f elf64 test.asm && ld -o test.o @@ -1,25 +1,8 @@ -#inc <str.fun> +test :-: u32 : u32 : u32 +a b : * a b -## TODO: Some kind of caching idea (memoizing?) -## TODO: Ref/deref memory aread for GC +text :-: [u8] +: "hallo" -end? [u8] -> u0 -end? arr : not (first arr) - -## = [u8] [u8] -> u0 -## = a b : cond -## | || (not end? a) (not end? b) : 0 -## | = first a first b : = rest a rest b - -text u32 -> [u8] -text a : "hallo" - -test u32 u32 -> u32 -test a b : * a b - -## fact u32 -> u32 -## fact n : match -## | ? zero? : 1 -## | _ : * n (fact (- n 1)) -## -## main : fact (test 2 3) +main :+: IO +: log text diff --git a/test/Spec.hs b/test/Spec.hs new file mode 100644 index 0000000..cd4753f --- /dev/null +++ b/test/Spec.hs @@ -0,0 +1,2 @@ +main :: IO () +main = putStrLn "Test suite not yet implemented" |