diff options
author | Marvin Borner | 2024-01-17 00:59:12 +0100 |
---|---|---|
committer | Marvin Borner | 2024-01-17 01:10:36 +0100 |
commit | b754e71ca502a5027ea3479eb3ea1a0ac28e5282 (patch) | |
tree | a7d4d9e45f289c6ba08abe56931f8b2f6dea39af |
Initial commit
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | inc/log.h | 11 | ||||
-rw-r--r-- | inc/parse.h | 20 | ||||
-rw-r--r-- | inc/spec.h | 20 | ||||
-rw-r--r-- | inc/target.h | 18 | ||||
-rw-r--r-- | inc/term.h | 33 | ||||
-rw-r--r-- | license | 21 | ||||
-rw-r--r-- | makefile | 54 | ||||
-rw-r--r-- | options.ggo | 8 | ||||
-rw-r--r-- | readme.md | 24 | ||||
-rw-r--r-- | src/.gitignore | 1 | ||||
-rw-r--r-- | src/log.c | 40 | ||||
-rw-r--r-- | src/main.c | 109 | ||||
-rw-r--r-- | src/parse.c | 94 | ||||
-rw-r--r-- | src/target.c | 32 | ||||
-rw-r--r-- | src/targets/unbblc.c | 72 | ||||
-rw-r--r-- | src/targets/unblc.c | 49 | ||||
-rw-r--r-- | src/term.c | 41 |
18 files changed, 652 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c50f973 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.cache/ +build/ + +compile_commands.json +tags diff --git a/inc/log.h b/inc/log.h new file mode 100644 index 0000000..fcae172 --- /dev/null +++ b/inc/log.h @@ -0,0 +1,11 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#ifndef BLOCADE_LOG_H +#define BLOCADE_LOG_H + +void debug(const char *format, ...); +void debug_enable(int enable); +void fatal(const char *format, ...) __attribute__((noreturn)); + +#endif diff --git a/inc/parse.h b/inc/parse.h new file mode 100644 index 0000000..2368199 --- /dev/null +++ b/inc/parse.h @@ -0,0 +1,20 @@ +// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#ifndef BLOC_PARSE_H +#define BLOC_PARSE_H + +#include <stddef.h> + +#include <term.h> +#include <spec.h> + +struct bloc_parsed { + size_t length; + struct term **entries; +}; + +struct bloc_parsed *parse_bloc(const void *bloc); +void free_bloc(struct bloc_parsed *bloc); + +#endif diff --git a/inc/spec.h b/inc/spec.h new file mode 100644 index 0000000..3d8300c --- /dev/null +++ b/inc/spec.h @@ -0,0 +1,20 @@ +// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#ifndef BLOC_SPEC_H +#define BLOC_SPEC_H + +#define BLOC_IDENTIFIER "BLoC" +#define BLOC_IDENTIFIER_LENGTH 4 + +struct bloc_header { + char identifier[BLOC_IDENTIFIER_LENGTH]; + short length; + void *entries; +} __attribute__((packed)); + +struct bloc_entry { + void *expression; +} __attribute__((packed)); + +#endif diff --git a/inc/target.h b/inc/target.h new file mode 100644 index 0000000..9196637 --- /dev/null +++ b/inc/target.h @@ -0,0 +1,18 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#ifndef BLOCADE_TARGET_H +#define BLOCADE_TARGET_H + +#include <stdio.h> + +#include <parse.h> + +struct target_spec { + const char *name; + void (*exec)(struct bloc_parsed *bloc, FILE *file); +}; + +void exec_target(char *name, struct bloc_parsed *bloc, FILE *file); + +#endif diff --git a/inc/term.h b/inc/term.h new file mode 100644 index 0000000..9823fc1 --- /dev/null +++ b/inc/term.h @@ -0,0 +1,33 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#ifndef BLOCADE_TERM_H +#define BLOCADE_TERM_H + +#include <stddef.h> + +typedef enum { INV, ABS, APP, VAR, REF } term_type; + +struct term { + term_type type; + union { + struct { + struct term *term; + } abs; + struct { + struct term *lhs; + struct term *rhs; + } app; + struct { + int index; + } var; + struct { + size_t index; + } ref; + } u; +}; + +struct term *new_term(term_type type); +void free_term(struct term *term); + +#endif @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Marvin Borner + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/makefile b/makefile new file mode 100644 index 0000000..1938be4 --- /dev/null +++ b/makefile @@ -0,0 +1,54 @@ +# Copyright (c) 2023, Marvin Borner <dev@marvinborner.de> +# SPDX-License-Identifier: MIT + +CC = gcc +TG = ctags + +BUILD = ${CURDIR}/build +SRC = ${CURDIR}/src +INC = ${CURDIR}/inc +SRCS = $(wildcard $(SRC)/*.c) $(wildcard $(SRC)/*/*.c) +OBJS = $(patsubst $(SRC)/%.c, $(BUILD)/%.o, $(SRCS)) + +CFLAGS_DEBUG = -fsanitize=leak,undefined,address -g -O0 +CFLAGS_WARNINGS = -Wall -Wextra -Wshadow -Wpointer-arith -Wwrite-strings -Wredundant-decls -Wnested-externs -Wmissing-declarations -Wstrict-prototypes -Wmissing-prototypes -Wcast-qual -Wswitch-default -Wswitch-enum -Wunreachable-code -Wundef -Wold-style-definition -pedantic -Wno-switch-enum +CFLAGS = $(CFLAGS_WARNINGS) -std=c99 -Ofast -I$(INC) + +ifdef DEBUG # TODO: Somehow clean automagically +CFLAGS += $(CFLAGS_DEBUG) +endif + +ifeq ($(PREFIX),) + PREFIX := /usr/local +endif + +all: genopts compile + +full: all sync + +genopts: + @gengetopt -i ${CURDIR}/options.ggo -G --output-dir=$(SRC) + +compile: $(BUILD) $(OBJS) $(BUILD)/blocade + +clean: + @rm -rf $(BUILD)/* + +install: + @install -m 755 $(BUILD)/blocade $(DESTDIR)$(PREFIX)/bin/ + +sync: # Ugly hack + @$(MAKE) $(BUILD)/blocade --always-make --dry-run | grep -wE 'gcc|g\+\+' | grep -w '\-c' | jq -nR '[inputs|{directory:".", command:., file: match(" [^ ]+$$").string[1:]}]' >compile_commands.json + @$(TG) -R --exclude=.git --exclude=build . + +$(BUILD)/%.o: $(SRC)/%.c + @mkdir -p $(@D) + @$(CC) -c -o $@ $(CFLAGS) $< + +$(BUILD)/blocade: $(OBJS) + @$(CC) -o $@ $(CFLAGS) $^ + +.PHONY: all compile clean sync + +$(BUILD): + @mkdir -p $@ diff --git a/options.ggo b/options.ggo new file mode 100644 index 0000000..8488bfd --- /dev/null +++ b/options.ggo @@ -0,0 +1,8 @@ +package "BLoCade" +version "1.0" +purpose "Tool for converting BLoC to various targets" + +option "input" i "input file" string required +option "output" o "output file" string optional +option "verbose" v "enable debug logging output" flag off +option "target" t "choose compilation target" string required diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..a5a85fb --- /dev/null +++ b/readme.md @@ -0,0 +1,24 @@ +# BLoCade + +[BLoC](https://github.com/marvinborner/bloc) is an optimized file format +for binary lambda calculus (BLC). + +BLoCade is the BLoC-aid and turns BLoC files back into executable files +(targets). This is useful for [bruijn](https://bruijn.marvinborner.de), +benchmarking, or general term optimization. + +## Targets + +- BLC (sharing by abstraction): Terms having BLoC entry indices get + abstracted and applied to the respective term. The indices get + converted to De Bruijn indices. Flag `bblc` (bits) and `blc` (ASCII + 0/1). +- BLC (unshared): Every BLoC entry gets reinserted into the original + term. Do not use this if you want efficiency or small files. Flag + `unbblc` (bits) and `unblc` (ASCII 0/1). +- [Effekt](https://effekt-lang.org): Because, why not? Flag `effekt`. +- Planned: Scala, HVM, C, LLVM, JS, Haskell + +## Benchmarks + +To be evaluated. diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..888d678 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1 @@ +cmdline.* diff --git a/src/log.c b/src/log.c new file mode 100644 index 0000000..e452610 --- /dev/null +++ b/src/log.c @@ -0,0 +1,40 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> + +#include <log.h> + +static int debug_enabled = 0; + +void debug(const char *format, ...) +{ + if (!debug_enabled) + return; + + fprintf(stderr, "[DEBUG] "); + + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); +} + +void debug_enable(int enable) +{ + debug_enabled = enable; +} + +void fatal(const char *format, ...) +{ + fprintf(stderr, "[FATAL] "); + + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + + abort(); +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..05e85af --- /dev/null +++ b/src/main.c @@ -0,0 +1,109 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include <target.h> +#include <term.h> +#include <log.h> +#include <parse.h> + +// automatically generated using gengetopt +#include "cmdline.h" + +#define BUF_SIZE 1024 +static char *read_stdin(void) +{ + debug("reading from stdin\n"); + char buffer[BUF_SIZE]; + size_t size = 1; + char *string = malloc(sizeof(char) * BUF_SIZE); + if (!string) + fatal("out of memory!\n"); + string[0] = '\0'; + while (fgets(buffer, BUF_SIZE, stdin)) { + char *old = string; + size += strlen(buffer); + string = realloc(string, size); + if (!string) { + free(old); + return 0; + } + strcat(string, buffer); + } + + if (ferror(stdin)) { + free(string); + fatal("can't read from stdin\n"); + } + return string; +} + +static char *read_file(FILE *f) +{ + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + fseek(f, 0, SEEK_SET); + + char *string = malloc(fsize + 1); + if (!string) + fatal("out of memory!\n"); + int ret = fread(string, fsize, 1, f); + + if (ret != 1) { + free(string); + fatal("can't read file: %s\n", strerror(errno)); + } + + string[fsize] = 0; + return string; +} + +static char *read_path(const char *path) +{ + debug("reading from %s\n", path); + FILE *f = fopen(path, "rb"); + if (!f) + fatal("can't open file %s: %s\n", path, strerror(errno)); + char *string = read_file(f); + fclose(f); + return string; +} + +int main(int argc, char **argv) +{ + struct gengetopt_args_info args; + if (cmdline_parser(argc, argv, &args)) + exit(1); + + debug_enable(args.verbose_flag); + + char *input; + if (args.input_arg[0] == '-') { + input = read_stdin(); + } else { + input = read_path(args.input_arg); + } + + if (!input) + return 1; + + if (args.target_arg) { + struct bloc_parsed *bloc = parse_bloc(input); + + FILE *file = + args.output_arg ? fopen(args.output_arg, "wb") : stdout; + exec_target(args.target_arg, bloc, file); + fclose(file); + + free(input); + free_bloc(bloc); + return 0; + } + + fatal("invalid options: use --help for information\n"); + return 1; +} diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..84cfcfd --- /dev/null +++ b/src/parse.c @@ -0,0 +1,94 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <term.h> +#include <spec.h> +#include <parse.h> +#include <log.h> + +#define BIT_AT(i) ((term[(i) / 8] & (1 << (7 - ((i) % 8)))) >> (7 - ((i) % 8))) + +// parses bloc's bit-encoded blc +// 010M -> abstraction of M +// 00MN -> application of M and N +// 1X0 -> bruijn index, amount of 1s in X +// 011I -> 2B index to entry +static struct term *parse_bloc_bblc(const char *term, size_t *bit) +{ + struct term *res = 0; + if (!BIT_AT(*bit) && BIT_AT(*bit + 1) && !BIT_AT(*bit + 2)) { + (*bit) += 3; + res = new_term(ABS); + res->u.abs.term = parse_bloc_bblc(term, bit); + } else if (!BIT_AT(*bit) && !BIT_AT(*bit + 1)) { + (*bit) += 2; + res = new_term(APP); + res->u.app.lhs = parse_bloc_bblc(term, bit); + res->u.app.rhs = parse_bloc_bblc(term, bit); + } else if (BIT_AT(*bit)) { + const size_t cur = *bit; + while (BIT_AT(*bit)) + (*bit)++; + res = new_term(VAR); + res->u.var.index = *bit - cur - 1; + (*bit)++; + } else if (!BIT_AT(*bit) && BIT_AT(*bit + 1) && BIT_AT(*bit + 2)) { + (*bit) += 3; + + // selected bit pattern, see readme + int sel = (2 << (BIT_AT(*bit) * 2 + BIT_AT(*bit + 1) + 2)); + (*bit) += 2; + + res = new_term(REF); + size_t index = 0; + for (int i = 0; i < sel; i++) { + index |= BIT_AT(*bit) << i; + (*bit) += 1; + } + res->u.ref.index = index; + } else { + (*bit)++; + res = parse_bloc_bblc(term, bit); + } + return res; +} + +struct bloc_parsed *parse_bloc(const void *bloc) +{ + const struct bloc_header *header = bloc; + if (memcmp(header->identifier, BLOC_IDENTIFIER, + (size_t)BLOC_IDENTIFIER_LENGTH)) { + fatal("invalid BLoC identifier!\n"); + return 0; + } + + struct bloc_parsed *parsed = malloc(sizeof(*parsed)); + parsed->length = header->length; + parsed->entries = malloc(header->length * sizeof(struct term *)); + + const struct bloc_entry *current = (const void *)&header->entries; + for (size_t i = 0; i < parsed->length; i++) { + size_t len = 0; + parsed->entries[i] = + parse_bloc_bblc((const char *)current, &len); + current = + (const struct bloc_entry *)(((const char *)current) + + (len / 8) + (len % 8 != 0)); + } + + return parsed; +} + +void free_bloc(struct bloc_parsed *bloc) +{ + for (size_t i = 0; i < bloc->length; i++) { + free_term(bloc->entries[i]); + } + + free(bloc->entries); + free(bloc); +} diff --git a/src/target.c b/src/target.c new file mode 100644 index 0000000..7a96f1e --- /dev/null +++ b/src/target.c @@ -0,0 +1,32 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <string.h> + +#include <log.h> +#include <target.h> + +extern struct target_spec target_unblc; +extern struct target_spec target_unbblc; + +static struct target_spec *targets[] = { + &target_unblc, + &target_unbblc, +}; + +void exec_target(char *name, struct bloc_parsed *bloc, FILE *file) +{ + int count = sizeof(targets) / sizeof(struct target_spec *); + for (int i = 0; i < count; i++) { + if (!strcmp(targets[i]->name, name)) { + targets[i]->exec(bloc, file); + return; + } + } + + printf("available targets:\n"); + for (int i = 0; i < count; i++) + printf(" %s\n", targets[i]->name); + + fatal("unknown target %s\n", name); +} diff --git a/src/targets/unbblc.c b/src/targets/unbblc.c new file mode 100644 index 0000000..57bc1cf --- /dev/null +++ b/src/targets/unbblc.c @@ -0,0 +1,72 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include <target.h> +#include <parse.h> +#include <log.h> + +static void write_bit(char val, FILE *file, char *byte, int *bit) +{ + if (*bit > 7) { // flush byte + fwrite(byte, 1, 1, file); + *byte = 0; + *bit = 0; + } + + // TODO: which endianness should be default? + if (val) + *byte |= 1UL << *bit; + /* *byte |= 1UL << (7 - *bit); */ + (*bit)++; +} + +static void fprint_unbblc(struct term *term, struct bloc_parsed *bloc, + FILE *file, char *byte, int *bit) +{ + switch (term->type) { + case ABS: + write_bit(0, file, byte, bit); + write_bit(0, file, byte, bit); + fprint_unbblc(term->u.abs.term, bloc, file, byte, bit); + break; + case APP: + write_bit(0, file, byte, bit); + write_bit(1, file, byte, bit); + fprint_unbblc(term->u.app.lhs, bloc, file, byte, bit); + fprint_unbblc(term->u.app.rhs, bloc, file, byte, bit); + break; + case VAR: + for (int i = 0; i <= term->u.var.index; i++) + write_bit(1, file, byte, bit); + write_bit(0, file, byte, bit); + break; + case REF: + if (term->u.ref.index + 1 >= bloc->length) + fatal("invalid ref index %ld\n", term->u.ref.index); + fprint_unbblc( + bloc->entries[bloc->length - term->u.ref.index - 2], + bloc, file, byte, bit); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static void write_unbblc(struct bloc_parsed *bloc, FILE *file) +{ + char byte = 0; + int bit = 0; + fprint_unbblc(bloc->entries[bloc->length - 1], bloc, file, &byte, &bit); + + if (bit) + fwrite(&byte, 1, 1, file); +} + +struct target_spec target_unbblc = { + .name = "unbblc", + .exec = write_unbblc, +}; diff --git a/src/targets/unblc.c b/src/targets/unblc.c new file mode 100644 index 0000000..46d63a5 --- /dev/null +++ b/src/targets/unblc.c @@ -0,0 +1,49 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include <target.h> +#include <parse.h> +#include <log.h> + +static void fprint_blc(struct term *term, struct bloc_parsed *bloc, FILE *file) +{ + switch (term->type) { + case ABS: + fprintf(file, "00"); + fprint_blc(term->u.abs.term, bloc, file); + break; + case APP: + fprintf(file, "01"); + fprint_blc(term->u.app.lhs, bloc, file); + fprint_blc(term->u.app.rhs, bloc, file); + break; + case VAR: + for (int i = 0; i <= term->u.var.index; i++) + fprintf(file, "1"); + fprintf(file, "0"); + break; + case REF: + if (term->u.ref.index + 1 >= bloc->length) + fatal("invalid ref index %ld\n", term->u.ref.index); + fprint_blc(bloc->entries[bloc->length - term->u.ref.index - 2], + bloc, file); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static void write_blc(struct bloc_parsed *bloc, FILE *file) +{ + fprint_blc(bloc->entries[bloc->length - 1], bloc, file); + fprintf(file, "\n"); +} + +struct target_spec target_blc = { + .name = "blc", + .exec = write_blc, +}; diff --git a/src/term.c b/src/term.c new file mode 100644 index 0000000..f93a50c --- /dev/null +++ b/src/term.c @@ -0,0 +1,41 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <term.h> +#include <log.h> + +struct term *new_term(term_type type) +{ + struct term *term = malloc(sizeof(*term)); + if (!term) + fatal("out of memory!\n"); + term->type = type; + return term; +} + +void free_term(struct term *term) +{ + switch (term->type) { + case ABS: + free_term(term->u.abs.term); + free(term); + break; + case APP: + free_term(term->u.app.lhs); + free_term(term->u.app.rhs); + free(term); + break; + case VAR: + free(term); + break; + case REF: + free(term); + break; + default: + fatal("invalid type %d\n", term->type); + } +} |