diff options
author | Marvin Borner | 2024-09-08 02:59:29 +0200 |
---|---|---|
committer | Marvin Borner | 2024-09-08 02:59:29 +0200 |
commit | b517b450316ffd1b6ee9cb0c25085ff3b1d4b75b (patch) | |
tree | c8443878c7994d0180b70c4a4e01c0f12bb86166 | |
parent | 3f84a966ab0eca4ccbab5f1d5fdea089007b4d7f (diff) |
Some implementations
-rw-r--r-- | inc/impl.h | 23 | ||||
-rw-r--r-- | makefile | 6 | ||||
-rw-r--r-- | src/abs.c | 81 | ||||
-rw-r--r-- | src/app.c | 89 | ||||
-rw-r--r-- | src/impl/blc.c | 65 | ||||
-rw-r--r-- | src/impl/blc2.c | 93 | ||||
-rw-r--r-- | src/impl/closed.c | 100 | ||||
-rw-r--r-- | src/impl/mateusz.c | 0 | ||||
-rw-r--r-- | src/main.c | 76 | ||||
-rw-r--r-- | src/parse.c | 7 | ||||
-rw-r--r-- | src/print.c | 22 | ||||
-rw-r--r-- | src/term.c | 3 |
12 files changed, 531 insertions, 34 deletions
diff --git a/inc/impl.h b/inc/impl.h new file mode 100644 index 0000000..115468b --- /dev/null +++ b/inc/impl.h @@ -0,0 +1,23 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#ifndef IMPL_H +#define IMPL_H + +#include <stdio.h> + +#include <term.h> + +typedef struct { + const char *name; + void (*encode)(Term *, FILE *); + Term *(*decode)(FILE *); +} Impl; + +Impl impl_blc(void); +Impl impl_blc2(void); +Impl impl_closed(void); +Impl impl_app(void); +Impl impl_abs(void); + +#endif @@ -7,7 +7,7 @@ TG = ctags BUILD = ${CURDIR}/build SRC = ${CURDIR}/src INC = ${CURDIR}/inc -SRCS = $(wildcard $(SRC)/*.c) +SRCS = $(wildcard $(SRC)/*.c) $(wildcard $(SRC)/impl/*.c) OBJS = $(patsubst $(SRC)/%.c, $(BUILD)/%.o, $(SRCS)) CFLAGS_DEBUG = -fsanitize=address,leak,undefined -g -O0 @@ -29,7 +29,7 @@ full: all sync compile: $(BUILD) $(OBJS) $(BUILD)/blc clean: - @rm -rf $(BUILD)/* + @rm -rf $(BUILD)/ install: @install -m 755 $(BUILD)/blc $(DESTDIR)$(PREFIX)/bin/ @@ -47,4 +47,4 @@ $(BUILD)/blc: $(OBJS) .PHONY: all compile clean sync $(BUILD): - @mkdir -p $@ + @mkdir -p $@ $@/impl/ diff --git a/src/abs.c b/src/abs.c new file mode 100644 index 0000000..a6aa76d --- /dev/null +++ b/src/abs.c @@ -0,0 +1,81 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <log.h> +#include <impl.h> + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + while (1) { + fprintf(fp, "0"); + if (term->u.abs.body->type != ABS) + break; + term = term->u.abs.body; + } + fprintf(fp, "01"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "01"); + encode(term->u.app.lhs, fp); + encode(term->u.app.rhs, fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + if (count == 0) { + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + } else { + Term *head = term_new(ABS); + + res = head; + for (size_t i = 0; i < count - 1; i++) { + res->u.abs.body = term_new(ABS); + res = res->u.abs.body; + } + res->u.abs.body = decode(fp); + res = head; + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_abs(void) +{ + return (Impl){ + .name = "abs", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/app.c b/src/app.c new file mode 100644 index 0000000..cc7eda9 --- /dev/null +++ b/src/app.c @@ -0,0 +1,89 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <log.h> +#include <impl.h> + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "01"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "0"); + + Term *stack[128] = { 0 }; // TODO: HEAP! + int stacked = 0; + + while (1) { + fprintf(fp, "0"); + stack[stacked++] = term->u.app.rhs; + term = term->u.app.lhs; + if (term->type != APP) + break; + } + + fprintf(fp, "1"); + encode(term, fp); + for (int i = stacked - 1; i >= 0; i--) + encode(stack[i], fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + if (count == 0) { + res = term_new(ABS); + res->u.abs.body = decode(fp); + } else { // foldl1 + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + + for (size_t i = 0; i < count - 1; i++) { + Term *prev = res; + res = term_new(APP); + res->u.app.lhs = prev; + res->u.app.rhs = decode(fp); + } + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_app(void) +{ + return (Impl){ + .name = "app", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/blc.c b/src/impl/blc.c index e69de29..893b856 100644 --- a/src/impl/blc.c +++ b/src/impl/blc.c @@ -0,0 +1,65 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <log.h> +#include <impl.h> + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "00"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "01"); + encode(term->u.app.lhs, fp); + encode(term->u.app.rhs, fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + char b = getc(fp); + if (b == '0') { + res = term_new(ABS); + res->u.abs.body = decode(fp); + } else if (b == '1') { + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_blc(void) +{ + return (Impl){ + .name = "blc", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/blc2.c b/src/impl/blc2.c index e69de29..3cf7ff8 100644 --- a/src/impl/blc2.c +++ b/src/impl/blc2.c @@ -0,0 +1,93 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +#include <log.h> +#include <impl.h> +#include <stdint.h> + +// haskell by John Tromp +static void levenshtein_encode(size_t n, FILE *fp) +{ + if (n == 0) { + fprintf(fp, "10"); + return; + } + + fprintf(fp, "1"); + + size_t length = (8 * sizeof(n)) - __builtin_clzll(n); + levenshtein_encode(length - 1, fp); + + if (length == 1) + return; + + for (size_t i = 1 << (length - 2); i != 0; i >>= 1) + fprintf(fp, "%d", (i & n) != 0); +} + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "00"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "01"); + encode(term->u.app.lhs, fp); + encode(term->u.app.rhs, fp); + break; + case IDX: + levenshtein_encode(term->u.index, fp); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +// by John Tromp +static size_t levenshtein_decode(FILE *fp) +{ + if (getc(fp) == '0') + return 0; + size_t x, l = levenshtein_decode(fp); + for (x = 1; l--;) + x = 2 * x + (getc(fp) != '0'); + return x; +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + char b = getc(fp); + if (b == '0') { + res = term_new(ABS); + res->u.abs.body = decode(fp); + } else if (b == '1') { + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = levenshtein_decode(fp); + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_blc2(void) +{ + return (Impl){ + .name = "blc2", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/closed.c b/src/impl/closed.c new file mode 100644 index 0000000..5b607d9 --- /dev/null +++ b/src/impl/closed.c @@ -0,0 +1,100 @@ +// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +// only works for closed terms! + +// technique proposed by Mateusz NaĆciszewski (afaik) +// implemented by John Tromp in Haskell + +#include <log.h> +#include <impl.h> +#include <stdint.h> + +static void encode_index(size_t n, size_t i, FILE *fp) +{ + if (n == 0) + fatal("invalid index (must be closed)\n"); + + if (i == 0) { + fprintf(fp, "%s", n == 1 ? "1" : "10"); + } else { + fprintf(fp, "1"); + encode_index(n - 1, i - 1, fp); + } +} + +static void encode_level(size_t n, Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "%s", n == 0 ? "0" : "00"); + encode_level(n + 1, term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "%s", n == 0 ? "1" : "01"); + encode_level(n, term->u.app.lhs, fp); + encode_level(n, term->u.app.rhs, fp); + break; + case IDX: + encode_index(n, term->u.index, fp); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static void encode(Term *term, FILE *fp) +{ + encode_level(0, term, fp); +} + +static Term *decode_level(size_t n, FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0' && n == 0) { + res = term_new(ABS); + res->u.abs.body = decode_level(n + 1, fp); + } else if (a == '0') { + char b = getc(fp); + + if (b == '0') { + res = term_new(ABS); + res->u.abs.body = decode_level(n + 1, fp); + } else if (b == '1') { + res = term_new(APP); + res->u.app.lhs = decode_level(n, fp); + res->u.app.rhs = decode_level(n, fp); + } + } else if (a == '1' && n == 0) { + res = term_new(APP); + res->u.app.lhs = decode_level(n, fp); + res->u.app.rhs = decode_level(n, fp); + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (--n != 0 && getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +static Term *decode(FILE *fp) +{ + return decode_level(0, fp); +} + +Impl impl_closed(void) +{ + return (Impl){ + .name = "closed", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/mateusz.c b/src/impl/mateusz.c deleted file mode 100644 index e69de29..0000000 --- a/src/impl/mateusz.c +++ /dev/null @@ -1,15 +1,85 @@ // Copyright (c) 2024, Marvin Borner <dev@marvinborner.de> // SPDX-License-Identifier: MIT +#define _DEFAULT_SOURCE #include <stdio.h> +#include <dirent.h> +#include <sys/stat.h> #include <term.h> #include <parse.h> #include <print.h> +#include <impl.h> + +#define N_IMPLS 5 + +static size_t file_size(FILE *fp) +{ + struct stat st; + fstat(fileno(fp), &st); + return st.st_size; +} + +static size_t test_impl(Impl impl, Term *term, FILE *out) +{ + rewind(out); + impl.encode(term, out); + size_t size = ftell(out); + rewind(out); + + Term *recon = impl.decode(out); + term_diff(term, recon); + term_free(recon); + + return size; +} + +static void test_all(Impl impls[N_IMPLS], const char *dir_path, FILE *out) +{ + char path[1024]; + struct dirent *dir_entry; + DIR *dir = opendir(dir_path); + while ((dir_entry = readdir(dir))) { + if (dir_entry->d_type != DT_REG) + continue; + + sprintf(path, "%s/%s", dir_path, dir_entry->d_name); + printf("%s\n", path); + + FILE *fp = fopen(path, "rb"); + + Term *term = parse_blc_fp(fp); + + for (Impl *impl = impls; impl != impls + N_IMPLS; impl++) { + size_t prev = file_size(fp); + size_t after = test_impl(*impl, term, out); + printf("%s: %lu -> %lu (%f%% reduction)\n", impl->name, + prev, after, + (double)((long)prev - (long)after) / + (double)prev * 100); + } + + term_free(term); + fclose(fp); + + printf("\n"); + } + closedir(dir); +} int main(void) { - Term *term = parse_blc_fp(stdin); - print_bruijn(term); - printf("\n"); + Impl impls[N_IMPLS] = { impl_blc(), impl_blc2(), impl_closed(), + impl_abs(), impl_app() }; + + FILE *out = tmpfile(); + + const char *dir_path = "test/small"; + test_all(impls, dir_path, out); + dir_path = "test/medium"; + test_all(impls, dir_path, out); + /* dir_path = "test/large"; */ + /* test_all(impls, dir_path, out); */ + + fclose(out); } diff --git a/src/parse.c b/src/parse.c index f2f89cd..86b084f 100644 --- a/src/parse.c +++ b/src/parse.c @@ -16,15 +16,12 @@ Term *parse_blc_fp(FILE *fp) if (b == '0') { res = term_new(ABS); res->u.abs.body = parse_blc_fp(fp); - } - if (b == '1') { + } else if (b == '1') { res = term_new(APP); res->u.app.lhs = parse_blc_fp(fp); res->u.app.rhs = parse_blc_fp(fp); } - } - - if (a == '1') { + } else if (a == '1') { res = term_new(IDX); res->u.index = 0; while (getc(fp) == '1') diff --git a/src/print.c b/src/print.c index b6493e1..35278a1 100644 --- a/src/print.c +++ b/src/print.c @@ -28,25 +28,3 @@ void print_bruijn(Term *term) fatal("invalid type %d\n", term->type); } } - -void print_blc(Term *term) -{ - switch (term->type) { - case ABS: - printf("00"); - print_blc(term->u.abs.body); - break; - case APP: - printf("01"); - print_blc(term->u.app.lhs); - print_blc(term->u.app.rhs); - break; - case IDX: - for (size_t i = 0; i <= term->u.index; i++) - printf("1"); - printf("0"); - break; - default: - fatal("invalid type %d\n", term->type); - } -} @@ -44,7 +44,8 @@ void term_diff(Term *a, Term *b) print_bruijn(a); fprintf(stderr, "\nTerm b: "); print_bruijn(b); - fatal("\ntype mismatch %d %d\n", a->type, b->type); + fprintf(stderr, "\n"); + fatal("type mismatch %d %d\n", a->type, b->type); } switch (a->type) { |