aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2024-09-08 02:59:29 +0200
committerMarvin Borner2024-09-08 02:59:29 +0200
commitb517b450316ffd1b6ee9cb0c25085ff3b1d4b75b (patch)
treec8443878c7994d0180b70c4a4e01c0f12bb86166
parent3f84a966ab0eca4ccbab5f1d5fdea089007b4d7f (diff)
Some implementations
-rw-r--r--inc/impl.h23
-rw-r--r--makefile6
-rw-r--r--src/abs.c81
-rw-r--r--src/app.c89
-rw-r--r--src/impl/blc.c65
-rw-r--r--src/impl/blc2.c93
-rw-r--r--src/impl/closed.c100
-rw-r--r--src/impl/mateusz.c0
-rw-r--r--src/main.c76
-rw-r--r--src/parse.c7
-rw-r--r--src/print.c22
-rw-r--r--src/term.c3
12 files changed, 531 insertions, 34 deletions
diff --git a/inc/impl.h b/inc/impl.h
new file mode 100644
index 0000000..115468b
--- /dev/null
+++ b/inc/impl.h
@@ -0,0 +1,23 @@
+// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#ifndef IMPL_H
+#define IMPL_H
+
+#include <stdio.h>
+
+#include <term.h>
+
+typedef struct {
+ const char *name;
+ void (*encode)(Term *, FILE *);
+ Term *(*decode)(FILE *);
+} Impl;
+
+Impl impl_blc(void);
+Impl impl_blc2(void);
+Impl impl_closed(void);
+Impl impl_app(void);
+Impl impl_abs(void);
+
+#endif
diff --git a/makefile b/makefile
index bcd8d97..e755b25 100644
--- a/makefile
+++ b/makefile
@@ -7,7 +7,7 @@ TG = ctags
BUILD = ${CURDIR}/build
SRC = ${CURDIR}/src
INC = ${CURDIR}/inc
-SRCS = $(wildcard $(SRC)/*.c)
+SRCS = $(wildcard $(SRC)/*.c) $(wildcard $(SRC)/impl/*.c)
OBJS = $(patsubst $(SRC)/%.c, $(BUILD)/%.o, $(SRCS))
CFLAGS_DEBUG = -fsanitize=address,leak,undefined -g -O0
@@ -29,7 +29,7 @@ full: all sync
compile: $(BUILD) $(OBJS) $(BUILD)/blc
clean:
- @rm -rf $(BUILD)/*
+ @rm -rf $(BUILD)/
install:
@install -m 755 $(BUILD)/blc $(DESTDIR)$(PREFIX)/bin/
@@ -47,4 +47,4 @@ $(BUILD)/blc: $(OBJS)
.PHONY: all compile clean sync
$(BUILD):
- @mkdir -p $@
+ @mkdir -p $@ $@/impl/
diff --git a/src/abs.c b/src/abs.c
new file mode 100644
index 0000000..a6aa76d
--- /dev/null
+++ b/src/abs.c
@@ -0,0 +1,81 @@
+// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#include <log.h>
+#include <impl.h>
+
+static void encode(Term *term, FILE *fp)
+{
+ switch (term->type) {
+ case ABS:
+ while (1) {
+ fprintf(fp, "0");
+ if (term->u.abs.body->type != ABS)
+ break;
+ term = term->u.abs.body;
+ }
+ fprintf(fp, "01");
+ encode(term->u.abs.body, fp);
+ break;
+ case APP:
+ fprintf(fp, "01");
+ encode(term->u.app.lhs, fp);
+ encode(term->u.app.rhs, fp);
+ break;
+ case IDX:
+ for (size_t i = 0; i <= term->u.index; i++)
+ fprintf(fp, "1");
+ fprintf(fp, "0");
+ break;
+ default:
+ fatal("invalid type %d\n", term->type);
+ }
+}
+
+static Term *decode(FILE *fp)
+{
+ Term *res = 0;
+
+ char a = getc(fp);
+
+ if (a == '0') {
+ size_t count = 0;
+ while (getc(fp) == '0')
+ count++;
+
+ if (count == 0) {
+ res = term_new(APP);
+ res->u.app.lhs = decode(fp);
+ res->u.app.rhs = decode(fp);
+ } else {
+ Term *head = term_new(ABS);
+
+ res = head;
+ for (size_t i = 0; i < count - 1; i++) {
+ res->u.abs.body = term_new(ABS);
+ res = res->u.abs.body;
+ }
+ res->u.abs.body = decode(fp);
+ res = head;
+ }
+ } else if (a == '1') {
+ res = term_new(IDX);
+ res->u.index = 0;
+ while (getc(fp) == '1')
+ res->u.index++;
+ }
+
+ if (!res)
+ fatal("invalid parsing state!\n");
+
+ return res;
+}
+
+Impl impl_abs(void)
+{
+ return (Impl){
+ .name = "abs",
+ .encode = encode,
+ .decode = decode,
+ };
+}
diff --git a/src/app.c b/src/app.c
new file mode 100644
index 0000000..cc7eda9
--- /dev/null
+++ b/src/app.c
@@ -0,0 +1,89 @@
+// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#include <log.h>
+#include <impl.h>
+
+static void encode(Term *term, FILE *fp)
+{
+ switch (term->type) {
+ case ABS:
+ fprintf(fp, "01");
+ encode(term->u.abs.body, fp);
+ break;
+ case APP:
+ fprintf(fp, "0");
+
+ Term *stack[128] = { 0 }; // TODO: HEAP!
+ int stacked = 0;
+
+ while (1) {
+ fprintf(fp, "0");
+ stack[stacked++] = term->u.app.rhs;
+ term = term->u.app.lhs;
+ if (term->type != APP)
+ break;
+ }
+
+ fprintf(fp, "1");
+ encode(term, fp);
+ for (int i = stacked - 1; i >= 0; i--)
+ encode(stack[i], fp);
+ break;
+ case IDX:
+ for (size_t i = 0; i <= term->u.index; i++)
+ fprintf(fp, "1");
+ fprintf(fp, "0");
+ break;
+ default:
+ fatal("invalid type %d\n", term->type);
+ }
+}
+
+static Term *decode(FILE *fp)
+{
+ Term *res = 0;
+
+ char a = getc(fp);
+
+ if (a == '0') {
+ size_t count = 0;
+ while (getc(fp) == '0')
+ count++;
+
+ if (count == 0) {
+ res = term_new(ABS);
+ res->u.abs.body = decode(fp);
+ } else { // foldl1
+ res = term_new(APP);
+ res->u.app.lhs = decode(fp);
+ res->u.app.rhs = decode(fp);
+
+ for (size_t i = 0; i < count - 1; i++) {
+ Term *prev = res;
+ res = term_new(APP);
+ res->u.app.lhs = prev;
+ res->u.app.rhs = decode(fp);
+ }
+ }
+ } else if (a == '1') {
+ res = term_new(IDX);
+ res->u.index = 0;
+ while (getc(fp) == '1')
+ res->u.index++;
+ }
+
+ if (!res)
+ fatal("invalid parsing state!\n");
+
+ return res;
+}
+
+Impl impl_app(void)
+{
+ return (Impl){
+ .name = "app",
+ .encode = encode,
+ .decode = decode,
+ };
+}
diff --git a/src/impl/blc.c b/src/impl/blc.c
index e69de29..893b856 100644
--- a/src/impl/blc.c
+++ b/src/impl/blc.c
@@ -0,0 +1,65 @@
+// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#include <log.h>
+#include <impl.h>
+
+static void encode(Term *term, FILE *fp)
+{
+ switch (term->type) {
+ case ABS:
+ fprintf(fp, "00");
+ encode(term->u.abs.body, fp);
+ break;
+ case APP:
+ fprintf(fp, "01");
+ encode(term->u.app.lhs, fp);
+ encode(term->u.app.rhs, fp);
+ break;
+ case IDX:
+ for (size_t i = 0; i <= term->u.index; i++)
+ fprintf(fp, "1");
+ fprintf(fp, "0");
+ break;
+ default:
+ fatal("invalid type %d\n", term->type);
+ }
+}
+
+static Term *decode(FILE *fp)
+{
+ Term *res = 0;
+
+ char a = getc(fp);
+
+ if (a == '0') {
+ char b = getc(fp);
+ if (b == '0') {
+ res = term_new(ABS);
+ res->u.abs.body = decode(fp);
+ } else if (b == '1') {
+ res = term_new(APP);
+ res->u.app.lhs = decode(fp);
+ res->u.app.rhs = decode(fp);
+ }
+ } else if (a == '1') {
+ res = term_new(IDX);
+ res->u.index = 0;
+ while (getc(fp) == '1')
+ res->u.index++;
+ }
+
+ if (!res)
+ fatal("invalid parsing state!\n");
+
+ return res;
+}
+
+Impl impl_blc(void)
+{
+ return (Impl){
+ .name = "blc",
+ .encode = encode,
+ .decode = decode,
+ };
+}
diff --git a/src/impl/blc2.c b/src/impl/blc2.c
index e69de29..3cf7ff8 100644
--- a/src/impl/blc2.c
+++ b/src/impl/blc2.c
@@ -0,0 +1,93 @@
+// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#include <log.h>
+#include <impl.h>
+#include <stdint.h>
+
+// haskell by John Tromp
+static void levenshtein_encode(size_t n, FILE *fp)
+{
+ if (n == 0) {
+ fprintf(fp, "10");
+ return;
+ }
+
+ fprintf(fp, "1");
+
+ size_t length = (8 * sizeof(n)) - __builtin_clzll(n);
+ levenshtein_encode(length - 1, fp);
+
+ if (length == 1)
+ return;
+
+ for (size_t i = 1 << (length - 2); i != 0; i >>= 1)
+ fprintf(fp, "%d", (i & n) != 0);
+}
+
+static void encode(Term *term, FILE *fp)
+{
+ switch (term->type) {
+ case ABS:
+ fprintf(fp, "00");
+ encode(term->u.abs.body, fp);
+ break;
+ case APP:
+ fprintf(fp, "01");
+ encode(term->u.app.lhs, fp);
+ encode(term->u.app.rhs, fp);
+ break;
+ case IDX:
+ levenshtein_encode(term->u.index, fp);
+ break;
+ default:
+ fatal("invalid type %d\n", term->type);
+ }
+}
+
+// by John Tromp
+static size_t levenshtein_decode(FILE *fp)
+{
+ if (getc(fp) == '0')
+ return 0;
+ size_t x, l = levenshtein_decode(fp);
+ for (x = 1; l--;)
+ x = 2 * x + (getc(fp) != '0');
+ return x;
+}
+
+static Term *decode(FILE *fp)
+{
+ Term *res = 0;
+
+ char a = getc(fp);
+
+ if (a == '0') {
+ char b = getc(fp);
+ if (b == '0') {
+ res = term_new(ABS);
+ res->u.abs.body = decode(fp);
+ } else if (b == '1') {
+ res = term_new(APP);
+ res->u.app.lhs = decode(fp);
+ res->u.app.rhs = decode(fp);
+ }
+ } else if (a == '1') {
+ res = term_new(IDX);
+ res->u.index = levenshtein_decode(fp);
+ }
+
+ if (!res)
+ fatal("invalid parsing state!\n");
+
+ return res;
+}
+
+Impl impl_blc2(void)
+{
+ return (Impl){
+ .name = "blc2",
+ .encode = encode,
+ .decode = decode,
+ };
+}
diff --git a/src/impl/closed.c b/src/impl/closed.c
new file mode 100644
index 0000000..5b607d9
--- /dev/null
+++ b/src/impl/closed.c
@@ -0,0 +1,100 @@
+// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+// only works for closed terms!
+
+// technique proposed by Mateusz Naƛciszewski (afaik)
+// implemented by John Tromp in Haskell
+
+#include <log.h>
+#include <impl.h>
+#include <stdint.h>
+
+static void encode_index(size_t n, size_t i, FILE *fp)
+{
+ if (n == 0)
+ fatal("invalid index (must be closed)\n");
+
+ if (i == 0) {
+ fprintf(fp, "%s", n == 1 ? "1" : "10");
+ } else {
+ fprintf(fp, "1");
+ encode_index(n - 1, i - 1, fp);
+ }
+}
+
+static void encode_level(size_t n, Term *term, FILE *fp)
+{
+ switch (term->type) {
+ case ABS:
+ fprintf(fp, "%s", n == 0 ? "0" : "00");
+ encode_level(n + 1, term->u.abs.body, fp);
+ break;
+ case APP:
+ fprintf(fp, "%s", n == 0 ? "1" : "01");
+ encode_level(n, term->u.app.lhs, fp);
+ encode_level(n, term->u.app.rhs, fp);
+ break;
+ case IDX:
+ encode_index(n, term->u.index, fp);
+ break;
+ default:
+ fatal("invalid type %d\n", term->type);
+ }
+}
+
+static void encode(Term *term, FILE *fp)
+{
+ encode_level(0, term, fp);
+}
+
+static Term *decode_level(size_t n, FILE *fp)
+{
+ Term *res = 0;
+
+ char a = getc(fp);
+
+ if (a == '0' && n == 0) {
+ res = term_new(ABS);
+ res->u.abs.body = decode_level(n + 1, fp);
+ } else if (a == '0') {
+ char b = getc(fp);
+
+ if (b == '0') {
+ res = term_new(ABS);
+ res->u.abs.body = decode_level(n + 1, fp);
+ } else if (b == '1') {
+ res = term_new(APP);
+ res->u.app.lhs = decode_level(n, fp);
+ res->u.app.rhs = decode_level(n, fp);
+ }
+ } else if (a == '1' && n == 0) {
+ res = term_new(APP);
+ res->u.app.lhs = decode_level(n, fp);
+ res->u.app.rhs = decode_level(n, fp);
+ } else if (a == '1') {
+ res = term_new(IDX);
+ res->u.index = 0;
+ while (--n != 0 && getc(fp) == '1')
+ res->u.index++;
+ }
+
+ if (!res)
+ fatal("invalid parsing state!\n");
+
+ return res;
+}
+
+static Term *decode(FILE *fp)
+{
+ return decode_level(0, fp);
+}
+
+Impl impl_closed(void)
+{
+ return (Impl){
+ .name = "closed",
+ .encode = encode,
+ .decode = decode,
+ };
+}
diff --git a/src/impl/mateusz.c b/src/impl/mateusz.c
deleted file mode 100644
index e69de29..0000000
--- a/src/impl/mateusz.c
+++ /dev/null
diff --git a/src/main.c b/src/main.c
index 802bfca..75dbb6a 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,15 +1,85 @@
// Copyright (c) 2024, Marvin Borner <dev@marvinborner.de>
// SPDX-License-Identifier: MIT
+#define _DEFAULT_SOURCE
#include <stdio.h>
+#include <dirent.h>
+#include <sys/stat.h>
#include <term.h>
#include <parse.h>
#include <print.h>
+#include <impl.h>
+
+#define N_IMPLS 5
+
+static size_t file_size(FILE *fp)
+{
+ struct stat st;
+ fstat(fileno(fp), &st);
+ return st.st_size;
+}
+
+static size_t test_impl(Impl impl, Term *term, FILE *out)
+{
+ rewind(out);
+ impl.encode(term, out);
+ size_t size = ftell(out);
+ rewind(out);
+
+ Term *recon = impl.decode(out);
+ term_diff(term, recon);
+ term_free(recon);
+
+ return size;
+}
+
+static void test_all(Impl impls[N_IMPLS], const char *dir_path, FILE *out)
+{
+ char path[1024];
+ struct dirent *dir_entry;
+ DIR *dir = opendir(dir_path);
+ while ((dir_entry = readdir(dir))) {
+ if (dir_entry->d_type != DT_REG)
+ continue;
+
+ sprintf(path, "%s/%s", dir_path, dir_entry->d_name);
+ printf("%s\n", path);
+
+ FILE *fp = fopen(path, "rb");
+
+ Term *term = parse_blc_fp(fp);
+
+ for (Impl *impl = impls; impl != impls + N_IMPLS; impl++) {
+ size_t prev = file_size(fp);
+ size_t after = test_impl(*impl, term, out);
+ printf("%s: %lu -> %lu (%f%% reduction)\n", impl->name,
+ prev, after,
+ (double)((long)prev - (long)after) /
+ (double)prev * 100);
+ }
+
+ term_free(term);
+ fclose(fp);
+
+ printf("\n");
+ }
+ closedir(dir);
+}
int main(void)
{
- Term *term = parse_blc_fp(stdin);
- print_bruijn(term);
- printf("\n");
+ Impl impls[N_IMPLS] = { impl_blc(), impl_blc2(), impl_closed(),
+ impl_abs(), impl_app() };
+
+ FILE *out = tmpfile();
+
+ const char *dir_path = "test/small";
+ test_all(impls, dir_path, out);
+ dir_path = "test/medium";
+ test_all(impls, dir_path, out);
+ /* dir_path = "test/large"; */
+ /* test_all(impls, dir_path, out); */
+
+ fclose(out);
}
diff --git a/src/parse.c b/src/parse.c
index f2f89cd..86b084f 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -16,15 +16,12 @@ Term *parse_blc_fp(FILE *fp)
if (b == '0') {
res = term_new(ABS);
res->u.abs.body = parse_blc_fp(fp);
- }
- if (b == '1') {
+ } else if (b == '1') {
res = term_new(APP);
res->u.app.lhs = parse_blc_fp(fp);
res->u.app.rhs = parse_blc_fp(fp);
}
- }
-
- if (a == '1') {
+ } else if (a == '1') {
res = term_new(IDX);
res->u.index = 0;
while (getc(fp) == '1')
diff --git a/src/print.c b/src/print.c
index b6493e1..35278a1 100644
--- a/src/print.c
+++ b/src/print.c
@@ -28,25 +28,3 @@ void print_bruijn(Term *term)
fatal("invalid type %d\n", term->type);
}
}
-
-void print_blc(Term *term)
-{
- switch (term->type) {
- case ABS:
- printf("00");
- print_blc(term->u.abs.body);
- break;
- case APP:
- printf("01");
- print_blc(term->u.app.lhs);
- print_blc(term->u.app.rhs);
- break;
- case IDX:
- for (size_t i = 0; i <= term->u.index; i++)
- printf("1");
- printf("0");
- break;
- default:
- fatal("invalid type %d\n", term->type);
- }
-}
diff --git a/src/term.c b/src/term.c
index e68611e..3074bfc 100644
--- a/src/term.c
+++ b/src/term.c
@@ -44,7 +44,8 @@ void term_diff(Term *a, Term *b)
print_bruijn(a);
fprintf(stderr, "\nTerm b: ");
print_bruijn(b);
- fatal("\ntype mismatch %d %d\n", a->type, b->type);
+ fprintf(stderr, "\n");
+ fatal("type mismatch %d %d\n", a->type, b->type);
}
switch (a->type) {