From 70179233d87c34b0f94521ba2752cda6ec77271d Mon Sep 17 00:00:00 2001 From: Marvin Borner Date: Sun, 8 Sep 2024 18:29:26 +0200 Subject: Add translation mode and statistics --- src/abs.c | 81 ---------------------------------- src/app.c | 89 ------------------------------------- src/impl/abs.c | 81 ++++++++++++++++++++++++++++++++++ src/impl/abs_app_right.c | 104 +++++++++++++++++++++++++++++++++++++++++++ src/impl/app_both.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++ src/impl/app_left.c | 89 +++++++++++++++++++++++++++++++++++++ src/impl/app_right.c | 89 +++++++++++++++++++++++++++++++++++++ src/impl/merged_abs.c | 0 src/impl/merged_app.c | 0 src/impl/merged_both.c | 0 src/main.c | 68 ++++++++++++++++++++++------ 11 files changed, 530 insertions(+), 183 deletions(-) delete mode 100644 src/abs.c delete mode 100644 src/app.c create mode 100644 src/impl/abs.c create mode 100644 src/impl/abs_app_right.c create mode 100644 src/impl/app_both.c create mode 100644 src/impl/app_left.c create mode 100644 src/impl/app_right.c delete mode 100644 src/impl/merged_abs.c delete mode 100644 src/impl/merged_app.c delete mode 100644 src/impl/merged_both.c (limited to 'src') diff --git a/src/abs.c b/src/abs.c deleted file mode 100644 index a6aa76d..0000000 --- a/src/abs.c +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2024, Marvin Borner -// SPDX-License-Identifier: MIT - -#include -#include - -static void encode(Term *term, FILE *fp) -{ - switch (term->type) { - case ABS: - while (1) { - fprintf(fp, "0"); - if (term->u.abs.body->type != ABS) - break; - term = term->u.abs.body; - } - fprintf(fp, "01"); - encode(term->u.abs.body, fp); - break; - case APP: - fprintf(fp, "01"); - encode(term->u.app.lhs, fp); - encode(term->u.app.rhs, fp); - break; - case IDX: - for (size_t i = 0; i <= term->u.index; i++) - fprintf(fp, "1"); - fprintf(fp, "0"); - break; - default: - fatal("invalid type %d\n", term->type); - } -} - -static Term *decode(FILE *fp) -{ - Term *res = 0; - - char a = getc(fp); - - if (a == '0') { - size_t count = 0; - while (getc(fp) == '0') - count++; - - if (count == 0) { - res = term_new(APP); - res->u.app.lhs = decode(fp); - res->u.app.rhs = decode(fp); - } else { - Term *head = term_new(ABS); - - res = head; - for (size_t i = 0; i < count - 1; i++) { - res->u.abs.body = term_new(ABS); - res = res->u.abs.body; - } - res->u.abs.body = decode(fp); - res = head; - } - } else if (a == '1') { - res = term_new(IDX); - res->u.index = 0; - while (getc(fp) == '1') - res->u.index++; - } - - if (!res) - fatal("invalid parsing state!\n"); - - return res; -} - -Impl impl_abs(void) -{ - return (Impl){ - .name = "abs", - .encode = encode, - .decode = decode, - }; -} diff --git a/src/app.c b/src/app.c deleted file mode 100644 index cc7eda9..0000000 --- a/src/app.c +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2024, Marvin Borner -// SPDX-License-Identifier: MIT - -#include -#include - -static void encode(Term *term, FILE *fp) -{ - switch (term->type) { - case ABS: - fprintf(fp, "01"); - encode(term->u.abs.body, fp); - break; - case APP: - fprintf(fp, "0"); - - Term *stack[128] = { 0 }; // TODO: HEAP! - int stacked = 0; - - while (1) { - fprintf(fp, "0"); - stack[stacked++] = term->u.app.rhs; - term = term->u.app.lhs; - if (term->type != APP) - break; - } - - fprintf(fp, "1"); - encode(term, fp); - for (int i = stacked - 1; i >= 0; i--) - encode(stack[i], fp); - break; - case IDX: - for (size_t i = 0; i <= term->u.index; i++) - fprintf(fp, "1"); - fprintf(fp, "0"); - break; - default: - fatal("invalid type %d\n", term->type); - } -} - -static Term *decode(FILE *fp) -{ - Term *res = 0; - - char a = getc(fp); - - if (a == '0') { - size_t count = 0; - while (getc(fp) == '0') - count++; - - if (count == 0) { - res = term_new(ABS); - res->u.abs.body = decode(fp); - } else { // foldl1 - res = term_new(APP); - res->u.app.lhs = decode(fp); - res->u.app.rhs = decode(fp); - - for (size_t i = 0; i < count - 1; i++) { - Term *prev = res; - res = term_new(APP); - res->u.app.lhs = prev; - res->u.app.rhs = decode(fp); - } - } - } else if (a == '1') { - res = term_new(IDX); - res->u.index = 0; - while (getc(fp) == '1') - res->u.index++; - } - - if (!res) - fatal("invalid parsing state!\n"); - - return res; -} - -Impl impl_app(void) -{ - return (Impl){ - .name = "app", - .encode = encode, - .decode = decode, - }; -} diff --git a/src/impl/abs.c b/src/impl/abs.c new file mode 100644 index 0000000..a6aa76d --- /dev/null +++ b/src/impl/abs.c @@ -0,0 +1,81 @@ +// Copyright (c) 2024, Marvin Borner +// SPDX-License-Identifier: MIT + +#include +#include + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + while (1) { + fprintf(fp, "0"); + if (term->u.abs.body->type != ABS) + break; + term = term->u.abs.body; + } + fprintf(fp, "01"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "01"); + encode(term->u.app.lhs, fp); + encode(term->u.app.rhs, fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + if (count == 0) { + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + } else { + Term *head = term_new(ABS); + + res = head; + for (size_t i = 0; i < count - 1; i++) { + res->u.abs.body = term_new(ABS); + res = res->u.abs.body; + } + res->u.abs.body = decode(fp); + res = head; + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_abs(void) +{ + return (Impl){ + .name = "abs", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/abs_app_right.c b/src/impl/abs_app_right.c new file mode 100644 index 0000000..a346013 --- /dev/null +++ b/src/impl/abs_app_right.c @@ -0,0 +1,104 @@ +// Copyright (c) 2024, Marvin Borner +// SPDX-License-Identifier: MIT + +#include +#include + +// my blc 3 for now! + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + while (1) { + fprintf(fp, "0"); + if (term->u.abs.body->type != ABS) + break; + term = term->u.abs.body; + } + fprintf(fp, "10"); + encode(term->u.abs.body, fp); + break; + case APP:; + Term *stack[128] = { 0 }; // TODO: HEAP! + int stacked = 0; + + while (1) { + fprintf(fp, "0"); + stack[stacked++] = term->u.app.rhs; + term = term->u.app.lhs; + if (term->type != APP) + break; + } + + fprintf(fp, "11"); + encode(term, fp); + for (int i = stacked - 1; i >= 0; i--) + encode(stack[i], fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + char b = getc(fp); + + if (b == '0') { + Term *head = term_new(ABS); + + res = head; + for (size_t i = 0; i < count; i++) { + res->u.abs.body = term_new(ABS); + res = res->u.abs.body; + } + res->u.abs.body = decode(fp); + res = head; + } else if (b == '1') { + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + + for (size_t i = 0; i < count; i++) { + Term *prev = res; + res = term_new(APP); + res->u.app.lhs = prev; + res->u.app.rhs = decode(fp); + } + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_abs_app_left(void) +{ + return (Impl){ + .name = "abs_app_left", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/app_both.c b/src/impl/app_both.c new file mode 100644 index 0000000..47f78aa --- /dev/null +++ b/src/impl/app_both.c @@ -0,0 +1,112 @@ +// Copyright (c) 2024, Marvin Borner +// SPDX-License-Identifier: MIT + +#include +#include + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "01"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "0"); + + Term *stack[128] = { 0 }; // TODO: HEAP! + int stacked = 0; + + // TODO: there's probably something here about finding optimal + // left/right folds but this is ok for now + + int fold_right = term->u.app.rhs->type == APP; + + while (1) { + fprintf(fp, "0"); + stack[stacked++] = + fold_right ? term->u.app.lhs : term->u.app.rhs; + term = fold_right ? term->u.app.rhs : term->u.app.lhs; + if (term->type != APP) + break; + } + + fprintf(fp, "1%d", fold_right); + if (fold_right) { + for (int i = 0; i < stacked; i++) + encode(stack[i], fp); + encode(term, fp); + } else { + encode(term, fp); + for (int i = stacked - 1; i >= 0; i--) + encode(stack[i], fp); + } + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + if (count == 0) { + res = term_new(ABS); + res->u.abs.body = decode(fp); + } else if (getc(fp) == '1') { // fold right + Term *head = term_new(APP); + res = head; + for (size_t i = 0; i < count - 1; i++) { + res->u.app.lhs = decode(fp); + res->u.app.rhs = term_new(APP); + res = res->u.app.rhs; + } + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + res = head; + } else { // fold left + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + + for (size_t i = 0; i < count - 1; i++) { + Term *prev = res; + res = term_new(APP); + res->u.app.lhs = prev; + res->u.app.rhs = decode(fp); + } + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_app_both(void) +{ + return (Impl){ + .name = "app_both", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/app_left.c b/src/impl/app_left.c new file mode 100644 index 0000000..dcc62cd --- /dev/null +++ b/src/impl/app_left.c @@ -0,0 +1,89 @@ +// Copyright (c) 2024, Marvin Borner +// SPDX-License-Identifier: MIT + +#include +#include + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "01"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "0"); + + Term *stack[128] = { 0 }; // TODO: HEAP! + int stacked = 0; + + while (1) { + fprintf(fp, "0"); + stack[stacked++] = term->u.app.rhs; + term = term->u.app.lhs; + if (term->type != APP) + break; + } + + fprintf(fp, "1"); + encode(term, fp); + for (int i = stacked - 1; i >= 0; i--) + encode(stack[i], fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + if (count == 0) { + res = term_new(ABS); + res->u.abs.body = decode(fp); + } else { // foldl1 + res = term_new(APP); + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + + for (size_t i = 0; i < count - 1; i++) { + Term *prev = res; + res = term_new(APP); + res->u.app.lhs = prev; + res->u.app.rhs = decode(fp); + } + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_app_left(void) +{ + return (Impl){ + .name = "app_left", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/app_right.c b/src/impl/app_right.c new file mode 100644 index 0000000..2583741 --- /dev/null +++ b/src/impl/app_right.c @@ -0,0 +1,89 @@ +// Copyright (c) 2024, Marvin Borner +// SPDX-License-Identifier: MIT + +#include +#include + +static void encode(Term *term, FILE *fp) +{ + switch (term->type) { + case ABS: + fprintf(fp, "01"); + encode(term->u.abs.body, fp); + break; + case APP: + fprintf(fp, "0"); + + Term *stack[128] = { 0 }; // TODO: HEAP! + int stacked = 0; + + while (1) { + fprintf(fp, "0"); + stack[stacked++] = term->u.app.lhs; + term = term->u.app.rhs; + if (term->type != APP) + break; + } + + fprintf(fp, "1"); + for (int i = 0; i < stacked; i++) + encode(stack[i], fp); + encode(term, fp); + break; + case IDX: + for (size_t i = 0; i <= term->u.index; i++) + fprintf(fp, "1"); + fprintf(fp, "0"); + break; + default: + fatal("invalid type %d\n", term->type); + } +} + +static Term *decode(FILE *fp) +{ + Term *res = 0; + + char a = getc(fp); + + if (a == '0') { + size_t count = 0; + while (getc(fp) == '0') + count++; + + if (count == 0) { + res = term_new(ABS); + res->u.abs.body = decode(fp); + } else { + Term *head = term_new(APP); + res = head; + for (size_t i = 0; i < count - 1; i++) { + res->u.app.lhs = decode(fp); + res->u.app.rhs = term_new(APP); + res = res->u.app.rhs; + } + res->u.app.lhs = decode(fp); + res->u.app.rhs = decode(fp); + res = head; + } + } else if (a == '1') { + res = term_new(IDX); + res->u.index = 0; + while (getc(fp) == '1') + res->u.index++; + } + + if (!res) + fatal("invalid parsing state!\n"); + + return res; +} + +Impl impl_app_right(void) +{ + return (Impl){ + .name = "app_right", + .encode = encode, + .decode = decode, + }; +} diff --git a/src/impl/merged_abs.c b/src/impl/merged_abs.c deleted file mode 100644 index e69de29..0000000 diff --git a/src/impl/merged_app.c b/src/impl/merged_app.c deleted file mode 100644 index e69de29..0000000 diff --git a/src/impl/merged_both.c b/src/impl/merged_both.c deleted file mode 100644 index e69de29..0000000 diff --git a/src/main.c b/src/main.c index 75dbb6a..9609142 100644 --- a/src/main.c +++ b/src/main.c @@ -5,13 +5,15 @@ #include #include #include +#include #include #include +#include #include #include -#define N_IMPLS 5 +#define N_IMPLS 8 static size_t file_size(FILE *fp) { @@ -36,6 +38,8 @@ static size_t test_impl(Impl impl, Term *term, FILE *out) static void test_all(Impl impls[N_IMPLS], const char *dir_path, FILE *out) { + double results[N_IMPLS] = { 0 }; + char path[1024]; struct dirent *dir_entry; DIR *dir = opendir(dir_path); @@ -44,7 +48,7 @@ static void test_all(Impl impls[N_IMPLS], const char *dir_path, FILE *out) continue; sprintf(path, "%s/%s", dir_path, dir_entry->d_name); - printf("%s\n", path); + debug("%s\n", path); FILE *fp = fopen(path, "rb"); @@ -53,33 +57,71 @@ static void test_all(Impl impls[N_IMPLS], const char *dir_path, FILE *out) for (Impl *impl = impls; impl != impls + N_IMPLS; impl++) { size_t prev = file_size(fp); size_t after = test_impl(*impl, term, out); - printf("%s: %lu -> %lu (%f%% reduction)\n", impl->name, - prev, after, - (double)((long)prev - (long)after) / - (double)prev * 100); + double change = (double)((long)prev - (long)after) / + (double)prev * 100; + debug("%s: %lu -> %lu (%f%% reduction)\n", impl->name, + prev, after, change); + results[impl - impls] += change; + results[impl - impls] /= 2.0; } term_free(term); fclose(fp); - printf("\n"); + debug("\n"); } closedir(dir); + + printf("\n--- %s ---\n", dir_path); + for (size_t i = 0; i < N_IMPLS; i++) + printf("%s: avg. %f%% reduction\n", impls[i].name, results[i]); } -int main(void) +static void test(Impl impls[N_IMPLS]) { - Impl impls[N_IMPLS] = { impl_blc(), impl_blc2(), impl_closed(), - impl_abs(), impl_app() }; - FILE *out = tmpfile(); const char *dir_path = "test/small"; test_all(impls, dir_path, out); dir_path = "test/medium"; test_all(impls, dir_path, out); - /* dir_path = "test/large"; */ - /* test_all(impls, dir_path, out); */ + dir_path = "test/large"; + test_all(impls, dir_path, out); fclose(out); } + +static Impl find_impl(const char *impl, Impl impls[N_IMPLS]) +{ + for (int i = 0; i < N_IMPLS; i++) { + if (!strcmp(impl, impls[i].name)) + return impls[i]; + } + fatal("encoding '%s' not found!\n", impl); +} + +int main(int argc, char *argv[]) +{ +#ifdef DEBUG + debug_enable(DEBUG); +#endif + Impl impls[N_IMPLS] = { impl_blc(), impl_blc2(), + impl_closed(), impl_abs(), + impl_app_left(), impl_app_right(), + impl_app_both(), impl_abs_app_left() }; + if (argc == 1) { + test(impls); + return 0; + } + + if (argc != 3) { + fatal("invalid arguments, please use `blc2blc `\n"); + } + + Impl from = find_impl(argv[1], impls); + Impl to = find_impl(argv[2], impls); + Term *term = from.decode(stdin); + to.encode(term, stdout); + + return 0; +} -- cgit v1.2.3