diff options
-rw-r--r-- | readme.md | 20 | ||||
-rw-r--r-- | src/build.c | 25 | ||||
-rw-r--r-- | src/main.c | 2 | ||||
-rw-r--r-- | src/parse.c | 9 |
4 files changed, 36 insertions, 20 deletions
@@ -13,15 +13,17 @@ first show you some results: 1. The [bruijn](https://github.com/marvinborner/bruijn) expression `fac (+30)`, where `fac` is the factorial implementation from `std/Math`: - - the original expression takes 2000 bytes in bit-encoded BLC - - the same expression in BLoC needs only 423 bytes + - the original expression takes 1200 bytes in bit-encoded BLC + - the same expression in BLoC needs only 348 bytes 2. [My solution](https://github.com/marvinborner/bruijn/blob/main/samples/aoc/2022/01/solve.bruijn) for the “Advent of Code” challenge [2022/01](https://adventofcode.com/2022/day/1) in [bruijn](https://github.com/marvinborner/bruijn): - the original expression takes 6258 bytes in bit-encoded BLC - - the same expression in BLoC needs only 1169 bytes + - the same expression in BLoC needs only 946 bytes + +You can find these examples in `test/`. ## Format @@ -50,7 +52,7 @@ following derivation of normal bit-encoded BLC: (\*): The encoding of indices is quite special: $I=XA$, where $X\in\\{00,01,10,11\\}$ and length of binary index $A$ is -$L(A)\in\\{1,2,4,8\\}$ byte respectively. +$L(A)\in\\{1,2,4,8\\}$ byte respectively (see `src/{build,parse}.c`). The final program will be in the last entry. The indices start counting from the number of entries down to 0. @@ -100,13 +102,3 @@ There seem to be problems with *very* big files: comparison test. I’ve not been able to reproduce this bug with any other file and 8cc itself is too huge to comfortably debug the issue. If you’re reading this: Please help me :( - -Also the current optimizer does not always make the best deduplication -choices. It seems like finding the optimal deduplications requires quite -complex algorithms which would probably be rather inefficient. - -For example, as of right now the length of an expression as seen by the -deduplicator doesn’t consider the change of occurrence count when -sub-expressions get replaced by a reference to another expression. This -results in entries like `(0 <1234>)` that would not have needed to be -deduplicated. diff --git a/src/build.c b/src/build.c index e396407..d90b714 100644 --- a/src/build.c +++ b/src/build.c @@ -46,9 +46,28 @@ static void rec_write_bblc(struct tree *tree, FILE *file, char *byte, int *bit) write_bit(1, file, byte, bit); write_bit(1, file, byte, bit); - // TODO: The bit-order of encoded shorts is kinda arbitrary - short ref = tree->u.ref.table_index; - for (int i = 0; i < 16; i++) + int ref = tree->u.ref.table_index; + int bits = 0; + + // write index length bit prefixes + if (ref < 2 << 7) { + bits = 8; + write_bit(0, file, byte, bit); + write_bit(0, file, byte, bit); + } else if (ref < 2 << 15) { + bits = 16; + write_bit(0, file, byte, bit); + write_bit(1, file, byte, bit); + } else if (ref < 2 << 31) { + bits = 32; + write_bit(1, file, byte, bit); + write_bit(0, file, byte, bit); + } else if (ref < 2 << 63) { + bits = 64; // i wanna see that program lol + write_bit(1, file, byte, bit); + write_bit(1, file, byte, bit); + } + for (int i = 0; i < bits; i++) write_bit((ref >> i) & 1, file, byte, bit); break; default: @@ -107,7 +107,7 @@ static void test(char *input) char *input_2 = read_file(temp_blc); struct term *parsed_2 = parse_blc(input_2); fseek(temp_blc, 0, SEEK_END); - fprintf(stderr, "size blc: ~%lu\n", ftell(temp_blc) / 8); + fprintf(stderr, "size blc: %lu\n", ftell(temp_blc) / 8 + 1); fclose(temp_blc); free(input_2); debug("parsed reconstructed blc\n"); diff --git a/src/parse.c b/src/parse.c index f6e047d..cf873c6 100644 --- a/src/parse.c +++ b/src/parse.c @@ -71,9 +71,14 @@ static struct term *parse_bloc_bblc(const char *term, size_t *bit) (*bit)++; } else if (!BIT_AT(*bit) && BIT_AT(*bit + 1) && BIT_AT(*bit + 2)) { (*bit) += 3; + + // selected bit pattern, see readme + int sel = (2 << (BIT_AT(*bit) * 2 + BIT_AT(*bit + 1) + 2)); + (*bit) += 2; + res = new_term(REF); - short index = 0; - for (int i = 0; i < 16; i++) { + size_t index = 0; + for (int i = 0; i < sel; i++) { index |= (BIT_AT(*bit) >> (7 - (*bit % 8))) << i; (*bit) += 1; } |