aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--readme.md20
-rw-r--r--src/build.c25
-rw-r--r--src/main.c2
-rw-r--r--src/parse.c9
4 files changed, 36 insertions, 20 deletions
diff --git a/readme.md b/readme.md
index 80da044..c03933d 100644
--- a/readme.md
+++ b/readme.md
@@ -13,15 +13,17 @@ first show you some results:
1. The [bruijn](https://github.com/marvinborner/bruijn) expression
`fac (+30)`, where `fac` is the factorial implementation from
`std/Math`:
- - the original expression takes 2000 bytes in bit-encoded BLC
- - the same expression in BLoC needs only 423 bytes
+ - the original expression takes 1200 bytes in bit-encoded BLC
+ - the same expression in BLoC needs only 348 bytes
2. [My
solution](https://github.com/marvinborner/bruijn/blob/main/samples/aoc/2022/01/solve.bruijn)
for the “Advent of Code” challenge
[2022/01](https://adventofcode.com/2022/day/1) in
[bruijn](https://github.com/marvinborner/bruijn):
- the original expression takes 6258 bytes in bit-encoded BLC
- - the same expression in BLoC needs only 1169 bytes
+ - the same expression in BLoC needs only 946 bytes
+
+You can find these examples in `test/`.
## Format
@@ -50,7 +52,7 @@ following derivation of normal bit-encoded BLC:
(\*): The encoding of indices is quite special: $I=XA$, where
$X\in\\{00,01,10,11\\}$ and length of binary index $A$ is
-$L(A)\in\\{1,2,4,8\\}$ byte respectively.
+$L(A)\in\\{1,2,4,8\\}$ byte respectively (see `src/{build,parse}.c`).
The final program will be in the last entry. The indices start counting
from the number of entries down to 0.
@@ -100,13 +102,3 @@ There seem to be problems with *very* big files:
comparison test. I’ve not been able to reproduce this bug with any other
file and 8cc itself is too huge to comfortably debug the issue. If
you’re reading this: Please help me :(
-
-Also the current optimizer does not always make the best deduplication
-choices. It seems like finding the optimal deduplications requires quite
-complex algorithms which would probably be rather inefficient.
-
-For example, as of right now the length of an expression as seen by the
-deduplicator doesn’t consider the change of occurrence count when
-sub-expressions get replaced by a reference to another expression. This
-results in entries like `(0 <1234>)` that would not have needed to be
-deduplicated.
diff --git a/src/build.c b/src/build.c
index e396407..d90b714 100644
--- a/src/build.c
+++ b/src/build.c
@@ -46,9 +46,28 @@ static void rec_write_bblc(struct tree *tree, FILE *file, char *byte, int *bit)
write_bit(1, file, byte, bit);
write_bit(1, file, byte, bit);
- // TODO: The bit-order of encoded shorts is kinda arbitrary
- short ref = tree->u.ref.table_index;
- for (int i = 0; i < 16; i++)
+ int ref = tree->u.ref.table_index;
+ int bits = 0;
+
+ // write index length bit prefixes
+ if (ref < 2 << 7) {
+ bits = 8;
+ write_bit(0, file, byte, bit);
+ write_bit(0, file, byte, bit);
+ } else if (ref < 2 << 15) {
+ bits = 16;
+ write_bit(0, file, byte, bit);
+ write_bit(1, file, byte, bit);
+ } else if (ref < 2 << 31) {
+ bits = 32;
+ write_bit(1, file, byte, bit);
+ write_bit(0, file, byte, bit);
+ } else if (ref < 2 << 63) {
+ bits = 64; // i wanna see that program lol
+ write_bit(1, file, byte, bit);
+ write_bit(1, file, byte, bit);
+ }
+ for (int i = 0; i < bits; i++)
write_bit((ref >> i) & 1, file, byte, bit);
break;
default:
diff --git a/src/main.c b/src/main.c
index 1d0a36f..785a70d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -107,7 +107,7 @@ static void test(char *input)
char *input_2 = read_file(temp_blc);
struct term *parsed_2 = parse_blc(input_2);
fseek(temp_blc, 0, SEEK_END);
- fprintf(stderr, "size blc: ~%lu\n", ftell(temp_blc) / 8);
+ fprintf(stderr, "size blc: %lu\n", ftell(temp_blc) / 8 + 1);
fclose(temp_blc);
free(input_2);
debug("parsed reconstructed blc\n");
diff --git a/src/parse.c b/src/parse.c
index f6e047d..cf873c6 100644
--- a/src/parse.c
+++ b/src/parse.c
@@ -71,9 +71,14 @@ static struct term *parse_bloc_bblc(const char *term, size_t *bit)
(*bit)++;
} else if (!BIT_AT(*bit) && BIT_AT(*bit + 1) && BIT_AT(*bit + 2)) {
(*bit) += 3;
+
+ // selected bit pattern, see readme
+ int sel = (2 << (BIT_AT(*bit) * 2 + BIT_AT(*bit + 1) + 2));
+ (*bit) += 2;
+
res = new_term(REF);
- short index = 0;
- for (int i = 0; i < 16; i++) {
+ size_t index = 0;
+ for (int i = 0; i < sel; i++) {
index |= (BIT_AT(*bit) >> (7 - (*bit % 8))) << i;
(*bit) += 1;
}