aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/build.c2
-rw-r--r--src/main.c13
-rw-r--r--src/optimize.c168
-rw-r--r--src/pqueue.c7
-rw-r--r--src/tree.c53
5 files changed, 223 insertions, 20 deletions
diff --git a/src/build.c b/src/build.c
index 9fdcad4..e396407 100644
--- a/src/build.c
+++ b/src/build.c
@@ -47,7 +47,7 @@ static void rec_write_bblc(struct tree *tree, FILE *file, char *byte, int *bit)
write_bit(1, file, byte, bit);
// TODO: The bit-order of encoded shorts is kinda arbitrary
- short ref = tree->u.ref.index;
+ short ref = tree->u.ref.table_index;
for (int i = 0; i < 16; i++)
write_bit((ref >> i) & 1, file, byte, bit);
break;
diff --git a/src/main.c b/src/main.c
index 4258281..1d0a36f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <term.h>
+#include <optimize.h>
#include <log.h>
#include <print.h>
#include <tree.h>
@@ -84,7 +85,11 @@ static void test(char *input)
debug("parsed original blc\n");
debug("merging duplicates\n");
- struct list *table = tree_merge_duplicates(parsed_1);
+ void *all_trees = 0;
+ struct tree *tree = tree_merge_duplicates(parsed_1, &all_trees);
+
+ debug("optimizing tree\n");
+ struct list *table = optimize_tree(tree, &all_trees);
FILE *temp_bloc = tmpfile();
write_bloc(table, temp_bloc);
@@ -123,7 +128,11 @@ static void from_blc(char *input, char *output_path)
debug("parsed blc\n");
debug("merging duplicates\n");
- struct list *table = tree_merge_duplicates(parsed);
+ void *all_trees = 0;
+ struct tree *tree = tree_merge_duplicates(parsed, &all_trees);
+
+ debug("optimizing tree\n");
+ struct list *table = optimize_tree(tree, &all_trees);
FILE *file = fopen(output_path, "wb");
write_bloc(table, file);
diff --git a/src/optimize.c b/src/optimize.c
new file mode 100644
index 0000000..a79299e
--- /dev/null
+++ b/src/optimize.c
@@ -0,0 +1,168 @@
+// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+// most of the actual optimizing is done in tree.c
+// this file does some extra steps to optimize the tree even further
+
+#include <search.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <log.h>
+#include <optimize.h>
+#include <pqueue.h>
+
+struct tree_tracker {
+ uint32_t hash;
+ struct tree *tree;
+ int count; // reference/occurrence count
+ size_t position; // in queue
+};
+
+struct hash_to_tree {
+ uint32_t hash;
+ struct tree *tree;
+};
+
+// comparison_fn_t for tsearch
+static int hash_compare(const void *_a, const void *_b)
+{
+ const struct tree_tracker *a = _a;
+ const struct tree_tracker *b = _b;
+
+ if (a->hash < b->hash)
+ return -1;
+ if (a->hash > b->hash)
+ return 1;
+ return 0;
+}
+
+// constructs a tree/map/set of all hashes and their occurrence count
+// this is needed because the index count changes (currently untracked, see README)
+// during tree invalidation and less used indices should get shorter encodings
+static void generate_index_mappings(struct tree *tree, void **all_trees,
+ void **set)
+{
+ switch (tree->type) {
+ case ABS:
+ generate_index_mappings(tree->u.abs.term, all_trees, set);
+ break;
+ case APP:
+ generate_index_mappings(tree->u.app.lhs, all_trees, set);
+ generate_index_mappings(tree->u.app.rhs, all_trees, set);
+ break;
+ case VAR:
+ break;
+ case REF:;
+ // increase count of reference
+ struct tree_tracker *element = malloc(sizeof(*element));
+ if (!element)
+ fatal("out of memory!\n");
+ element->hash = tree->u.ref.hash;
+ struct tree_tracker **handle =
+ tsearch(element, set, hash_compare);
+ if (*handle == element) { // first of its kind
+ struct hash_to_tree *tree_element =
+ malloc(sizeof(*tree_element));
+ tree_element->hash = tree->u.ref.hash;
+ struct hash_to_tree **ref_tree =
+ tfind(element, all_trees, hash_compare);
+ if (!ref_tree)
+ fatal("referred tree not found!\n");
+ free(tree_element);
+
+ element->count = 1;
+ element->tree = (*ref_tree)->tree;
+ assert(element->tree);
+ generate_index_mappings(element->tree, all_trees, set);
+ } else {
+ free(element); // already exists, not needed
+ (*handle)->count++;
+ }
+ break;
+ default:
+ fatal("invalid type %d\n", tree->type);
+ }
+}
+
+// sets corresponding table_index of references
+static void fix_tree(struct tree *tree, void **set)
+{
+ switch (tree->type) {
+ case ABS:
+ fix_tree(tree->u.abs.term, set);
+ break;
+ case APP:
+ fix_tree(tree->u.app.lhs, set);
+ fix_tree(tree->u.app.rhs, set);
+ break;
+ case VAR:
+ break;
+ case REF:;
+ struct tree_tracker *element = malloc(sizeof(*element));
+ element->hash = tree->u.ref.hash;
+ if (!element)
+ fatal("out of memory!\n");
+ struct tree_tracker **handle =
+ tfind(element, set, hash_compare);
+ assert(handle); // must exist
+ free(element);
+ tree->u.ref.table_index = (*handle)->position;
+ fix_tree((*handle)->tree, set);
+ break;
+ default:
+ fatal("invalid type %d\n", tree->type);
+ }
+}
+
+// priority of candidate -> occurrence count
+static pqueue_pri_t get_pri(void *a)
+{
+ return ((struct tree_tracker *)a)->count;
+}
+
+static int cmp_pri(pqueue_pri_t next, pqueue_pri_t curr)
+{
+ return next < curr;
+}
+
+static void set_pos(void *a, size_t pos)
+{
+ ((struct tree_tracker *)a)->position = pos - 1;
+}
+
+static struct pqueue *set_queue; // i know this is stupid but tsearch is stupider
+static void walk(void *data, VISIT which, int depth)
+{
+ (void)depth;
+ if (which != preorder && which != leaf)
+ return;
+
+ struct tree_tracker *element = *(struct tree_tracker **)data;
+ // TODO: Merge elements with =1 references
+ if (element->count) { // only insert elements with references
+ pqueue_insert(set_queue, element);
+ }
+}
+
+struct list *optimize_tree(struct tree *tree, void **all_trees)
+{
+ void *set = 0;
+ generate_index_mappings(tree, all_trees, &set);
+
+ // pqueue from mappings: hash -> tree_tracker
+ set_queue = pqueue_init(2 << 7, cmp_pri, get_pri, set_pos);
+ twalk(set, walk);
+
+ fix_tree(tree, &set);
+
+ struct list *list = list_add(0, tree);
+
+ for (size_t i = 1; i < pqueue_size(set_queue) + 1; i++) {
+ struct tree_tracker *element = set_queue->d[i];
+ list = list_add(list, element->tree);
+ }
+ pqueue_free(set_queue);
+
+ return list;
+}
diff --git a/src/pqueue.c b/src/pqueue.c
index 8e7eca7..c9b7d34 100644
--- a/src/pqueue.c
+++ b/src/pqueue.c
@@ -35,7 +35,7 @@
#define parent(i) ((i) >> 1)
struct pqueue *pqueue_init(size_t n, pqueue_cmp_pri_f cmppri,
- pqueue_get_pri_f getpri)
+ pqueue_get_pri_f getpri, pqueue_set_pos_f setpos)
{
struct pqueue *q;
@@ -52,6 +52,7 @@ struct pqueue *pqueue_init(size_t n, pqueue_cmp_pri_f cmppri,
q->avail = q->step = (n + 1); /* see comment above about n+1 */
q->cmppri = cmppri;
q->getpri = getpri;
+ q->setpos = setpos;
return q;
}
@@ -78,9 +79,11 @@ static void bubble_up(struct pqueue *q, size_t i)
((i > 1) && q->cmppri(q->getpri(q->d[parent_node]), moving_pri));
i = parent_node, parent_node = parent(i)) {
q->d[i] = q->d[parent_node];
+ q->setpos(q->d[i], i);
}
q->d[i] = moving_node;
+ q->setpos(moving_node, i);
}
static size_t maxchild(struct pqueue *q, size_t i)
@@ -107,10 +110,12 @@ static void percolate_down(struct pqueue *q, size_t i)
while ((child_node = maxchild(q, i)) &&
q->cmppri(moving_pri, q->getpri(q->d[child_node]))) {
q->d[i] = q->d[child_node];
+ q->setpos(q->d[i], i);
i = child_node;
}
q->d[i] = moving_node;
+ q->setpos(moving_node, i);
}
int pqueue_insert(struct pqueue *q, void *d)
diff --git a/src/tree.c b/src/tree.c
index 755b5e2..8fef9d1 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -50,7 +50,7 @@ static uint32_t murmur3_32(const uint8_t *key, size_t len, uint32_t seed)
}
static struct list *list_end = 0;
-static struct list *list_add(struct list *list, void *data)
+struct list *list_add(struct list *list, void *data)
{
struct list *new = malloc(sizeof(*new));
if (!new)
@@ -62,16 +62,22 @@ static struct list *list_add(struct list *list, void *data)
}
// element of the tsearch tree
-struct set_element {
+struct hash_to_list {
uint32_t hash;
struct list *list;
};
+// element of the tsearch tree
+struct hash_to_tree {
+ uint32_t hash;
+ struct tree *tree;
+};
+
// comparison_fn_t for tsearch
static int hash_compare(const void *_a, const void *_b)
{
- const struct set_element *a = _a;
- const struct set_element *b = _b;
+ const struct hash_to_list *a = _a;
+ const struct hash_to_list *b = _b;
if (a->hash < b->hash)
return -1;
@@ -124,12 +130,12 @@ static struct tree *build_tree(struct term *term, void **set)
if (tree->size < 10) // not suitable for deduplication
return tree;
- struct set_element *element = malloc(sizeof(*element));
+ struct hash_to_list *element = malloc(sizeof(*element));
if (!element)
fatal("out of memory!\n");
element->hash = tree->hash;
- struct set_element **handle = tsearch(element, set, hash_compare);
+ struct hash_to_list **handle = tsearch(element, set, hash_compare);
if (*handle == element) { // first of its kind
element->list = list_add(list_end, tree);
return tree;
@@ -164,7 +170,6 @@ static struct tree *clone_tree_root(struct tree *tree)
default:
free(new);
fatal("invalid type %d\n", tree->type);
- return 0;
}
return new;
@@ -230,7 +235,7 @@ static void ref_invalidated_tree(struct tree *tree)
if (tree->state != INVALIDATED_TREE &&
tree->state != VALIDATED_TREE) { // is reffed
tree->type = REF;
- tree->u.ref.index = tree->state - 1;
+ tree->u.ref.hash = tree->state;
tree->state = VALIDATED_TREE;
}
}
@@ -247,7 +252,13 @@ static int cmp_pri(pqueue_pri_t next, pqueue_pri_t curr)
return next < curr;
}
-struct list *tree_merge_duplicates(struct term *term)
+static void set_pos(void *a, size_t position)
+{
+ (void)a;
+ (void)position;
+}
+
+struct tree *tree_merge_duplicates(struct term *term, void **all_trees)
{
debug("building the merkle tree and deduplication set\n");
@@ -260,22 +271,25 @@ struct list *tree_merge_duplicates(struct term *term)
// construct priority queue while deleting set
// ~> sorts the candidates by get_pri
debug("constructing priority queue\n");
- struct pqueue *prioritized = pqueue_init(2 << 15, cmp_pri, get_pri);
+ struct pqueue *prioritized =
+ pqueue_init(2 << 15, cmp_pri, get_pri, set_pos);
if (!prioritized)
fatal("can't create pqueue\n");
while (set) {
- struct set_element *element = *(struct set_element **)set;
+ struct hash_to_list *element = *(struct hash_to_list **)set;
pqueue_insert(prioritized, element->list);
tdelete(element, &set, hash_compare);
free(element);
}
- struct list *final = list_end;
struct list *invalidated = list_end;
// add longest (=> blueprint/structure of expression)
struct list *longest = pqueue_pop(prioritized);
- final = list_add(final, longest->data);
+ struct hash_to_tree *element = malloc(sizeof(*element));
+ element->hash = ((struct tree *)longest->data)->hash;
+ element->tree = longest->data;
+ tsearch(element, all_trees, hash_compare);
debug("iterating priority queue, invalidating duplicates\n");
struct list *iterator;
@@ -294,7 +308,14 @@ struct list *tree_merge_duplicates(struct term *term)
// clone root so it doesn't get replaced by a ref to itself
struct tree *cloned_head = clone_tree_root(head);
cloned_head->state = INVALIDATED_TREE;
- final = list_add(final, cloned_head);
+
+ element = malloc(sizeof(*element));
+ element->hash = cloned_head->hash;
+ element->tree = cloned_head;
+ struct hash_to_tree **handle =
+ tsearch(element, all_trees, hash_compare);
+ if (*handle != element)
+ free(element); // already exists, not needed
// invalidate all subtrees
// invalidated trees will be replaced with a reference
@@ -303,7 +324,7 @@ struct list *tree_merge_duplicates(struct term *term)
invalidate_tree(list->data, list->val);
// keep a ref for later replacement
- ((struct tree *)list->data)->state = final->val - 1;
+ ((struct tree *)list->data)->state = head->hash;
invalidated = list_add(invalidated, list->data);
list = list->next;
@@ -323,7 +344,7 @@ struct list *tree_merge_duplicates(struct term *term)
// destroy prioritized list
pqueue_free(prioritized);
- return final;
+ return longest->data;
}
void tree_destroy(struct list *table)