diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/build.c | 2 | ||||
-rw-r--r-- | src/main.c | 13 | ||||
-rw-r--r-- | src/optimize.c | 168 | ||||
-rw-r--r-- | src/pqueue.c | 7 | ||||
-rw-r--r-- | src/tree.c | 53 |
5 files changed, 223 insertions, 20 deletions
diff --git a/src/build.c b/src/build.c index 9fdcad4..e396407 100644 --- a/src/build.c +++ b/src/build.c @@ -47,7 +47,7 @@ static void rec_write_bblc(struct tree *tree, FILE *file, char *byte, int *bit) write_bit(1, file, byte, bit); // TODO: The bit-order of encoded shorts is kinda arbitrary - short ref = tree->u.ref.index; + short ref = tree->u.ref.table_index; for (int i = 0; i < 16; i++) write_bit((ref >> i) & 1, file, byte, bit); break; @@ -7,6 +7,7 @@ #include <stdlib.h> #include <term.h> +#include <optimize.h> #include <log.h> #include <print.h> #include <tree.h> @@ -84,7 +85,11 @@ static void test(char *input) debug("parsed original blc\n"); debug("merging duplicates\n"); - struct list *table = tree_merge_duplicates(parsed_1); + void *all_trees = 0; + struct tree *tree = tree_merge_duplicates(parsed_1, &all_trees); + + debug("optimizing tree\n"); + struct list *table = optimize_tree(tree, &all_trees); FILE *temp_bloc = tmpfile(); write_bloc(table, temp_bloc); @@ -123,7 +128,11 @@ static void from_blc(char *input, char *output_path) debug("parsed blc\n"); debug("merging duplicates\n"); - struct list *table = tree_merge_duplicates(parsed); + void *all_trees = 0; + struct tree *tree = tree_merge_duplicates(parsed, &all_trees); + + debug("optimizing tree\n"); + struct list *table = optimize_tree(tree, &all_trees); FILE *file = fopen(output_path, "wb"); write_bloc(table, file); diff --git a/src/optimize.c b/src/optimize.c new file mode 100644 index 0000000..a79299e --- /dev/null +++ b/src/optimize.c @@ -0,0 +1,168 @@ +// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de> +// SPDX-License-Identifier: MIT + +// most of the actual optimizing is done in tree.c +// this file does some extra steps to optimize the tree even further + +#include <search.h> +#include <assert.h> +#include <stdlib.h> + +#include <log.h> +#include <optimize.h> +#include <pqueue.h> + +struct tree_tracker { + uint32_t hash; + struct tree *tree; + int count; // reference/occurrence count + size_t position; // in queue +}; + +struct hash_to_tree { + uint32_t hash; + struct tree *tree; +}; + +// comparison_fn_t for tsearch +static int hash_compare(const void *_a, const void *_b) +{ + const struct tree_tracker *a = _a; + const struct tree_tracker *b = _b; + + if (a->hash < b->hash) + return -1; + if (a->hash > b->hash) + return 1; + return 0; +} + +// constructs a tree/map/set of all hashes and their occurrence count +// this is needed because the index count changes (currently untracked, see README) +// during tree invalidation and less used indices should get shorter encodings +static void generate_index_mappings(struct tree *tree, void **all_trees, + void **set) +{ + switch (tree->type) { + case ABS: + generate_index_mappings(tree->u.abs.term, all_trees, set); + break; + case APP: + generate_index_mappings(tree->u.app.lhs, all_trees, set); + generate_index_mappings(tree->u.app.rhs, all_trees, set); + break; + case VAR: + break; + case REF:; + // increase count of reference + struct tree_tracker *element = malloc(sizeof(*element)); + if (!element) + fatal("out of memory!\n"); + element->hash = tree->u.ref.hash; + struct tree_tracker **handle = + tsearch(element, set, hash_compare); + if (*handle == element) { // first of its kind + struct hash_to_tree *tree_element = + malloc(sizeof(*tree_element)); + tree_element->hash = tree->u.ref.hash; + struct hash_to_tree **ref_tree = + tfind(element, all_trees, hash_compare); + if (!ref_tree) + fatal("referred tree not found!\n"); + free(tree_element); + + element->count = 1; + element->tree = (*ref_tree)->tree; + assert(element->tree); + generate_index_mappings(element->tree, all_trees, set); + } else { + free(element); // already exists, not needed + (*handle)->count++; + } + break; + default: + fatal("invalid type %d\n", tree->type); + } +} + +// sets corresponding table_index of references +static void fix_tree(struct tree *tree, void **set) +{ + switch (tree->type) { + case ABS: + fix_tree(tree->u.abs.term, set); + break; + case APP: + fix_tree(tree->u.app.lhs, set); + fix_tree(tree->u.app.rhs, set); + break; + case VAR: + break; + case REF:; + struct tree_tracker *element = malloc(sizeof(*element)); + element->hash = tree->u.ref.hash; + if (!element) + fatal("out of memory!\n"); + struct tree_tracker **handle = + tfind(element, set, hash_compare); + assert(handle); // must exist + free(element); + tree->u.ref.table_index = (*handle)->position; + fix_tree((*handle)->tree, set); + break; + default: + fatal("invalid type %d\n", tree->type); + } +} + +// priority of candidate -> occurrence count +static pqueue_pri_t get_pri(void *a) +{ + return ((struct tree_tracker *)a)->count; +} + +static int cmp_pri(pqueue_pri_t next, pqueue_pri_t curr) +{ + return next < curr; +} + +static void set_pos(void *a, size_t pos) +{ + ((struct tree_tracker *)a)->position = pos - 1; +} + +static struct pqueue *set_queue; // i know this is stupid but tsearch is stupider +static void walk(void *data, VISIT which, int depth) +{ + (void)depth; + if (which != preorder && which != leaf) + return; + + struct tree_tracker *element = *(struct tree_tracker **)data; + // TODO: Merge elements with =1 references + if (element->count) { // only insert elements with references + pqueue_insert(set_queue, element); + } +} + +struct list *optimize_tree(struct tree *tree, void **all_trees) +{ + void *set = 0; + generate_index_mappings(tree, all_trees, &set); + + // pqueue from mappings: hash -> tree_tracker + set_queue = pqueue_init(2 << 7, cmp_pri, get_pri, set_pos); + twalk(set, walk); + + fix_tree(tree, &set); + + struct list *list = list_add(0, tree); + + for (size_t i = 1; i < pqueue_size(set_queue) + 1; i++) { + struct tree_tracker *element = set_queue->d[i]; + list = list_add(list, element->tree); + } + pqueue_free(set_queue); + + return list; +} diff --git a/src/pqueue.c b/src/pqueue.c index 8e7eca7..c9b7d34 100644 --- a/src/pqueue.c +++ b/src/pqueue.c @@ -35,7 +35,7 @@ #define parent(i) ((i) >> 1) struct pqueue *pqueue_init(size_t n, pqueue_cmp_pri_f cmppri, - pqueue_get_pri_f getpri) + pqueue_get_pri_f getpri, pqueue_set_pos_f setpos) { struct pqueue *q; @@ -52,6 +52,7 @@ struct pqueue *pqueue_init(size_t n, pqueue_cmp_pri_f cmppri, q->avail = q->step = (n + 1); /* see comment above about n+1 */ q->cmppri = cmppri; q->getpri = getpri; + q->setpos = setpos; return q; } @@ -78,9 +79,11 @@ static void bubble_up(struct pqueue *q, size_t i) ((i > 1) && q->cmppri(q->getpri(q->d[parent_node]), moving_pri)); i = parent_node, parent_node = parent(i)) { q->d[i] = q->d[parent_node]; + q->setpos(q->d[i], i); } q->d[i] = moving_node; + q->setpos(moving_node, i); } static size_t maxchild(struct pqueue *q, size_t i) @@ -107,10 +110,12 @@ static void percolate_down(struct pqueue *q, size_t i) while ((child_node = maxchild(q, i)) && q->cmppri(moving_pri, q->getpri(q->d[child_node]))) { q->d[i] = q->d[child_node]; + q->setpos(q->d[i], i); i = child_node; } q->d[i] = moving_node; + q->setpos(moving_node, i); } int pqueue_insert(struct pqueue *q, void *d) @@ -50,7 +50,7 @@ static uint32_t murmur3_32(const uint8_t *key, size_t len, uint32_t seed) } static struct list *list_end = 0; -static struct list *list_add(struct list *list, void *data) +struct list *list_add(struct list *list, void *data) { struct list *new = malloc(sizeof(*new)); if (!new) @@ -62,16 +62,22 @@ static struct list *list_add(struct list *list, void *data) } // element of the tsearch tree -struct set_element { +struct hash_to_list { uint32_t hash; struct list *list; }; +// element of the tsearch tree +struct hash_to_tree { + uint32_t hash; + struct tree *tree; +}; + // comparison_fn_t for tsearch static int hash_compare(const void *_a, const void *_b) { - const struct set_element *a = _a; - const struct set_element *b = _b; + const struct hash_to_list *a = _a; + const struct hash_to_list *b = _b; if (a->hash < b->hash) return -1; @@ -124,12 +130,12 @@ static struct tree *build_tree(struct term *term, void **set) if (tree->size < 10) // not suitable for deduplication return tree; - struct set_element *element = malloc(sizeof(*element)); + struct hash_to_list *element = malloc(sizeof(*element)); if (!element) fatal("out of memory!\n"); element->hash = tree->hash; - struct set_element **handle = tsearch(element, set, hash_compare); + struct hash_to_list **handle = tsearch(element, set, hash_compare); if (*handle == element) { // first of its kind element->list = list_add(list_end, tree); return tree; @@ -164,7 +170,6 @@ static struct tree *clone_tree_root(struct tree *tree) default: free(new); fatal("invalid type %d\n", tree->type); - return 0; } return new; @@ -230,7 +235,7 @@ static void ref_invalidated_tree(struct tree *tree) if (tree->state != INVALIDATED_TREE && tree->state != VALIDATED_TREE) { // is reffed tree->type = REF; - tree->u.ref.index = tree->state - 1; + tree->u.ref.hash = tree->state; tree->state = VALIDATED_TREE; } } @@ -247,7 +252,13 @@ static int cmp_pri(pqueue_pri_t next, pqueue_pri_t curr) return next < curr; } -struct list *tree_merge_duplicates(struct term *term) +static void set_pos(void *a, size_t position) +{ + (void)a; + (void)position; +} + +struct tree *tree_merge_duplicates(struct term *term, void **all_trees) { debug("building the merkle tree and deduplication set\n"); @@ -260,22 +271,25 @@ struct list *tree_merge_duplicates(struct term *term) // construct priority queue while deleting set // ~> sorts the candidates by get_pri debug("constructing priority queue\n"); - struct pqueue *prioritized = pqueue_init(2 << 15, cmp_pri, get_pri); + struct pqueue *prioritized = + pqueue_init(2 << 15, cmp_pri, get_pri, set_pos); if (!prioritized) fatal("can't create pqueue\n"); while (set) { - struct set_element *element = *(struct set_element **)set; + struct hash_to_list *element = *(struct hash_to_list **)set; pqueue_insert(prioritized, element->list); tdelete(element, &set, hash_compare); free(element); } - struct list *final = list_end; struct list *invalidated = list_end; // add longest (=> blueprint/structure of expression) struct list *longest = pqueue_pop(prioritized); - final = list_add(final, longest->data); + struct hash_to_tree *element = malloc(sizeof(*element)); + element->hash = ((struct tree *)longest->data)->hash; + element->tree = longest->data; + tsearch(element, all_trees, hash_compare); debug("iterating priority queue, invalidating duplicates\n"); struct list *iterator; @@ -294,7 +308,14 @@ struct list *tree_merge_duplicates(struct term *term) // clone root so it doesn't get replaced by a ref to itself struct tree *cloned_head = clone_tree_root(head); cloned_head->state = INVALIDATED_TREE; - final = list_add(final, cloned_head); + + element = malloc(sizeof(*element)); + element->hash = cloned_head->hash; + element->tree = cloned_head; + struct hash_to_tree **handle = + tsearch(element, all_trees, hash_compare); + if (*handle != element) + free(element); // already exists, not needed // invalidate all subtrees // invalidated trees will be replaced with a reference @@ -303,7 +324,7 @@ struct list *tree_merge_duplicates(struct term *term) invalidate_tree(list->data, list->val); // keep a ref for later replacement - ((struct tree *)list->data)->state = final->val - 1; + ((struct tree *)list->data)->state = head->hash; invalidated = list_add(invalidated, list->data); list = list->next; @@ -323,7 +344,7 @@ struct list *tree_merge_duplicates(struct term *term) // destroy prioritized list pqueue_free(prioritized); - return final; + return longest->data; } void tree_destroy(struct list *table) |