diff options
author | Marvin Borner | 2023-02-17 18:11:28 +0100 |
---|---|---|
committer | Marvin Borner | 2023-02-17 18:11:28 +0100 |
commit | 9b0ddd467c52c2848efa28f5faa3f2be9b6cc384 (patch) | |
tree | b72fdc16953ab43232426cc150eda8bc6546957c | |
parent | 4e106e78a98f7a241fc2681fefc0996a34207045 (diff) |
Switched to CHAMP library
-rw-r--r-- | inc/hamt.h | 30 | ||||
-rw-r--r-- | inc/store.h | 264 | ||||
-rw-r--r-- | readme.md | 27 | ||||
-rw-r--r-- | src/hamt.c | 461 | ||||
-rw-r--r-- | src/store.c | 1135 |
5 files changed, 1423 insertions, 494 deletions
diff --git a/inc/hamt.h b/inc/hamt.h deleted file mode 100644 index da5f876..0000000 --- a/inc/hamt.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef HAMT_H -#define HAMT_H - -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> - -typedef int (*hamt_cmp_fn)(const void *lhs, const void *rhs); -typedef uint32_t (*hamt_key_hash_fn)(const void *key, const size_t gen); - -typedef struct hamt_impl *HAMT; - -struct hamt_allocator { - void *(*malloc)(const size_t size); - void *(*realloc)(void *chunk, const size_t size); - void (*free)(void *chunk); -}; - -extern struct hamt_allocator hamt_allocator_default; - -HAMT hamt_create(hamt_key_hash_fn key_hash, hamt_cmp_fn key_cmp, - struct hamt_allocator *ator); -void hamt_delete(HAMT); - -void *hamt_get(const HAMT trie, void *key); -HAMT hamt_pset(const HAMT trie, void *key, void *value); - -typedef struct hamt_iterator_impl *hamt_iterator; - -#endif diff --git a/inc/store.h b/inc/store.h new file mode 100644 index 0000000..a6df112 --- /dev/null +++ b/inc/store.h @@ -0,0 +1,264 @@ +/* + * MIT License + * + * Copyright (c) 2020 Samuel Vogelsanger <vogelsangersamuel@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef STORE_STORE_H +#define STORE_STORE_H + +#include <stdint.h> +#include <stddef.h> + +#ifndef STORE_VERBOSITY +#define STORE_VERBOSITY 0 +#endif + +#define DEBUG_NOTICE(fmt, ...) \ + do { \ + if (STORE_VERBOSITY >= 5) \ + fprintf(stderr, "DEBUG: store: " fmt, __VA_ARGS__); \ + } while (0) +#define DEBUG_WARN(fmt, ...) \ + do { \ + if (STORE_VERBOSITY >= 4) \ + fprintf(stderr, "DEBUG: store: " fmt, __VA_ARGS__); \ + } while (0) + +#ifndef STORE_KEY_T +#define STORE_KEY_T void * +#endif + +#ifndef STORE_VALUE_T +#define STORE_VALUE_T void * +#endif + +/** + * These are mostly for convenience + */ + +#define STORE_HASHFN_T(name) uint32_t (*name)(const STORE_KEY_T) +#define STORE_EQUALSFN_T(name) \ + int (*name)(const STORE_KEY_T left, const STORE_KEY_T right) +#define STORE_ASSOCFN_T(name) \ + STORE_VALUE_T(*name) \ + (const STORE_KEY_T key, const STORE_VALUE_T old_value, void *user_data) +#define STORE_VALUE_EQUALSFN_T(name) \ + int (*name)(const STORE_VALUE_T left, const STORE_VALUE_T right) + +/** + * These macros help with defining the various callbacks. Use them like so: + * @code{c} + * STORE_MAKE_EQUALSFN(equals_int, left, right) + * { + * return left == right; + * } + * @endcode + */ + +#define STORE_MAKE_HASHFN(name, arg_1) uint32_t name(const STORE_KEY_T arg_1) +#define STORE_MAKE_EQUALSFN(name, arg_l, arg_r) \ + int name(const STORE_KEY_T arg_l, const STORE_KEY_T arg_r) +#define STORE_MAKE_ASSOCFN(name, key_arg, value_arg, user_data_arg) \ + STORE_VALUE_T name(const STORE_KEY_T key_arg, \ + const STORE_VALUE_T value_arg, void *user_data_arg) +#define STORE_MAKE_VALUE_EQUALSFN(name, arg_l, arg_r) \ + int name(const STORE_VALUE_T arg_l, const STORE_VALUE_T arg_r) + +struct store { + volatile uint32_t ref_count; + unsigned length; + struct node *root; + + STORE_HASHFN_T(hash); + STORE_EQUALSFN_T(equals); +}; + +/** + * Creates a new map with the given hash and equals functions. This implementation is based on the assumption that if + * two keys are equal, their hashes must be equal as well. This is commonly known as the Java Hashcode contract. + * + * The reference count of a new map is zero. + * + * @param hash + * @param equals + * @return + */ +struct store *store_new(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals)); + +/** + * Destroys a store. Doesn't clean up the stored key-value-pairs. + * + * @param old + */ +void store_destroy(struct store **store); + +/** + * Atomically increases the reference count of a map. + * + * @param store + * @return + */ +struct store *store_acquire(const struct store *store); + +/** + * Atomically decreases the reference count of a map and calls store_destroy if it caused the count to drop to zero. + * + * In either case then sets the reference to NULL. + * + * @param store + */ +void store_release(struct store **store); + +/** + * Returns the number of entries in store. + * + * @param store + * @return the number of entries + */ +unsigned store_length(const struct store *store); + +/** + * Looks up key and sets *value_receiver to the associated value. Doesn't change value_receiver if key is not set. + * + * @param store + * @param key + * @param found is set to 0 if key is not set + * @return + */ +STORE_VALUE_T store_get(const struct store *store, const STORE_KEY_T key, + int *found); + +/** + * Returns a new map derived from store but with key set to value. + * If replaced is not NULL, sets it to indicate if the key is present in store. + * + * Reference count of the new map is zero. + * + * @param store + * @param key + * @param value + * @param replaced + * @return a new store + */ +struct store *store_set(const struct store *store, const STORE_KEY_T key, + const STORE_VALUE_T value, int *replaced); + +/** + * Returns a new map derived from store but without a mapping for key. + * + * Reference count of the new map is zero. + * + * @param store + * @param key + * @param modified + * @return + */ +struct store *store_del(const struct store *store, const STORE_KEY_T key, + int *modified); + +/** + * Creates a new store with the given hash and equals functions, and inserts the given keys and values. + * Only the first 'length' elements from keys and values are inserted. + * + * Reference count of the new map is zero. + * + * @param hash + * @param equals + * @param keys + * @param values + * @param length + * @return + */ +struct store *store_of(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals), + STORE_KEY_T *keys, STORE_VALUE_T *values, size_t length); + +/** + * Returns a new map derived from store, but with key set to the return value of fn. + * fn is passed the key, the current value for key, and user_data. + * If key is not present in store, NULL is passed in place of the key and current value. + * + * Reference count of the new map is zero. + * + * @param store + * @param key + * @param fn + * @param user_data + * @return + */ +struct store *store_assoc(const struct store *store, const STORE_KEY_T key, + STORE_ASSOCFN_T(fn), const void *user_data); + +/** + * Compares two maps for equality. A lot of short-circuiting is done on the assumption that unequal hashes + * (for both keys and values) imply inequality. This is commonly known as the Java Hashcode contract: If two values + * are equal, their hashes must be equal as well. + * + * @param left + * @param right + * @return + */ +int store_equals(const struct store *left, const struct store *right, + STORE_VALUE_EQUALSFN_T(value_equals)); + +/** + * An iterator for store. Meant to be put on the stack. + */ +struct store_iter { + int stack_level; + unsigned element_cursor; + unsigned element_arity; + unsigned branch_cursor_stack[8]; + unsigned branch_arity_stack[8]; + const void *node_stack[8]; +}; + +/** + * Initializes an iterator with a store. + * + * Example: + * @code{.c} + * struct store_iter iter; + * STORE_KEY_T key; + * STORE_VAL_T val; + * + * store_iter_init(&iter, store); + * while(store_iter_next(&iter, &key, &val)) { + * // do something with key and value + * } + * @endcode + * + * @param iter + * @param store + */ +void store_iter_init(struct store_iter *iter, const struct store *store); + +/** + * Advances iter and points key_receiver and value_receiver to the next pair. + * + * @param iter + * @param key_receiver + * @param value_receiver + * @return 0 if the end of the store has been reached + */ +int store_iter_next(struct store_iter *iter, STORE_KEY_T *key_receiver, + STORE_VALUE_T *value_receiver); + +#endif @@ -1,15 +1,36 @@ # calm -> **c**all-by-need **a**bstract **l**ambda **m**achine +> **c**alm **a**bstract **l**ambda **m**achine -- Strong reduction (reduction inside abstractions) of call-by-need - lambda calculus +- **Strong** reduction (reduction inside abstractions) of + **call-by-need** lambda calculus - Originally intended as reducer of the [`bruijn`](https://github.com/marvinborner/bruijn) programming language - Useful for proof assistants or as a high-level lambda-term reducer of functional programming languages - Based on bleeding-edge research results +- Exponentially big normal forms of the family $e_n=λx.c_nωx$, where + $ω:=λx.xx$ and $c_n$ denotes the $n$th Church numeral, consume only + a linear in $n$ amount of memory and are computed in linear + time\[0\] + +## Why C? + +You might realize that the reduction transitions are of a very +functional nature and aren't that obvious to implement in non-functional +languages. + +I used C because **(1)** most functional languages use automatic memory +management (often using periodic garbage collecting) which unnecessarily +slows down the transitions, **(2)** there aren't that many efficient +lambda calculus reducers in C out there, **(3)** it's a fun challenge +and **(4)** I like C. + +## Libraries + +- [CHAMP](https://github.com/ammut/immutable-c-ollections) \[MIT\]: + Underrated efficient hash array mapped trie ## Research diff --git a/src/hamt.c b/src/hamt.c deleted file mode 100644 index f9bd8b2..0000000 --- a/src/hamt.c +++ /dev/null @@ -1,461 +0,0 @@ -#include "hamt.h" - -#include <assert.h> -#include <stdbool.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -/* Pointer tagging */ -#define HAMT_TAG_MASK 0x3 /* last two bits */ -#define HAMT_TAG_VALUE 0x1 -#define tagged(__p) (hamt_node *)((uintptr_t)__p | HAMT_TAG_VALUE) -#define untagged(__p) (hamt_node *)((uintptr_t)__p & ~HAMT_TAG_MASK) -#define is_value(__p) (((uintptr_t)__p & HAMT_TAG_MASK) == HAMT_TAG_VALUE) - -/* Bit fiddling */ -#define index_clear_bit(_index, _n) _index & ~(1ul << _n) -#define index_set_bit(_index, _n) _index | (1ul << _n) - -/* Node data structure */ -#define TABLE(a) a->as.table.ptr -#define INDEX(a) a->as.table.index -#define VALUE(a) a->as.kv.value -#define KEY(a) a->as.kv.key - -/* Memory management */ -#define mem_alloc(ator, size) (ator)->malloc(size) -#define mem_realloc(ator, ptr, size) (ator)->realloc(ptr, size) -#define mem_free(ator, ptr) (ator)->free(ptr) -/* Default allocator uses system malloc */ -struct hamt_allocator hamt_allocator_default = { malloc, realloc, free }; - -typedef struct hamt_node { - union { - struct { - void *value; /* tagged pointer */ - const void *key; - } kv; - struct { - struct hamt_node *ptr; - uint32_t index; - } table; - } as; -} hamt_node; - -struct hamt_stats { - size_t table_sizes[32]; -}; - -/* Opaque user-facing implementation */ -struct hamt_impl { - struct hamt_node *root; - size_t size; - hamt_key_hash_fn key_hash; - hamt_cmp_fn key_cmp; - struct hamt_allocator *ator; - struct hamt_stats stats; -}; - -/* hash_stateing w/ state management */ -typedef struct hash_state { - const void *key; - hamt_key_hash_fn hash_fn; - uint32_t hash; - size_t depth; - size_t shift; -} hash_state; - -/* Search results */ -typedef enum { - SEARCH_SUCCESS, - SEARCH_FAIL_NOTFOUND, - SEARCH_FAIL_KEYMISMATCH -} search_status; - -typedef struct search_result { - search_status status; - hamt_node *anchor; - hamt_node *value; - hash_state *hash; -} search_result; - -/* Removal results */ -typedef enum { REMOVE_SUCCESS, REMOVE_GATHERED, REMOVE_NOTFOUND } remove_status; - -typedef struct remove_result { - remove_status status; - void *value; -} remove_result; - -typedef struct path_result { - union { - search_result sr; - remove_result rr; - } u; - hamt_node *root; -} path_result; - -static inline hash_state *hash_next(hash_state *h) -{ - h->depth += 1; - h->shift += 5; - if (h->shift > 30) { - h->hash = h->hash_fn(h->key, h->depth / 5); - h->shift = 0; - } - return h; -} - -static inline uint32_t hash_get_index(const hash_state *h) -{ - return (h->hash >> h->shift) & 0x1f; -} - -static int get_popcount(uint32_t n) -{ - return __builtin_popcount(n); -} - -static int get_pos(uint32_t sparse_index, uint32_t bitmap) -{ - return get_popcount(bitmap & ((1ul << sparse_index) - 1)); -} - -static inline bool has_index(const hamt_node *anchor, size_t index) -{ - assert(anchor && "anchor must not be NULL"); - assert(index < 32 && "index must not be larger than 31"); - return INDEX(anchor) & (1ul << index); -} - -static hamt_node *table_allocate(struct hamt_impl *h, size_t size) -{ - if (size) - h->stats.table_sizes[size - 1] += 1; - return (hamt_node *)mem_alloc(h->ator, (size * sizeof(hamt_node))); -} - -static void table_free(struct hamt_impl *h, hamt_node *ptr, size_t size) -{ - mem_free(h->ator, ptr); - if (size) - h->stats.table_sizes[size - 1] -= 1; -} - -static hamt_node *table_extend(struct hamt_impl *h, hamt_node *anchor, - size_t n_rows, uint32_t index, uint32_t pos) -{ - hamt_node *new_table = table_allocate(h, n_rows + 1); - if (!new_table) - return NULL; - if (n_rows > 0) { - /* copy over table */ - memcpy(&new_table[0], &TABLE(anchor)[0], - pos * sizeof(hamt_node)); - /* note: this works since (n_rows - pos) == 0 for cases - * where we're adding the new k/v pair at the end (i.e. memcpy(a, b, 0) - * is a nop) */ - memcpy(&new_table[pos + 1], &TABLE(anchor)[pos], - (n_rows - pos) * sizeof(hamt_node)); - } - assert(!is_value(VALUE(anchor)) && "URGS"); - table_free(h, TABLE(anchor), n_rows); - TABLE(anchor) = new_table; - INDEX(anchor) |= (1ul << index); - return anchor; -} - -static hamt_node *table_shrink(struct hamt_impl *h, hamt_node *anchor, - size_t n_rows, uint32_t index, uint32_t pos) -{ - /* debug assertions */ - assert(anchor && "Anchor cannot be NULL"); - assert(!is_value(VALUE(anchor)) && - "Invariant: shrinking a table requires an internal node"); - - hamt_node *new_table = NULL; - uint32_t new_index = 0; - if (n_rows > 0) { - new_table = table_allocate(h, n_rows - 1); - if (!new_table) - return NULL; - new_index = INDEX(anchor) & ~(1ul << index); - memcpy(&new_table[0], &TABLE(anchor)[0], - pos * sizeof(hamt_node)); - memcpy(&new_table[pos], &TABLE(anchor)[pos + 1], - (n_rows - pos - 1) * sizeof(hamt_node)); - } - table_free(h, TABLE(anchor), n_rows); - INDEX(anchor) = new_index; - TABLE(anchor) = new_table; - return anchor; -} - -static hamt_node *table_gather(struct hamt_impl *h, hamt_node *anchor, - uint32_t pos) -{ - /* debug assertions */ - assert(anchor && "Anchor cannot be NULL"); - assert(!is_value(VALUE(anchor)) && - "Invariant: gathering a table requires an internal anchor"); - assert((pos == 0 || pos == 1) && "pos must be 0 or 1"); - - int n_rows = get_popcount(INDEX(anchor)); - assert((n_rows == 2 || n_rows == 1) && - "Table must have size 1 or 2 to gather"); - hamt_node *table = TABLE(anchor); - KEY(anchor) = table[pos].as.kv.key; - VALUE(anchor) = table[pos].as.kv.value; /* already tagged */ - table_free(h, table, n_rows); - return anchor; -} - -static hamt_node *table_dup(struct hamt_impl *h, hamt_node *anchor) -{ - int n_rows = get_popcount(INDEX(anchor)); - hamt_node *new_table = table_allocate(h, n_rows); - if (new_table && TABLE(anchor)) { - memcpy(&new_table[0], &TABLE(anchor)[0], - n_rows * sizeof(hamt_node)); - } - return new_table; -} - -HAMT hamt_create(hamt_key_hash_fn key_hash, hamt_cmp_fn key_cmp, - struct hamt_allocator *ator) -{ - struct hamt_impl *trie = mem_alloc(ator, sizeof(struct hamt_impl)); - trie->ator = ator; - trie->root = mem_alloc(ator, sizeof(hamt_node)); - memset(trie->root, 0, sizeof(hamt_node)); - trie->size = 0; - trie->key_hash = key_hash; - trie->key_cmp = key_cmp; - // memset(trie->stats.table_sizes, 0, 32*sizeof(size_t)); - trie->stats = (struct hamt_stats){ .table_sizes = { 0 } }; - return trie; -} - -static HAMT hamt_dup(HAMT h) -{ - struct hamt_impl *trie = mem_alloc(h->ator, sizeof(struct hamt_impl)); - trie->ator = h->ator; - trie->root = mem_alloc(h->ator, sizeof(hamt_node)); - memcpy(trie->root, h->root, sizeof(hamt_node)); - trie->size = h->size; - trie->key_hash = h->key_hash; - trie->key_cmp = h->key_cmp; - memcpy(&trie->stats, &h->stats, sizeof(struct hamt_stats)); - return trie; -} - -static const hamt_node *insert_kv(struct hamt_impl *h, hamt_node *anchor, - hash_state *hash, void *key, void *value) -{ - /* calculate position in new table */ - uint32_t ix = hash_get_index(hash); - uint32_t new_index = INDEX(anchor) | (1ul << ix); - int pos = get_pos(ix, new_index); - /* extend table */ - size_t n_rows = get_popcount(INDEX(anchor)); - anchor = table_extend(h, anchor, n_rows, ix, pos); - if (!anchor) - return NULL; - hamt_node *new_table = TABLE(anchor); - /* set new k/v pair */ - new_table[pos].as.kv.key = key; - new_table[pos].as.kv.value = tagged(value); - /* return a pointer to the inserted k/v pair */ - return &new_table[pos]; -} - -static const hamt_node *insert_table(struct hamt_impl *h, hamt_node *anchor, - hash_state *hash, void *key, void *value) -{ - /* FIXME: check for alloc failure and bail out correctly (deleting the - * incomplete subtree */ - - /* Collect everything we know about the existing value */ - hash_state *x_hash = - &(hash_state){ .key = KEY(anchor), - .hash_fn = hash->hash_fn, - .hash = hash->hash_fn(KEY(anchor), - hash->depth / 5), - .depth = hash->depth, - .shift = hash->shift }; - void *x_value = VALUE(anchor); /* tagged (!) value ptr */ - /* increase depth until the hashes diverge, building a list - * of tables along the way */ - hash_state *next_hash = hash_next(hash); - hash_state *x_next_hash = hash_next(x_hash); - uint32_t next_index = hash_get_index(next_hash); - uint32_t x_next_index = hash_get_index(x_next_hash); - while (x_next_index == next_index) { - TABLE(anchor) = table_allocate(h, 1); - INDEX(anchor) = (1ul << next_index); - next_hash = hash_next(next_hash); - x_next_hash = hash_next(x_next_hash); - next_index = hash_get_index(next_hash); - x_next_index = hash_get_index(x_next_hash); - anchor = TABLE(anchor); - } - /* the hashes are different, let's allocate a table with two - * entries to store the existing and new values */ - TABLE(anchor) = table_allocate(h, 2); - INDEX(anchor) = (1ul << next_index) | (1ul << x_next_index); - /* determine the proper position in the allocated table */ - int x_pos = get_pos(x_next_index, INDEX(anchor)); - int pos = get_pos(next_index, INDEX(anchor)); - /* fill in the existing value; no need to tag the value pointer - * since it is already tagged. */ - TABLE(anchor)[x_pos].as.kv.key = (const void *)x_hash->key; - TABLE(anchor)[x_pos].as.kv.value = x_value; - /* fill in the new key/value pair, tagging the pointer to the - * new value to mark it as a value ptr */ - TABLE(anchor)[pos].as.kv.key = key; - TABLE(anchor)[pos].as.kv.value = tagged(value); - - return &TABLE(anchor)[pos]; -} - -static search_result search_recursive(struct hamt_impl *h, hamt_node *anchor, - hash_state *hash, hamt_cmp_fn cmp_eq, - const void *key, hamt_node *path) -{ - assert(!is_value(VALUE(anchor)) && - "Invariant: path copy requires an internal node"); - hamt_node *copy = path; - if (path) { - /* copy the table we're pointing to */ - TABLE(copy) = table_dup(h, anchor); - INDEX(copy) = INDEX(anchor); - assert(!is_value(VALUE(copy)) && - "Copy caused a leaf/internal switch"); - } else { - copy = anchor; - } - - /* determine the expected index in table */ - uint32_t expected_index = hash_get_index(hash); - /* check if the expected index is set */ - if (has_index(copy, expected_index)) { - /* if yes, get the compact index to address the array */ - int pos = get_pos(expected_index, INDEX(copy)); - /* index into the table and check what type of entry we're looking at */ - hamt_node *next = &TABLE(copy)[pos]; - if (is_value(VALUE(next))) { - if ((*cmp_eq)(key, KEY(next)) == 0) { - /* keys match */ - search_result result = { .status = - SEARCH_SUCCESS, - .anchor = copy, - .value = next, - .hash = hash }; - return result; - } - /* not found: same hash but different key */ - search_result result = { - .status = SEARCH_FAIL_KEYMISMATCH, - .anchor = copy, - .value = next, - .hash = hash - }; - return result; - } else { - /* For table entries, recurse to the next level */ - assert(TABLE(next) != NULL && - "invariant: table ptrs must not be NULL"); - return search_recursive(h, next, hash_next(hash), - cmp_eq, key, - path ? next : NULL); - } - } - /* expected index is not set, terminate search */ - search_result result = { .status = SEARCH_FAIL_NOTFOUND, - .anchor = copy, - .value = NULL, - .hash = hash }; - return result; -} - -void *hamt_get(const HAMT trie, void *key) -{ - hash_state *hash = &(hash_state){ .key = key, - .hash_fn = trie->key_hash, - .hash = trie->key_hash(key, 0), - .depth = 0, - .shift = 0 }; - search_result sr = search_recursive(trie, trie->root, hash, - trie->key_cmp, key, NULL); - if (sr.status == SEARCH_SUCCESS) { - return untagged(sr.VALUE(value)); - } - return NULL; -} - -static path_result search(struct hamt_impl *h, hamt_node *anchor, - hash_state *hash, hamt_cmp_fn cmp_eq, const void *key) -{ - path_result pr; - pr.root = mem_alloc(h->ator, sizeof(hamt_node)); - pr.u.sr = search_recursive(h, anchor, hash, cmp_eq, key, pr.root); - return pr; -} - -HAMT hamt_pset(HAMT h, void *key, void *value) -{ - hash_state *hash = &(hash_state){ .key = key, - .hash_fn = h->key_hash, - .hash = h->key_hash(key, 0), - .depth = 0, - .shift = 0 }; - HAMT cp = hamt_dup(h); - path_result pr = search(h, h->root, hash, h->key_cmp, key); - cp->root = pr.root; - switch (pr.u.sr.status) { - case SEARCH_SUCCESS: - pr.u.sr.VALUE(value) = tagged(value); - break; - case SEARCH_FAIL_NOTFOUND: - if (insert_kv(h, pr.u.sr.anchor, pr.u.sr.hash, key, value) != - NULL) { - cp->size += 1; - } - break; - case SEARCH_FAIL_KEYMISMATCH: - if (insert_table(h, pr.u.sr.value, pr.u.sr.hash, key, value) != - NULL) { - cp->size += 1; - } - break; - default: - fprintf(stderr, "Invalid result status\n"); - } - return cp; -} - -/* delete recursively from anchor */ -static void delete_recursive(struct hamt_impl *h, hamt_node *anchor) -{ - if (TABLE(anchor)) { - assert(!is_value(VALUE(anchor)) && - "delete requires an internal node"); - size_t n = get_popcount(INDEX(anchor)); - for (size_t i = 0; i < n; ++i) { - if (!is_value(TABLE(anchor)[i].as.kv.value)) { - delete_recursive(h, &TABLE(anchor)[i]); - } - } - table_free(h, TABLE(anchor), n); - TABLE(anchor) = NULL; - } -} - -void hamt_delete(HAMT h) -{ - delete_recursive(h, h->root); - mem_free(h->ator, h->root); - mem_free(h->ator, h); -} diff --git a/src/store.c b/src/store.c new file mode 100644 index 0000000..92d7099 --- /dev/null +++ b/src/store.c @@ -0,0 +1,1135 @@ +/* + * MIT License + * + * Copyright (c) 2020 Samuel Vogelsanger <vogelsangersamuel@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * All the ref-counting specific code was marked with a "//reference counting" comment. If you need to modify this to + * work with your own memory policy, it is recommended to start looking at those places to understand when and where + * memory is allocated and freed. + */ + +#include <malloc.h> +#include <stdint.h> +#include <stdio.h> +#include <stdatomic.h> // reference counting +#include <string.h> + +#include <store.h> + +#define store_node_debug_fmt \ + "node{element_arity=%u, element_map=%08x, branch_arity=%u, branch_map=%08x, ref_count=%u}" +#define store_node_debug_args(node) \ + node->element_arity, node->element_map, node->branch_arity, \ + node->branch_map, node->ref_count + +#define HASH_PARTITION_WIDTH 5u +#define HASH_TOTAL_WIDTH (8 * sizeof(uint32_t)) + +/* + * Helper functions + */ + +static unsigned bitcount(uint32_t value) +{ + // taken from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + value = value - + ((value >> 1u) & 0x55555555u); // reuse input as temporary + value = (value & 0x33333333u) + ((value >> 2u) & 0x33333333u); // temp + return (((value + (value >> 4u)) & 0xF0F0F0Fu) * 0x1010101u) >> + 24u; // count +} + +static uint32_t store_mask(uint32_t hash, unsigned shift) +{ + return (hash >> shift) & ((1u << HASH_PARTITION_WIDTH) - 1); +} + +static unsigned store_index(uint32_t bitmap, uint32_t bitpos) +{ + return bitcount(bitmap & (bitpos - 1)); +} + +/* + * Data structure definitions + */ + +struct kv { + STORE_KEY_T key; + STORE_VALUE_T val; +}; + +#define STORE_NODE_ELEMENT_T struct kv +#define STORE_NODE_BRANCH_T struct node * + +struct node { + uint8_t element_arity; + uint8_t branch_arity; + volatile uint16_t ref_count; // reference counting + uint32_t element_map; + uint32_t branch_map; + STORE_NODE_ELEMENT_T content[]; +}; + +struct collision_node { + uint8_t element_arity; // MUST SHARE LAYOUT WITH struct node + uint8_t branch_arity; // MUST SHARE LAYOUT WITH struct node + volatile uint16_t + ref_count; // MUST SHARE LAYOUT WITH struct node // reference counting + STORE_NODE_ELEMENT_T content[]; +}; + +static const struct node empty_node = { + .branch_arity = 0, + .element_arity = 0, + .ref_count = 1, + .branch_map = 0, + .element_map = 0, +}; + +#define STORE_NODE_ELEMENTS(node) (node)->content +#define STORE_NODE_BRANCHES(node) \ + ((STORE_NODE_BRANCH_T const *)&(node)->content[(node)->element_arity]) + +#define STORE_NODE_ELEMENTS_SIZE(length) \ + (sizeof(STORE_NODE_ELEMENT_T) * (length)) +#define STORE_NODE_BRANCHES_SIZE(length) \ + (sizeof(STORE_NODE_BRANCH_T) * (length)) + +#define STORE_NODE_ELEMENT_AT(node, bitpos) \ + STORE_NODE_ELEMENTS(node)[store_index(node->element_map, bitpos)] +#define STORE_NODE_BRANCH_AT(node, bitpos) \ + STORE_NODE_BRANCHES(node)[store_index(node->branch_map, bitpos)] + +/* + * static function declarations + */ + +// node constructor +static struct node *node_new(uint32_t element_map, uint32_t branch_map, + STORE_NODE_ELEMENT_T const *elements, + uint8_t element_arity, + STORE_NODE_BRANCH_T const *branches, + uint8_t branch_arity); + +// collision node variant +static struct collision_node * +collision_node_new(const STORE_NODE_ELEMENT_T *values, uint8_t element_arity); + +// destructor +static void node_destroy(struct node *node); + +// reference counting +static inline struct node *store_node_acquire(const struct node *node); + +// reference counting +static inline void store_node_release(const struct node *node); + +// top-level functions +static STORE_VALUE_T node_get(const struct node *node, STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, uint32_t hash, + unsigned shift, int *found); + +static struct node *node_update(const struct node *node, STORE_HASHFN_T(hashfn), + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + const STORE_VALUE_T value, uint32_t hash, + unsigned shift, int *found); + +static struct node *node_assoc(const struct node *node, STORE_HASHFN_T(hashfn), + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + STORE_ASSOCFN_T(fn), const void *user_data, + uint32_t hash, unsigned shift, int *found); + +static struct node *node_del(const struct node *node, STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, uint32_t hash, + unsigned shift, int *modified); + +// collision node variants +static STORE_VALUE_T collision_node_get(const struct collision_node *node, + STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, int *found); + +static struct collision_node * +collision_node_update(const struct collision_node *node, + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + const STORE_VALUE_T value, int *found); + +static struct collision_node * +collision_node_assoc(const struct collision_node *node, + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + STORE_ASSOCFN_T(fn), const void *user_data, int *found); + +static struct collision_node * +collision_node_del(const struct collision_node *node, STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, int *modified); + +// helper functions for creation of modified nodes +static struct node *node_merge(uint32_t hash_l, const STORE_KEY_T key_l, + const STORE_VALUE_T value_l, uint32_t hash_r, + const STORE_KEY_T key_r, + const STORE_VALUE_T value_r, unsigned shift); + +static struct node *node_clone_pullup(const struct node *node, uint32_t bitpos, + const struct kv element); + +static struct node *node_clone_update_branch(const struct node *node, + uint32_t bitpos, + struct node *branch); + +static struct node *node_clone_pushdown(const struct node *node, + uint32_t bitpos, struct node *branch); + +static struct node *node_clone_insert_element(const struct node *node, + uint32_t bitpos, + const STORE_KEY_T key, + const STORE_VALUE_T value); + +static struct node *node_clone_update_element(const struct node *node, + uint32_t bitpos, + const STORE_VALUE_T value); + +static struct node *node_clone_remove_element(const struct node *node, + uint32_t bitpos); + +// collision node variants +static struct collision_node * +collision_node_clone_insert_element(const struct collision_node *node, + const STORE_KEY_T key, + const STORE_VALUE_T value); + +static struct collision_node * +collision_node_clone_update_element(const struct collision_node *node, + unsigned index, const STORE_VALUE_T value); + +static struct collision_node * +collision_node_clone_remove_element(const struct collision_node *node, + unsigned index); + +// equality +static int node_equals(const struct node *left, const struct node *right, + STORE_EQUALSFN_T(key_equals), + STORE_VALUE_EQUALSFN_T(value_equals), unsigned shift); + +static int collision_node_equals(const struct collision_node *left, + const struct collision_node *right, + STORE_EQUALSFN_T(key_equals), + STORE_VALUE_EQUALSFN_T(value_equals)); + +// store private constructor +static struct store *store_from(struct node *root, unsigned length, + STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals)); + +// iterator helper functions +static void iter_push(struct store_iter *iterator, const struct node *node); + +static void iter_pop(struct store_iter *iterator); + +/* + * definitions + */ + +static void node_destroy(struct node *node) +{ + DEBUG_NOTICE(" destroying " store_node_debug_fmt "@%p\n", + store_node_debug_args(node), (void *)node); + + // reference counting + STORE_NODE_BRANCH_T *branches = + (STORE_NODE_BRANCH_T *)STORE_NODE_BRANCHES(node); + for (int i = 0; i < node->branch_arity; ++i) { + store_node_release(branches[i]); + } + + free(node); +} + +// reference counting +static inline struct node *store_node_acquire(const struct node *node) +{ + if (node == &empty_node) + return (struct node *)node; + atomic_fetch_add((uint16_t *)&node->ref_count, 1u); + return (struct node *)node; +} + +// reference counting +static inline void store_node_release(const struct node *node) +{ + if (node == &empty_node) + return; + if (atomic_fetch_sub((uint16_t *)&node->ref_count, 1u) == 1) + node_destroy((struct node *)node); +} + +/** + * WARNING: all branches in <code>branches</code> are "acquired", i.e. their reference count is incremented. + * Do not pass an "almost correct" list of branches. + */ +static struct node *node_new(uint32_t element_map, uint32_t branch_map, + STORE_NODE_ELEMENT_T const *elements, + uint8_t element_arity, + STORE_NODE_BRANCH_T const *branches, + uint8_t branch_arity) +{ + const size_t content_size = STORE_NODE_ELEMENTS_SIZE(element_arity) + + STORE_NODE_BRANCHES_SIZE(branch_arity); + struct node *result = malloc(sizeof(*result) + content_size); + + result->element_arity = element_arity; + result->branch_arity = branch_arity; + result->ref_count = 0; + result->element_map = element_map; + result->branch_map = branch_map; + + if (elements) { + memcpy(STORE_NODE_ELEMENTS(result), elements, + STORE_NODE_ELEMENTS_SIZE(element_arity)); + } + + STORE_NODE_BRANCH_T *branches_dest = + (STORE_NODE_BRANCH_T *)STORE_NODE_BRANCHES(result); + // reference counting + for (int i = 0; i < branch_arity; ++i) { + branches_dest[i] = store_node_acquire(branches[i]); + } + + return result; +} + +static STORE_VALUE_T collision_node_get(const struct collision_node *node, + STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, int *found) +{ + for (unsigned i = 0; i < node->element_arity; ++i) { + struct kv kv = node->content[i]; + if (equals(kv.key, key)) { + *found = 1; + return kv.val; + } + } + + *found = 0; + return (STORE_VALUE_T)0; +} + +static STORE_VALUE_T node_get(const struct node *node, STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, uint32_t hash, + unsigned shift, int *found) +{ + if (shift >= HASH_TOTAL_WIDTH) + return collision_node_get((const struct collision_node *)node, + equals, key, found); + + const uint32_t bitpos = 1u << store_mask(hash, shift); + + if (node->branch_map & bitpos) { + return node_get(STORE_NODE_BRANCH_AT(node, bitpos), equals, key, + hash, shift + HASH_PARTITION_WIDTH, found); + + } else if (node->element_map & bitpos) { + STORE_NODE_ELEMENT_T kv = STORE_NODE_ELEMENT_AT(node, bitpos); + if (equals(kv.key, key)) { + *found = 1; + return kv.val; + } + } + + *found = 0; + return (STORE_VALUE_T)0; +} + +static struct node *node_clone_insert_element(const struct node *node, + uint32_t bitpos, + const STORE_KEY_T key, + const STORE_VALUE_T value) +{ + STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH]; + const unsigned index = store_index(node->element_map, bitpos); + + // copy <element_arity> chunks in total + memcpy(elements, STORE_NODE_ELEMENTS(node), + STORE_NODE_ELEMENTS_SIZE(index)); // copy first <index> chunks + elements[index].key = (STORE_KEY_T)key; + elements[index].val = (STORE_VALUE_T)value; + memcpy(&elements[index + 1], // start copying into one-past-<index> + &STORE_NODE_ELEMENTS(node)[index], // start copying from <index> + STORE_NODE_ELEMENTS_SIZE( + node->element_arity - + index) // <index> chunks already copied, <element_arity> - <index> remaining + ); + + return node_new(node->element_map | bitpos, node->branch_map, elements, + node->element_arity + 1, STORE_NODE_BRANCHES(node), + node->branch_arity); +} + +static struct node *node_clone_update_element(const struct node *node, + uint32_t bitpos, + const STORE_VALUE_T value) +{ + STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH]; + const unsigned index = store_index(node->element_map, bitpos); + + memcpy(elements, STORE_NODE_ELEMENTS(node), + STORE_NODE_ELEMENTS_SIZE(node->element_arity)); + elements[index].val = (STORE_VALUE_T)value; + return node_new(node->element_map, node->branch_map, elements, + node->element_arity, STORE_NODE_BRANCHES(node), + node->branch_arity); +} + +static struct node *node_clone_update_branch(const struct node *node, + uint32_t bitpos, + struct node *branch) +{ + STORE_NODE_BRANCH_T branches[1u << HASH_PARTITION_WIDTH]; + const unsigned index = store_index(node->branch_map, bitpos); + + memcpy(branches, STORE_NODE_BRANCHES(node), + STORE_NODE_BRANCHES_SIZE(node->branch_arity)); + branches[index] = branch; + return node_new(node->element_map, node->branch_map, + STORE_NODE_ELEMENTS(node), node->element_arity, + branches, node->branch_arity); +} + +static struct node *node_clone_pushdown(const struct node *node, + uint32_t bitpos, struct node *branch) +{ + STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH]; + STORE_NODE_BRANCH_T branches[1u << HASH_PARTITION_WIDTH]; + const unsigned element_index = store_index(node->element_map, bitpos); + const unsigned branch_index = store_index(node->branch_map, bitpos); + + memcpy(elements, STORE_NODE_ELEMENTS(node), + STORE_NODE_ELEMENTS_SIZE(element_index)); + memcpy(&elements[element_index], + &STORE_NODE_ELEMENTS(node)[element_index + 1], + STORE_NODE_ELEMENTS_SIZE(node->element_arity - + (element_index + 1))); + + memcpy(branches, STORE_NODE_BRANCHES(node), + STORE_NODE_BRANCHES_SIZE(branch_index)); + memcpy(&branches[branch_index + 1], + &STORE_NODE_BRANCHES(node)[branch_index], + STORE_NODE_BRANCHES_SIZE(node->branch_arity - branch_index)); + branches[branch_index] = branch; + + return node_new(node->element_map & ~bitpos, node->branch_map | bitpos, + elements, node->element_arity - 1, branches, + node->branch_arity + 1); +} + +static struct collision_node * +collision_node_new(const STORE_NODE_ELEMENT_T *values, uint8_t element_arity) +{ + size_t content_size = sizeof(STORE_NODE_ELEMENT_T) * element_arity; + struct collision_node *result = malloc(sizeof(*result) + content_size); + + result->element_arity = element_arity; + result->branch_arity = 0; + result->ref_count = 0; + + memcpy(result->content, values, + STORE_NODE_ELEMENTS_SIZE(element_arity)); + + return result; +} + +static struct node *node_merge(uint32_t hash_l, const STORE_KEY_T key_l, + const STORE_VALUE_T value_l, uint32_t hash_r, + const STORE_KEY_T key_r, + const STORE_VALUE_T value_r, unsigned shift) +{ + uint32_t bitpos_l = 1u << store_mask(hash_l, shift); + uint32_t bitpos_r = 1u << store_mask(hash_r, shift); + + if (shift >= HASH_TOTAL_WIDTH) { + STORE_NODE_ELEMENT_T elements[2]; + elements[0].key = (STORE_KEY_T)key_l; + elements[0].val = (STORE_VALUE_T)value_l; + elements[1].key = (STORE_KEY_T)key_r; + elements[1].val = (STORE_VALUE_T)value_r; + + return (struct node *)collision_node_new(elements, 2); + + } else if (bitpos_l != bitpos_r) { + STORE_NODE_ELEMENT_T elements[2]; + + if (bitpos_l <= bitpos_r) { + elements[0].key = (STORE_KEY_T)key_l; + elements[0].val = (STORE_VALUE_T)value_l; + elements[1].key = (STORE_KEY_T)key_r; + elements[1].val = (STORE_VALUE_T)value_r; + } else { + elements[0].key = (STORE_KEY_T)key_r; + elements[0].val = (STORE_VALUE_T)value_r; + elements[1].key = (STORE_KEY_T)key_l; + elements[1].val = (STORE_VALUE_T)value_l; + } + + return node_new(bitpos_l | bitpos_r, 0u, elements, 2, NULL, 0); + + } else { + struct node *sub_node = + node_merge(hash_l, key_l, value_l, hash_r, key_r, + value_r, shift + HASH_PARTITION_WIDTH); + + return node_new(0, bitpos_l, NULL, 0, &sub_node, 1); + } +} + +static struct collision_node * +collision_node_clone_update_element(const struct collision_node *node, + unsigned index, const STORE_VALUE_T value) +{ + STORE_NODE_ELEMENT_T elements[node->element_arity]; + + memcpy(elements, node->content, + STORE_NODE_ELEMENTS_SIZE(node->element_arity)); + elements[index].val = (STORE_VALUE_T)value; + + return collision_node_new(elements, node->element_arity); +} + +static struct collision_node * +collision_node_clone_insert_element(const struct collision_node *node, + const STORE_KEY_T key, + const STORE_VALUE_T value) +{ + STORE_NODE_ELEMENT_T elements[node->element_arity + 1]; + + memcpy(elements, node->content, + STORE_NODE_ELEMENTS_SIZE(node->element_arity)); + elements[node->element_arity].key = (STORE_KEY_T)key; + elements[node->element_arity].val = (STORE_VALUE_T)value; + + return collision_node_new(elements, node->element_arity + 1); +} + +static struct collision_node * +collision_node_update(const struct collision_node *node, + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + const STORE_VALUE_T value, int *found) +{ + for (unsigned i = 0; i < node->element_arity; ++i) { + struct kv kv = node->content[i]; + if (equals(kv.key, key)) { + *found = 1; + + return collision_node_clone_update_element(node, i, + value); + } + } + + return collision_node_clone_insert_element(node, key, value); +} + +static struct node *node_update(const struct node *node, STORE_HASHFN_T(hashfn), + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + const STORE_VALUE_T value, uint32_t hash, + unsigned shift, int *found) +{ + if (shift >= HASH_TOTAL_WIDTH) + return (struct node *)collision_node_update( + (const struct collision_node *)node, equals, key, value, + found); + + const uint32_t bitpos = 1u << store_mask(hash, shift); + + if (node->branch_map & bitpos) { + const struct node *sub_node = + STORE_NODE_BRANCH_AT(node, bitpos); + struct node *new_sub_node = + node_update(sub_node, hashfn, equals, key, value, hash, + shift + HASH_PARTITION_WIDTH, found); + return node_clone_update_branch(node, bitpos, new_sub_node); + + } else if (node->element_map & bitpos) { + const STORE_KEY_T current_key = + STORE_NODE_ELEMENT_AT(node, bitpos).key; + + if (equals(current_key, key)) { + *found = 1; + return node_clone_update_element(node, bitpos, value); + + } else { + const STORE_VALUE_T current_value = + STORE_NODE_ELEMENT_AT(node, bitpos).val; + struct node *sub_node = + node_merge(hashfn(current_key), current_key, + current_value, hashfn(key), key, + value, shift + HASH_PARTITION_WIDTH); + return node_clone_pushdown(node, bitpos, sub_node); + } + + } else { + return node_clone_insert_element(node, bitpos, key, value); + } +} + +static struct node *node_clone_remove_element(const struct node *node, + uint32_t bitpos) +{ + DEBUG_NOTICE("removing element with bit position 0x%x\n", bitpos); + + STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH]; + const unsigned index = store_index(node->element_map, bitpos); + + memcpy(elements, STORE_NODE_ELEMENTS(node), + STORE_NODE_ELEMENTS_SIZE(index)); + memcpy(&elements[index], &STORE_NODE_ELEMENTS(node)[index + 1], + STORE_NODE_ELEMENTS_SIZE(node->element_arity - (index + 1))); + + return node_new(node->element_map & ~bitpos, node->branch_map, elements, + node->element_arity - 1, STORE_NODE_BRANCHES(node), + node->branch_arity); +} + +/* + * 'Pullup' is the inverse of pushdown. + * It's the process of 'pulling an entry up' from a branch, inlining it as an element instead. + */ +static struct node *node_clone_pullup(const struct node *node, uint32_t bitpos, + const struct kv element) +{ + STORE_NODE_BRANCH_T branches[1u << HASH_PARTITION_WIDTH]; + STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH]; + const unsigned branch_index = store_index(node->branch_map, bitpos); + const unsigned element_index = store_index(node->element_map, bitpos); + + memcpy(branches, STORE_NODE_BRANCHES(node), + STORE_NODE_BRANCHES_SIZE(branch_index)); + memcpy(&branches[branch_index], + &STORE_NODE_BRANCHES(node)[branch_index + 1], + STORE_NODE_BRANCHES_SIZE(node->branch_arity - + (branch_index + 1))); + + memcpy(elements, STORE_NODE_ELEMENTS(node), + STORE_NODE_ELEMENTS_SIZE(element_index)); + elements[element_index] = element; + memcpy(&elements[element_index + 1], + &STORE_NODE_ELEMENTS(node)[element_index], + STORE_NODE_ELEMENTS_SIZE(node->element_arity - element_index)); + + return node_new(node->element_map | bitpos, node->branch_map & ~bitpos, + elements, node->element_arity + 1, branches, + node->branch_arity - 1); +} + +static struct collision_node * +collision_node_clone_remove_element(const struct collision_node *node, + unsigned index) +{ + STORE_NODE_ELEMENT_T elements[node->element_arity - 1]; + + memcpy(elements, node->content, STORE_NODE_ELEMENTS_SIZE(index)); + memcpy(elements, &node->content[index + 1], + STORE_NODE_ELEMENTS_SIZE(node->element_arity - (index + 1))); + + return collision_node_new(elements, node->element_arity - 1); +} + +/** + * If only one element remains, the returned node will be passed up the tree - to where knowledge of hash collision + * nodes is inappropriate. In that case, this will return a normal <code>struct node *</code> instead. + * + * Consider the only(!) place where this is called: at the start of node_del, if the hash is exhausted. The returned + * value is then immediately returned to the previous call of node_del, where it is evaluated as new_sub_node of + * type struct node, and its members branch_arity and element_arity are evaluated. this requires us to have those + * members be at the exact same place in both struct node and struct collision_node. + * + * @return + */ +static struct collision_node * +collision_node_del(const struct collision_node *node, STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, int *modified) +{ + for (unsigned i = 0; i < node->element_arity; ++i) { + struct kv kv = node->content[i]; + if (equals(kv.key, key)) { + *modified = 1; + if (node->element_arity == 2) { + STORE_NODE_ELEMENT_T elements[1] = { + node->content[i ? 0 : 1] + }; + return (struct collision_node *)node_new( + 0, 0, elements, 1, NULL, 0); + + } else { + return collision_node_clone_remove_element(node, + i); + } + } + } + + return NULL; +} + +static struct node *node_del(const struct node *node, STORE_EQUALSFN_T(equals), + const STORE_KEY_T key, uint32_t hash, + unsigned shift, int *modified) +{ + if (shift >= HASH_TOTAL_WIDTH) + return (struct node *)collision_node_del( + (const struct collision_node *)node, equals, key, + modified); + + const uint32_t bitpos = 1u << store_mask(hash, shift); + + if (node->element_map & bitpos) { + if (equals(STORE_NODE_ELEMENT_AT(node, bitpos).key, key)) { + *modified = 1; + if (node->element_arity + node->branch_arity == + 1) // only possible for the root node + return (struct node *)&empty_node; + else + return node_clone_remove_element(node, bitpos); + } else { + return NULL; // returning from node_del with *modified == 0 means abort immediately + } + + } else if (node->branch_map & bitpos) { + struct node *sub_node = STORE_NODE_BRANCH_AT(node, bitpos); + struct node *new_sub_node = + node_del(sub_node, equals, key, hash, + shift + HASH_PARTITION_WIDTH, modified); + + if (!*modified) + return NULL; // returning from node_del with *modified == 0 means abort immediately + + if (node->branch_arity + node->element_arity == + 1) { // node is a 'passthrough' + if (new_sub_node->branch_arity * 2 + + new_sub_node->element_arity == + 1) { // new_sub_node is non-canonical, propagate for inlining + new_sub_node->element_map = bitpos; + return new_sub_node; + } else { // canonical, bubble modified trie to the top + return node_clone_update_branch(node, bitpos, + new_sub_node); + } + + } else if (new_sub_node->branch_arity * 2 + + new_sub_node->element_arity == + 1) { // new_sub_node is non-canonical + const struct kv remaining_element = + STORE_NODE_ELEMENTS(new_sub_node)[0]; + node_destroy(new_sub_node); + return node_clone_pullup(node, bitpos, + remaining_element); + + } else { // both node and new_sub_node are canonical + return node_clone_update_branch(node, bitpos, + new_sub_node); + } + + } else { + return NULL; + } +} + +static struct collision_node * +collision_node_assoc(const struct collision_node *node, + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + STORE_ASSOCFN_T(fn), const void *user_data, int *found) +{ + STORE_VALUE_T new_value; + for (unsigned i = 0; i < node->element_arity; ++i) { + struct kv kv = node->content[i]; + if (equals(kv.key, key)) { + *found = 1; + STORE_VALUE_T old_value = kv.val; + new_value = fn(key, old_value, (void *)user_data); + return collision_node_clone_update_element(node, i, + new_value); + } + } + + new_value = fn((STORE_KEY_T)0, (STORE_VALUE_T)0, (void *)user_data); + return collision_node_clone_insert_element(node, key, new_value); +} + +static struct node *node_assoc(const struct node *node, STORE_HASHFN_T(hashfn), + STORE_EQUALSFN_T(equals), const STORE_KEY_T key, + STORE_ASSOCFN_T(fn), const void *user_data, + uint32_t hash, unsigned shift, int *found) +{ + if (shift >= HASH_TOTAL_WIDTH) + return (struct node *)collision_node_assoc( + (const struct collision_node *)node, equals, key, fn, + user_data, found); + + const uint32_t bitpos = 1u << store_mask(hash, shift); + + if (node->branch_map & bitpos) { + const struct node *sub_node = + STORE_NODE_BRANCH_AT(node, bitpos); + struct node *new_sub_node = + node_assoc(sub_node, hashfn, equals, key, fn, user_data, + hash, shift + HASH_PARTITION_WIDTH, found); + return node_clone_update_branch(node, bitpos, new_sub_node); + + } else if (node->element_map & bitpos) { + const STORE_KEY_T current_key = + STORE_NODE_ELEMENT_AT(node, bitpos).key; + + if (equals(current_key, key)) { + *found = 1; + const STORE_VALUE_T old_value = + STORE_NODE_ELEMENT_AT(node, bitpos).val; + STORE_VALUE_T new_value = + fn(key, old_value, (void *)user_data); + return node_clone_update_element(node, bitpos, + new_value); + + } else { + const STORE_VALUE_T current_value = + STORE_NODE_ELEMENT_AT(node, bitpos).val; + const STORE_VALUE_T new_value = + fn((STORE_KEY_T)0, (STORE_VALUE_T)0, + (void *)user_data); + struct node *sub_node = + node_merge(hashfn(current_key), current_key, + current_value, hash, key, new_value, + shift + HASH_PARTITION_WIDTH); + return node_clone_pushdown(node, bitpos, sub_node); + } + + } else { + const STORE_VALUE_T value = + fn((STORE_KEY_T)0, (STORE_VALUE_T)0, (void *)user_data); + return node_clone_insert_element(node, bitpos, key, value); + } +} + +static int collision_node_equals(const struct collision_node *left, + const struct collision_node *right, + STORE_EQUALSFN_T(key_equals), + STORE_VALUE_EQUALSFN_T(value_equals)) +{ + if (left == right) + return 1; + if (left->element_arity != right->element_arity) + return 0; + + for (unsigned left_i = 0; left_i < left->element_arity; ++left_i) { + struct kv left_element = STORE_NODE_ELEMENTS(left)[left_i]; + + for (unsigned right_i = 0; right_i < right->element_arity; + ++right_i) { + struct kv right_element = + STORE_NODE_ELEMENTS(right)[right_i]; + + if (key_equals(left_element.key, right_element.key) && + value_equals(left_element.val, right_element.val)) + goto found_matching_element; + } + return 0; // compared left_element to all elements in right node, no match. + + found_matching_element: + continue; + } + return 1; // compared all elements in left node, never had an element without match. +} + +static int node_equals(const struct node *left, const struct node *right, + STORE_EQUALSFN_T(key_equals), + STORE_VALUE_EQUALSFN_T(value_equals), unsigned shift) +{ + if (shift >= HASH_TOTAL_WIDTH) + return collision_node_equals((struct collision_node *)left, + (struct collision_node *)right, + key_equals, value_equals); + if (left == right) + return 1; + if (left->element_map != right->element_map) + return 0; + if (left->branch_map != right->branch_map) + return 0; + for (unsigned i = 0; i < left->element_arity; ++i) { + struct kv left_element = STORE_NODE_ELEMENTS(left)[i]; + struct kv right_element = STORE_NODE_ELEMENTS(right)[i]; + if (!key_equals(left_element.key, right_element.key) || + !value_equals(left_element.val, right_element.val)) + return 0; + } + for (unsigned i = 0; i < left->branch_arity; ++i) { + struct node *left_branch = STORE_NODE_BRANCHES(left)[i]; + struct node *right_branch = STORE_NODE_BRANCHES(right)[i]; + if (!node_equals(left_branch, right_branch, key_equals, + value_equals, shift + HASH_PARTITION_WIDTH)) + return 0; + } + return 1; +} + +static struct store *store_from(struct node *root, unsigned length, + STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals)) +{ + struct store *result = malloc(sizeof(*result)); + result->ref_count = 0; + result->root = root; + result->length = length; + result->hash = hash; + result->equals = equals; + return result; +} + +void store_destroy(struct store **store) +{ + DEBUG_NOTICE("destroying store@%p\n", (void *)*store); + store_node_release((*store)->root); + free(*store); + *store = NULL; +} + +struct store *store_new(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals)) +{ + return store_from((struct node *)&empty_node, 0, hash, equals); +} + +struct store *store_acquire(const struct store *store) +{ + atomic_fetch_add((uint32_t *)&store->ref_count, 1u); + DEBUG_NOTICE("ACQ %p: %d\n", (void *)store, store->ref_count); + return (struct store *)store; +} + +void store_release(struct store **store) +{ + DEBUG_NOTICE("REL %p: %d\n", (void *)*store, (*store)->ref_count - 1); + if (atomic_fetch_sub((uint32_t *)&((*store)->ref_count), 1u) == 1u) + store_destroy((struct store **)store); +} + +struct store *store_of(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals), + STORE_KEY_T *keys, STORE_VALUE_T *values, size_t length) +{ + struct store *result = store_new(hash, equals); + while (length--) { + struct store *tmp = + store_set(result, keys[length], values[length], NULL); + store_destroy(&result); + result = tmp; + } + return result; +} + +unsigned store_length(const struct store *store) +{ + return store->length; +} + +struct store *store_set(const struct store *store, const STORE_KEY_T key, + const STORE_VALUE_T value, int *replaced) +{ + const uint32_t hash = store->hash(key); + int found = 0; + int *found_p = replaced ? replaced : &found; + *found_p = 0; + struct node *new_root = store_node_acquire( + node_update(store->root, store->hash, store->equals, key, value, + hash, 0, found_p)); + return store_from(new_root, store->length + (*found_p ? 0 : 1), + store->hash, store->equals); +} + +STORE_VALUE_T store_get(const struct store *store, const STORE_KEY_T key, + int *found) +{ + uint32_t hash = store->hash(key); + int tmp = 0; + return node_get(store->root, store->equals, key, hash, 0, + found ? found : &tmp); +} + +struct store *store_del(const struct store *store, const STORE_KEY_T key, + int *modified) +{ + const uint32_t hash = store->hash(key); + int found = 0; + int *found_p = modified ? modified : &found; + *found_p = 0; + struct node *new_root = + node_del(store->root, store->equals, key, hash, 0, found_p); + if (!*found_p) + return (struct store *)store; + return store_from(store_node_acquire(new_root), store->length - 1, + store->hash, store->equals); +} + +struct store *store_assoc(const struct store *store, const STORE_KEY_T key, + STORE_ASSOCFN_T(fn), const void *user_data) +{ + const uint32_t hash = store->hash(key); + int found = 0; + struct node *new_root = store_node_acquire( + node_assoc(store->root, store->hash, store->equals, key, fn, + user_data, hash, 0, &found)); + return store_from(new_root, store->length + (found ? 0 : 1), + store->hash, store->equals); +} + +int store_equals(const struct store *left, const struct store *right, + STORE_VALUE_EQUALSFN_T(value_equals)) +{ + if (left == right) + return 1; + else if (store_length(left) != store_length(right)) + return 0; + else + return node_equals(left->root, right->root, left->equals, + value_equals, 0); +} + +static const char *indent(unsigned level) +{ + const char *spaces = + " "; + return spaces + 4 * (20 - level); +} + +#define iprintf(level, fmt, ...) printf("%s" fmt, indent(level), __VA_ARGS__) + +static char *format_binary(uint32_t value, char *buffer) +{ + for (char *pos = buffer + 31; pos >= buffer; --pos) { + if (value & 1u) + *pos = '1'; + else + *pos = '0'; + value = value >> 1u; + } + return buffer; +} + +static void store_node_repr(const struct node *node, const char *kp, + const char *vp, unsigned shift, unsigned i_level) +{ + if (shift >= HASH_TOTAL_WIDTH) { + iprintf(i_level, "\"collision node (omitted)\"%s", ""); + return; + } + char map_buf[33]; + printf("{\n"); + iprintf(i_level, "\"element_map\": 0b%.32s,\n", + format_binary(node->element_map, map_buf)); + iprintf(i_level, "\"element_arity\": %u,\n", node->element_arity); + iprintf(i_level, "\"branch_map\": 0b%.32s,\n", + format_binary(node->branch_map, map_buf)); + iprintf(i_level, "\"branch_arity\": %u,\n", node->branch_arity); + iprintf(i_level, "\"elements\": {\n%s", ""); + for (unsigned i = 0; i < node->element_arity; ++i) { + STORE_NODE_ELEMENT_T el = STORE_NODE_ELEMENTS(node)[i]; + iprintf(i_level + 1, "\"%s", ""); + printf(kp, el.key); + printf("\": "); + printf(vp, el.val); + printf(",\n"); + } + iprintf(i_level, "},\n%s", ""); + iprintf(i_level, "\"nodes\": [\n%s", ""); + for (unsigned i = 0; i < node->branch_arity; ++i) { + STORE_NODE_BRANCH_T n = STORE_NODE_BRANCHES(node)[i]; + iprintf(i_level + 1, "%s", ""); + store_node_repr(n, kp, vp, shift + HASH_PARTITION_WIDTH, + i_level + 2); + printf(",\n"); + } + iprintf(i_level, "],\n%s", ""); + iprintf(i_level - 1, "}%s", ""); +} + +void store_repr(const struct store *store, const char *key_prefix, + const char *value_prefix) +{ + printf("{\n"); + iprintf(1, "\"length\": %d,\n", store->length); + iprintf(1, "\"root\": %s", ""); + store_node_repr(store->root, key_prefix, value_prefix, 0, 2); + printf("\n}\n"); +} + +void store_iter_init(struct store_iter *iterator, const struct store *store) +{ + iterator->stack_level = 0; + iterator->element_cursor = 0; + iterator->element_arity = store->root->element_arity; + iterator->branch_cursor_stack[0] = 0; + iterator->branch_arity_stack[0] = store->root->branch_arity; + iterator->node_stack[0] = store->root; +} + +static void iter_push(struct store_iter *iterator, const struct node *node) +{ + iterator->stack_level += 1; + iterator->element_cursor = 0; + iterator->element_arity = node->element_arity; + iterator->branch_cursor_stack[iterator->stack_level] = 0; + iterator->branch_arity_stack[iterator->stack_level] = + node->branch_arity; + iterator->node_stack[iterator->stack_level] = node; +} + +static void iter_pop(struct store_iter *iterator) +{ + iterator->stack_level -= 1; +} + +int store_iter_next(struct store_iter *iterator, STORE_KEY_T *key, + STORE_VALUE_T *value) +{ + if (iterator->stack_level == -1) + return 0; + + const struct node *current_node = + iterator->node_stack[iterator->stack_level]; + unsigned *branch_cursor = + iterator->branch_cursor_stack + iterator->stack_level; + if (*branch_cursor == 0 && + iterator->element_cursor < + current_node->element_arity) { // todo: write test for this + *key = STORE_NODE_ELEMENTS( + current_node)[iterator->element_cursor] + .key; + *value = STORE_NODE_ELEMENTS( + current_node)[iterator->element_cursor] + .val; + ++iterator->element_cursor; + return 1; + + } else { + if (*branch_cursor < + iterator->branch_arity_stack[iterator->stack_level]) { + iter_push(iterator, + STORE_NODE_BRANCHES( + current_node)[*branch_cursor]); + ++*branch_cursor; + return store_iter_next(iterator, key, value); + + } else { + iter_pop(iterator); + return store_iter_next(iterator, key, value); + } + } +} |