aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarvin Borner2023-02-17 18:11:28 +0100
committerMarvin Borner2023-02-17 18:11:28 +0100
commit9b0ddd467c52c2848efa28f5faa3f2be9b6cc384 (patch)
treeb72fdc16953ab43232426cc150eda8bc6546957c
parent4e106e78a98f7a241fc2681fefc0996a34207045 (diff)
Switched to CHAMP library
-rw-r--r--inc/hamt.h30
-rw-r--r--inc/store.h264
-rw-r--r--readme.md27
-rw-r--r--src/hamt.c461
-rw-r--r--src/store.c1135
5 files changed, 1423 insertions, 494 deletions
diff --git a/inc/hamt.h b/inc/hamt.h
deleted file mode 100644
index da5f876..0000000
--- a/inc/hamt.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef HAMT_H
-#define HAMT_H
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-typedef int (*hamt_cmp_fn)(const void *lhs, const void *rhs);
-typedef uint32_t (*hamt_key_hash_fn)(const void *key, const size_t gen);
-
-typedef struct hamt_impl *HAMT;
-
-struct hamt_allocator {
- void *(*malloc)(const size_t size);
- void *(*realloc)(void *chunk, const size_t size);
- void (*free)(void *chunk);
-};
-
-extern struct hamt_allocator hamt_allocator_default;
-
-HAMT hamt_create(hamt_key_hash_fn key_hash, hamt_cmp_fn key_cmp,
- struct hamt_allocator *ator);
-void hamt_delete(HAMT);
-
-void *hamt_get(const HAMT trie, void *key);
-HAMT hamt_pset(const HAMT trie, void *key, void *value);
-
-typedef struct hamt_iterator_impl *hamt_iterator;
-
-#endif
diff --git a/inc/store.h b/inc/store.h
new file mode 100644
index 0000000..a6df112
--- /dev/null
+++ b/inc/store.h
@@ -0,0 +1,264 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2020 Samuel Vogelsanger <vogelsangersamuel@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef STORE_STORE_H
+#define STORE_STORE_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifndef STORE_VERBOSITY
+#define STORE_VERBOSITY 0
+#endif
+
+#define DEBUG_NOTICE(fmt, ...) \
+ do { \
+ if (STORE_VERBOSITY >= 5) \
+ fprintf(stderr, "DEBUG: store: " fmt, __VA_ARGS__); \
+ } while (0)
+#define DEBUG_WARN(fmt, ...) \
+ do { \
+ if (STORE_VERBOSITY >= 4) \
+ fprintf(stderr, "DEBUG: store: " fmt, __VA_ARGS__); \
+ } while (0)
+
+#ifndef STORE_KEY_T
+#define STORE_KEY_T void *
+#endif
+
+#ifndef STORE_VALUE_T
+#define STORE_VALUE_T void *
+#endif
+
+/**
+ * These are mostly for convenience
+ */
+
+#define STORE_HASHFN_T(name) uint32_t (*name)(const STORE_KEY_T)
+#define STORE_EQUALSFN_T(name) \
+ int (*name)(const STORE_KEY_T left, const STORE_KEY_T right)
+#define STORE_ASSOCFN_T(name) \
+ STORE_VALUE_T(*name) \
+ (const STORE_KEY_T key, const STORE_VALUE_T old_value, void *user_data)
+#define STORE_VALUE_EQUALSFN_T(name) \
+ int (*name)(const STORE_VALUE_T left, const STORE_VALUE_T right)
+
+/**
+ * These macros help with defining the various callbacks. Use them like so:
+ * @code{c}
+ * STORE_MAKE_EQUALSFN(equals_int, left, right)
+ * {
+ * return left == right;
+ * }
+ * @endcode
+ */
+
+#define STORE_MAKE_HASHFN(name, arg_1) uint32_t name(const STORE_KEY_T arg_1)
+#define STORE_MAKE_EQUALSFN(name, arg_l, arg_r) \
+ int name(const STORE_KEY_T arg_l, const STORE_KEY_T arg_r)
+#define STORE_MAKE_ASSOCFN(name, key_arg, value_arg, user_data_arg) \
+ STORE_VALUE_T name(const STORE_KEY_T key_arg, \
+ const STORE_VALUE_T value_arg, void *user_data_arg)
+#define STORE_MAKE_VALUE_EQUALSFN(name, arg_l, arg_r) \
+ int name(const STORE_VALUE_T arg_l, const STORE_VALUE_T arg_r)
+
+struct store {
+ volatile uint32_t ref_count;
+ unsigned length;
+ struct node *root;
+
+ STORE_HASHFN_T(hash);
+ STORE_EQUALSFN_T(equals);
+};
+
+/**
+ * Creates a new map with the given hash and equals functions. This implementation is based on the assumption that if
+ * two keys are equal, their hashes must be equal as well. This is commonly known as the Java Hashcode contract.
+ *
+ * The reference count of a new map is zero.
+ *
+ * @param hash
+ * @param equals
+ * @return
+ */
+struct store *store_new(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals));
+
+/**
+ * Destroys a store. Doesn't clean up the stored key-value-pairs.
+ *
+ * @param old
+ */
+void store_destroy(struct store **store);
+
+/**
+ * Atomically increases the reference count of a map.
+ *
+ * @param store
+ * @return
+ */
+struct store *store_acquire(const struct store *store);
+
+/**
+ * Atomically decreases the reference count of a map and calls store_destroy if it caused the count to drop to zero.
+ *
+ * In either case then sets the reference to NULL.
+ *
+ * @param store
+ */
+void store_release(struct store **store);
+
+/**
+ * Returns the number of entries in store.
+ *
+ * @param store
+ * @return the number of entries
+ */
+unsigned store_length(const struct store *store);
+
+/**
+ * Looks up key and sets *value_receiver to the associated value. Doesn't change value_receiver if key is not set.
+ *
+ * @param store
+ * @param key
+ * @param found is set to 0 if key is not set
+ * @return
+ */
+STORE_VALUE_T store_get(const struct store *store, const STORE_KEY_T key,
+ int *found);
+
+/**
+ * Returns a new map derived from store but with key set to value.
+ * If replaced is not NULL, sets it to indicate if the key is present in store.
+ *
+ * Reference count of the new map is zero.
+ *
+ * @param store
+ * @param key
+ * @param value
+ * @param replaced
+ * @return a new store
+ */
+struct store *store_set(const struct store *store, const STORE_KEY_T key,
+ const STORE_VALUE_T value, int *replaced);
+
+/**
+ * Returns a new map derived from store but without a mapping for key.
+ *
+ * Reference count of the new map is zero.
+ *
+ * @param store
+ * @param key
+ * @param modified
+ * @return
+ */
+struct store *store_del(const struct store *store, const STORE_KEY_T key,
+ int *modified);
+
+/**
+ * Creates a new store with the given hash and equals functions, and inserts the given keys and values.
+ * Only the first 'length' elements from keys and values are inserted.
+ *
+ * Reference count of the new map is zero.
+ *
+ * @param hash
+ * @param equals
+ * @param keys
+ * @param values
+ * @param length
+ * @return
+ */
+struct store *store_of(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals),
+ STORE_KEY_T *keys, STORE_VALUE_T *values, size_t length);
+
+/**
+ * Returns a new map derived from store, but with key set to the return value of fn.
+ * fn is passed the key, the current value for key, and user_data.
+ * If key is not present in store, NULL is passed in place of the key and current value.
+ *
+ * Reference count of the new map is zero.
+ *
+ * @param store
+ * @param key
+ * @param fn
+ * @param user_data
+ * @return
+ */
+struct store *store_assoc(const struct store *store, const STORE_KEY_T key,
+ STORE_ASSOCFN_T(fn), const void *user_data);
+
+/**
+ * Compares two maps for equality. A lot of short-circuiting is done on the assumption that unequal hashes
+ * (for both keys and values) imply inequality. This is commonly known as the Java Hashcode contract: If two values
+ * are equal, their hashes must be equal as well.
+ *
+ * @param left
+ * @param right
+ * @return
+ */
+int store_equals(const struct store *left, const struct store *right,
+ STORE_VALUE_EQUALSFN_T(value_equals));
+
+/**
+ * An iterator for store. Meant to be put on the stack.
+ */
+struct store_iter {
+ int stack_level;
+ unsigned element_cursor;
+ unsigned element_arity;
+ unsigned branch_cursor_stack[8];
+ unsigned branch_arity_stack[8];
+ const void *node_stack[8];
+};
+
+/**
+ * Initializes an iterator with a store.
+ *
+ * Example:
+ * @code{.c}
+ * struct store_iter iter;
+ * STORE_KEY_T key;
+ * STORE_VAL_T val;
+ *
+ * store_iter_init(&iter, store);
+ * while(store_iter_next(&iter, &key, &val)) {
+ * // do something with key and value
+ * }
+ * @endcode
+ *
+ * @param iter
+ * @param store
+ */
+void store_iter_init(struct store_iter *iter, const struct store *store);
+
+/**
+ * Advances iter and points key_receiver and value_receiver to the next pair.
+ *
+ * @param iter
+ * @param key_receiver
+ * @param value_receiver
+ * @return 0 if the end of the store has been reached
+ */
+int store_iter_next(struct store_iter *iter, STORE_KEY_T *key_receiver,
+ STORE_VALUE_T *value_receiver);
+
+#endif
diff --git a/readme.md b/readme.md
index 9fac6be..4c3d25d 100644
--- a/readme.md
+++ b/readme.md
@@ -1,15 +1,36 @@
# calm
-> **c**all-by-need **a**bstract **l**ambda **m**achine
+> **c**alm **a**bstract **l**ambda **m**achine
-- Strong reduction (reduction inside abstractions) of call-by-need
- lambda calculus
+- **Strong** reduction (reduction inside abstractions) of
+ **call-by-need** lambda calculus
- Originally intended as reducer of the
[`bruijn`](https://github.com/marvinborner/bruijn) programming
language
- Useful for proof assistants or as a high-level lambda-term reducer
of functional programming languages
- Based on bleeding-edge research results
+- Exponentially big normal forms of the family $e_n=λx.c_nωx$, where
+ $ω:=λx.xx$ and $c_n$ denotes the $n$th Church numeral, consume only
+ a linear in $n$ amount of memory and are computed in linear
+ time\[0\]
+
+## Why C?
+
+You might realize that the reduction transitions are of a very
+functional nature and aren't that obvious to implement in non-functional
+languages.
+
+I used C because **(1)** most functional languages use automatic memory
+management (often using periodic garbage collecting) which unnecessarily
+slows down the transitions, **(2)** there aren't that many efficient
+lambda calculus reducers in C out there, **(3)** it's a fun challenge
+and **(4)** I like C.
+
+## Libraries
+
+- [CHAMP](https://github.com/ammut/immutable-c-ollections) \[MIT\]:
+ Underrated efficient hash array mapped trie
## Research
diff --git a/src/hamt.c b/src/hamt.c
deleted file mode 100644
index f9bd8b2..0000000
--- a/src/hamt.c
+++ /dev/null
@@ -1,461 +0,0 @@
-#include "hamt.h"
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-/* Pointer tagging */
-#define HAMT_TAG_MASK 0x3 /* last two bits */
-#define HAMT_TAG_VALUE 0x1
-#define tagged(__p) (hamt_node *)((uintptr_t)__p | HAMT_TAG_VALUE)
-#define untagged(__p) (hamt_node *)((uintptr_t)__p & ~HAMT_TAG_MASK)
-#define is_value(__p) (((uintptr_t)__p & HAMT_TAG_MASK) == HAMT_TAG_VALUE)
-
-/* Bit fiddling */
-#define index_clear_bit(_index, _n) _index & ~(1ul << _n)
-#define index_set_bit(_index, _n) _index | (1ul << _n)
-
-/* Node data structure */
-#define TABLE(a) a->as.table.ptr
-#define INDEX(a) a->as.table.index
-#define VALUE(a) a->as.kv.value
-#define KEY(a) a->as.kv.key
-
-/* Memory management */
-#define mem_alloc(ator, size) (ator)->malloc(size)
-#define mem_realloc(ator, ptr, size) (ator)->realloc(ptr, size)
-#define mem_free(ator, ptr) (ator)->free(ptr)
-/* Default allocator uses system malloc */
-struct hamt_allocator hamt_allocator_default = { malloc, realloc, free };
-
-typedef struct hamt_node {
- union {
- struct {
- void *value; /* tagged pointer */
- const void *key;
- } kv;
- struct {
- struct hamt_node *ptr;
- uint32_t index;
- } table;
- } as;
-} hamt_node;
-
-struct hamt_stats {
- size_t table_sizes[32];
-};
-
-/* Opaque user-facing implementation */
-struct hamt_impl {
- struct hamt_node *root;
- size_t size;
- hamt_key_hash_fn key_hash;
- hamt_cmp_fn key_cmp;
- struct hamt_allocator *ator;
- struct hamt_stats stats;
-};
-
-/* hash_stateing w/ state management */
-typedef struct hash_state {
- const void *key;
- hamt_key_hash_fn hash_fn;
- uint32_t hash;
- size_t depth;
- size_t shift;
-} hash_state;
-
-/* Search results */
-typedef enum {
- SEARCH_SUCCESS,
- SEARCH_FAIL_NOTFOUND,
- SEARCH_FAIL_KEYMISMATCH
-} search_status;
-
-typedef struct search_result {
- search_status status;
- hamt_node *anchor;
- hamt_node *value;
- hash_state *hash;
-} search_result;
-
-/* Removal results */
-typedef enum { REMOVE_SUCCESS, REMOVE_GATHERED, REMOVE_NOTFOUND } remove_status;
-
-typedef struct remove_result {
- remove_status status;
- void *value;
-} remove_result;
-
-typedef struct path_result {
- union {
- search_result sr;
- remove_result rr;
- } u;
- hamt_node *root;
-} path_result;
-
-static inline hash_state *hash_next(hash_state *h)
-{
- h->depth += 1;
- h->shift += 5;
- if (h->shift > 30) {
- h->hash = h->hash_fn(h->key, h->depth / 5);
- h->shift = 0;
- }
- return h;
-}
-
-static inline uint32_t hash_get_index(const hash_state *h)
-{
- return (h->hash >> h->shift) & 0x1f;
-}
-
-static int get_popcount(uint32_t n)
-{
- return __builtin_popcount(n);
-}
-
-static int get_pos(uint32_t sparse_index, uint32_t bitmap)
-{
- return get_popcount(bitmap & ((1ul << sparse_index) - 1));
-}
-
-static inline bool has_index(const hamt_node *anchor, size_t index)
-{
- assert(anchor && "anchor must not be NULL");
- assert(index < 32 && "index must not be larger than 31");
- return INDEX(anchor) & (1ul << index);
-}
-
-static hamt_node *table_allocate(struct hamt_impl *h, size_t size)
-{
- if (size)
- h->stats.table_sizes[size - 1] += 1;
- return (hamt_node *)mem_alloc(h->ator, (size * sizeof(hamt_node)));
-}
-
-static void table_free(struct hamt_impl *h, hamt_node *ptr, size_t size)
-{
- mem_free(h->ator, ptr);
- if (size)
- h->stats.table_sizes[size - 1] -= 1;
-}
-
-static hamt_node *table_extend(struct hamt_impl *h, hamt_node *anchor,
- size_t n_rows, uint32_t index, uint32_t pos)
-{
- hamt_node *new_table = table_allocate(h, n_rows + 1);
- if (!new_table)
- return NULL;
- if (n_rows > 0) {
- /* copy over table */
- memcpy(&new_table[0], &TABLE(anchor)[0],
- pos * sizeof(hamt_node));
- /* note: this works since (n_rows - pos) == 0 for cases
- * where we're adding the new k/v pair at the end (i.e. memcpy(a, b, 0)
- * is a nop) */
- memcpy(&new_table[pos + 1], &TABLE(anchor)[pos],
- (n_rows - pos) * sizeof(hamt_node));
- }
- assert(!is_value(VALUE(anchor)) && "URGS");
- table_free(h, TABLE(anchor), n_rows);
- TABLE(anchor) = new_table;
- INDEX(anchor) |= (1ul << index);
- return anchor;
-}
-
-static hamt_node *table_shrink(struct hamt_impl *h, hamt_node *anchor,
- size_t n_rows, uint32_t index, uint32_t pos)
-{
- /* debug assertions */
- assert(anchor && "Anchor cannot be NULL");
- assert(!is_value(VALUE(anchor)) &&
- "Invariant: shrinking a table requires an internal node");
-
- hamt_node *new_table = NULL;
- uint32_t new_index = 0;
- if (n_rows > 0) {
- new_table = table_allocate(h, n_rows - 1);
- if (!new_table)
- return NULL;
- new_index = INDEX(anchor) & ~(1ul << index);
- memcpy(&new_table[0], &TABLE(anchor)[0],
- pos * sizeof(hamt_node));
- memcpy(&new_table[pos], &TABLE(anchor)[pos + 1],
- (n_rows - pos - 1) * sizeof(hamt_node));
- }
- table_free(h, TABLE(anchor), n_rows);
- INDEX(anchor) = new_index;
- TABLE(anchor) = new_table;
- return anchor;
-}
-
-static hamt_node *table_gather(struct hamt_impl *h, hamt_node *anchor,
- uint32_t pos)
-{
- /* debug assertions */
- assert(anchor && "Anchor cannot be NULL");
- assert(!is_value(VALUE(anchor)) &&
- "Invariant: gathering a table requires an internal anchor");
- assert((pos == 0 || pos == 1) && "pos must be 0 or 1");
-
- int n_rows = get_popcount(INDEX(anchor));
- assert((n_rows == 2 || n_rows == 1) &&
- "Table must have size 1 or 2 to gather");
- hamt_node *table = TABLE(anchor);
- KEY(anchor) = table[pos].as.kv.key;
- VALUE(anchor) = table[pos].as.kv.value; /* already tagged */
- table_free(h, table, n_rows);
- return anchor;
-}
-
-static hamt_node *table_dup(struct hamt_impl *h, hamt_node *anchor)
-{
- int n_rows = get_popcount(INDEX(anchor));
- hamt_node *new_table = table_allocate(h, n_rows);
- if (new_table && TABLE(anchor)) {
- memcpy(&new_table[0], &TABLE(anchor)[0],
- n_rows * sizeof(hamt_node));
- }
- return new_table;
-}
-
-HAMT hamt_create(hamt_key_hash_fn key_hash, hamt_cmp_fn key_cmp,
- struct hamt_allocator *ator)
-{
- struct hamt_impl *trie = mem_alloc(ator, sizeof(struct hamt_impl));
- trie->ator = ator;
- trie->root = mem_alloc(ator, sizeof(hamt_node));
- memset(trie->root, 0, sizeof(hamt_node));
- trie->size = 0;
- trie->key_hash = key_hash;
- trie->key_cmp = key_cmp;
- // memset(trie->stats.table_sizes, 0, 32*sizeof(size_t));
- trie->stats = (struct hamt_stats){ .table_sizes = { 0 } };
- return trie;
-}
-
-static HAMT hamt_dup(HAMT h)
-{
- struct hamt_impl *trie = mem_alloc(h->ator, sizeof(struct hamt_impl));
- trie->ator = h->ator;
- trie->root = mem_alloc(h->ator, sizeof(hamt_node));
- memcpy(trie->root, h->root, sizeof(hamt_node));
- trie->size = h->size;
- trie->key_hash = h->key_hash;
- trie->key_cmp = h->key_cmp;
- memcpy(&trie->stats, &h->stats, sizeof(struct hamt_stats));
- return trie;
-}
-
-static const hamt_node *insert_kv(struct hamt_impl *h, hamt_node *anchor,
- hash_state *hash, void *key, void *value)
-{
- /* calculate position in new table */
- uint32_t ix = hash_get_index(hash);
- uint32_t new_index = INDEX(anchor) | (1ul << ix);
- int pos = get_pos(ix, new_index);
- /* extend table */
- size_t n_rows = get_popcount(INDEX(anchor));
- anchor = table_extend(h, anchor, n_rows, ix, pos);
- if (!anchor)
- return NULL;
- hamt_node *new_table = TABLE(anchor);
- /* set new k/v pair */
- new_table[pos].as.kv.key = key;
- new_table[pos].as.kv.value = tagged(value);
- /* return a pointer to the inserted k/v pair */
- return &new_table[pos];
-}
-
-static const hamt_node *insert_table(struct hamt_impl *h, hamt_node *anchor,
- hash_state *hash, void *key, void *value)
-{
- /* FIXME: check for alloc failure and bail out correctly (deleting the
- * incomplete subtree */
-
- /* Collect everything we know about the existing value */
- hash_state *x_hash =
- &(hash_state){ .key = KEY(anchor),
- .hash_fn = hash->hash_fn,
- .hash = hash->hash_fn(KEY(anchor),
- hash->depth / 5),
- .depth = hash->depth,
- .shift = hash->shift };
- void *x_value = VALUE(anchor); /* tagged (!) value ptr */
- /* increase depth until the hashes diverge, building a list
- * of tables along the way */
- hash_state *next_hash = hash_next(hash);
- hash_state *x_next_hash = hash_next(x_hash);
- uint32_t next_index = hash_get_index(next_hash);
- uint32_t x_next_index = hash_get_index(x_next_hash);
- while (x_next_index == next_index) {
- TABLE(anchor) = table_allocate(h, 1);
- INDEX(anchor) = (1ul << next_index);
- next_hash = hash_next(next_hash);
- x_next_hash = hash_next(x_next_hash);
- next_index = hash_get_index(next_hash);
- x_next_index = hash_get_index(x_next_hash);
- anchor = TABLE(anchor);
- }
- /* the hashes are different, let's allocate a table with two
- * entries to store the existing and new values */
- TABLE(anchor) = table_allocate(h, 2);
- INDEX(anchor) = (1ul << next_index) | (1ul << x_next_index);
- /* determine the proper position in the allocated table */
- int x_pos = get_pos(x_next_index, INDEX(anchor));
- int pos = get_pos(next_index, INDEX(anchor));
- /* fill in the existing value; no need to tag the value pointer
- * since it is already tagged. */
- TABLE(anchor)[x_pos].as.kv.key = (const void *)x_hash->key;
- TABLE(anchor)[x_pos].as.kv.value = x_value;
- /* fill in the new key/value pair, tagging the pointer to the
- * new value to mark it as a value ptr */
- TABLE(anchor)[pos].as.kv.key = key;
- TABLE(anchor)[pos].as.kv.value = tagged(value);
-
- return &TABLE(anchor)[pos];
-}
-
-static search_result search_recursive(struct hamt_impl *h, hamt_node *anchor,
- hash_state *hash, hamt_cmp_fn cmp_eq,
- const void *key, hamt_node *path)
-{
- assert(!is_value(VALUE(anchor)) &&
- "Invariant: path copy requires an internal node");
- hamt_node *copy = path;
- if (path) {
- /* copy the table we're pointing to */
- TABLE(copy) = table_dup(h, anchor);
- INDEX(copy) = INDEX(anchor);
- assert(!is_value(VALUE(copy)) &&
- "Copy caused a leaf/internal switch");
- } else {
- copy = anchor;
- }
-
- /* determine the expected index in table */
- uint32_t expected_index = hash_get_index(hash);
- /* check if the expected index is set */
- if (has_index(copy, expected_index)) {
- /* if yes, get the compact index to address the array */
- int pos = get_pos(expected_index, INDEX(copy));
- /* index into the table and check what type of entry we're looking at */
- hamt_node *next = &TABLE(copy)[pos];
- if (is_value(VALUE(next))) {
- if ((*cmp_eq)(key, KEY(next)) == 0) {
- /* keys match */
- search_result result = { .status =
- SEARCH_SUCCESS,
- .anchor = copy,
- .value = next,
- .hash = hash };
- return result;
- }
- /* not found: same hash but different key */
- search_result result = {
- .status = SEARCH_FAIL_KEYMISMATCH,
- .anchor = copy,
- .value = next,
- .hash = hash
- };
- return result;
- } else {
- /* For table entries, recurse to the next level */
- assert(TABLE(next) != NULL &&
- "invariant: table ptrs must not be NULL");
- return search_recursive(h, next, hash_next(hash),
- cmp_eq, key,
- path ? next : NULL);
- }
- }
- /* expected index is not set, terminate search */
- search_result result = { .status = SEARCH_FAIL_NOTFOUND,
- .anchor = copy,
- .value = NULL,
- .hash = hash };
- return result;
-}
-
-void *hamt_get(const HAMT trie, void *key)
-{
- hash_state *hash = &(hash_state){ .key = key,
- .hash_fn = trie->key_hash,
- .hash = trie->key_hash(key, 0),
- .depth = 0,
- .shift = 0 };
- search_result sr = search_recursive(trie, trie->root, hash,
- trie->key_cmp, key, NULL);
- if (sr.status == SEARCH_SUCCESS) {
- return untagged(sr.VALUE(value));
- }
- return NULL;
-}
-
-static path_result search(struct hamt_impl *h, hamt_node *anchor,
- hash_state *hash, hamt_cmp_fn cmp_eq, const void *key)
-{
- path_result pr;
- pr.root = mem_alloc(h->ator, sizeof(hamt_node));
- pr.u.sr = search_recursive(h, anchor, hash, cmp_eq, key, pr.root);
- return pr;
-}
-
-HAMT hamt_pset(HAMT h, void *key, void *value)
-{
- hash_state *hash = &(hash_state){ .key = key,
- .hash_fn = h->key_hash,
- .hash = h->key_hash(key, 0),
- .depth = 0,
- .shift = 0 };
- HAMT cp = hamt_dup(h);
- path_result pr = search(h, h->root, hash, h->key_cmp, key);
- cp->root = pr.root;
- switch (pr.u.sr.status) {
- case SEARCH_SUCCESS:
- pr.u.sr.VALUE(value) = tagged(value);
- break;
- case SEARCH_FAIL_NOTFOUND:
- if (insert_kv(h, pr.u.sr.anchor, pr.u.sr.hash, key, value) !=
- NULL) {
- cp->size += 1;
- }
- break;
- case SEARCH_FAIL_KEYMISMATCH:
- if (insert_table(h, pr.u.sr.value, pr.u.sr.hash, key, value) !=
- NULL) {
- cp->size += 1;
- }
- break;
- default:
- fprintf(stderr, "Invalid result status\n");
- }
- return cp;
-}
-
-/* delete recursively from anchor */
-static void delete_recursive(struct hamt_impl *h, hamt_node *anchor)
-{
- if (TABLE(anchor)) {
- assert(!is_value(VALUE(anchor)) &&
- "delete requires an internal node");
- size_t n = get_popcount(INDEX(anchor));
- for (size_t i = 0; i < n; ++i) {
- if (!is_value(TABLE(anchor)[i].as.kv.value)) {
- delete_recursive(h, &TABLE(anchor)[i]);
- }
- }
- table_free(h, TABLE(anchor), n);
- TABLE(anchor) = NULL;
- }
-}
-
-void hamt_delete(HAMT h)
-{
- delete_recursive(h, h->root);
- mem_free(h->ator, h->root);
- mem_free(h->ator, h);
-}
diff --git a/src/store.c b/src/store.c
new file mode 100644
index 0000000..92d7099
--- /dev/null
+++ b/src/store.c
@@ -0,0 +1,1135 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2020 Samuel Vogelsanger <vogelsangersamuel@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * All the ref-counting specific code was marked with a "//reference counting" comment. If you need to modify this to
+ * work with your own memory policy, it is recommended to start looking at those places to understand when and where
+ * memory is allocated and freed.
+ */
+
+#include <malloc.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdatomic.h> // reference counting
+#include <string.h>
+
+#include <store.h>
+
+#define store_node_debug_fmt \
+ "node{element_arity=%u, element_map=%08x, branch_arity=%u, branch_map=%08x, ref_count=%u}"
+#define store_node_debug_args(node) \
+ node->element_arity, node->element_map, node->branch_arity, \
+ node->branch_map, node->ref_count
+
+#define HASH_PARTITION_WIDTH 5u
+#define HASH_TOTAL_WIDTH (8 * sizeof(uint32_t))
+
+/*
+ * Helper functions
+ */
+
+static unsigned bitcount(uint32_t value)
+{
+ // taken from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ value = value -
+ ((value >> 1u) & 0x55555555u); // reuse input as temporary
+ value = (value & 0x33333333u) + ((value >> 2u) & 0x33333333u); // temp
+ return (((value + (value >> 4u)) & 0xF0F0F0Fu) * 0x1010101u) >>
+ 24u; // count
+}
+
+static uint32_t store_mask(uint32_t hash, unsigned shift)
+{
+ return (hash >> shift) & ((1u << HASH_PARTITION_WIDTH) - 1);
+}
+
+static unsigned store_index(uint32_t bitmap, uint32_t bitpos)
+{
+ return bitcount(bitmap & (bitpos - 1));
+}
+
+/*
+ * Data structure definitions
+ */
+
+struct kv {
+ STORE_KEY_T key;
+ STORE_VALUE_T val;
+};
+
+#define STORE_NODE_ELEMENT_T struct kv
+#define STORE_NODE_BRANCH_T struct node *
+
+struct node {
+ uint8_t element_arity;
+ uint8_t branch_arity;
+ volatile uint16_t ref_count; // reference counting
+ uint32_t element_map;
+ uint32_t branch_map;
+ STORE_NODE_ELEMENT_T content[];
+};
+
+struct collision_node {
+ uint8_t element_arity; // MUST SHARE LAYOUT WITH struct node
+ uint8_t branch_arity; // MUST SHARE LAYOUT WITH struct node
+ volatile uint16_t
+ ref_count; // MUST SHARE LAYOUT WITH struct node // reference counting
+ STORE_NODE_ELEMENT_T content[];
+};
+
+static const struct node empty_node = {
+ .branch_arity = 0,
+ .element_arity = 0,
+ .ref_count = 1,
+ .branch_map = 0,
+ .element_map = 0,
+};
+
+#define STORE_NODE_ELEMENTS(node) (node)->content
+#define STORE_NODE_BRANCHES(node) \
+ ((STORE_NODE_BRANCH_T const *)&(node)->content[(node)->element_arity])
+
+#define STORE_NODE_ELEMENTS_SIZE(length) \
+ (sizeof(STORE_NODE_ELEMENT_T) * (length))
+#define STORE_NODE_BRANCHES_SIZE(length) \
+ (sizeof(STORE_NODE_BRANCH_T) * (length))
+
+#define STORE_NODE_ELEMENT_AT(node, bitpos) \
+ STORE_NODE_ELEMENTS(node)[store_index(node->element_map, bitpos)]
+#define STORE_NODE_BRANCH_AT(node, bitpos) \
+ STORE_NODE_BRANCHES(node)[store_index(node->branch_map, bitpos)]
+
+/*
+ * static function declarations
+ */
+
+// node constructor
+static struct node *node_new(uint32_t element_map, uint32_t branch_map,
+ STORE_NODE_ELEMENT_T const *elements,
+ uint8_t element_arity,
+ STORE_NODE_BRANCH_T const *branches,
+ uint8_t branch_arity);
+
+// collision node variant
+static struct collision_node *
+collision_node_new(const STORE_NODE_ELEMENT_T *values, uint8_t element_arity);
+
+// destructor
+static void node_destroy(struct node *node);
+
+// reference counting
+static inline struct node *store_node_acquire(const struct node *node);
+
+// reference counting
+static inline void store_node_release(const struct node *node);
+
+// top-level functions
+static STORE_VALUE_T node_get(const struct node *node, STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, uint32_t hash,
+ unsigned shift, int *found);
+
+static struct node *node_update(const struct node *node, STORE_HASHFN_T(hashfn),
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ const STORE_VALUE_T value, uint32_t hash,
+ unsigned shift, int *found);
+
+static struct node *node_assoc(const struct node *node, STORE_HASHFN_T(hashfn),
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ STORE_ASSOCFN_T(fn), const void *user_data,
+ uint32_t hash, unsigned shift, int *found);
+
+static struct node *node_del(const struct node *node, STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, uint32_t hash,
+ unsigned shift, int *modified);
+
+// collision node variants
+static STORE_VALUE_T collision_node_get(const struct collision_node *node,
+ STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, int *found);
+
+static struct collision_node *
+collision_node_update(const struct collision_node *node,
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ const STORE_VALUE_T value, int *found);
+
+static struct collision_node *
+collision_node_assoc(const struct collision_node *node,
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ STORE_ASSOCFN_T(fn), const void *user_data, int *found);
+
+static struct collision_node *
+collision_node_del(const struct collision_node *node, STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, int *modified);
+
+// helper functions for creation of modified nodes
+static struct node *node_merge(uint32_t hash_l, const STORE_KEY_T key_l,
+ const STORE_VALUE_T value_l, uint32_t hash_r,
+ const STORE_KEY_T key_r,
+ const STORE_VALUE_T value_r, unsigned shift);
+
+static struct node *node_clone_pullup(const struct node *node, uint32_t bitpos,
+ const struct kv element);
+
+static struct node *node_clone_update_branch(const struct node *node,
+ uint32_t bitpos,
+ struct node *branch);
+
+static struct node *node_clone_pushdown(const struct node *node,
+ uint32_t bitpos, struct node *branch);
+
+static struct node *node_clone_insert_element(const struct node *node,
+ uint32_t bitpos,
+ const STORE_KEY_T key,
+ const STORE_VALUE_T value);
+
+static struct node *node_clone_update_element(const struct node *node,
+ uint32_t bitpos,
+ const STORE_VALUE_T value);
+
+static struct node *node_clone_remove_element(const struct node *node,
+ uint32_t bitpos);
+
+// collision node variants
+static struct collision_node *
+collision_node_clone_insert_element(const struct collision_node *node,
+ const STORE_KEY_T key,
+ const STORE_VALUE_T value);
+
+static struct collision_node *
+collision_node_clone_update_element(const struct collision_node *node,
+ unsigned index, const STORE_VALUE_T value);
+
+static struct collision_node *
+collision_node_clone_remove_element(const struct collision_node *node,
+ unsigned index);
+
+// equality
+static int node_equals(const struct node *left, const struct node *right,
+ STORE_EQUALSFN_T(key_equals),
+ STORE_VALUE_EQUALSFN_T(value_equals), unsigned shift);
+
+static int collision_node_equals(const struct collision_node *left,
+ const struct collision_node *right,
+ STORE_EQUALSFN_T(key_equals),
+ STORE_VALUE_EQUALSFN_T(value_equals));
+
+// store private constructor
+static struct store *store_from(struct node *root, unsigned length,
+ STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals));
+
+// iterator helper functions
+static void iter_push(struct store_iter *iterator, const struct node *node);
+
+static void iter_pop(struct store_iter *iterator);
+
+/*
+ * definitions
+ */
+
+static void node_destroy(struct node *node)
+{
+ DEBUG_NOTICE(" destroying " store_node_debug_fmt "@%p\n",
+ store_node_debug_args(node), (void *)node);
+
+ // reference counting
+ STORE_NODE_BRANCH_T *branches =
+ (STORE_NODE_BRANCH_T *)STORE_NODE_BRANCHES(node);
+ for (int i = 0; i < node->branch_arity; ++i) {
+ store_node_release(branches[i]);
+ }
+
+ free(node);
+}
+
+// reference counting
+static inline struct node *store_node_acquire(const struct node *node)
+{
+ if (node == &empty_node)
+ return (struct node *)node;
+ atomic_fetch_add((uint16_t *)&node->ref_count, 1u);
+ return (struct node *)node;
+}
+
+// reference counting
+static inline void store_node_release(const struct node *node)
+{
+ if (node == &empty_node)
+ return;
+ if (atomic_fetch_sub((uint16_t *)&node->ref_count, 1u) == 1)
+ node_destroy((struct node *)node);
+}
+
+/**
+ * WARNING: all branches in <code>branches</code> are "acquired", i.e. their reference count is incremented.
+ * Do not pass an "almost correct" list of branches.
+ */
+static struct node *node_new(uint32_t element_map, uint32_t branch_map,
+ STORE_NODE_ELEMENT_T const *elements,
+ uint8_t element_arity,
+ STORE_NODE_BRANCH_T const *branches,
+ uint8_t branch_arity)
+{
+ const size_t content_size = STORE_NODE_ELEMENTS_SIZE(element_arity) +
+ STORE_NODE_BRANCHES_SIZE(branch_arity);
+ struct node *result = malloc(sizeof(*result) + content_size);
+
+ result->element_arity = element_arity;
+ result->branch_arity = branch_arity;
+ result->ref_count = 0;
+ result->element_map = element_map;
+ result->branch_map = branch_map;
+
+ if (elements) {
+ memcpy(STORE_NODE_ELEMENTS(result), elements,
+ STORE_NODE_ELEMENTS_SIZE(element_arity));
+ }
+
+ STORE_NODE_BRANCH_T *branches_dest =
+ (STORE_NODE_BRANCH_T *)STORE_NODE_BRANCHES(result);
+ // reference counting
+ for (int i = 0; i < branch_arity; ++i) {
+ branches_dest[i] = store_node_acquire(branches[i]);
+ }
+
+ return result;
+}
+
+static STORE_VALUE_T collision_node_get(const struct collision_node *node,
+ STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, int *found)
+{
+ for (unsigned i = 0; i < node->element_arity; ++i) {
+ struct kv kv = node->content[i];
+ if (equals(kv.key, key)) {
+ *found = 1;
+ return kv.val;
+ }
+ }
+
+ *found = 0;
+ return (STORE_VALUE_T)0;
+}
+
+static STORE_VALUE_T node_get(const struct node *node, STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, uint32_t hash,
+ unsigned shift, int *found)
+{
+ if (shift >= HASH_TOTAL_WIDTH)
+ return collision_node_get((const struct collision_node *)node,
+ equals, key, found);
+
+ const uint32_t bitpos = 1u << store_mask(hash, shift);
+
+ if (node->branch_map & bitpos) {
+ return node_get(STORE_NODE_BRANCH_AT(node, bitpos), equals, key,
+ hash, shift + HASH_PARTITION_WIDTH, found);
+
+ } else if (node->element_map & bitpos) {
+ STORE_NODE_ELEMENT_T kv = STORE_NODE_ELEMENT_AT(node, bitpos);
+ if (equals(kv.key, key)) {
+ *found = 1;
+ return kv.val;
+ }
+ }
+
+ *found = 0;
+ return (STORE_VALUE_T)0;
+}
+
+static struct node *node_clone_insert_element(const struct node *node,
+ uint32_t bitpos,
+ const STORE_KEY_T key,
+ const STORE_VALUE_T value)
+{
+ STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH];
+ const unsigned index = store_index(node->element_map, bitpos);
+
+ // copy <element_arity> chunks in total
+ memcpy(elements, STORE_NODE_ELEMENTS(node),
+ STORE_NODE_ELEMENTS_SIZE(index)); // copy first <index> chunks
+ elements[index].key = (STORE_KEY_T)key;
+ elements[index].val = (STORE_VALUE_T)value;
+ memcpy(&elements[index + 1], // start copying into one-past-<index>
+ &STORE_NODE_ELEMENTS(node)[index], // start copying from <index>
+ STORE_NODE_ELEMENTS_SIZE(
+ node->element_arity -
+ index) // <index> chunks already copied, <element_arity> - <index> remaining
+ );
+
+ return node_new(node->element_map | bitpos, node->branch_map, elements,
+ node->element_arity + 1, STORE_NODE_BRANCHES(node),
+ node->branch_arity);
+}
+
+static struct node *node_clone_update_element(const struct node *node,
+ uint32_t bitpos,
+ const STORE_VALUE_T value)
+{
+ STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH];
+ const unsigned index = store_index(node->element_map, bitpos);
+
+ memcpy(elements, STORE_NODE_ELEMENTS(node),
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity));
+ elements[index].val = (STORE_VALUE_T)value;
+ return node_new(node->element_map, node->branch_map, elements,
+ node->element_arity, STORE_NODE_BRANCHES(node),
+ node->branch_arity);
+}
+
+static struct node *node_clone_update_branch(const struct node *node,
+ uint32_t bitpos,
+ struct node *branch)
+{
+ STORE_NODE_BRANCH_T branches[1u << HASH_PARTITION_WIDTH];
+ const unsigned index = store_index(node->branch_map, bitpos);
+
+ memcpy(branches, STORE_NODE_BRANCHES(node),
+ STORE_NODE_BRANCHES_SIZE(node->branch_arity));
+ branches[index] = branch;
+ return node_new(node->element_map, node->branch_map,
+ STORE_NODE_ELEMENTS(node), node->element_arity,
+ branches, node->branch_arity);
+}
+
+static struct node *node_clone_pushdown(const struct node *node,
+ uint32_t bitpos, struct node *branch)
+{
+ STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH];
+ STORE_NODE_BRANCH_T branches[1u << HASH_PARTITION_WIDTH];
+ const unsigned element_index = store_index(node->element_map, bitpos);
+ const unsigned branch_index = store_index(node->branch_map, bitpos);
+
+ memcpy(elements, STORE_NODE_ELEMENTS(node),
+ STORE_NODE_ELEMENTS_SIZE(element_index));
+ memcpy(&elements[element_index],
+ &STORE_NODE_ELEMENTS(node)[element_index + 1],
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity -
+ (element_index + 1)));
+
+ memcpy(branches, STORE_NODE_BRANCHES(node),
+ STORE_NODE_BRANCHES_SIZE(branch_index));
+ memcpy(&branches[branch_index + 1],
+ &STORE_NODE_BRANCHES(node)[branch_index],
+ STORE_NODE_BRANCHES_SIZE(node->branch_arity - branch_index));
+ branches[branch_index] = branch;
+
+ return node_new(node->element_map & ~bitpos, node->branch_map | bitpos,
+ elements, node->element_arity - 1, branches,
+ node->branch_arity + 1);
+}
+
+static struct collision_node *
+collision_node_new(const STORE_NODE_ELEMENT_T *values, uint8_t element_arity)
+{
+ size_t content_size = sizeof(STORE_NODE_ELEMENT_T) * element_arity;
+ struct collision_node *result = malloc(sizeof(*result) + content_size);
+
+ result->element_arity = element_arity;
+ result->branch_arity = 0;
+ result->ref_count = 0;
+
+ memcpy(result->content, values,
+ STORE_NODE_ELEMENTS_SIZE(element_arity));
+
+ return result;
+}
+
+static struct node *node_merge(uint32_t hash_l, const STORE_KEY_T key_l,
+ const STORE_VALUE_T value_l, uint32_t hash_r,
+ const STORE_KEY_T key_r,
+ const STORE_VALUE_T value_r, unsigned shift)
+{
+ uint32_t bitpos_l = 1u << store_mask(hash_l, shift);
+ uint32_t bitpos_r = 1u << store_mask(hash_r, shift);
+
+ if (shift >= HASH_TOTAL_WIDTH) {
+ STORE_NODE_ELEMENT_T elements[2];
+ elements[0].key = (STORE_KEY_T)key_l;
+ elements[0].val = (STORE_VALUE_T)value_l;
+ elements[1].key = (STORE_KEY_T)key_r;
+ elements[1].val = (STORE_VALUE_T)value_r;
+
+ return (struct node *)collision_node_new(elements, 2);
+
+ } else if (bitpos_l != bitpos_r) {
+ STORE_NODE_ELEMENT_T elements[2];
+
+ if (bitpos_l <= bitpos_r) {
+ elements[0].key = (STORE_KEY_T)key_l;
+ elements[0].val = (STORE_VALUE_T)value_l;
+ elements[1].key = (STORE_KEY_T)key_r;
+ elements[1].val = (STORE_VALUE_T)value_r;
+ } else {
+ elements[0].key = (STORE_KEY_T)key_r;
+ elements[0].val = (STORE_VALUE_T)value_r;
+ elements[1].key = (STORE_KEY_T)key_l;
+ elements[1].val = (STORE_VALUE_T)value_l;
+ }
+
+ return node_new(bitpos_l | bitpos_r, 0u, elements, 2, NULL, 0);
+
+ } else {
+ struct node *sub_node =
+ node_merge(hash_l, key_l, value_l, hash_r, key_r,
+ value_r, shift + HASH_PARTITION_WIDTH);
+
+ return node_new(0, bitpos_l, NULL, 0, &sub_node, 1);
+ }
+}
+
+static struct collision_node *
+collision_node_clone_update_element(const struct collision_node *node,
+ unsigned index, const STORE_VALUE_T value)
+{
+ STORE_NODE_ELEMENT_T elements[node->element_arity];
+
+ memcpy(elements, node->content,
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity));
+ elements[index].val = (STORE_VALUE_T)value;
+
+ return collision_node_new(elements, node->element_arity);
+}
+
+static struct collision_node *
+collision_node_clone_insert_element(const struct collision_node *node,
+ const STORE_KEY_T key,
+ const STORE_VALUE_T value)
+{
+ STORE_NODE_ELEMENT_T elements[node->element_arity + 1];
+
+ memcpy(elements, node->content,
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity));
+ elements[node->element_arity].key = (STORE_KEY_T)key;
+ elements[node->element_arity].val = (STORE_VALUE_T)value;
+
+ return collision_node_new(elements, node->element_arity + 1);
+}
+
+static struct collision_node *
+collision_node_update(const struct collision_node *node,
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ const STORE_VALUE_T value, int *found)
+{
+ for (unsigned i = 0; i < node->element_arity; ++i) {
+ struct kv kv = node->content[i];
+ if (equals(kv.key, key)) {
+ *found = 1;
+
+ return collision_node_clone_update_element(node, i,
+ value);
+ }
+ }
+
+ return collision_node_clone_insert_element(node, key, value);
+}
+
+static struct node *node_update(const struct node *node, STORE_HASHFN_T(hashfn),
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ const STORE_VALUE_T value, uint32_t hash,
+ unsigned shift, int *found)
+{
+ if (shift >= HASH_TOTAL_WIDTH)
+ return (struct node *)collision_node_update(
+ (const struct collision_node *)node, equals, key, value,
+ found);
+
+ const uint32_t bitpos = 1u << store_mask(hash, shift);
+
+ if (node->branch_map & bitpos) {
+ const struct node *sub_node =
+ STORE_NODE_BRANCH_AT(node, bitpos);
+ struct node *new_sub_node =
+ node_update(sub_node, hashfn, equals, key, value, hash,
+ shift + HASH_PARTITION_WIDTH, found);
+ return node_clone_update_branch(node, bitpos, new_sub_node);
+
+ } else if (node->element_map & bitpos) {
+ const STORE_KEY_T current_key =
+ STORE_NODE_ELEMENT_AT(node, bitpos).key;
+
+ if (equals(current_key, key)) {
+ *found = 1;
+ return node_clone_update_element(node, bitpos, value);
+
+ } else {
+ const STORE_VALUE_T current_value =
+ STORE_NODE_ELEMENT_AT(node, bitpos).val;
+ struct node *sub_node =
+ node_merge(hashfn(current_key), current_key,
+ current_value, hashfn(key), key,
+ value, shift + HASH_PARTITION_WIDTH);
+ return node_clone_pushdown(node, bitpos, sub_node);
+ }
+
+ } else {
+ return node_clone_insert_element(node, bitpos, key, value);
+ }
+}
+
+static struct node *node_clone_remove_element(const struct node *node,
+ uint32_t bitpos)
+{
+ DEBUG_NOTICE("removing element with bit position 0x%x\n", bitpos);
+
+ STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH];
+ const unsigned index = store_index(node->element_map, bitpos);
+
+ memcpy(elements, STORE_NODE_ELEMENTS(node),
+ STORE_NODE_ELEMENTS_SIZE(index));
+ memcpy(&elements[index], &STORE_NODE_ELEMENTS(node)[index + 1],
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity - (index + 1)));
+
+ return node_new(node->element_map & ~bitpos, node->branch_map, elements,
+ node->element_arity - 1, STORE_NODE_BRANCHES(node),
+ node->branch_arity);
+}
+
+/*
+ * 'Pullup' is the inverse of pushdown.
+ * It's the process of 'pulling an entry up' from a branch, inlining it as an element instead.
+ */
+static struct node *node_clone_pullup(const struct node *node, uint32_t bitpos,
+ const struct kv element)
+{
+ STORE_NODE_BRANCH_T branches[1u << HASH_PARTITION_WIDTH];
+ STORE_NODE_ELEMENT_T elements[1u << HASH_PARTITION_WIDTH];
+ const unsigned branch_index = store_index(node->branch_map, bitpos);
+ const unsigned element_index = store_index(node->element_map, bitpos);
+
+ memcpy(branches, STORE_NODE_BRANCHES(node),
+ STORE_NODE_BRANCHES_SIZE(branch_index));
+ memcpy(&branches[branch_index],
+ &STORE_NODE_BRANCHES(node)[branch_index + 1],
+ STORE_NODE_BRANCHES_SIZE(node->branch_arity -
+ (branch_index + 1)));
+
+ memcpy(elements, STORE_NODE_ELEMENTS(node),
+ STORE_NODE_ELEMENTS_SIZE(element_index));
+ elements[element_index] = element;
+ memcpy(&elements[element_index + 1],
+ &STORE_NODE_ELEMENTS(node)[element_index],
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity - element_index));
+
+ return node_new(node->element_map | bitpos, node->branch_map & ~bitpos,
+ elements, node->element_arity + 1, branches,
+ node->branch_arity - 1);
+}
+
+static struct collision_node *
+collision_node_clone_remove_element(const struct collision_node *node,
+ unsigned index)
+{
+ STORE_NODE_ELEMENT_T elements[node->element_arity - 1];
+
+ memcpy(elements, node->content, STORE_NODE_ELEMENTS_SIZE(index));
+ memcpy(elements, &node->content[index + 1],
+ STORE_NODE_ELEMENTS_SIZE(node->element_arity - (index + 1)));
+
+ return collision_node_new(elements, node->element_arity - 1);
+}
+
+/**
+ * If only one element remains, the returned node will be passed up the tree - to where knowledge of hash collision
+ * nodes is inappropriate. In that case, this will return a normal <code>struct node *</code> instead.
+ *
+ * Consider the only(!) place where this is called: at the start of node_del, if the hash is exhausted. The returned
+ * value is then immediately returned to the previous call of node_del, where it is evaluated as new_sub_node of
+ * type struct node, and its members branch_arity and element_arity are evaluated. this requires us to have those
+ * members be at the exact same place in both struct node and struct collision_node.
+ *
+ * @return
+ */
+static struct collision_node *
+collision_node_del(const struct collision_node *node, STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, int *modified)
+{
+ for (unsigned i = 0; i < node->element_arity; ++i) {
+ struct kv kv = node->content[i];
+ if (equals(kv.key, key)) {
+ *modified = 1;
+ if (node->element_arity == 2) {
+ STORE_NODE_ELEMENT_T elements[1] = {
+ node->content[i ? 0 : 1]
+ };
+ return (struct collision_node *)node_new(
+ 0, 0, elements, 1, NULL, 0);
+
+ } else {
+ return collision_node_clone_remove_element(node,
+ i);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static struct node *node_del(const struct node *node, STORE_EQUALSFN_T(equals),
+ const STORE_KEY_T key, uint32_t hash,
+ unsigned shift, int *modified)
+{
+ if (shift >= HASH_TOTAL_WIDTH)
+ return (struct node *)collision_node_del(
+ (const struct collision_node *)node, equals, key,
+ modified);
+
+ const uint32_t bitpos = 1u << store_mask(hash, shift);
+
+ if (node->element_map & bitpos) {
+ if (equals(STORE_NODE_ELEMENT_AT(node, bitpos).key, key)) {
+ *modified = 1;
+ if (node->element_arity + node->branch_arity ==
+ 1) // only possible for the root node
+ return (struct node *)&empty_node;
+ else
+ return node_clone_remove_element(node, bitpos);
+ } else {
+ return NULL; // returning from node_del with *modified == 0 means abort immediately
+ }
+
+ } else if (node->branch_map & bitpos) {
+ struct node *sub_node = STORE_NODE_BRANCH_AT(node, bitpos);
+ struct node *new_sub_node =
+ node_del(sub_node, equals, key, hash,
+ shift + HASH_PARTITION_WIDTH, modified);
+
+ if (!*modified)
+ return NULL; // returning from node_del with *modified == 0 means abort immediately
+
+ if (node->branch_arity + node->element_arity ==
+ 1) { // node is a 'passthrough'
+ if (new_sub_node->branch_arity * 2 +
+ new_sub_node->element_arity ==
+ 1) { // new_sub_node is non-canonical, propagate for inlining
+ new_sub_node->element_map = bitpos;
+ return new_sub_node;
+ } else { // canonical, bubble modified trie to the top
+ return node_clone_update_branch(node, bitpos,
+ new_sub_node);
+ }
+
+ } else if (new_sub_node->branch_arity * 2 +
+ new_sub_node->element_arity ==
+ 1) { // new_sub_node is non-canonical
+ const struct kv remaining_element =
+ STORE_NODE_ELEMENTS(new_sub_node)[0];
+ node_destroy(new_sub_node);
+ return node_clone_pullup(node, bitpos,
+ remaining_element);
+
+ } else { // both node and new_sub_node are canonical
+ return node_clone_update_branch(node, bitpos,
+ new_sub_node);
+ }
+
+ } else {
+ return NULL;
+ }
+}
+
+static struct collision_node *
+collision_node_assoc(const struct collision_node *node,
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ STORE_ASSOCFN_T(fn), const void *user_data, int *found)
+{
+ STORE_VALUE_T new_value;
+ for (unsigned i = 0; i < node->element_arity; ++i) {
+ struct kv kv = node->content[i];
+ if (equals(kv.key, key)) {
+ *found = 1;
+ STORE_VALUE_T old_value = kv.val;
+ new_value = fn(key, old_value, (void *)user_data);
+ return collision_node_clone_update_element(node, i,
+ new_value);
+ }
+ }
+
+ new_value = fn((STORE_KEY_T)0, (STORE_VALUE_T)0, (void *)user_data);
+ return collision_node_clone_insert_element(node, key, new_value);
+}
+
+static struct node *node_assoc(const struct node *node, STORE_HASHFN_T(hashfn),
+ STORE_EQUALSFN_T(equals), const STORE_KEY_T key,
+ STORE_ASSOCFN_T(fn), const void *user_data,
+ uint32_t hash, unsigned shift, int *found)
+{
+ if (shift >= HASH_TOTAL_WIDTH)
+ return (struct node *)collision_node_assoc(
+ (const struct collision_node *)node, equals, key, fn,
+ user_data, found);
+
+ const uint32_t bitpos = 1u << store_mask(hash, shift);
+
+ if (node->branch_map & bitpos) {
+ const struct node *sub_node =
+ STORE_NODE_BRANCH_AT(node, bitpos);
+ struct node *new_sub_node =
+ node_assoc(sub_node, hashfn, equals, key, fn, user_data,
+ hash, shift + HASH_PARTITION_WIDTH, found);
+ return node_clone_update_branch(node, bitpos, new_sub_node);
+
+ } else if (node->element_map & bitpos) {
+ const STORE_KEY_T current_key =
+ STORE_NODE_ELEMENT_AT(node, bitpos).key;
+
+ if (equals(current_key, key)) {
+ *found = 1;
+ const STORE_VALUE_T old_value =
+ STORE_NODE_ELEMENT_AT(node, bitpos).val;
+ STORE_VALUE_T new_value =
+ fn(key, old_value, (void *)user_data);
+ return node_clone_update_element(node, bitpos,
+ new_value);
+
+ } else {
+ const STORE_VALUE_T current_value =
+ STORE_NODE_ELEMENT_AT(node, bitpos).val;
+ const STORE_VALUE_T new_value =
+ fn((STORE_KEY_T)0, (STORE_VALUE_T)0,
+ (void *)user_data);
+ struct node *sub_node =
+ node_merge(hashfn(current_key), current_key,
+ current_value, hash, key, new_value,
+ shift + HASH_PARTITION_WIDTH);
+ return node_clone_pushdown(node, bitpos, sub_node);
+ }
+
+ } else {
+ const STORE_VALUE_T value =
+ fn((STORE_KEY_T)0, (STORE_VALUE_T)0, (void *)user_data);
+ return node_clone_insert_element(node, bitpos, key, value);
+ }
+}
+
+static int collision_node_equals(const struct collision_node *left,
+ const struct collision_node *right,
+ STORE_EQUALSFN_T(key_equals),
+ STORE_VALUE_EQUALSFN_T(value_equals))
+{
+ if (left == right)
+ return 1;
+ if (left->element_arity != right->element_arity)
+ return 0;
+
+ for (unsigned left_i = 0; left_i < left->element_arity; ++left_i) {
+ struct kv left_element = STORE_NODE_ELEMENTS(left)[left_i];
+
+ for (unsigned right_i = 0; right_i < right->element_arity;
+ ++right_i) {
+ struct kv right_element =
+ STORE_NODE_ELEMENTS(right)[right_i];
+
+ if (key_equals(left_element.key, right_element.key) &&
+ value_equals(left_element.val, right_element.val))
+ goto found_matching_element;
+ }
+ return 0; // compared left_element to all elements in right node, no match.
+
+ found_matching_element:
+ continue;
+ }
+ return 1; // compared all elements in left node, never had an element without match.
+}
+
+static int node_equals(const struct node *left, const struct node *right,
+ STORE_EQUALSFN_T(key_equals),
+ STORE_VALUE_EQUALSFN_T(value_equals), unsigned shift)
+{
+ if (shift >= HASH_TOTAL_WIDTH)
+ return collision_node_equals((struct collision_node *)left,
+ (struct collision_node *)right,
+ key_equals, value_equals);
+ if (left == right)
+ return 1;
+ if (left->element_map != right->element_map)
+ return 0;
+ if (left->branch_map != right->branch_map)
+ return 0;
+ for (unsigned i = 0; i < left->element_arity; ++i) {
+ struct kv left_element = STORE_NODE_ELEMENTS(left)[i];
+ struct kv right_element = STORE_NODE_ELEMENTS(right)[i];
+ if (!key_equals(left_element.key, right_element.key) ||
+ !value_equals(left_element.val, right_element.val))
+ return 0;
+ }
+ for (unsigned i = 0; i < left->branch_arity; ++i) {
+ struct node *left_branch = STORE_NODE_BRANCHES(left)[i];
+ struct node *right_branch = STORE_NODE_BRANCHES(right)[i];
+ if (!node_equals(left_branch, right_branch, key_equals,
+ value_equals, shift + HASH_PARTITION_WIDTH))
+ return 0;
+ }
+ return 1;
+}
+
+static struct store *store_from(struct node *root, unsigned length,
+ STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals))
+{
+ struct store *result = malloc(sizeof(*result));
+ result->ref_count = 0;
+ result->root = root;
+ result->length = length;
+ result->hash = hash;
+ result->equals = equals;
+ return result;
+}
+
+void store_destroy(struct store **store)
+{
+ DEBUG_NOTICE("destroying store@%p\n", (void *)*store);
+ store_node_release((*store)->root);
+ free(*store);
+ *store = NULL;
+}
+
+struct store *store_new(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals))
+{
+ return store_from((struct node *)&empty_node, 0, hash, equals);
+}
+
+struct store *store_acquire(const struct store *store)
+{
+ atomic_fetch_add((uint32_t *)&store->ref_count, 1u);
+ DEBUG_NOTICE("ACQ %p: %d\n", (void *)store, store->ref_count);
+ return (struct store *)store;
+}
+
+void store_release(struct store **store)
+{
+ DEBUG_NOTICE("REL %p: %d\n", (void *)*store, (*store)->ref_count - 1);
+ if (atomic_fetch_sub((uint32_t *)&((*store)->ref_count), 1u) == 1u)
+ store_destroy((struct store **)store);
+}
+
+struct store *store_of(STORE_HASHFN_T(hash), STORE_EQUALSFN_T(equals),
+ STORE_KEY_T *keys, STORE_VALUE_T *values, size_t length)
+{
+ struct store *result = store_new(hash, equals);
+ while (length--) {
+ struct store *tmp =
+ store_set(result, keys[length], values[length], NULL);
+ store_destroy(&result);
+ result = tmp;
+ }
+ return result;
+}
+
+unsigned store_length(const struct store *store)
+{
+ return store->length;
+}
+
+struct store *store_set(const struct store *store, const STORE_KEY_T key,
+ const STORE_VALUE_T value, int *replaced)
+{
+ const uint32_t hash = store->hash(key);
+ int found = 0;
+ int *found_p = replaced ? replaced : &found;
+ *found_p = 0;
+ struct node *new_root = store_node_acquire(
+ node_update(store->root, store->hash, store->equals, key, value,
+ hash, 0, found_p));
+ return store_from(new_root, store->length + (*found_p ? 0 : 1),
+ store->hash, store->equals);
+}
+
+STORE_VALUE_T store_get(const struct store *store, const STORE_KEY_T key,
+ int *found)
+{
+ uint32_t hash = store->hash(key);
+ int tmp = 0;
+ return node_get(store->root, store->equals, key, hash, 0,
+ found ? found : &tmp);
+}
+
+struct store *store_del(const struct store *store, const STORE_KEY_T key,
+ int *modified)
+{
+ const uint32_t hash = store->hash(key);
+ int found = 0;
+ int *found_p = modified ? modified : &found;
+ *found_p = 0;
+ struct node *new_root =
+ node_del(store->root, store->equals, key, hash, 0, found_p);
+ if (!*found_p)
+ return (struct store *)store;
+ return store_from(store_node_acquire(new_root), store->length - 1,
+ store->hash, store->equals);
+}
+
+struct store *store_assoc(const struct store *store, const STORE_KEY_T key,
+ STORE_ASSOCFN_T(fn), const void *user_data)
+{
+ const uint32_t hash = store->hash(key);
+ int found = 0;
+ struct node *new_root = store_node_acquire(
+ node_assoc(store->root, store->hash, store->equals, key, fn,
+ user_data, hash, 0, &found));
+ return store_from(new_root, store->length + (found ? 0 : 1),
+ store->hash, store->equals);
+}
+
+int store_equals(const struct store *left, const struct store *right,
+ STORE_VALUE_EQUALSFN_T(value_equals))
+{
+ if (left == right)
+ return 1;
+ else if (store_length(left) != store_length(right))
+ return 0;
+ else
+ return node_equals(left->root, right->root, left->equals,
+ value_equals, 0);
+}
+
+static const char *indent(unsigned level)
+{
+ const char *spaces =
+ " ";
+ return spaces + 4 * (20 - level);
+}
+
+#define iprintf(level, fmt, ...) printf("%s" fmt, indent(level), __VA_ARGS__)
+
+static char *format_binary(uint32_t value, char *buffer)
+{
+ for (char *pos = buffer + 31; pos >= buffer; --pos) {
+ if (value & 1u)
+ *pos = '1';
+ else
+ *pos = '0';
+ value = value >> 1u;
+ }
+ return buffer;
+}
+
+static void store_node_repr(const struct node *node, const char *kp,
+ const char *vp, unsigned shift, unsigned i_level)
+{
+ if (shift >= HASH_TOTAL_WIDTH) {
+ iprintf(i_level, "\"collision node (omitted)\"%s", "");
+ return;
+ }
+ char map_buf[33];
+ printf("{\n");
+ iprintf(i_level, "\"element_map\": 0b%.32s,\n",
+ format_binary(node->element_map, map_buf));
+ iprintf(i_level, "\"element_arity\": %u,\n", node->element_arity);
+ iprintf(i_level, "\"branch_map\": 0b%.32s,\n",
+ format_binary(node->branch_map, map_buf));
+ iprintf(i_level, "\"branch_arity\": %u,\n", node->branch_arity);
+ iprintf(i_level, "\"elements\": {\n%s", "");
+ for (unsigned i = 0; i < node->element_arity; ++i) {
+ STORE_NODE_ELEMENT_T el = STORE_NODE_ELEMENTS(node)[i];
+ iprintf(i_level + 1, "\"%s", "");
+ printf(kp, el.key);
+ printf("\": ");
+ printf(vp, el.val);
+ printf(",\n");
+ }
+ iprintf(i_level, "},\n%s", "");
+ iprintf(i_level, "\"nodes\": [\n%s", "");
+ for (unsigned i = 0; i < node->branch_arity; ++i) {
+ STORE_NODE_BRANCH_T n = STORE_NODE_BRANCHES(node)[i];
+ iprintf(i_level + 1, "%s", "");
+ store_node_repr(n, kp, vp, shift + HASH_PARTITION_WIDTH,
+ i_level + 2);
+ printf(",\n");
+ }
+ iprintf(i_level, "],\n%s", "");
+ iprintf(i_level - 1, "}%s", "");
+}
+
+void store_repr(const struct store *store, const char *key_prefix,
+ const char *value_prefix)
+{
+ printf("{\n");
+ iprintf(1, "\"length\": %d,\n", store->length);
+ iprintf(1, "\"root\": %s", "");
+ store_node_repr(store->root, key_prefix, value_prefix, 0, 2);
+ printf("\n}\n");
+}
+
+void store_iter_init(struct store_iter *iterator, const struct store *store)
+{
+ iterator->stack_level = 0;
+ iterator->element_cursor = 0;
+ iterator->element_arity = store->root->element_arity;
+ iterator->branch_cursor_stack[0] = 0;
+ iterator->branch_arity_stack[0] = store->root->branch_arity;
+ iterator->node_stack[0] = store->root;
+}
+
+static void iter_push(struct store_iter *iterator, const struct node *node)
+{
+ iterator->stack_level += 1;
+ iterator->element_cursor = 0;
+ iterator->element_arity = node->element_arity;
+ iterator->branch_cursor_stack[iterator->stack_level] = 0;
+ iterator->branch_arity_stack[iterator->stack_level] =
+ node->branch_arity;
+ iterator->node_stack[iterator->stack_level] = node;
+}
+
+static void iter_pop(struct store_iter *iterator)
+{
+ iterator->stack_level -= 1;
+}
+
+int store_iter_next(struct store_iter *iterator, STORE_KEY_T *key,
+ STORE_VALUE_T *value)
+{
+ if (iterator->stack_level == -1)
+ return 0;
+
+ const struct node *current_node =
+ iterator->node_stack[iterator->stack_level];
+ unsigned *branch_cursor =
+ iterator->branch_cursor_stack + iterator->stack_level;
+ if (*branch_cursor == 0 &&
+ iterator->element_cursor <
+ current_node->element_arity) { // todo: write test for this
+ *key = STORE_NODE_ELEMENTS(
+ current_node)[iterator->element_cursor]
+ .key;
+ *value = STORE_NODE_ELEMENTS(
+ current_node)[iterator->element_cursor]
+ .val;
+ ++iterator->element_cursor;
+ return 1;
+
+ } else {
+ if (*branch_cursor <
+ iterator->branch_arity_stack[iterator->stack_level]) {
+ iter_push(iterator,
+ STORE_NODE_BRANCHES(
+ current_node)[*branch_cursor]);
+ ++*branch_cursor;
+ return store_iter_next(iterator, key, value);
+
+ } else {
+ iter_pop(iterator);
+ return store_iter_next(iterator, key, value);
+ }
+ }
+}