4 files changed, 678 insertions, 1 deletions
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..be8e369
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,135 @@
+//-----------------------------------------------------------------------------
+// xxHash Library
+// Copyright (c) 2012-2021 Yann Collet
+// Copyright (c) 2023 Marvin Borner
+// All rights reserved.
+//
+// BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+//
+// xxHash3
+//-----------------------------------------------------------------------------
+
+#include <string.h>
+
+#include <hash.h>
+
+#define XXH_PRIME_1 11400714785074694791ULL
+#define XXH_PRIME_2 14029467366897019727ULL
+#define XXH_PRIME_3 1609587929392839161ULL
+#define XXH_PRIME_4 9650029242287828579ULL
+#define XXH_PRIME_5 2870177450012600261ULL
+
+static uint64_t XXH_read64(void *memptr)
+{
+	uint64_t val;
+	memcpy(&val, memptr, sizeof(val));
+	return val;
+}
+
+static uint32_t XXH_read32(void *memptr)
+{
+	uint32_t val;
+	memcpy(&val, memptr, sizeof(val));
+	return val;
+}
+
+static uint64_t XXH_rotl64(uint64_t x, int r)
+{
+	return (x << r) | (x >> (64 - r));
+}
+
+hash_t hash(void *data, size_t len, uint64_t seed)
+{
+	uint8_t *p = (uint8_t *)data;
+	uint8_t *end = p + len;
+	uint64_t h64;
+
+	if (len >= 32) {
+		uint8_t *limit = end - 32;
+		uint64_t v1 = seed + XXH_PRIME_1 + XXH_PRIME_2;
+		uint64_t v2 = seed + XXH_PRIME_2;
+		uint64_t v3 = seed + 0;
+		uint64_t v4 = seed - XXH_PRIME_1;
+
+		do {
+			v1 += XXH_read64(p) * XXH_PRIME_2;
+			v1 = XXH_rotl64(v1, 31);
+			v1 *= XXH_PRIME_1;
+
+			v2 += XXH_read64(p + 8) * XXH_PRIME_2;
+			v2 = XXH_rotl64(v2, 31);
+			v2 *= XXH_PRIME_1;
+
+			v3 += XXH_read64(p + 16) * XXH_PRIME_2;
+			v3 = XXH_rotl64(v3, 31);
+			v3 *= XXH_PRIME_1;
+
+			v4 += XXH_read64(p + 24) * XXH_PRIME_2;
+			v4 = XXH_rotl64(v4, 31);
+			v4 *= XXH_PRIME_1;
+
+			p += 32;
+		} while (p <= limit);
+
+		h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) +
+		      XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+		v1 *= XXH_PRIME_2;
+		v1 = XXH_rotl64(v1, 31);
+		v1 *= XXH_PRIME_1;
+		h64 ^= v1;
+		h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+
+		v2 *= XXH_PRIME_2;
+		v2 = XXH_rotl64(v2, 31);
+		v2 *= XXH_PRIME_1;
+		h64 ^= v2;
+		h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+
+		v3 *= XXH_PRIME_2;
+		v3 = XXH_rotl64(v3, 31);
+		v3 *= XXH_PRIME_1;
+		h64 ^= v3;
+		h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+
+		v4 *= XXH_PRIME_2;
+		v4 = XXH_rotl64(v4, 31);
+		v4 *= XXH_PRIME_1;
+		h64 ^= v4;
+		h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+	} else {
+		h64 = seed + XXH_PRIME_5;
+	}
+
+	h64 += (uint64_t)len;
+
+	while (p + 8 <= end) {
+		uint64_t k1 = XXH_read64(p);
+		k1 *= XXH_PRIME_2;
+		k1 = XXH_rotl64(k1, 31);
+		k1 *= XXH_PRIME_1;
+		h64 ^= k1;
+		h64 = XXH_rotl64(h64, 27) * XXH_PRIME_1 + XXH_PRIME_4;
+		p += 8;
+	}
+
+	if (p + 4 <= end) {
+		h64 ^= (uint64_t)(XXH_read32(p)) * XXH_PRIME_1;
+		h64 = XXH_rotl64(h64, 23) * XXH_PRIME_2 + XXH_PRIME_3;
+		p += 4;
+	}
+
+	while (p < end) {
+		h64 ^= (*p) * XXH_PRIME_5;
+		h64 = XXH_rotl64(h64, 11) * XXH_PRIME_1;
+		p++;
+	}
+
+	h64 ^= h64 >> 33;
+	h64 *= XXH_PRIME_2;
+	h64 ^= h64 >> 29;
+	h64 *= XXH_PRIME_3;
+	h64 ^= h64 >> 32;
+
+	return h64;
+}
diff --git a/src/hashmap.c b/src/hashmap.c
new file mode 100644
index 0000000..94b170d
--- /dev/null
+++ b/src/hashmap.c
@@ -0,0 +1,327 @@
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Copyright 2023 Marvin Borner
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file.
+
+// This is a fork of tidwall's hashmap. It's heavily reduced and adapted.
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#include <hashmap.h>
+#include <hash.h>
+
+#define GROW_AT 0.60
+#define SHRINK_AT 0.10
+
+struct bucket {
+	uint64_t hash : 48;
+	uint64_t dib : 16;
+};
+
+struct hashmap {
+	size_t elsize;
+	size_t cap;
+	void (*elfree)(void *item);
+	size_t bucketsz;
+	size_t nbuckets;
+	size_t count;
+	size_t mask;
+	size_t growat;
+	size_t shrinkat;
+	uint8_t growpower;
+	bool oom;
+	void *buckets;
+	void *spare;
+	void *edata;
+};
+
+void hashmap_set_grow_by_power(struct hashmap *map, size_t power)
+{
+	map->growpower = power < 1 ? 1 : power > 16 ? 16 : power;
+}
+
+static struct bucket *bucket_at0(void *buckets, size_t bucketsz, size_t i)
+{
+	return (struct bucket *)(((char *)buckets) + (bucketsz * i));
+}
+
+static struct bucket *bucket_at(struct hashmap *map, size_t index)
+{
+	return bucket_at0(map->buckets, map->bucketsz, index);
+}
+
+static void *bucket_item(struct bucket *entry)
+{
+	return ((char *)entry) + sizeof(struct bucket);
+}
+
+static uint64_t clip_hash(uint64_t hash)
+{
+	return hash & 0xFFFFFFFFFFFF;
+}
+
+struct hashmap *hashmap_new(size_t elsize, size_t cap,
+			    void (*elfree)(void *item))
+{
+	size_t ncap = 16;
+	if (cap < ncap) {
+		cap = ncap;
+	} else {
+		while (ncap < cap) {
+			ncap *= 2;
+		}
+		cap = ncap;
+	}
+	size_t bucketsz = sizeof(struct bucket) + elsize;
+	while (bucketsz & (sizeof(uintptr_t) - 1)) {
+		bucketsz++;
+	}
+	// hashmap + spare + edata
+	size_t size = sizeof(struct hashmap) + bucketsz * 2;
+	struct hashmap *map = malloc(size);
+	if (!map) {
+		return NULL;
+	}
+	memset(map, 0, sizeof(struct hashmap));
+	map->elsize = elsize;
+	map->bucketsz = bucketsz;
+	map->elfree = elfree;
+	map->spare = ((char *)map) + sizeof(struct hashmap);
+	map->edata = (char *)map->spare + bucketsz;
+	map->cap = cap;
+	map->nbuckets = cap;
+	map->mask = map->nbuckets - 1;
+	map->buckets = malloc(map->bucketsz * map->nbuckets);
+	if (!map->buckets) {
+		free(map);
+		return NULL;
+	}
+	memset(map->buckets, 0, map->bucketsz * map->nbuckets);
+	map->growpower = 1;
+	map->growat = map->nbuckets * GROW_AT;
+	map->shrinkat = map->nbuckets * SHRINK_AT;
+	return map;
+}
+
+static void free_elements(struct hashmap *map)
+{
+	if (!map->elfree)
+		return;
+
+	for (size_t i = 0; i < map->nbuckets; i++) {
+		struct bucket *bucket = bucket_at(map, i);
+		if (bucket->dib)
+			map->elfree(bucket_item(bucket));
+	}
+}
+
+void hashmap_clear(struct hashmap *map, bool update_cap)
+{
+	map->count = 0;
+	free_elements(map);
+	if (update_cap) {
+		map->cap = map->nbuckets;
+	} else if (map->nbuckets != map->cap) {
+		void *new_buckets = malloc(map->bucketsz * map->cap);
+		if (new_buckets) {
+			free(map->buckets);
+			map->buckets = new_buckets;
+		}
+		map->nbuckets = map->cap;
+	}
+	memset(map->buckets, 0, map->bucketsz * map->nbuckets);
+	map->mask = map->nbuckets - 1;
+	map->growat = map->nbuckets * 0.75;
+	map->shrinkat = map->nbuckets * 0.10;
+}
+
+static bool resize0(struct hashmap *map, size_t new_cap)
+{
+	struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->elfree);
+	if (!map2)
+		return false;
+	for (size_t i = 0; i < map->nbuckets; i++) {
+		struct bucket *entry = bucket_at(map, i);
+		if (!entry->dib) {
+			continue;
+		}
+		entry->dib = 1;
+		size_t j = entry->hash & map2->mask;
+		while (1) {
+			struct bucket *bucket = bucket_at(map2, j);
+			if (bucket->dib == 0) {
+				memcpy(bucket, entry, map->bucketsz);
+				break;
+			}
+			if (bucket->dib < entry->dib) {
+				memcpy(map2->spare, bucket, map->bucketsz);
+				memcpy(bucket, entry, map->bucketsz);
+				memcpy(entry, map2->spare, map->bucketsz);
+			}
+			j = (j + 1) & map2->mask;
+			entry->dib += 1;
+		}
+	}
+	free(map->buckets);
+	map->buckets = map2->buckets;
+	map->nbuckets = map2->nbuckets;
+	map->mask = map2->mask;
+	map->growat = map2->growat;
+	map->shrinkat = map2->shrinkat;
+	free(map2);
+	return true;
+}
+
+static bool resize(struct hashmap *map, size_t new_cap)
+{
+	return resize0(map, new_cap);
+}
+
+void *hashmap_set(struct hashmap *map, void *item, uint64_t hash)
+{
+	hash = clip_hash(hash);
+	map->oom = false;
+	if (map->count == map->growat) {
+		if (!resize(map, map->nbuckets * (1 << map->growpower))) {
+			map->oom = true;
+			return NULL;
+		}
+	}
+
+	struct bucket *entry = map->edata;
+	entry->hash = hash;
+	entry->dib = 1;
+	void *eitem = bucket_item(entry);
+	memcpy(eitem, item, map->elsize);
+
+	void *bitem;
+	size_t i = entry->hash & map->mask;
+	while (1) {
+		struct bucket *bucket = bucket_at(map, i);
+		if (bucket->dib == 0) {
+			memcpy(bucket, entry, map->bucketsz);
+			map->count++;
+			return NULL;
+		}
+		bitem = bucket_item(bucket);
+		if (entry->hash == bucket->hash) {
+			memcpy(map->spare, bitem, map->elsize);
+			memcpy(bitem, eitem, map->elsize);
+			return map->spare;
+		}
+		if (bucket->dib < entry->dib) {
+			memcpy(map->spare, bucket, map->bucketsz);
+			memcpy(bucket, entry, map->bucketsz);
+			memcpy(entry, map->spare, map->bucketsz);
+			eitem = bucket_item(entry);
+		}
+		i = (i + 1) & map->mask;
+		entry->dib += 1;
+	}
+}
+
+void *hashmap_get(struct hashmap *map, uint64_t hash)
+{
+	hash = clip_hash(hash);
+	size_t i = hash & map->mask;
+	while (1) {
+		struct bucket *bucket = bucket_at(map, i);
+		if (!bucket->dib)
+			return NULL;
+		if (bucket->hash == hash)
+			return bucket_item(bucket);
+		i = (i + 1) & map->mask;
+	}
+}
+
+void *hashmap_probe(struct hashmap *map, uint64_t position)
+{
+	size_t i = position & map->mask;
+	struct bucket *bucket = bucket_at(map, i);
+	if (!bucket->dib) {
+		return NULL;
+	}
+	return bucket_item(bucket);
+}
+
+void *hashmap_delete(struct hashmap *map, uint64_t hash)
+{
+	hash = clip_hash(hash);
+	map->oom = false;
+	size_t i = hash & map->mask;
+	while (1) {
+		struct bucket *bucket = bucket_at(map, i);
+		if (!bucket->dib)
+			return NULL;
+		void *bitem = bucket_item(bucket);
+		if (bucket->hash == hash) {
+			memcpy(map->spare, bitem, map->elsize);
+			bucket->dib = 0;
+			while (1) {
+				struct bucket *prev = bucket;
+				i = (i + 1) & map->mask;
+				bucket = bucket_at(map, i);
+				if (bucket->dib <= 1) {
+					prev->dib = 0;
+					break;
+				}
+				memcpy(prev, bucket, map->bucketsz);
+				prev->dib--;
+			}
+			map->count--;
+			if (map->nbuckets > map->cap &&
+			    map->count <= map->shrinkat) {
+				resize(map, map->nbuckets / 2);
+			}
+			return map->spare;
+		}
+		i = (i + 1) & map->mask;
+	}
+}
+
+size_t hashmap_count(struct hashmap *map)
+{
+	return map->count;
+}
+
+void hashmap_free(struct hashmap *map)
+{
+	if (!map)
+		return;
+	free_elements(map);
+	free(map->buckets);
+	free(map);
+}
+
+bool hashmap_oom(struct hashmap *map)
+{
+	return map->oom;
+}
+
+bool hashmap_scan(struct hashmap *map, bool (*iter)(void *item))
+{
+	for (size_t i = 0; i < map->nbuckets; i++) {
+		struct bucket *bucket = bucket_at(map, i);
+		if (bucket->dib && !iter(bucket_item(bucket))) {
+			return false;
+		}
+	}
+	return true;
+}
+
+bool hashmap_iter(struct hashmap *map, size_t *i, void **item)
+{
+	struct bucket *bucket;
+	do {
+		if (*i >= map->nbuckets)
+			return false;
+		bucket = bucket_at(map, *i);
+		(*i)++;
+	} while (!bucket->dib);
+	*item = bucket_item(bucket);
+	return true;
+}
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..a34c8e8
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,40 @@
+// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include <log.h>
+
+static int debug_enabled = 0;
+
+void debug(const char *format, ...)
+{
+	if (!debug_enabled)
+		return;
+
+	fprintf(stderr, "[DEBUG] ");
+
+	va_list ap;
+	va_start(ap, format);
+	vfprintf(stderr, format, ap);
+	va_end(ap);
+}
+
+void debug_enable(int enable)
+{
+	debug_enabled = enable;
+}
+
+void fatal(const char *format, ...)
+{
+	fprintf(stderr, "[FATAL] ");
+
+	va_list ap;
+	va_start(ap, format);
+	vfprintf(stderr, format, ap);
+	va_end(ap);
+
+	abort();
+}
diff --git a/src/main.c b/src/main.c
index 5b74476..eb651bd 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,7 +1,182 @@
+// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
 #include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <log.h>
+#include <hash.h>
+#include <hashmap.h>
+
+struct hashmap *all_terms;
+
+typedef enum { INV, ABS, APP, VAR } term_type_t;
+
+struct term {
+	term_type_t type;
+	hash_t hash;
+	size_t refs;
+	union {
+		struct {
+			hash_t term;
+		} abs;
+		struct {
+			hash_t lhs;
+			hash_t rhs;
+		} app;
+		struct {
+			int index;
+		} var;
+	} u;
+};
+
+static void deref_term(struct term *term)
+{
+	if (term->type == ABS) {
+		deref_term(hashmap_get(all_terms, term->u.abs.term));
+	} else if (term->type == APP) {
+		deref_term(hashmap_get(all_terms, term->u.app.lhs));
+		deref_term(hashmap_get(all_terms, term->u.app.rhs));
+	}
+
+	// TODO: remove from hashmap?
+	if (--term->refs == 0)
+		free(term);
+}
+
+static void hashmap_free_term(void *item)
+{
+	struct term *term = *(struct term **)item;
+	free(term);
+}
+
+static struct term *new_term(term_type_t type)
+{
+	struct term *term = malloc(sizeof(*term));
+	if (!term)
+		fatal("out of memory!\n");
+	term->type = type;
+	return term;
+}
+
+// TODO: bit rec_bblc
+static hash_t rec_blc(char **term)
+{
+	hash_t res = 0;
+	struct term *res_term = 0;
+	term_type_t res_type = 0;
+
+	if (!**term) {
+		fatal("invalid parsing state!\n");
+	} else if (**term == '0' && *(*term + 1) == '0') {
+		(*term) += 2;
+		res_type = ABS;
+		hash_t inner_hash = rec_blc(term);
+		res = hash((uint8_t *)&res_type, sizeof(res_type), inner_hash);
+
+		struct term **handle = hashmap_get(all_terms, res);
+		if (handle) {
+			res_term = *handle;
+			res_term->refs++;
+		} else {
+			res_term = new_term(ABS);
+			res_term->refs = 1;
+			res_term->hash = res;
+			res_term->u.abs.term = inner_hash;
+			hashmap_set(all_terms, &res_term, res);
+		}
+	} else if (**term == '0' && *(*term + 1) == '1') {
+		(*term) += 2;
+		res_type = APP;
+		hash_t lhs_hash = rec_blc(term);
+		hash_t rhs_hash = rec_blc(term);
+		res = hash((uint8_t *)&res_type, sizeof(res_type), lhs_hash);
+		res = hash((uint8_t *)&res, sizeof(res), rhs_hash);
+
+		struct term **handle = hashmap_get(all_terms, res);
+		if (handle) {
+			res_term = *handle;
+			res_term->refs++;
+		} else {
+			res_term = new_term(res_type);
+			res_term->refs = 1;
+			res_term->hash = res;
+			res_term->u.app.lhs = lhs_hash;
+			res_term->u.app.rhs = rhs_hash;
+			hashmap_set(all_terms, &res_term, res);
+		}
+	} else if (**term == '1') {
+		res_type = VAR;
+		const char *cur = *term;
+		while (**term == '1')
+			(*term)++;
+		int index = *term - cur - 1;
+		res = hash((uint8_t *)&res_type, sizeof(res_type), index);
+
+		struct term **handle = hashmap_get(all_terms, res);
+		if (handle) {
+			res_term = *handle;
+			res_term->refs++;
+		} else {
+			res_term = new_term(res_type);
+			res_term->refs = 1;
+			res_term->hash = res;
+			res_term->u.var.index = index;
+			hashmap_set(all_terms, &res_term, res);
+		}
+		(*term)++;
+	} else {
+		(*term)++;
+		res = rec_blc(term);
+	}
+	return res;
+}
+
+static char *read_file(FILE *f)
+{
+	fseek(f, 0, SEEK_END);
+	long fsize = ftell(f);
+	fseek(f, 0, SEEK_SET);
+
+	char *string = malloc(fsize + 1);
+	if (!string)
+		fatal("out of memory!\n");
+	int ret = fread(string, fsize, 1, f);
+
+	if (ret != 1) {
+		free(string);
+		fatal("can't read file: %s\n", strerror(errno));
+	}
+
+	string[fsize] = 0;
+	return string;
+}
+
+static char *read_path(const char *path)
+{
+	debug("reading from %s\n", path);
+	FILE *f = fopen(path, "rb");
+	if (!f)
+		fatal("can't open file %s: %s\n", path, strerror(errno));
+	char *string = read_file(f);
+	fclose(f);
+	return string;
+}
 
 int main(int argc, char *argv[])
 {
-	printf("hello world!");
+	debug_enable(1);
+	if (argc != 2)
+		fatal("usage: %s <file>\n", argv[0]);
+
+	char *term = read_path(argv[1]);
+	char *orig_term = term;
+	all_terms = hashmap_new(sizeof(struct term *), 0, hashmap_free_term);
+	rec_blc(&term);
+
+	free(orig_term);
+	hashmap_free(all_terms);
 	return 0;
 }