aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/hash.c135
-rw-r--r--src/hashmap.c327
-rw-r--r--src/log.c40
-rw-r--r--src/main.c177
4 files changed, 678 insertions, 1 deletions
diff --git a/src/hash.c b/src/hash.c
new file mode 100644
index 0000000..be8e369
--- /dev/null
+++ b/src/hash.c
@@ -0,0 +1,135 @@
+//-----------------------------------------------------------------------------
+// xxHash Library
+// Copyright (c) 2012-2021 Yann Collet
+// Copyright (c) 2023 Marvin Borner
+// All rights reserved.
+//
+// BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
+//
+// xxHash3
+//-----------------------------------------------------------------------------
+
+#include <string.h>
+
+#include <hash.h>
+
+#define XXH_PRIME_1 11400714785074694791ULL
+#define XXH_PRIME_2 14029467366897019727ULL
+#define XXH_PRIME_3 1609587929392839161ULL
+#define XXH_PRIME_4 9650029242287828579ULL
+#define XXH_PRIME_5 2870177450012600261ULL
+
+static uint64_t XXH_read64(void *memptr)
+{
+ uint64_t val;
+ memcpy(&val, memptr, sizeof(val));
+ return val;
+}
+
+static uint32_t XXH_read32(void *memptr)
+{
+ uint32_t val;
+ memcpy(&val, memptr, sizeof(val));
+ return val;
+}
+
+static uint64_t XXH_rotl64(uint64_t x, int r)
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+hash_t hash(void *data, size_t len, uint64_t seed)
+{
+ uint8_t *p = (uint8_t *)data;
+ uint8_t *end = p + len;
+ uint64_t h64;
+
+ if (len >= 32) {
+ uint8_t *limit = end - 32;
+ uint64_t v1 = seed + XXH_PRIME_1 + XXH_PRIME_2;
+ uint64_t v2 = seed + XXH_PRIME_2;
+ uint64_t v3 = seed + 0;
+ uint64_t v4 = seed - XXH_PRIME_1;
+
+ do {
+ v1 += XXH_read64(p) * XXH_PRIME_2;
+ v1 = XXH_rotl64(v1, 31);
+ v1 *= XXH_PRIME_1;
+
+ v2 += XXH_read64(p + 8) * XXH_PRIME_2;
+ v2 = XXH_rotl64(v2, 31);
+ v2 *= XXH_PRIME_1;
+
+ v3 += XXH_read64(p + 16) * XXH_PRIME_2;
+ v3 = XXH_rotl64(v3, 31);
+ v3 *= XXH_PRIME_1;
+
+ v4 += XXH_read64(p + 24) * XXH_PRIME_2;
+ v4 = XXH_rotl64(v4, 31);
+ v4 *= XXH_PRIME_1;
+
+ p += 32;
+ } while (p <= limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) +
+ XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+
+ v1 *= XXH_PRIME_2;
+ v1 = XXH_rotl64(v1, 31);
+ v1 *= XXH_PRIME_1;
+ h64 ^= v1;
+ h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+
+ v2 *= XXH_PRIME_2;
+ v2 = XXH_rotl64(v2, 31);
+ v2 *= XXH_PRIME_1;
+ h64 ^= v2;
+ h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+
+ v3 *= XXH_PRIME_2;
+ v3 = XXH_rotl64(v3, 31);
+ v3 *= XXH_PRIME_1;
+ h64 ^= v3;
+ h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+
+ v4 *= XXH_PRIME_2;
+ v4 = XXH_rotl64(v4, 31);
+ v4 *= XXH_PRIME_1;
+ h64 ^= v4;
+ h64 = h64 * XXH_PRIME_1 + XXH_PRIME_4;
+ } else {
+ h64 = seed + XXH_PRIME_5;
+ }
+
+ h64 += (uint64_t)len;
+
+ while (p + 8 <= end) {
+ uint64_t k1 = XXH_read64(p);
+ k1 *= XXH_PRIME_2;
+ k1 = XXH_rotl64(k1, 31);
+ k1 *= XXH_PRIME_1;
+ h64 ^= k1;
+ h64 = XXH_rotl64(h64, 27) * XXH_PRIME_1 + XXH_PRIME_4;
+ p += 8;
+ }
+
+ if (p + 4 <= end) {
+ h64 ^= (uint64_t)(XXH_read32(p)) * XXH_PRIME_1;
+ h64 = XXH_rotl64(h64, 23) * XXH_PRIME_2 + XXH_PRIME_3;
+ p += 4;
+ }
+
+ while (p < end) {
+ h64 ^= (*p) * XXH_PRIME_5;
+ h64 = XXH_rotl64(h64, 11) * XXH_PRIME_1;
+ p++;
+ }
+
+ h64 ^= h64 >> 33;
+ h64 *= XXH_PRIME_2;
+ h64 ^= h64 >> 29;
+ h64 *= XXH_PRIME_3;
+ h64 ^= h64 >> 32;
+
+ return h64;
+}
diff --git a/src/hashmap.c b/src/hashmap.c
new file mode 100644
index 0000000..94b170d
--- /dev/null
+++ b/src/hashmap.c
@@ -0,0 +1,327 @@
+// Copyright 2020 Joshua J Baker. All rights reserved.
+// Copyright 2023 Marvin Borner
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file.
+
+// This is a fork of tidwall's hashmap. It's heavily reduced and adapted.
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#include <hashmap.h>
+#include <hash.h>
+
+#define GROW_AT 0.60
+#define SHRINK_AT 0.10
+
+struct bucket {
+ uint64_t hash : 48;
+ uint64_t dib : 16;
+};
+
+struct hashmap {
+ size_t elsize;
+ size_t cap;
+ void (*elfree)(void *item);
+ size_t bucketsz;
+ size_t nbuckets;
+ size_t count;
+ size_t mask;
+ size_t growat;
+ size_t shrinkat;
+ uint8_t growpower;
+ bool oom;
+ void *buckets;
+ void *spare;
+ void *edata;
+};
+
+void hashmap_set_grow_by_power(struct hashmap *map, size_t power)
+{
+ map->growpower = power < 1 ? 1 : power > 16 ? 16 : power;
+}
+
+static struct bucket *bucket_at0(void *buckets, size_t bucketsz, size_t i)
+{
+ return (struct bucket *)(((char *)buckets) + (bucketsz * i));
+}
+
+static struct bucket *bucket_at(struct hashmap *map, size_t index)
+{
+ return bucket_at0(map->buckets, map->bucketsz, index);
+}
+
+static void *bucket_item(struct bucket *entry)
+{
+ return ((char *)entry) + sizeof(struct bucket);
+}
+
+static uint64_t clip_hash(uint64_t hash)
+{
+ return hash & 0xFFFFFFFFFFFF;
+}
+
+struct hashmap *hashmap_new(size_t elsize, size_t cap,
+ void (*elfree)(void *item))
+{
+ size_t ncap = 16;
+ if (cap < ncap) {
+ cap = ncap;
+ } else {
+ while (ncap < cap) {
+ ncap *= 2;
+ }
+ cap = ncap;
+ }
+ size_t bucketsz = sizeof(struct bucket) + elsize;
+ while (bucketsz & (sizeof(uintptr_t) - 1)) {
+ bucketsz++;
+ }
+ // hashmap + spare + edata
+ size_t size = sizeof(struct hashmap) + bucketsz * 2;
+ struct hashmap *map = malloc(size);
+ if (!map) {
+ return NULL;
+ }
+ memset(map, 0, sizeof(struct hashmap));
+ map->elsize = elsize;
+ map->bucketsz = bucketsz;
+ map->elfree = elfree;
+ map->spare = ((char *)map) + sizeof(struct hashmap);
+ map->edata = (char *)map->spare + bucketsz;
+ map->cap = cap;
+ map->nbuckets = cap;
+ map->mask = map->nbuckets - 1;
+ map->buckets = malloc(map->bucketsz * map->nbuckets);
+ if (!map->buckets) {
+ free(map);
+ return NULL;
+ }
+ memset(map->buckets, 0, map->bucketsz * map->nbuckets);
+ map->growpower = 1;
+ map->growat = map->nbuckets * GROW_AT;
+ map->shrinkat = map->nbuckets * SHRINK_AT;
+ return map;
+}
+
+static void free_elements(struct hashmap *map)
+{
+ if (!map->elfree)
+ return;
+
+ for (size_t i = 0; i < map->nbuckets; i++) {
+ struct bucket *bucket = bucket_at(map, i);
+ if (bucket->dib)
+ map->elfree(bucket_item(bucket));
+ }
+}
+
+void hashmap_clear(struct hashmap *map, bool update_cap)
+{
+ map->count = 0;
+ free_elements(map);
+ if (update_cap) {
+ map->cap = map->nbuckets;
+ } else if (map->nbuckets != map->cap) {
+ void *new_buckets = malloc(map->bucketsz * map->cap);
+ if (new_buckets) {
+ free(map->buckets);
+ map->buckets = new_buckets;
+ }
+ map->nbuckets = map->cap;
+ }
+ memset(map->buckets, 0, map->bucketsz * map->nbuckets);
+ map->mask = map->nbuckets - 1;
+ map->growat = map->nbuckets * 0.75;
+ map->shrinkat = map->nbuckets * 0.10;
+}
+
+static bool resize0(struct hashmap *map, size_t new_cap)
+{
+ struct hashmap *map2 = hashmap_new(map->elsize, new_cap, map->elfree);
+ if (!map2)
+ return false;
+ for (size_t i = 0; i < map->nbuckets; i++) {
+ struct bucket *entry = bucket_at(map, i);
+ if (!entry->dib) {
+ continue;
+ }
+ entry->dib = 1;
+ size_t j = entry->hash & map2->mask;
+ while (1) {
+ struct bucket *bucket = bucket_at(map2, j);
+ if (bucket->dib == 0) {
+ memcpy(bucket, entry, map->bucketsz);
+ break;
+ }
+ if (bucket->dib < entry->dib) {
+ memcpy(map2->spare, bucket, map->bucketsz);
+ memcpy(bucket, entry, map->bucketsz);
+ memcpy(entry, map2->spare, map->bucketsz);
+ }
+ j = (j + 1) & map2->mask;
+ entry->dib += 1;
+ }
+ }
+ free(map->buckets);
+ map->buckets = map2->buckets;
+ map->nbuckets = map2->nbuckets;
+ map->mask = map2->mask;
+ map->growat = map2->growat;
+ map->shrinkat = map2->shrinkat;
+ free(map2);
+ return true;
+}
+
+static bool resize(struct hashmap *map, size_t new_cap)
+{
+ return resize0(map, new_cap);
+}
+
+void *hashmap_set(struct hashmap *map, void *item, uint64_t hash)
+{
+ hash = clip_hash(hash);
+ map->oom = false;
+ if (map->count == map->growat) {
+ if (!resize(map, map->nbuckets * (1 << map->growpower))) {
+ map->oom = true;
+ return NULL;
+ }
+ }
+
+ struct bucket *entry = map->edata;
+ entry->hash = hash;
+ entry->dib = 1;
+ void *eitem = bucket_item(entry);
+ memcpy(eitem, item, map->elsize);
+
+ void *bitem;
+ size_t i = entry->hash & map->mask;
+ while (1) {
+ struct bucket *bucket = bucket_at(map, i);
+ if (bucket->dib == 0) {
+ memcpy(bucket, entry, map->bucketsz);
+ map->count++;
+ return NULL;
+ }
+ bitem = bucket_item(bucket);
+ if (entry->hash == bucket->hash) {
+ memcpy(map->spare, bitem, map->elsize);
+ memcpy(bitem, eitem, map->elsize);
+ return map->spare;
+ }
+ if (bucket->dib < entry->dib) {
+ memcpy(map->spare, bucket, map->bucketsz);
+ memcpy(bucket, entry, map->bucketsz);
+ memcpy(entry, map->spare, map->bucketsz);
+ eitem = bucket_item(entry);
+ }
+ i = (i + 1) & map->mask;
+ entry->dib += 1;
+ }
+}
+
+void *hashmap_get(struct hashmap *map, uint64_t hash)
+{
+ hash = clip_hash(hash);
+ size_t i = hash & map->mask;
+ while (1) {
+ struct bucket *bucket = bucket_at(map, i);
+ if (!bucket->dib)
+ return NULL;
+ if (bucket->hash == hash)
+ return bucket_item(bucket);
+ i = (i + 1) & map->mask;
+ }
+}
+
+void *hashmap_probe(struct hashmap *map, uint64_t position)
+{
+ size_t i = position & map->mask;
+ struct bucket *bucket = bucket_at(map, i);
+ if (!bucket->dib) {
+ return NULL;
+ }
+ return bucket_item(bucket);
+}
+
+void *hashmap_delete(struct hashmap *map, uint64_t hash)
+{
+ hash = clip_hash(hash);
+ map->oom = false;
+ size_t i = hash & map->mask;
+ while (1) {
+ struct bucket *bucket = bucket_at(map, i);
+ if (!bucket->dib)
+ return NULL;
+ void *bitem = bucket_item(bucket);
+ if (bucket->hash == hash) {
+ memcpy(map->spare, bitem, map->elsize);
+ bucket->dib = 0;
+ while (1) {
+ struct bucket *prev = bucket;
+ i = (i + 1) & map->mask;
+ bucket = bucket_at(map, i);
+ if (bucket->dib <= 1) {
+ prev->dib = 0;
+ break;
+ }
+ memcpy(prev, bucket, map->bucketsz);
+ prev->dib--;
+ }
+ map->count--;
+ if (map->nbuckets > map->cap &&
+ map->count <= map->shrinkat) {
+ resize(map, map->nbuckets / 2);
+ }
+ return map->spare;
+ }
+ i = (i + 1) & map->mask;
+ }
+}
+
+size_t hashmap_count(struct hashmap *map)
+{
+ return map->count;
+}
+
+void hashmap_free(struct hashmap *map)
+{
+ if (!map)
+ return;
+ free_elements(map);
+ free(map->buckets);
+ free(map);
+}
+
+bool hashmap_oom(struct hashmap *map)
+{
+ return map->oom;
+}
+
+bool hashmap_scan(struct hashmap *map, bool (*iter)(void *item))
+{
+ for (size_t i = 0; i < map->nbuckets; i++) {
+ struct bucket *bucket = bucket_at(map, i);
+ if (bucket->dib && !iter(bucket_item(bucket))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool hashmap_iter(struct hashmap *map, size_t *i, void **item)
+{
+ struct bucket *bucket;
+ do {
+ if (*i >= map->nbuckets)
+ return false;
+ bucket = bucket_at(map, *i);
+ (*i)++;
+ } while (!bucket->dib);
+ *item = bucket_item(bucket);
+ return true;
+}
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 0000000..a34c8e8
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,40 @@
+// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+
+#include <log.h>
+
+static int debug_enabled = 0;
+
+void debug(const char *format, ...)
+{
+ if (!debug_enabled)
+ return;
+
+ fprintf(stderr, "[DEBUG] ");
+
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+}
+
+void debug_enable(int enable)
+{
+ debug_enabled = enable;
+}
+
+void fatal(const char *format, ...)
+{
+ fprintf(stderr, "[FATAL] ");
+
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+
+ abort();
+}
diff --git a/src/main.c b/src/main.c
index 5b74476..eb651bd 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,7 +1,182 @@
+// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de>
+// SPDX-License-Identifier: MIT
+
#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <log.h>
+#include <hash.h>
+#include <hashmap.h>
+
+struct hashmap *all_terms;
+
+typedef enum { INV, ABS, APP, VAR } term_type_t;
+
+struct term {
+ term_type_t type;
+ hash_t hash;
+ size_t refs;
+ union {
+ struct {
+ hash_t term;
+ } abs;
+ struct {
+ hash_t lhs;
+ hash_t rhs;
+ } app;
+ struct {
+ int index;
+ } var;
+ } u;
+};
+
+static void deref_term(struct term *term)
+{
+ if (term->type == ABS) {
+ deref_term(hashmap_get(all_terms, term->u.abs.term));
+ } else if (term->type == APP) {
+ deref_term(hashmap_get(all_terms, term->u.app.lhs));
+ deref_term(hashmap_get(all_terms, term->u.app.rhs));
+ }
+
+ // TODO: remove from hashmap?
+ if (--term->refs == 0)
+ free(term);
+}
+
+static void hashmap_free_term(void *item)
+{
+ struct term *term = *(struct term **)item;
+ free(term);
+}
+
+static struct term *new_term(term_type_t type)
+{
+ struct term *term = malloc(sizeof(*term));
+ if (!term)
+ fatal("out of memory!\n");
+ term->type = type;
+ return term;
+}
+
+// TODO: bit rec_bblc
+static hash_t rec_blc(char **term)
+{
+ hash_t res = 0;
+ struct term *res_term = 0;
+ term_type_t res_type = 0;
+
+ if (!**term) {
+ fatal("invalid parsing state!\n");
+ } else if (**term == '0' && *(*term + 1) == '0') {
+ (*term) += 2;
+ res_type = ABS;
+ hash_t inner_hash = rec_blc(term);
+ res = hash((uint8_t *)&res_type, sizeof(res_type), inner_hash);
+
+ struct term **handle = hashmap_get(all_terms, res);
+ if (handle) {
+ res_term = *handle;
+ res_term->refs++;
+ } else {
+ res_term = new_term(ABS);
+ res_term->refs = 1;
+ res_term->hash = res;
+ res_term->u.abs.term = inner_hash;
+ hashmap_set(all_terms, &res_term, res);
+ }
+ } else if (**term == '0' && *(*term + 1) == '1') {
+ (*term) += 2;
+ res_type = APP;
+ hash_t lhs_hash = rec_blc(term);
+ hash_t rhs_hash = rec_blc(term);
+ res = hash((uint8_t *)&res_type, sizeof(res_type), lhs_hash);
+ res = hash((uint8_t *)&res, sizeof(res), rhs_hash);
+
+ struct term **handle = hashmap_get(all_terms, res);
+ if (handle) {
+ res_term = *handle;
+ res_term->refs++;
+ } else {
+ res_term = new_term(res_type);
+ res_term->refs = 1;
+ res_term->hash = res;
+ res_term->u.app.lhs = lhs_hash;
+ res_term->u.app.rhs = rhs_hash;
+ hashmap_set(all_terms, &res_term, res);
+ }
+ } else if (**term == '1') {
+ res_type = VAR;
+ const char *cur = *term;
+ while (**term == '1')
+ (*term)++;
+ int index = *term - cur - 1;
+ res = hash((uint8_t *)&res_type, sizeof(res_type), index);
+
+ struct term **handle = hashmap_get(all_terms, res);
+ if (handle) {
+ res_term = *handle;
+ res_term->refs++;
+ } else {
+ res_term = new_term(res_type);
+ res_term->refs = 1;
+ res_term->hash = res;
+ res_term->u.var.index = index;
+ hashmap_set(all_terms, &res_term, res);
+ }
+ (*term)++;
+ } else {
+ (*term)++;
+ res = rec_blc(term);
+ }
+ return res;
+}
+
+static char *read_file(FILE *f)
+{
+ fseek(f, 0, SEEK_END);
+ long fsize = ftell(f);
+ fseek(f, 0, SEEK_SET);
+
+ char *string = malloc(fsize + 1);
+ if (!string)
+ fatal("out of memory!\n");
+ int ret = fread(string, fsize, 1, f);
+
+ if (ret != 1) {
+ free(string);
+ fatal("can't read file: %s\n", strerror(errno));
+ }
+
+ string[fsize] = 0;
+ return string;
+}
+
+static char *read_path(const char *path)
+{
+ debug("reading from %s\n", path);
+ FILE *f = fopen(path, "rb");
+ if (!f)
+ fatal("can't open file %s: %s\n", path, strerror(errno));
+ char *string = read_file(f);
+ fclose(f);
+ return string;
+}
int main(int argc, char *argv[])
{
- printf("hello world!");
+ debug_enable(1);
+ if (argc != 2)
+ fatal("usage: %s <file>\n", argv[0]);
+
+ char *term = read_path(argv[1]);
+ char *orig_term = term;
+ all_terms = hashmap_new(sizeof(struct term *), 0, hashmap_free_term);
+ rec_blc(&term);
+
+ free(orig_term);
+ hashmap_free(all_terms);
return 0;
}