aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--inc/hash.h4
-rw-r--r--readme.md17
-rw-r--r--src/hash.c80
3 files changed, 59 insertions, 42 deletions
diff --git a/inc/hash.h b/inc/hash.h
index 96a30dd..35da312 100644
--- a/inc/hash.h
+++ b/inc/hash.h
@@ -7,8 +7,8 @@
#include <stdint.h>
#include <stddef.h>
-typedef uint32_t hash_t;
+typedef uint64_t hash_t;
-hash_t hash(const uint8_t *key, size_t len, uint32_t seed);
+hash_t hash(const void *key, int len, uint64_t seed);
#endif
diff --git a/readme.md b/readme.md
index c03933d..2ce8f3d 100644
--- a/readme.md
+++ b/readme.md
@@ -10,12 +10,17 @@ BLoC and heavily reduces its size.
Before explaining the format or its optimization techniques, let me
first show you some results:
-1. The [bruijn](https://github.com/marvinborner/bruijn) expression
+1. x86 C compiler [8cc](https://github.com/rui314/8cc) translated [to
+ lambda calculus](https://github.com/woodrush/lambda-8cc):
+ - the original expression takes ~5M bytes in bit-encoded BLC
+ - the same expression in BLoC needs only ~650K bytes (which is
+ around 2x the original 8cc!)
+2. The [bruijn](https://github.com/marvinborner/bruijn) expression
`fac (+30)`, where `fac` is the factorial implementation from
`std/Math`:
- the original expression takes 1200 bytes in bit-encoded BLC
- the same expression in BLoC needs only 348 bytes
-2. [My
+3. [My
solution](https://github.com/marvinborner/bruijn/blob/main/samples/aoc/2022/01/solve.bruijn)
for the “Advent of Code” challenge
[2022/01](https://adventofcode.com/2022/day/1) in
@@ -94,11 +99,3 @@ As of right now, expressions **don’t** get beta-reduced or manipulated
in any other way. As an idea for the future, long expressions could get
reduced using different techniques/depths and then get replaced with the
shortest one (as fully reduced expressions aren’t necessarily shorter).
-
-## Improvements
-
-There seem to be problems with *very* big files:
-[8cc](https://github.com/woodrush/lambda-8cc) does not pass the bloc-blc
-comparison test. I’ve not been able to reproduce this bug with any other
-file and 8cc itself is too huge to comfortably debug the issue. If
-you’re reading this: Please help me :(
diff --git a/src/hash.c b/src/hash.c
index 6a17e09..8940d70 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,42 +1,62 @@
// murmur3 originally by Austin Appleby
+// Copyright (c) 2013, ksss
// Copyright (c) 2023, Marvin Borner <dev@marvinborner.de>
// SPDX-License-Identifier: MIT
#include <stdint.h>
-#include <string.h>
#include <hash.h>
-static inline uint32_t murmur_32_scramble(uint32_t k)
+hash_t hash(const void *key, int len, uint64_t seed)
{
- k *= 0xcc9e2d51;
- k = (k << 15) | (k >> 17);
- k *= 0x1b873593;
- return k;
-}
+ const uint64_t m = 0xc6a4a7935bd1e995ULL;
+ const int r = 47;
+
+ uint64_t h = seed ^ (len * m);
+
+ const uint64_t *data = (const uint64_t *)key;
+ const uint64_t *end = data + (len / 8);
+
+ while (data != end) {
+ uint64_t k = *data++;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char *data2 = (const unsigned char *)data;
+
+ int b = len & 7;
+ if (b >= 7) {
+ h ^= ((uint64_t)data2[6]) << 48;
+ }
+ if (b >= 6) {
+ h ^= ((uint64_t)data2[5]) << 40;
+ }
+ if (b >= 5) {
+ h ^= ((uint64_t)data2[4]) << 32;
+ }
+ if (b >= 4) {
+ h ^= ((uint64_t)data2[3]) << 24;
+ }
+ if (b >= 3) {
+ h ^= ((uint64_t)data2[2]) << 16;
+ }
+ if (b >= 2) {
+ h ^= ((uint64_t)data2[1]) << 8;
+ }
+ if (b >= 1) {
+ h ^= ((uint64_t)data2[0]);
+ h *= m;
+ }
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
-hash_t hash(const uint8_t *key, size_t len, uint32_t seed)
-{
- uint32_t h = seed;
- uint32_t k;
- for (size_t i = len >> 2; i; i--) {
- memcpy(&k, key, sizeof(uint32_t));
- key += sizeof(uint32_t);
- h ^= murmur_32_scramble(k);
- h = (h << 13) | (h >> 19);
- h = h * 5 + 0xe6546b64;
- }
- k = 0;
- for (size_t i = len & 3; i; i--) {
- k <<= 8;
- k |= key[i - 1];
- }
- h ^= murmur_32_scramble(k);
- h ^= len;
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
return h;
}