summaryrefslogtreecommitdiff
path: root/ocaml/lib
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-07 00:13:21 +0700
committerpolwex <polwex@sortug.com>2025-10-07 00:13:21 +0700
commit5242ee845cdb3a7e9780443bf5d7e534f72c3f28 (patch)
tree24270ef9d0ce356e92b1048394fec2020e2bec71 /ocaml/lib
parent5de3f7a3ad7b0cf63b4a6cbddfc1e26359dea161 (diff)
mugs in ocaml with murmur3 fork
Diffstat (limited to 'ocaml/lib')
-rw-r--r--ocaml/lib/dune2
-rw-r--r--ocaml/lib/noun.ml77
2 files changed, 52 insertions, 27 deletions
diff --git a/ocaml/lib/dune b/ocaml/lib/dune
index 97961d5..a0cb4b8 100644
--- a/ocaml/lib/dune
+++ b/ocaml/lib/dune
@@ -1,4 +1,4 @@
(library
(name nock_lib)
(modules noun nock bitstream serial eventlog state effects boot runtime domain_pool nock_parallel)
- (libraries zarith eio eio.unix domainslib))
+ (libraries zarith eio eio.unix domainslib murmur3))
diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml
index 0a666a4..f4c4f87 100644
--- a/ocaml/lib/noun.ml
+++ b/ocaml/lib/noun.ml
@@ -98,9 +98,56 @@ let inc = function
| Atom { z; _ } -> Atom { z = Z.succ z; mug = 0l }
| Cell _ -> raise Exit
+(** MurmurHash3 using external library *)
+
+(* Use the murmur3 library's hash function with seed parameter *)
+external murmur3_hash32_seed : string -> int32 -> int32 = "caml_murmur3_hash32_seed"
+
+(* u3r_mug_bytes: matches C implementation *)
+let mug_bytes_with_seed data seed =
+ let rec try_hash seed attempts =
+ if attempts >= 8 then 0x7fffl (* Fallback after 8 attempts *)
+ else begin
+ let hash = murmur3_hash32_seed data seed in
+ let mug = Int32.logxor (Int32.shift_right_logical hash 31)
+ (Int32.logand hash 0x7fffffffl) in
+ if mug = 0l then
+ try_hash (Int32.succ seed) (attempts + 1)
+ else
+ mug
+ end
+ in
+ try_hash seed 0
+
+(* c3_bits_word: count number of bits in a 32-bit word *)
+let c3_bits_word n =
+ let rec loop n bits =
+ if n = 0l then bits
+ else loop (Int32.shift_right_logical n 1) (bits + 1)
+ in
+ loop n 0
+
+(* u3r_mug_both: mix two mugs for cells *)
+let mug_both lef_mug rit_mug =
+ (* Calculate length based on bit width of rit_mug, matching C's c3_bits_word *)
+ let bits = c3_bits_word rit_mug in
+ let len = 4 + ((bits + 7) / 8) in
+ let buf = Bytes.create len in
+ (* Always write lef_mug (4 bytes) *)
+ Bytes.set buf 0 (Char.chr (Int32.to_int (Int32.logand lef_mug 0xffl)));
+ Bytes.set buf 1 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical lef_mug 8) 0xffl)));
+ Bytes.set buf 2 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical lef_mug 16) 0xffl)));
+ Bytes.set buf 3 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical lef_mug 24) 0xffl)));
+ (* Write rit_mug bytes only if len > 4 *)
+ if len > 4 then Bytes.set buf 4 (Char.chr (Int32.to_int (Int32.logand rit_mug 0xffl)));
+ if len > 5 then Bytes.set buf 5 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical rit_mug 8) 0xffl)));
+ if len > 6 then Bytes.set buf 6 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical rit_mug 16) 0xffl)));
+ if len > 7 then Bytes.set buf 7 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical rit_mug 24) 0xffl)));
+ mug_bytes_with_seed (Bytes.to_string buf) 0xdeadbeefl
+
(** Compute mug (31-bit hash) of a noun with inline caching
- This implements Urbit's mug hash function using FNV-1a.
+ This implements Urbit's mug hash function using MurmurHash3.
Mugs are stored inline in the noun structure (like C's u3 system).
The mug field is 0l when not yet computed, and computed lazily on first access.
@@ -126,7 +173,7 @@ let rec mug noun =
else begin
let h_mug = mug h in
let t_mug = mug t in
- let computed = mix_mugs h_mug t_mug in
+ let computed = mug_both h_mug t_mug in
(* Update inline cache *)
(match noun with
| Cell r -> r.mug <- computed
@@ -135,31 +182,9 @@ let rec mug noun =
end
and compute_mug_atom z =
- (* FNV-1a constants - using hex to avoid signed int32 overflow *)
- let fnv_prime = 16777619l in
- let fnv_basis = 0x811c9dc5l in (* 2166136261 in decimal *)
-
- (* Mask to 31 bits (Urbit uses 31-bit mugs) *)
- let mask31 x = Int32.logand x 0x7fffffffl in
-
- (* Convert atom to bytes and hash using FNV-1a *)
+ (* Convert to bytes (little-endian, strip trailing zeros) *)
let bytes = Z.to_bits z in
- let len = String.length bytes in
- let rec loop i hash =
- if i >= len then hash
- else
- let byte = Int32.of_int (Char.code bytes.[i]) in
- let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in
- loop (i + 1) hash'
- in
- mask31 (loop 0 fnv_basis)
-
-and mix_mugs a_mug b_mug =
- (* Mix two mugs together (for cells) *)
- let fnv_prime = 16777619l in
- let mask31 x = Int32.logand x 0x7fffffffl in
- let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in
- mask31 mixed
+ mug_bytes_with_seed bytes 0xcafebabel
(** Pretty-print a noun *)
let rec pp_noun fmt = function