diff options
Diffstat (limited to 'ocaml/lib/noun.ml')
-rw-r--r-- | ocaml/lib/noun.ml | 173 |
1 files changed, 76 insertions, 97 deletions
diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml index b87aff5..0a666a4 100644 --- a/ocaml/lib/noun.ml +++ b/ocaml/lib/noun.ml @@ -1,18 +1,31 @@ (** Noun type and basic operations *) +(** Atom record with inline mug cache *) +type atom_record = { + z: Z.t; (** The integer value *) + mutable mug: int32; (** Cached 31-bit hash (0l = not computed yet) *) +} + +(** Cell record with inline mug cache *) +type cell_record = { + h: noun; (** Head *) + t: noun; (** Tail *) + mutable mug: int32; (** Cached 31-bit hash (0l = not computed yet) *) +} + (** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *) -type noun = - | Atom of Z.t (** Arbitrary-precision integer using Zarith *) - | Cell of noun * noun (** Pair of nouns *) +and noun = + | Atom of atom_record (** Arbitrary-precision integer with mug cache *) + | Cell of cell_record (** Pair of nouns with mug cache *) (** Exception raised on nock evaluation errors *) exception Exit (** Create an atom from an int *) -let atom n = Atom (Z.of_int n) +let atom n = Atom { z = Z.of_int n; mug = 0l } (** Create a cell *) -let cell a b = Cell (a, b) +let cell a b = Cell { h = a; t = b; mug = 0l } (** Test if a noun is a cell *) let is_cell = function @@ -26,12 +39,12 @@ let is_atom = function (** Get head of a cell *) let head = function - | Cell (h, _) -> h + | Cell { h; _ } -> h | Atom _ -> raise Exit (** Get tail of a cell *) let tail = function - | Cell (_, t) -> t + | Cell { t; _ } -> t | Atom _ -> raise Exit (** Fragment/axis lookup: slot(n, noun) @@ -62,14 +75,12 @@ let rec slot n noun = let sub = slot parent noun in let result = if bit then tail sub else head sub in if debug then begin + let result_str = + if is_cell result then "cell" + else "atom" + in Printf.eprintf "[SLOT-DEBUG] slot(%s): taking %s, result is %s\n%!" - (Z.to_string n) (if bit then "tail" else "head") - (if is_cell result then - let h = head result in - if is_atom h then - (match h with Atom z -> "cell[" ^ Z.to_string z ^ " ...]" | _ -> "cell[? ...]") - else "cell[cell ...]" - else "atom") + (Z.to_string n) (if bit then "tail" else "head") result_str end; result end @@ -77,72 +88,53 @@ let rec slot n noun = (** Equality test for nouns *) let rec equal a b = match a, b with - | Atom x, Atom y -> Z.equal x y - | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt + | Atom { z = x; _ }, Atom { z = y; _ } -> Z.equal x y + | Cell { h = ah; t = at; _ }, Cell { h = bh; t = bt; _ } -> + equal ah bh && equal at bt | _, _ -> false (** Increment an atom *) let inc = function - | Atom n -> Atom (Z.succ n) + | Atom { z; _ } -> Atom { z = Z.succ z; mug = 0l } | Cell _ -> raise Exit -(** Mug cache using ephemeron (weak key-value table) - - This implements mug caching like C's u3 system: - - Uses physical equality (==) for lookups - - Weak keys allow GC to collect unused nouns - - Values (mugs) are int32, no GC needed - - Dramatically improves performance for repeated mug calls -*) -module MugCache = struct - (* Ephemeron table: weak keys, strong values *) - module E = Ephemeron.K1.Make(struct - type t = noun - let equal = (==) (* Physical equality - same noun object *) - let hash = Hashtbl.hash (* OCaml's built-in structural hash *) - end) - - let cache : int32 E.t = E.create 10000 (* Initial size *) - let hit_count = ref 0 - let miss_count = ref 0 - - let find noun = - match E.find_opt cache noun with - | Some mug -> - hit_count := !hit_count + 1; - Some mug - | None -> - miss_count := !miss_count + 1; - None - - let add noun mug = E.add cache noun mug - - let stats () = - let total = !hit_count + !miss_count in - if total > 0 then - Printf.eprintf "[MugCache] hits=%d misses=%d (%.1f%% hit rate)\n%!" - !hit_count !miss_count - (100.0 *. float_of_int !hit_count /. float_of_int total) -end - -(** Compute mug (31-bit hash) of a noun with caching +(** Compute mug (31-bit hash) of a noun with inline caching This implements Urbit's mug hash function using FNV-1a. - Mugs are cached to avoid recomputation (critical for performance). + Mugs are stored inline in the noun structure (like C's u3 system). + + The mug field is 0l when not yet computed, and computed lazily on first access. + This matches C's approach where u3r_mug() is just a memory read after first computation. For atoms: hash the bytes of the integer representation For cells: mix the mugs of head and tail *) let rec mug noun = - (* Check cache first (MugCache.find updates hit/miss counts) *) - match MugCache.find noun with - | Some cached_mug -> cached_mug - | None -> - let computed_mug = compute_mug_uncached noun in - MugCache.add noun computed_mug; - computed_mug - -and compute_mug_uncached noun = + match noun with + | Atom { z; mug = cached_mug } -> + if cached_mug <> 0l then cached_mug + else begin + let computed = compute_mug_atom z in + (* Update inline cache *) + (match noun with + | Atom r -> r.mug <- computed + | _ -> ()); + computed + end + | Cell { h; t; mug = cached_mug } -> + if cached_mug <> 0l then cached_mug + else begin + let h_mug = mug h in + let t_mug = mug t in + let computed = mix_mugs h_mug t_mug in + (* Update inline cache *) + (match noun with + | Cell r -> r.mug <- computed + | _ -> ()); + computed + end + +and compute_mug_atom z = (* FNV-1a constants - using hex to avoid signed int32 overflow *) let fnv_prime = 16777619l in let fnv_basis = 0x811c9dc5l in (* 2166136261 in decimal *) @@ -150,39 +142,26 @@ and compute_mug_uncached noun = (* Mask to 31 bits (Urbit uses 31-bit mugs) *) let mask31 x = Int32.logand x 0x7fffffffl in - (* Hash bytes using FNV-1a *) - let hash_bytes bytes = - let len = Bytes.length bytes in - let rec loop i hash = - if i >= len then hash - else - let byte = Int32.of_int (Bytes.get_uint8 bytes i) in - let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in - loop (i + 1) hash' - in - mask31 (loop 0 fnv_basis) + (* Convert atom to bytes and hash using FNV-1a *) + let bytes = Z.to_bits z in + let len = String.length bytes in + let rec loop i hash = + if i >= len then hash + else + let byte = Int32.of_int (Char.code bytes.[i]) in + let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in + loop (i + 1) hash' in + mask31 (loop 0 fnv_basis) +and mix_mugs a_mug b_mug = (* Mix two mugs together (for cells) *) - let mix_mugs a_mug b_mug = - (* Mix by XOR and multiply, then mask *) - let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in - mask31 mixed - in - - match noun with - | Atom z -> - (* Convert atom to bytes and hash *) - let bytes = Z.to_bits z in - hash_bytes (Bytes.of_string bytes) - - | Cell (h, t) -> - (* Mix the mugs of head and tail (recursively cached) *) - let h_mug = mug h in - let t_mug = mug t in - mix_mugs h_mug t_mug + let fnv_prime = 16777619l in + let mask31 x = Int32.logand x 0x7fffffffl in + let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in + mask31 mixed (** Pretty-print a noun *) let rec pp_noun fmt = function - | Atom n -> Format.fprintf fmt "%s" (Z.to_string n) - | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b + | Atom { z; _ } -> Format.fprintf fmt "%s" (Z.to_string z) + | Cell { h; t; _ } -> Format.fprintf fmt "[%a %a]" pp_noun h pp_noun t |