summaryrefslogtreecommitdiff
path: root/ocaml/lib/noun.ml
diff options
context:
space:
mode:
Diffstat (limited to 'ocaml/lib/noun.ml')
-rw-r--r--ocaml/lib/noun.ml173
1 files changed, 76 insertions, 97 deletions
diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml
index b87aff5..0a666a4 100644
--- a/ocaml/lib/noun.ml
+++ b/ocaml/lib/noun.ml
@@ -1,18 +1,31 @@
(** Noun type and basic operations *)
+(** Atom record with inline mug cache *)
+type atom_record = {
+ z: Z.t; (** The integer value *)
+ mutable mug: int32; (** Cached 31-bit hash (0l = not computed yet) *)
+}
+
+(** Cell record with inline mug cache *)
+type cell_record = {
+ h: noun; (** Head *)
+ t: noun; (** Tail *)
+ mutable mug: int32; (** Cached 31-bit hash (0l = not computed yet) *)
+}
+
(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *)
-type noun =
- | Atom of Z.t (** Arbitrary-precision integer using Zarith *)
- | Cell of noun * noun (** Pair of nouns *)
+and noun =
+ | Atom of atom_record (** Arbitrary-precision integer with mug cache *)
+ | Cell of cell_record (** Pair of nouns with mug cache *)
(** Exception raised on nock evaluation errors *)
exception Exit
(** Create an atom from an int *)
-let atom n = Atom (Z.of_int n)
+let atom n = Atom { z = Z.of_int n; mug = 0l }
(** Create a cell *)
-let cell a b = Cell (a, b)
+let cell a b = Cell { h = a; t = b; mug = 0l }
(** Test if a noun is a cell *)
let is_cell = function
@@ -26,12 +39,12 @@ let is_atom = function
(** Get head of a cell *)
let head = function
- | Cell (h, _) -> h
+ | Cell { h; _ } -> h
| Atom _ -> raise Exit
(** Get tail of a cell *)
let tail = function
- | Cell (_, t) -> t
+ | Cell { t; _ } -> t
| Atom _ -> raise Exit
(** Fragment/axis lookup: slot(n, noun)
@@ -62,14 +75,12 @@ let rec slot n noun =
let sub = slot parent noun in
let result = if bit then tail sub else head sub in
if debug then begin
+ let result_str =
+ if is_cell result then "cell"
+ else "atom"
+ in
Printf.eprintf "[SLOT-DEBUG] slot(%s): taking %s, result is %s\n%!"
- (Z.to_string n) (if bit then "tail" else "head")
- (if is_cell result then
- let h = head result in
- if is_atom h then
- (match h with Atom z -> "cell[" ^ Z.to_string z ^ " ...]" | _ -> "cell[? ...]")
- else "cell[cell ...]"
- else "atom")
+ (Z.to_string n) (if bit then "tail" else "head") result_str
end;
result
end
@@ -77,72 +88,53 @@ let rec slot n noun =
(** Equality test for nouns *)
let rec equal a b =
match a, b with
- | Atom x, Atom y -> Z.equal x y
- | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt
+ | Atom { z = x; _ }, Atom { z = y; _ } -> Z.equal x y
+ | Cell { h = ah; t = at; _ }, Cell { h = bh; t = bt; _ } ->
+ equal ah bh && equal at bt
| _, _ -> false
(** Increment an atom *)
let inc = function
- | Atom n -> Atom (Z.succ n)
+ | Atom { z; _ } -> Atom { z = Z.succ z; mug = 0l }
| Cell _ -> raise Exit
-(** Mug cache using ephemeron (weak key-value table)
-
- This implements mug caching like C's u3 system:
- - Uses physical equality (==) for lookups
- - Weak keys allow GC to collect unused nouns
- - Values (mugs) are int32, no GC needed
- - Dramatically improves performance for repeated mug calls
-*)
-module MugCache = struct
- (* Ephemeron table: weak keys, strong values *)
- module E = Ephemeron.K1.Make(struct
- type t = noun
- let equal = (==) (* Physical equality - same noun object *)
- let hash = Hashtbl.hash (* OCaml's built-in structural hash *)
- end)
-
- let cache : int32 E.t = E.create 10000 (* Initial size *)
- let hit_count = ref 0
- let miss_count = ref 0
-
- let find noun =
- match E.find_opt cache noun with
- | Some mug ->
- hit_count := !hit_count + 1;
- Some mug
- | None ->
- miss_count := !miss_count + 1;
- None
-
- let add noun mug = E.add cache noun mug
-
- let stats () =
- let total = !hit_count + !miss_count in
- if total > 0 then
- Printf.eprintf "[MugCache] hits=%d misses=%d (%.1f%% hit rate)\n%!"
- !hit_count !miss_count
- (100.0 *. float_of_int !hit_count /. float_of_int total)
-end
-
-(** Compute mug (31-bit hash) of a noun with caching
+(** Compute mug (31-bit hash) of a noun with inline caching
This implements Urbit's mug hash function using FNV-1a.
- Mugs are cached to avoid recomputation (critical for performance).
+ Mugs are stored inline in the noun structure (like C's u3 system).
+
+ The mug field is 0l when not yet computed, and computed lazily on first access.
+ This matches C's approach where u3r_mug() is just a memory read after first computation.
For atoms: hash the bytes of the integer representation
For cells: mix the mugs of head and tail
*)
let rec mug noun =
- (* Check cache first (MugCache.find updates hit/miss counts) *)
- match MugCache.find noun with
- | Some cached_mug -> cached_mug
- | None ->
- let computed_mug = compute_mug_uncached noun in
- MugCache.add noun computed_mug;
- computed_mug
-
-and compute_mug_uncached noun =
+ match noun with
+ | Atom { z; mug = cached_mug } ->
+ if cached_mug <> 0l then cached_mug
+ else begin
+ let computed = compute_mug_atom z in
+ (* Update inline cache *)
+ (match noun with
+ | Atom r -> r.mug <- computed
+ | _ -> ());
+ computed
+ end
+ | Cell { h; t; mug = cached_mug } ->
+ if cached_mug <> 0l then cached_mug
+ else begin
+ let h_mug = mug h in
+ let t_mug = mug t in
+ let computed = mix_mugs h_mug t_mug in
+ (* Update inline cache *)
+ (match noun with
+ | Cell r -> r.mug <- computed
+ | _ -> ());
+ computed
+ end
+
+and compute_mug_atom z =
(* FNV-1a constants - using hex to avoid signed int32 overflow *)
let fnv_prime = 16777619l in
let fnv_basis = 0x811c9dc5l in (* 2166136261 in decimal *)
@@ -150,39 +142,26 @@ and compute_mug_uncached noun =
(* Mask to 31 bits (Urbit uses 31-bit mugs) *)
let mask31 x = Int32.logand x 0x7fffffffl in
- (* Hash bytes using FNV-1a *)
- let hash_bytes bytes =
- let len = Bytes.length bytes in
- let rec loop i hash =
- if i >= len then hash
- else
- let byte = Int32.of_int (Bytes.get_uint8 bytes i) in
- let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in
- loop (i + 1) hash'
- in
- mask31 (loop 0 fnv_basis)
+ (* Convert atom to bytes and hash using FNV-1a *)
+ let bytes = Z.to_bits z in
+ let len = String.length bytes in
+ let rec loop i hash =
+ if i >= len then hash
+ else
+ let byte = Int32.of_int (Char.code bytes.[i]) in
+ let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in
+ loop (i + 1) hash'
in
+ mask31 (loop 0 fnv_basis)
+and mix_mugs a_mug b_mug =
(* Mix two mugs together (for cells) *)
- let mix_mugs a_mug b_mug =
- (* Mix by XOR and multiply, then mask *)
- let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in
- mask31 mixed
- in
-
- match noun with
- | Atom z ->
- (* Convert atom to bytes and hash *)
- let bytes = Z.to_bits z in
- hash_bytes (Bytes.of_string bytes)
-
- | Cell (h, t) ->
- (* Mix the mugs of head and tail (recursively cached) *)
- let h_mug = mug h in
- let t_mug = mug t in
- mix_mugs h_mug t_mug
+ let fnv_prime = 16777619l in
+ let mask31 x = Int32.logand x 0x7fffffffl in
+ let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in
+ mask31 mixed
(** Pretty-print a noun *)
let rec pp_noun fmt = function
- | Atom n -> Format.fprintf fmt "%s" (Z.to_string n)
- | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b
+ | Atom { z; _ } -> Format.fprintf fmt "%s" (Z.to_string z)
+ | Cell { h; t; _ } -> Format.fprintf fmt "[%a %a]" pp_noun h pp_noun t