From fdab65f6dac4ba85ed4749f61970660d1132d453 Mon Sep 17 00:00:00 2001 From: polwex Date: Mon, 6 Oct 2025 23:06:50 +0700 Subject: added mutable mugs --- ocaml/lib/boot.ml | 101 +++++++++++------------ ocaml/lib/effects.ml | 4 +- ocaml/lib/nock.ml | 16 ++-- ocaml/lib/nock_alt.ml | 12 +-- ocaml/lib/noun.ml | 173 ++++++++++++++++++---------------------- ocaml/lib/serial.ml | 8 +- vere/C_INSIGHTS.md | 41 ++++++++++ vere/pkg/vere/ivory_boot_test.c | 18 ++++- 8 files changed, 205 insertions(+), 168 deletions(-) diff --git a/ocaml/lib/boot.ml b/ocaml/lib/boot.ml index f8289ec..1aeafaf 100644 --- a/ocaml/lib/boot.ml +++ b/ocaml/lib/boot.ml @@ -152,7 +152,7 @@ let life eve = (* Check if eve is null (for ivory pill boot) *) let is_null = match eve with - | Noun.Atom z when Z.equal z Z.zero -> true + | Noun.Atom { z; _ } when Z.equal z Z.zero -> true | _ -> false in @@ -177,14 +177,14 @@ let life eve = (* Log eve structure before nock - matching C vortex.c:43-59 *) Printf.printf "[Boot] eve is %s\n%!" (if Noun.is_cell eve then "cell" else "atom"); (match eve with - | Noun.Cell (eve_h, eve_t) -> + | Noun.Cell { h = eve_h; t = eve_t; _ } -> Printf.printf "[Boot] eve head is %s\n%!" (if Noun.is_cell eve_h then "cell" else "atom"); Printf.printf "[Boot] eve tail is %s\n%!" (if Noun.is_cell eve_t then "cell" else "atom"); (match eve_h with - | Noun.Cell (eve_hh, eve_ht) -> + | Noun.Cell { h = eve_hh; t = eve_ht; _ } -> Printf.printf "[Boot] eve head.head is %s\n%!" (if Noun.is_atom eve_hh then - let z = match eve_hh with Noun.Atom z -> z | _ -> Z.zero in + let z = match eve_hh with Noun.Atom { z; _ } -> z | _ -> Z.zero in "atom(" ^ Z.to_string z ^ ")" else "cell"); Printf.printf "[Boot] eve head.tail is %s\n%!" @@ -247,7 +247,7 @@ let boot state eve_list = let rec count_events acc noun = match noun with | Noun.Atom _ -> acc - | Noun.Cell (_, rest) -> count_events (acc + 1) rest + | Noun.Cell { t = rest; _ } -> count_events (acc + 1) rest in let event_count = count_events 0 eve_list in @@ -279,7 +279,7 @@ let parse_solid_pill pil = (* Debug: print actual tag value *) (match tag with - | Noun.Atom z -> + | Noun.Atom { z; _ } -> Printf.printf "[Debug] Tag is atom: %s (hex: %s)\n%!" (Z.to_string z) (Z.format "x" z) | Noun.Cell _ -> @@ -289,7 +289,7 @@ let parse_solid_pill pil = let pill_tag = Z.of_string "1819044208" in (* "pill" = 0x6c6c6970 *) begin match tag with - | Noun.Atom z when Z.equal z pill_tag -> + | Noun.Atom { z; _ } when Z.equal z pill_tag -> (* rest should be [%solid bot mod use], extract all 4 *) if not (Noun.is_cell rest) then Error "Pill rest must be a cell" @@ -351,14 +351,14 @@ let string_to_atom s = let byte = Char.code s.[i] in loop (i - 1) (Z.add (Z.mul acc (Z.of_int 256)) (Z.of_int byte)) in - Noun.Atom (loop (len - 1) Z.zero) + Noun.Atom { z = loop (len - 1) Z.zero; mug = 0l } (* Synthesize the 4 MOD events - following C Vere mars.c:1763-1779 *) let synthesize_mod_events () = (* Wire for all mod events: [%$ %arvo ~] which is [0 'arvo' 0] *) let arvo_str = string_to_atom "arvo" in - let wir = Noun.cell (Noun.Atom Z.zero) - (Noun.cell arvo_str (Noun.Atom Z.zero)) in + let wir = Noun.cell (Noun.atom 0) + (Noun.cell arvo_str (Noun.atom 0)) in (* 1. wack: entropy card [%wack [16 random words]] *) let eny_words = Array.init 16 (fun _ -> @@ -367,15 +367,15 @@ let synthesize_mod_events () = let rec build_word_list i acc = if i < 0 then acc else build_word_list (i - 1) - (Noun.cell (Noun.Atom (Z.of_int64 eny_words.(i))) acc) + (Noun.cell (Noun.Atom { z = Z.of_int64 eny_words.(i); mug = 0l }) acc) in - let eny_noun = build_word_list 15 (Noun.Atom Z.zero) in + let eny_noun = build_word_list 15 (Noun.atom 0) in let wack_tag = string_to_atom "wack" in let wack_card = Noun.cell wack_tag eny_noun in let wack = Noun.cell (wir) wack_card in (* 2. whom: identity card [%whom ship] *) - let who = Noun.Atom Z.zero in (* ~zod = 0 *) + let who = Noun.atom 0 in (* ~zod = 0 *) let whom_tag = string_to_atom "whom" in let whom_card = Noun.cell whom_tag who in let whom = Noun.cell (wir) whom_card in @@ -383,36 +383,36 @@ let synthesize_mod_events () = (* 3. verb: verbose flag [%verb ~ 0] (0 = verbose off) *) let verb_tag = string_to_atom "verb" in let verb_card = Noun.cell verb_tag - (Noun.cell (Noun.Atom Z.zero) (Noun.Atom Z.zero)) in + (Noun.cell (Noun.atom 0) (Noun.atom 0)) in let verb = Noun.cell (wir) verb_card in (* 4. wyrd: version card - simplified for now *) (* TODO: Implement proper version card like _mars_wyrd_card *) let wyrd_tag = string_to_atom "wyrd" in - let wyrd_card = Noun.cell wyrd_tag (Noun.Atom Z.zero) in + let wyrd_card = Noun.cell wyrd_tag (Noun.atom 0) in let wyrd = Noun.cell wir wyrd_card in (* Build list: [wack whom verb wyrd ~] *) Noun.cell wack (Noun.cell whom (Noun.cell verb - (Noun.cell wyrd (Noun.Atom Z.zero)))) + (Noun.cell wyrd (Noun.atom 0)))) (* Synthesize legacy boot USE event - following C Vere mars.c:1785-1789 *) let synthesize_boot_event () = (* Wire: [%d %term '1' ~] which is ['d' 'term' 0x31 0] *) let d = string_to_atom "d" in let term = string_to_atom "term" in - let one = Noun.Atom (Z.of_int 0x31) in (* ASCII '1' *) + let one = Noun.Atom { z = Z.of_int 0x31; mug = 0l } in (* ASCII '1' *) let wir = Noun.cell d (Noun.cell term - (Noun.cell one (Noun.Atom Z.zero))) in + (Noun.cell one (Noun.atom 0))) in (* Card: [%boot lit ven] - simplified *) (* lit = 0 (c3n = false), ven will be filled in properly later *) let boot_tag = string_to_atom "boot" in - let lit = Noun.Atom Z.zero in - let ven = Noun.Atom Z.zero in (* Placeholder for now *) + let lit = Noun.atom 0 in + let ven = Noun.atom 0 in (* Placeholder for now *) let cad = Noun.cell boot_tag (Noun.cell lit ven) in Noun.cell wir cad @@ -426,8 +426,8 @@ let build_event_list bot mod_ use_ = (* Count events *) let rec count_list noun = match noun with - | Noun.Atom z when Z.equal z Z.zero -> 0 - | Noun.Cell (_, rest) -> 1 + count_list rest + | Noun.Atom { z; _ } when Z.equal z Z.zero -> 0 + | Noun.Cell { t = rest; _ } -> 1 + count_list rest | _ -> 0 in @@ -447,31 +447,31 @@ let build_event_list bot mod_ use_ = let now_us = Int64.of_float (now_timeval *. 1_000_000.0) in (* Urbit epoch is ~292 billion years before Unix epoch For now, use simplified timestamp *) - let now = Noun.Atom (Z.of_int64 now_us) in + let now = Noun.Atom { z = Z.of_int64 now_us; mug = 0l } in (* 1/2^16 seconds increment *) - let bit = Noun.Atom (Z.shift_left Z.one 48) in + let bit = Noun.Atom { z = Z.shift_left Z.one 48; mug = 0l } in (* Helper: flip a list *) let rec flip acc noun = match noun with - | Noun.Atom z when Z.equal z Z.zero -> acc - | Noun.Cell (h, t) -> flip (Noun.Cell (h, acc)) t + | Noun.Atom { z; _ } when Z.equal z Z.zero -> acc + | Noun.Cell { h; t; _ } -> flip (Noun.cell h acc) t | _ -> acc in (* Weld mod and use lists *) let rec weld l1 l2 = match l1 with - | Noun.Atom z when Z.equal z Z.zero -> l2 - | Noun.Cell (h, t) -> Noun.Cell (h, weld t l2) + | Noun.Atom { z; _ } when Z.equal z Z.zero -> l2 + | Noun.Cell { h; t; _ } -> Noun.cell h (weld t l2) | _ -> l2 in let lit = weld mod_ use_ in (* Start with flipped bot events (NO timestamp) *) - let eve = flip (Noun.Atom Z.zero) bot in + let eve = flip (Noun.atom 0) bot in (* Add timestamped mod/use events by consing to FRONT * This matches C Vere: eve = u3nc(u3nc(now, i), eve) @@ -482,18 +482,19 @@ let build_event_list bot mod_ use_ = *) let rec add_timestamped acc now_ref noun = match noun with - | Noun.Atom z when Z.equal z Z.zero -> acc - | Noun.Cell (event, rest) -> + | Noun.Atom { z; _ } when Z.equal z Z.zero -> acc + | Noun.Cell { h = event; t = rest; _ } -> (* Increment timestamp *) let new_now = match !now_ref, bit with - | Noun.Atom n, Noun.Atom b -> Noun.Atom (Z.add n b) + | Noun.Atom { z = n; _ }, Noun.Atom { z = b; _ } -> + Noun.Atom { z = Z.add n b; mug = 0l } | _ -> !now_ref in now_ref := new_now; (* Create [timestamp event] pair and cons to FRONT *) - let stamped = Noun.Cell (new_now, event) in - let new_acc = Noun.Cell (stamped, acc) in + let stamped = Noun.cell new_now event in + let new_acc = Noun.cell stamped acc in add_timestamped new_acc now_ref rest | _ -> acc @@ -503,21 +504,21 @@ let build_event_list bot mod_ use_ = let eve_with_stamped = add_timestamped eve now_ref lit in (* Flip final list to get: [bot1 bot2 bot3 [time mod1] ...] *) - let ova = flip (Noun.Atom Z.zero) eve_with_stamped in + let ova = flip (Noun.atom 0) eve_with_stamped in Printf.printf "[Boot] ✓ Event list built: %d events\n%!" (count_list ova); (* Debug: Check first few events *) Printf.printf "[Boot] First event structure:\n%!"; (match ova with - | Noun.Cell (first, _) -> + | Noun.Cell { h = first; _ } -> Printf.printf "[Boot] first = %s\n%!" (if Noun.is_cell first then "cell" else "atom"); (match first with - | Noun.Cell (h, t) -> + | Noun.Cell { h; t; _ } -> Printf.printf "[Boot] head = %s\n%!" (if Noun.is_atom h then - let z = match h with Noun.Atom z -> z | _ -> Z.zero in + let z = match h with Noun.Atom { z; _ } -> z | _ -> Z.zero in "atom(" ^ Z.to_string z ^ ")" else "cell"); Printf.printf "[Boot] tail = %s\n%!" @@ -565,7 +566,7 @@ let boot_lite ~fs state ivory_path = let rec lent n = match n with | Noun.Atom _ when Noun.equal n (Noun.atom 0) -> 0 - | Noun.Cell (_, t) -> 1 + lent t + | Noun.Cell { t; _ } -> 1 + lent t | _ -> 0 in let pil_len = lent pil in @@ -576,8 +577,8 @@ let boot_lite ~fs state ivory_path = let rec dump_noun depth max_depth n = if depth >= max_depth then "..." else match n with - | Noun.Atom z -> Z.to_string z - | Noun.Cell (h, t) -> + | Noun.Atom { z; _ } -> Z.to_string z + | Noun.Cell { h; t; _ } -> "[" ^ dump_noun (depth+1) max_depth h ^ " " ^ dump_noun (depth+1) max_depth t ^ "]" in @@ -664,8 +665,8 @@ let boot_solid ~fs state ivory_path solid_path = (* Count helper *) let rec count_list noun = match noun with - | Noun.Atom z when Z.equal z Z.zero -> 0 - | Noun.Cell (_, rest) -> 1 + count_list rest + | Noun.Atom { z; _ } when Z.equal z Z.zero -> 0 + | Noun.Cell { t = rest; _ } -> 1 + count_list rest | _ -> 0 in @@ -684,9 +685,9 @@ let boot_solid ~fs state ivory_path solid_path = Printf.printf " USE list details:\n%!"; let rec walk_list noun i = match noun with - | Noun.Atom z when Z.equal z Z.zero -> + | Noun.Atom { z; _ } when Z.equal z Z.zero -> Printf.printf " [%d] = ~ (list terminator)\n%!" i - | Noun.Cell (head, tail) -> + | Noun.Cell { h = head; t = tail; _ } -> Printf.printf " [%d] = %s\n%!" i (if Noun.is_cell head then "CELL" else "ATOM"); if i < 5 then walk_list tail (i + 1) @@ -702,14 +703,14 @@ let boot_solid ~fs state ivory_path solid_path = (* Debug: Check what's in first BOT event *) Printf.printf " [Debug] Checking first BOT event structure:\n%!"; (match bot with - | Noun.Cell (first_bot, _) -> + | Noun.Cell { h = first_bot; _ } -> Printf.printf " first_bot is %s\n%!" (if Noun.is_cell first_bot then "cell" else "atom"); (match first_bot with - | Noun.Cell (h, _) -> + | Noun.Cell { h; _ } -> Printf.printf " first_bot head is %s\n%!" (if Noun.is_atom h then - let z = match h with Noun.Atom z -> z | _ -> Z.zero in + let z = match h with Noun.Atom { z; _ } -> z | _ -> Z.zero in "atom(" ^ Z.to_string z ^ ")" else "cell") | _ -> ()) @@ -727,8 +728,8 @@ let boot_solid ~fs state ivory_path solid_path = (* Helper to prepend to a list *) let rec weld l1 l2 = match l1 with - | Noun.Atom z when Z.equal z Z.zero -> l2 - | Noun.Cell (h, t) -> Noun.Cell (h, weld t l2) + | Noun.Atom { z; _ } when Z.equal z Z.zero -> l2 + | Noun.Cell { h; t; _ } -> Noun.cell h (weld t l2) | _ -> l2 in diff --git a/ocaml/lib/effects.ml b/ocaml/lib/effects.ml index f0d9955..84e7602 100644 --- a/ocaml/lib/effects.ml +++ b/ocaml/lib/effects.ml @@ -77,10 +77,10 @@ let make_ovum ~wire ~card = { wire; card } (* Create a timer ovum (from behn) *) let timer_ovum ~id ~fire_time = { - wire = Noun.Atom (Z.of_int64 id); + wire = Noun.Atom { z = Z.of_int64 id; mug = 0l }; card = Noun.cell (Noun.atom 0) (* behn tag - simplified *) - (Noun.Atom (Z.of_float (fire_time *. 1000000.0))); (* microseconds *) + (Noun.Atom { z = Z.of_float (fire_time *. 1000000.0); mug = 0l }); (* microseconds *) } (* Create a log ovum *) diff --git a/ocaml/lib/nock.ml b/ocaml/lib/nock.ml index ac6a2ad..2d106b6 100644 --- a/ocaml/lib/nock.ml +++ b/ocaml/lib/nock.ml @@ -73,8 +73,8 @@ let rec nock_on bus fol = (* Log entry - shows opcode, depth, subject type, and mug at shallow depths *) if should_log then begin let opcode_str = match fol with - | Cell (Atom op, _) when Z.fits_int op -> opcode_name (Z.to_int op) - | Cell (Cell _, _) -> "CELL(dist)" + | Cell { h = Atom { z = op; _ }; _ } when Z.fits_int op -> opcode_name (Z.to_int op) + | Cell { h = Cell _; _ } -> "CELL(dist)" | _ -> "?" in (* Only compute mugs at shallow depths to avoid performance penalty *) @@ -97,13 +97,13 @@ let rec nock_on bus fol = try let result = match fol with - | Cell (hib, gal) when is_cell hib -> + | Cell { h = hib; t = gal; _ } when is_cell hib -> (* Distribution: [a b] -> compute both sides and cons *) let poz = nock_on bus hib in let riv = nock_on bus gal in cell poz riv - | Cell (Atom op, gal) -> + | Cell { h = Atom { z = op; _ }; t = gal; _ } -> (* Check if opcode fits in int *) if Z.compare op (Z.of_int max_int) > 0 then raise Exit; let opcode = Z.to_int op in @@ -112,7 +112,7 @@ let rec nock_on bus fol = | 0 -> (* Nock 0: /[axis subject] - slot/fragment lookup *) if not (is_atom gal) then raise Exit - else slot (match gal with Atom n -> n | _ -> raise Exit) bus + else slot (match gal with Atom { z = n; _ } -> n | _ -> raise Exit) bus | 1 -> (* Nock 1: constant - return gal as-is *) @@ -156,8 +156,8 @@ let rec nock_on bus fol = let d_gal = tail cd_gal in let tys = nock_on bus b_gal in let nex = match tys with - | Atom n when Z.equal n Z.zero -> c_gal - | Atom n when Z.equal n Z.one -> d_gal + | Atom { z = n; _ } when Z.equal n Z.zero -> c_gal + | Atom { z = n; _ } when Z.equal n Z.one -> d_gal | _ -> raise Exit in nock_on bus nex @@ -186,7 +186,7 @@ let rec nock_on bus fol = let c_gal = tail gal in if not (is_atom b_gal) then raise Exit; let seb = nock_on bus c_gal in - let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in + let nex = slot (match b_gal with Atom { z; _ } -> z | _ -> raise Exit) seb in nock_on seb nex | 10 -> diff --git a/ocaml/lib/nock_alt.ml b/ocaml/lib/nock_alt.ml index 037c0d0..7b38387 100644 --- a/ocaml/lib/nock_alt.ml +++ b/ocaml/lib/nock_alt.ml @@ -27,18 +27,18 @@ open Noun *) let rec nock_on bus fol = match fol with - | Cell (hib, gal) when is_cell hib -> + | Cell { h = hib; t = gal; _ } when is_cell hib -> (* [a b] -> compute both sides and cons *) let poz = nock_on bus hib in let riv = nock_on bus gal in cell poz riv - | Cell (Atom op, gal) -> + | Cell { h = Atom { z = op; _ }; t = gal; _ } -> (match Z.to_int op with | 0 -> (* /[axis subject] - slot/fragment lookup *) if not (is_atom gal) then raise Exit - else slot (match gal with Atom n -> n | _ -> raise Exit) bus + else slot (match gal with Atom { z = n; _ } -> n | _ -> raise Exit) bus | 1 -> (* =[constant subject] - return constant *) @@ -82,8 +82,8 @@ let rec nock_on bus fol = let tys = nock_on bus b_gal in let nex = match tys with - | Atom n when Z.equal n Z.zero -> c_gal - | Atom n when Z.equal n Z.one -> d_gal + | Atom { z = n; _ } when Z.equal n Z.zero -> c_gal + | Atom { z = n; _ } when Z.equal n Z.one -> d_gal | _ -> raise Exit in nock_on bus nex @@ -113,7 +113,7 @@ let rec nock_on bus fol = if not (is_atom b_gal) then raise Exit; let seb = nock_on bus c_gal in - let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in + let nex = slot (match b_gal with Atom { z = n; _ } -> n | _ -> raise Exit) seb in nock_on seb nex | 10 -> diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml index b87aff5..0a666a4 100644 --- a/ocaml/lib/noun.ml +++ b/ocaml/lib/noun.ml @@ -1,18 +1,31 @@ (** Noun type and basic operations *) +(** Atom record with inline mug cache *) +type atom_record = { + z: Z.t; (** The integer value *) + mutable mug: int32; (** Cached 31-bit hash (0l = not computed yet) *) +} + +(** Cell record with inline mug cache *) +type cell_record = { + h: noun; (** Head *) + t: noun; (** Tail *) + mutable mug: int32; (** Cached 31-bit hash (0l = not computed yet) *) +} + (** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *) -type noun = - | Atom of Z.t (** Arbitrary-precision integer using Zarith *) - | Cell of noun * noun (** Pair of nouns *) +and noun = + | Atom of atom_record (** Arbitrary-precision integer with mug cache *) + | Cell of cell_record (** Pair of nouns with mug cache *) (** Exception raised on nock evaluation errors *) exception Exit (** Create an atom from an int *) -let atom n = Atom (Z.of_int n) +let atom n = Atom { z = Z.of_int n; mug = 0l } (** Create a cell *) -let cell a b = Cell (a, b) +let cell a b = Cell { h = a; t = b; mug = 0l } (** Test if a noun is a cell *) let is_cell = function @@ -26,12 +39,12 @@ let is_atom = function (** Get head of a cell *) let head = function - | Cell (h, _) -> h + | Cell { h; _ } -> h | Atom _ -> raise Exit (** Get tail of a cell *) let tail = function - | Cell (_, t) -> t + | Cell { t; _ } -> t | Atom _ -> raise Exit (** Fragment/axis lookup: slot(n, noun) @@ -62,14 +75,12 @@ let rec slot n noun = let sub = slot parent noun in let result = if bit then tail sub else head sub in if debug then begin + let result_str = + if is_cell result then "cell" + else "atom" + in Printf.eprintf "[SLOT-DEBUG] slot(%s): taking %s, result is %s\n%!" - (Z.to_string n) (if bit then "tail" else "head") - (if is_cell result then - let h = head result in - if is_atom h then - (match h with Atom z -> "cell[" ^ Z.to_string z ^ " ...]" | _ -> "cell[? ...]") - else "cell[cell ...]" - else "atom") + (Z.to_string n) (if bit then "tail" else "head") result_str end; result end @@ -77,72 +88,53 @@ let rec slot n noun = (** Equality test for nouns *) let rec equal a b = match a, b with - | Atom x, Atom y -> Z.equal x y - | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt + | Atom { z = x; _ }, Atom { z = y; _ } -> Z.equal x y + | Cell { h = ah; t = at; _ }, Cell { h = bh; t = bt; _ } -> + equal ah bh && equal at bt | _, _ -> false (** Increment an atom *) let inc = function - | Atom n -> Atom (Z.succ n) + | Atom { z; _ } -> Atom { z = Z.succ z; mug = 0l } | Cell _ -> raise Exit -(** Mug cache using ephemeron (weak key-value table) - - This implements mug caching like C's u3 system: - - Uses physical equality (==) for lookups - - Weak keys allow GC to collect unused nouns - - Values (mugs) are int32, no GC needed - - Dramatically improves performance for repeated mug calls -*) -module MugCache = struct - (* Ephemeron table: weak keys, strong values *) - module E = Ephemeron.K1.Make(struct - type t = noun - let equal = (==) (* Physical equality - same noun object *) - let hash = Hashtbl.hash (* OCaml's built-in structural hash *) - end) - - let cache : int32 E.t = E.create 10000 (* Initial size *) - let hit_count = ref 0 - let miss_count = ref 0 - - let find noun = - match E.find_opt cache noun with - | Some mug -> - hit_count := !hit_count + 1; - Some mug - | None -> - miss_count := !miss_count + 1; - None - - let add noun mug = E.add cache noun mug - - let stats () = - let total = !hit_count + !miss_count in - if total > 0 then - Printf.eprintf "[MugCache] hits=%d misses=%d (%.1f%% hit rate)\n%!" - !hit_count !miss_count - (100.0 *. float_of_int !hit_count /. float_of_int total) -end - -(** Compute mug (31-bit hash) of a noun with caching +(** Compute mug (31-bit hash) of a noun with inline caching This implements Urbit's mug hash function using FNV-1a. - Mugs are cached to avoid recomputation (critical for performance). + Mugs are stored inline in the noun structure (like C's u3 system). + + The mug field is 0l when not yet computed, and computed lazily on first access. + This matches C's approach where u3r_mug() is just a memory read after first computation. For atoms: hash the bytes of the integer representation For cells: mix the mugs of head and tail *) let rec mug noun = - (* Check cache first (MugCache.find updates hit/miss counts) *) - match MugCache.find noun with - | Some cached_mug -> cached_mug - | None -> - let computed_mug = compute_mug_uncached noun in - MugCache.add noun computed_mug; - computed_mug - -and compute_mug_uncached noun = + match noun with + | Atom { z; mug = cached_mug } -> + if cached_mug <> 0l then cached_mug + else begin + let computed = compute_mug_atom z in + (* Update inline cache *) + (match noun with + | Atom r -> r.mug <- computed + | _ -> ()); + computed + end + | Cell { h; t; mug = cached_mug } -> + if cached_mug <> 0l then cached_mug + else begin + let h_mug = mug h in + let t_mug = mug t in + let computed = mix_mugs h_mug t_mug in + (* Update inline cache *) + (match noun with + | Cell r -> r.mug <- computed + | _ -> ()); + computed + end + +and compute_mug_atom z = (* FNV-1a constants - using hex to avoid signed int32 overflow *) let fnv_prime = 16777619l in let fnv_basis = 0x811c9dc5l in (* 2166136261 in decimal *) @@ -150,39 +142,26 @@ and compute_mug_uncached noun = (* Mask to 31 bits (Urbit uses 31-bit mugs) *) let mask31 x = Int32.logand x 0x7fffffffl in - (* Hash bytes using FNV-1a *) - let hash_bytes bytes = - let len = Bytes.length bytes in - let rec loop i hash = - if i >= len then hash - else - let byte = Int32.of_int (Bytes.get_uint8 bytes i) in - let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in - loop (i + 1) hash' - in - mask31 (loop 0 fnv_basis) + (* Convert atom to bytes and hash using FNV-1a *) + let bytes = Z.to_bits z in + let len = String.length bytes in + let rec loop i hash = + if i >= len then hash + else + let byte = Int32.of_int (Char.code bytes.[i]) in + let hash' = Int32.mul (Int32.logxor hash byte) fnv_prime in + loop (i + 1) hash' in + mask31 (loop 0 fnv_basis) +and mix_mugs a_mug b_mug = (* Mix two mugs together (for cells) *) - let mix_mugs a_mug b_mug = - (* Mix by XOR and multiply, then mask *) - let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in - mask31 mixed - in - - match noun with - | Atom z -> - (* Convert atom to bytes and hash *) - let bytes = Z.to_bits z in - hash_bytes (Bytes.of_string bytes) - - | Cell (h, t) -> - (* Mix the mugs of head and tail (recursively cached) *) - let h_mug = mug h in - let t_mug = mug t in - mix_mugs h_mug t_mug + let fnv_prime = 16777619l in + let mask31 x = Int32.logand x 0x7fffffffl in + let mixed = Int32.mul (Int32.logxor a_mug b_mug) fnv_prime in + mask31 mixed (** Pretty-print a noun *) let rec pp_noun fmt = function - | Atom n -> Format.fprintf fmt "%s" (Z.to_string n) - | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b + | Atom { z; _ } -> Format.fprintf fmt "%s" (Z.to_string z) + | Cell { h; t; _ } -> Format.fprintf fmt "[%a %a]" pp_noun h pp_noun t diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml index ac97421..457385e 100644 --- a/ocaml/lib/serial.ml +++ b/ocaml/lib/serial.ml @@ -92,7 +92,7 @@ let jam noun = let rec jam_noun n = match n with - | Atom a -> + | Atom { z = a; _ } -> (* Check if we've seen this atom before *) begin match Hashtbl.find_opt positions n with | Some pos -> @@ -117,7 +117,7 @@ let jam noun = mat_encode w a end - | Cell (head, tail) -> + | Cell { h = head; t = tail; _ } -> (* Check for backref *) begin match Hashtbl.find_opt positions n with | Some pos -> @@ -246,7 +246,7 @@ let cue ?progress ?(progress_interval = 200_000) ?inspect bytes = let cell_pos = pos_arr.(idx) in head_arr.(idx) <- None; stack_size := idx; - let cell = Cell (head, noun) in + let cell = Cell { h = head; t = noun; mug = 0l } in IntTable.replace backref_table cell_pos cell; emit cell end @@ -262,7 +262,7 @@ let cue ?progress ?(progress_interval = 200_000) ?inspect bytes = inspect_event (Cue_atom_begin { position = pos; value_bits = bits }) in let (value, total_bits, value_bits) = mat_decode ~on_value_bits r in - let atom = Atom value in + let atom = Atom { z = value; mug = 0l } in IntTable.replace backref_table pos atom; inspect_event (Cue_atom_end { position = pos; total_bits; value_bits }); emit atom diff --git a/vere/C_INSIGHTS.md b/vere/C_INSIGHTS.md index 016ee83..26d1e3b 100644 --- a/vere/C_INSIGHTS.md +++ b/vere/C_INSIGHTS.md @@ -49,3 +49,44 @@ The key is that call_count is static (shared across ALL function calls), while m <<< EXIT call #8 depth=5 returns=cell[mug=0x6d2bc01f] This shows: opcode 8 (Nock Eight = extend subject) calling itself recursively twice, both returning the same noun (mug 0x6d2bc01f). + + +# on vere mugs +CLAUDE: + +how can C output the mugs *so fast* even for huge nouns like the huge pill. You say it's caching them but it still has to compute them at first + +● Excellent question! The answer is that C doesn't compute mugs on-demand - mugs are computed at allocation time and stored inline in the noun structure itself! + + In C's u3 system, every noun (atom or cell) has a mug field built into its memory layout. When you allocate a noun with u3i_* or u3nc(), the mug is computed immediately and stored. So u3r_mug() is just a + memory read - instant! + + Here's the key difference: + + C (vere/pkg/noun/allocate.c): + // Nouns store mugs inline: + typedef struct { + c3_w mug_w; // ← Mug stored here! + c3_w len_w; + c3_w buf_w[]; + } u3a_atom; + + // u3r_mug() just reads the field: + c3_w u3r_mug(u3_noun veb) { + return u3a_to_pug(u3a_to_ptr(veb))->mug_w; // Just a memory read! + } + + OCaml (our current approach): + type noun = + | Atom of Z.t (* No mug field! *) + | Cell of noun * noun (* No mug field! *) + + (* We compute mugs on-demand with caching *) + let mug noun = (* ... expensive recursive computation ... *) + + To match C's performance, we'd need to change OCaml's noun type to: + type noun = + | Atom of { z: Z.t; mutable mug: int32 option } + | Cell of { h: noun; t: noun; mutable mug: int32 option } + + This would let us compute once and store inline like C. Should we implement this now, or continue with the current cache approach? diff --git a/vere/pkg/vere/ivory_boot_test.c b/vere/pkg/vere/ivory_boot_test.c index 0bd968b..cb531f9 100644 --- a/vere/pkg/vere/ivory_boot_test.c +++ b/vere/pkg/vere/ivory_boot_test.c @@ -21,14 +21,30 @@ _setup(void) // Don't free ivory_jammed - it's managed by u3m_file u3l_log("ivory_pil is_atom: %u", u3a_is_atom(pil)); + u3l_log("ivory_pil mug: 0x%x", u3r_mug(pil)); - // Boot with ivory pill + // Extract the Arvo core (tail of [%ivory core]) + u3_noun arvo_core = u3t(pil); + u3l_log("arvo_core mug: 0x%x", u3r_mug(arvo_core)); + + // Boot with ivory pill - this will call u3v_life() internally u3l_log("Booting with ivory.pill from OCaml..."); if ( c3n == u3v_boot_lite(pil) ) { printf("*** fail: ivory boot failed\n"); exit(1); } u3l_log("✓ Ivory boot completed!"); + + // Log kernel mugs after boot (u3A->roc is the kernel) + u3l_log("Kernel mug: 0x%x", u3r_mug(u3A->roc)); + + // Log mugs of key slots for verification + u3_noun slot2 = u3r_at(2, u3A->roc); + u3_noun slot3 = u3r_at(3, u3A->roc); + u3_noun slot23 = u3r_at(23, u3A->roc); + u3l_log("Kernel slot 2 (battery) mug: 0x%x", u3r_mug(slot2)); + u3l_log("Kernel slot 3 (payload) mug: 0x%x", u3r_mug(slot3)); + u3l_log("Kernel slot 23 (poke) mug: 0x%x", u3r_mug(slot23)); } /* _test_lily(): test small noun parsing. -- cgit v1.2.3