summaryrefslogtreecommitdiff
path: root/ocaml/lib/noun.ml
blob: 382c79d184df749f02523bdb25516af12a00d2b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
(** Noun type and basic operations *)

(** Atom record with inline mug cache *)
type atom_record = {
  z: Z.t;              (** The integer value *)
  mutable mug: int32;  (** Cached 31-bit hash (0l = not computed yet) *)
}

(** Cell record with inline mug cache *)
type cell_record = {
  h: noun;             (** Head *)
  t: noun;             (** Tail *)
  mutable mug: int32;  (** Cached 31-bit hash (0l = not computed yet) *)
}

(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *)
and noun =
  | Atom of atom_record  (** Arbitrary-precision integer with mug cache *)
  | Cell of cell_record  (** Pair of nouns with mug cache *)

(** Exception raised on nock evaluation errors *)
exception Exit

(** Create an atom from an int *)
let atom n = Atom { z = Z.of_int n; mug = 0l }

(** Create a cell *)
let cell a b = Cell { h = a; t = b; mug = 0l }

(** Test if a noun is a cell *)
let is_cell = function
  | Cell _ -> true
  | Atom _ -> false

(** Test if a noun is an atom *)
let is_atom = function
  | Atom _ -> true
  | Cell _ -> false

(** Get head of a cell *)
let head = function
  | Cell { h; _ } -> h
  | Atom _ -> raise Exit

(** Get tail of a cell *)
let tail = function
  | Cell { t; _ } -> t
  | Atom _ -> raise Exit

(** Fragment/axis lookup: slot(n, noun)
    This implements the tree-addressing scheme:
    - 1 is the root
    - 2 is head, 3 is tail
    - For n > 1: if even, go left; if odd, go right
*)
let slot_debug = ref false

let rec slot n noun =
  let debug = !slot_debug && (Z.equal n (Z.of_int 2) || Z.equal n (Z.of_int 3)) in
  if debug then
    Printf.eprintf "[SLOT-DEBUG] slot(%s, noun)\n%!" (Z.to_string n);

  if Z.equal n Z.one then begin
    if debug then Printf.eprintf "[SLOT-DEBUG] slot(%s) = identity\n%!" (Z.to_string n);
    noun
  end
  else if Z.equal n Z.zero then
    raise Exit
  else begin
    let bit = Z.testbit n 0 in  (* Check if odd *)
    let parent = Z.shift_right n 1 in
    if debug then
      Printf.eprintf "[SLOT-DEBUG] slot(%s): bit=%b parent=%s\n%!"
        (Z.to_string n) bit (Z.to_string parent);
    let sub = slot parent noun in
    let result = if bit then tail sub else head sub in
    if debug then begin
      let result_str =
        if is_cell result then "cell"
        else "atom"
      in
      Printf.eprintf "[SLOT-DEBUG] slot(%s): taking %s, result is %s\n%!"
        (Z.to_string n) (if bit then "tail" else "head") result_str
    end;
    result
  end

(** Equality test for nouns *)
let rec equal a b =
  match a, b with
  | Atom { z = x; _ }, Atom { z = y; _ } -> Z.equal x y
  | Cell { h = ah; t = at; _ }, Cell { h = bh; t = bt; _ } ->
      equal ah bh && equal at bt
  | _, _ -> false

(** Increment an atom *)
let inc = function
  | Atom { z; _ } -> Atom { z = Z.succ z; mug = 0l }
  | Cell _ -> raise Exit

(** MurmurHash3 using external library *)

(* Use the murmur3 library's hash function with seed parameter *)
external murmur3_hash32_seed : string -> int32 -> int32 = "caml_murmur3_hash32_seed"

(* u3r_mug_bytes: matches C implementation *)
let mug_bytes_with_seed data seed =
  let rec try_hash seed attempts =
    if attempts >= 8 then 0x7fffl  (* Fallback after 8 attempts *)
    else begin
      let hash = murmur3_hash32_seed data seed in
      let mug = Int32.logxor (Int32.shift_right_logical hash 31)
                             (Int32.logand hash 0x7fffffffl) in
      if mug = 0l then
        try_hash (Int32.succ seed) (attempts + 1)
      else
        mug
    end
  in
  try_hash seed 0

(* c3_bits_word: count number of bits in a 32-bit word *)
let c3_bits_word n =
  let rec loop n bits =
    if n = 0l then bits
    else loop (Int32.shift_right_logical n 1) (bits + 1)
  in
  loop n 0

(* u3r_mug_both: mix two mugs for cells *)
let mug_both lef_mug rit_mug =
  (* Calculate length based on bit width of rit_mug, matching C's c3_bits_word *)
  let bits = c3_bits_word rit_mug in
  let len = 4 + ((bits + 7) / 8) in
  let buf = Bytes.create len in
  (* Always write lef_mug (4 bytes) *)
  Bytes.set buf 0 (Char.chr (Int32.to_int (Int32.logand lef_mug 0xffl)));
  Bytes.set buf 1 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical lef_mug 8) 0xffl)));
  Bytes.set buf 2 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical lef_mug 16) 0xffl)));
  Bytes.set buf 3 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical lef_mug 24) 0xffl)));
  (* Write rit_mug bytes only if len > 4 *)
  if len > 4 then Bytes.set buf 4 (Char.chr (Int32.to_int (Int32.logand rit_mug 0xffl)));
  if len > 5 then Bytes.set buf 5 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical rit_mug 8) 0xffl)));
  if len > 6 then Bytes.set buf 6 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical rit_mug 16) 0xffl)));
  if len > 7 then Bytes.set buf 7 (Char.chr (Int32.to_int (Int32.logand (Int32.shift_right_logical rit_mug 24) 0xffl)));
  mug_bytes_with_seed (Bytes.to_string buf) 0xdeadbeefl

(** Compute mug (31-bit hash) of a noun with inline caching

    This implements Urbit's mug hash function using MurmurHash3.
    Mugs are stored inline in the noun structure (like C's u3 system).

    The mug field is 0l when not yet computed, and computed lazily on first access.
    This matches C's approach where u3r_mug() is just a memory read after first computation.

    For atoms: hash the bytes of the integer representation
    For cells: mix the mugs of head and tail
*)
let rec mug noun =
  match noun with
  | Atom { z; mug = cached_mug } ->
      if cached_mug <> 0l then cached_mug
      else begin
        let computed = compute_mug_atom z in
        (* Update inline cache *)
        (match noun with
         | Atom r -> r.mug <- computed
         | _ -> ());
        computed
      end
  | Cell { h; t; mug = cached_mug } ->
      if cached_mug <> 0l then cached_mug
      else begin
        let h_mug = mug h in
        let t_mug = mug t in
        let computed = mug_both h_mug t_mug in
        (* Update inline cache *)
        (match noun with
         | Cell r -> r.mug <- computed
         | _ -> ());
        computed
      end

and compute_mug_atom z =
  (* Convert to bytes (little-endian) *)
  let bytes = Z.to_bits z in
  (* Strip trailing zeros like C's u3r_mug_words does *)
  let len = ref (String.length bytes) in
  while !len > 0 && bytes.[!len - 1] = '\x00' do
    decr len
  done;
  let stripped = String.sub bytes 0 !len in
  mug_bytes_with_seed stripped 0xcafebabel

(** Pretty-print a noun *)
let rec pp_noun fmt = function
  | Atom { z; _ } -> Format.fprintf fmt "%s" (Z.to_string z)
  | Cell { h; t; _ } -> Format.fprintf fmt "[%a %a]" pp_noun h pp_noun t