(** Jam/cue serialization for nouns Based on the Vere implementation in pkg/noun/serial.c Jam encoding: - Atoms: tag bit 0, then mat-encoded value - Cells: tag bits 01, then recursively encode head and tail - Backrefs: tag bits 11, then mat-encoded position Mat encoding (length-prefixed): - For 0: just bit 1 - For n > 0: - Let a = bit-width of n - Let b = bit-width of a - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits] *) open Noun open Bitstream (** Mat-encode a number into the bitstream Mat encoding is a variable-length integer encoding: - 0 is encoded as a single 1 bit - For n > 0: - a = number of bits in n (met 0 n) - b = number of bits needed to represent a - Write b 1-bits, then a 0-bit - Write a-1 in b-1 bits - Write n in a bits *) let rec mat_encode w n = if Z.equal n Z.zero then write_bit w true else begin let a = Z.numbits n in (* bit-width of n *) let b = Z.numbits (Z.of_int a) in (* bit-width of a *) (* Write b 1-bits followed by a 0-bit *) for _i = 1 to b do write_bit w true done; write_bit w false; (* Write a-1 in b-1 bits *) write_bits w (Z.of_int (a - 1)) (b - 1); (* Write n in a bits *) write_bits w n a end (** Mat-decode from bitstream, returns (value, bits_read) *) let rec mat_decode r = let start_pos = reader_pos r in if not (read_bit r) then (Z.zero, reader_pos r - start_pos) else begin (* Count leading 1 bits *) let b = ref 1 in while read_bit r do b := !b + 1 done; let b = !b in if b = 1 then (* Special case: just "10" means 1 *) (Z.one, reader_pos r - start_pos) else begin (* Read a-1 in b-1 bits *) let a_minus_1 = read_bits r (b - 1) in let a = Z.to_int (Z.add a_minus_1 Z.one) in (* Read n in a bits *) let n = read_bits r a in (n, reader_pos r - start_pos) end end (** Jam: serialize a noun to bytes Uses a hash table to track positions for backreferences. Returns the serialized bytes. *) let jam noun = let w = writer_create () in let positions = Hashtbl.create 256 in (* noun -> bit position *) let rec jam_noun n = match n with | Atom a -> (* Check if we've seen this atom before *) begin match Hashtbl.find_opt positions n with | Some pos -> (* Backref might be smaller than re-encoding *) let atom_size = 1 + (Z.numbits a) in (* rough estimate *) let backref_size = 2 + (Z.numbits (Z.of_int pos)) in if backref_size < atom_size then begin (* Encode backref: tag bits 11 *) write_bit w true; write_bit w true; mat_encode w (Z.of_int pos) end else begin (* Encode atom *) write_bit w false; mat_encode w a end | None -> (* Record position and encode atom *) Hashtbl.add positions n w.bit_pos; write_bit w false; mat_encode w a end | Cell (head, tail) -> (* Check for backref *) begin match Hashtbl.find_opt positions n with | Some pos -> (* Encode backref: tag bits 11 *) write_bit w true; write_bit w true; mat_encode w (Z.of_int pos) | None -> (* Record position and encode cell *) Hashtbl.add positions n w.bit_pos; (* Tag bits 01 for cell *) write_bit w true; write_bit w false; (* Recursively encode head and tail *) jam_noun head; jam_noun tail end in jam_noun noun; writer_to_bytes w (** Cue: deserialize bytes to a noun Uses a hash table to store nouns by bit position for backreferences. *) let cue bytes = let r = reader_create bytes in let backref_table = Hashtbl.create 256 in (* bit position -> noun *) let rec cue_noun () = let pos = reader_pos r in (* Read tag bit *) let tag0 = read_bit r in if not tag0 then begin (* Atom: tag bit 0 *) let (value, _width) = mat_decode r in let result = Atom value in Hashtbl.add backref_table pos result; result end else begin (* Read second tag bit *) let tag1 = read_bit r in if tag1 then begin (* Backref: tag bits 11 *) let (ref_pos, _width) = mat_decode r in let ref_pos = Z.to_int ref_pos in match Hashtbl.find_opt backref_table ref_pos with | Some noun -> noun | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos)) end else begin (* Cell: tag bits 01 *) let head = cue_noun () in let tail = cue_noun () in let result = Cell (head, tail) in Hashtbl.add backref_table pos result; result end end in cue_noun () (** Convert bytes to a hex string for debugging *) let bytes_to_hex bytes = let len = Bytes.length bytes in let buf = Buffer.create (len * 2) in for i = 0 to len - 1 do Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i)) done; Buffer.contents buf