diff options
author | polwex <polwex@sortug.com> | 2025-10-05 22:57:55 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-10-05 22:57:55 +0700 |
commit | c4b71435d9afdb67450f320f54fb7aa99dcae85e (patch) | |
tree | a08c4c2f7965a95fcfe6dda09629d3f103d25a0b /ocaml/serial.ml | |
parent | fcedfddf00b3f994e4f4e40332ac7fc192c63244 (diff) |
fixed jamcue
Diffstat (limited to 'ocaml/serial.ml')
-rw-r--r-- | ocaml/serial.ml | 191 |
1 files changed, 0 insertions, 191 deletions
diff --git a/ocaml/serial.ml b/ocaml/serial.ml deleted file mode 100644 index 039cd2f..0000000 --- a/ocaml/serial.ml +++ /dev/null @@ -1,191 +0,0 @@ -(** Jam/cue serialization for nouns - - Based on the Vere implementation in pkg/noun/serial.c - - Jam encoding: - - Atoms: tag bit 0, then mat-encoded value - - Cells: tag bits 01, then recursively encode head and tail - - Backrefs: tag bits 11, then mat-encoded position - - Mat encoding (length-prefixed): - - For 0: just bit 1 - - For n > 0: - - Let a = bit-width of n - - Let b = bit-width of a - - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits] -*) - -open Noun -open Bitstream - -(** Mat-encode a number into the bitstream - - Mat encoding is a variable-length integer encoding: - - 0 is encoded as a single 1 bit - - For n > 0: - - a = number of bits in n (met 0 n) - - b = number of bits needed to represent a - - Write b 1-bits, then a 0-bit - - Write a-1 in b-1 bits - - Write n in a bits -*) -let rec mat_encode w n = - if Z.equal n Z.zero then - write_bit w true - else begin - let a = Z.numbits n in (* bit-width of n *) - let b = Z.numbits (Z.of_int a) in (* bit-width of a *) - - (* Write b 1-bits followed by a 0-bit *) - for _i = 1 to b do - write_bit w true - done; - write_bit w false; - - (* Write a-1 in b-1 bits *) - write_bits w (Z.of_int (a - 1)) (b - 1); - - (* Write n in a bits *) - write_bits w n a - end - -(** Mat-decode from bitstream, returns (value, bits_read) *) -let rec mat_decode r = - let start_pos = reader_pos r in - - if not (read_bit r) then - (Z.zero, reader_pos r - start_pos) - else begin - (* Count leading 1 bits *) - let b = ref 1 in - while read_bit r do - b := !b + 1 - done; - - let b = !b in - - if b = 1 then - (* Special case: just "10" means 1 *) - (Z.one, reader_pos r - start_pos) - else begin - (* Read a-1 in b-1 bits *) - let a_minus_1 = read_bits r (b - 1) in - let a = Z.to_int (Z.add a_minus_1 Z.one) in - - (* Read n in a bits *) - let n = read_bits r a in - (n, reader_pos r - start_pos) - end - end - -(** Jam: serialize a noun to bytes - - Uses a hash table to track positions for backreferences. - Returns the serialized bytes. -*) -let jam noun = - let w = writer_create () in - let positions = Hashtbl.create 256 in (* noun -> bit position *) - - let rec jam_noun n = - match n with - | Atom a -> - (* Check if we've seen this atom before *) - begin match Hashtbl.find_opt positions n with - | Some pos -> - (* Backref might be smaller than re-encoding *) - let atom_size = 1 + (Z.numbits a) in (* rough estimate *) - let backref_size = 2 + (Z.numbits (Z.of_int pos)) in - - if backref_size < atom_size then begin - (* Encode backref: tag bits 11 *) - write_bit w true; - write_bit w true; - mat_encode w (Z.of_int pos) - end else begin - (* Encode atom *) - write_bit w false; - mat_encode w a - end - | None -> - (* Record position and encode atom *) - Hashtbl.add positions n w.bit_pos; - write_bit w false; - mat_encode w a - end - - | Cell (head, tail) -> - (* Check for backref *) - begin match Hashtbl.find_opt positions n with - | Some pos -> - (* Encode backref: tag bits 11 *) - write_bit w true; - write_bit w true; - mat_encode w (Z.of_int pos) - | None -> - (* Record position and encode cell *) - Hashtbl.add positions n w.bit_pos; - (* Tag bits 01 for cell *) - write_bit w true; - write_bit w false; - (* Recursively encode head and tail *) - jam_noun head; - jam_noun tail - end - in - - jam_noun noun; - writer_to_bytes w - -(** Cue: deserialize bytes to a noun - - Uses a hash table to store nouns by bit position for backreferences. -*) -let cue bytes = - let r = reader_create bytes in - let backref_table = Hashtbl.create 256 in (* bit position -> noun *) - - let rec cue_noun () = - let pos = reader_pos r in - - (* Read tag bit *) - let tag0 = read_bit r in - - if not tag0 then begin - (* Atom: tag bit 0 *) - let (value, _width) = mat_decode r in - let result = Atom value in - Hashtbl.add backref_table pos result; - result - end else begin - (* Read second tag bit *) - let tag1 = read_bit r in - - if tag1 then begin - (* Backref: tag bits 11 *) - let (ref_pos, _width) = mat_decode r in - let ref_pos = Z.to_int ref_pos in - match Hashtbl.find_opt backref_table ref_pos with - | Some noun -> noun - | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos)) - end else begin - (* Cell: tag bits 01 *) - let head = cue_noun () in - let tail = cue_noun () in - let result = Cell (head, tail) in - Hashtbl.add backref_table pos result; - result - end - end - in - - cue_noun () - -(** Convert bytes to a hex string for debugging *) -let bytes_to_hex bytes = - let len = Bytes.length bytes in - let buf = Buffer.create (len * 2) in - for i = 0 to len - 1 do - Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i)) - done; - Buffer.contents buf |