summaryrefslogtreecommitdiff
path: root/ocaml/serial.ml
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 22:57:55 +0700
committerpolwex <polwex@sortug.com>2025-10-05 22:57:55 +0700
commitc4b71435d9afdb67450f320f54fb7aa99dcae85e (patch)
treea08c4c2f7965a95fcfe6dda09629d3f103d25a0b /ocaml/serial.ml
parentfcedfddf00b3f994e4f4e40332ac7fc192c63244 (diff)
fixed jamcue
Diffstat (limited to 'ocaml/serial.ml')
-rw-r--r--ocaml/serial.ml191
1 files changed, 0 insertions, 191 deletions
diff --git a/ocaml/serial.ml b/ocaml/serial.ml
deleted file mode 100644
index 039cd2f..0000000
--- a/ocaml/serial.ml
+++ /dev/null
@@ -1,191 +0,0 @@
-(** Jam/cue serialization for nouns
-
- Based on the Vere implementation in pkg/noun/serial.c
-
- Jam encoding:
- - Atoms: tag bit 0, then mat-encoded value
- - Cells: tag bits 01, then recursively encode head and tail
- - Backrefs: tag bits 11, then mat-encoded position
-
- Mat encoding (length-prefixed):
- - For 0: just bit 1
- - For n > 0:
- - Let a = bit-width of n
- - Let b = bit-width of a
- - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits]
-*)
-
-open Noun
-open Bitstream
-
-(** Mat-encode a number into the bitstream
-
- Mat encoding is a variable-length integer encoding:
- - 0 is encoded as a single 1 bit
- - For n > 0:
- - a = number of bits in n (met 0 n)
- - b = number of bits needed to represent a
- - Write b 1-bits, then a 0-bit
- - Write a-1 in b-1 bits
- - Write n in a bits
-*)
-let rec mat_encode w n =
- if Z.equal n Z.zero then
- write_bit w true
- else begin
- let a = Z.numbits n in (* bit-width of n *)
- let b = Z.numbits (Z.of_int a) in (* bit-width of a *)
-
- (* Write b 1-bits followed by a 0-bit *)
- for _i = 1 to b do
- write_bit w true
- done;
- write_bit w false;
-
- (* Write a-1 in b-1 bits *)
- write_bits w (Z.of_int (a - 1)) (b - 1);
-
- (* Write n in a bits *)
- write_bits w n a
- end
-
-(** Mat-decode from bitstream, returns (value, bits_read) *)
-let rec mat_decode r =
- let start_pos = reader_pos r in
-
- if not (read_bit r) then
- (Z.zero, reader_pos r - start_pos)
- else begin
- (* Count leading 1 bits *)
- let b = ref 1 in
- while read_bit r do
- b := !b + 1
- done;
-
- let b = !b in
-
- if b = 1 then
- (* Special case: just "10" means 1 *)
- (Z.one, reader_pos r - start_pos)
- else begin
- (* Read a-1 in b-1 bits *)
- let a_minus_1 = read_bits r (b - 1) in
- let a = Z.to_int (Z.add a_minus_1 Z.one) in
-
- (* Read n in a bits *)
- let n = read_bits r a in
- (n, reader_pos r - start_pos)
- end
- end
-
-(** Jam: serialize a noun to bytes
-
- Uses a hash table to track positions for backreferences.
- Returns the serialized bytes.
-*)
-let jam noun =
- let w = writer_create () in
- let positions = Hashtbl.create 256 in (* noun -> bit position *)
-
- let rec jam_noun n =
- match n with
- | Atom a ->
- (* Check if we've seen this atom before *)
- begin match Hashtbl.find_opt positions n with
- | Some pos ->
- (* Backref might be smaller than re-encoding *)
- let atom_size = 1 + (Z.numbits a) in (* rough estimate *)
- let backref_size = 2 + (Z.numbits (Z.of_int pos)) in
-
- if backref_size < atom_size then begin
- (* Encode backref: tag bits 11 *)
- write_bit w true;
- write_bit w true;
- mat_encode w (Z.of_int pos)
- end else begin
- (* Encode atom *)
- write_bit w false;
- mat_encode w a
- end
- | None ->
- (* Record position and encode atom *)
- Hashtbl.add positions n w.bit_pos;
- write_bit w false;
- mat_encode w a
- end
-
- | Cell (head, tail) ->
- (* Check for backref *)
- begin match Hashtbl.find_opt positions n with
- | Some pos ->
- (* Encode backref: tag bits 11 *)
- write_bit w true;
- write_bit w true;
- mat_encode w (Z.of_int pos)
- | None ->
- (* Record position and encode cell *)
- Hashtbl.add positions n w.bit_pos;
- (* Tag bits 01 for cell *)
- write_bit w true;
- write_bit w false;
- (* Recursively encode head and tail *)
- jam_noun head;
- jam_noun tail
- end
- in
-
- jam_noun noun;
- writer_to_bytes w
-
-(** Cue: deserialize bytes to a noun
-
- Uses a hash table to store nouns by bit position for backreferences.
-*)
-let cue bytes =
- let r = reader_create bytes in
- let backref_table = Hashtbl.create 256 in (* bit position -> noun *)
-
- let rec cue_noun () =
- let pos = reader_pos r in
-
- (* Read tag bit *)
- let tag0 = read_bit r in
-
- if not tag0 then begin
- (* Atom: tag bit 0 *)
- let (value, _width) = mat_decode r in
- let result = Atom value in
- Hashtbl.add backref_table pos result;
- result
- end else begin
- (* Read second tag bit *)
- let tag1 = read_bit r in
-
- if tag1 then begin
- (* Backref: tag bits 11 *)
- let (ref_pos, _width) = mat_decode r in
- let ref_pos = Z.to_int ref_pos in
- match Hashtbl.find_opt backref_table ref_pos with
- | Some noun -> noun
- | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos))
- end else begin
- (* Cell: tag bits 01 *)
- let head = cue_noun () in
- let tail = cue_noun () in
- let result = Cell (head, tail) in
- Hashtbl.add backref_table pos result;
- result
- end
- end
- in
-
- cue_noun ()
-
-(** Convert bytes to a hex string for debugging *)
-let bytes_to_hex bytes =
- let len = Bytes.length bytes in
- let buf = Buffer.create (len * 2) in
- for i = 0 to len - 1 do
- Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i))
- done;
- Buffer.contents buf