summaryrefslogtreecommitdiff
path: root/ocaml/lib/serial.ml
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-20 13:13:39 +0700
committerpolwex <polwex@sortug.com>2025-10-20 13:13:39 +0700
commitd21900836f89b2bf9cd55ff1708a4619c8b89656 (patch)
treebb3a5842ae408ffa465814c6bbf27a5002866252 /ocaml/lib/serial.ml
neoinityes
Diffstat (limited to 'ocaml/lib/serial.ml')
-rw-r--r--ocaml/lib/serial.ml225
1 files changed, 225 insertions, 0 deletions
diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml
new file mode 100644
index 0000000..47e04d1
--- /dev/null
+++ b/ocaml/lib/serial.ml
@@ -0,0 +1,225 @@
+open Noun
+open Bitstream
+
+(* Jam hashtable: use physical equality first (fast path),
+ then fall back to structural equality for correctness.
+ Hash based on pointer value for O(1) performance. *)
+module NounTbl = Hashtbl.Make (struct
+ type t = noun
+ let equal a b = (a == b) || Noun.equal a b
+ let hash noun = Hashtbl.hash (Obj.magic noun : int)
+end)
+
+let mat_encode writer n =
+ if Z.equal n Z.zero then
+ write_bit writer true
+ else begin
+ let a = Z.numbits n in
+ let b = Z.numbits (Z.of_int a) in
+ for _ = 1 to b do
+ write_bit writer false
+ done;
+ write_bit writer true;
+ if b > 1 then
+ write_bits writer (Z.of_int a) (b - 1);
+ write_bits writer n a
+ end
+
+let mat_decode ?(verbose=false) reader =
+ let zeros = count_zero_bits_until_one reader in
+ if zeros = 0 then Z.zero
+ else
+ let len_bits =
+ if zeros = 1 then Z.zero else read_bits reader (zeros - 1)
+ in
+ let width_z = Z.add (Z.shift_left Z.one (zeros - 1)) len_bits in
+ let width =
+ try
+ let w = Z.to_int width_z in
+ if verbose && w > 1000000 then
+ Printf.eprintf "\nmat_decode: reading large atom with %d bits\n%!" w;
+ w
+ with Z.Overflow ->
+ Printf.eprintf "\nmat_decode: width overflow! zeros=%d\n%!" zeros;
+ raise Exit
+ in
+ read_bits reader width
+
+let jam ?(verbose=false) noun =
+ let writer = writer_create () in
+ (* Use polymorphic Hashtbl with custom hash/equal like ocaml-old *)
+ let positions = Hashtbl.create 1024 in
+ let counter = ref 0 in
+
+ let rec encode noun =
+ incr counter;
+ if verbose && !counter mod 10000 = 0 then
+ Printf.eprintf "jam: processed %d nodes, table size %d, bits written %d\r%!"
+ !counter (Hashtbl.length positions) (writer_pos writer);
+
+ match Hashtbl.find_opt positions noun with
+ | Some bit_pos ->
+ begin match noun with
+ | Atom z ->
+ (* if atom is smaller than backref, encode atom directly *)
+ let atom_bits = Z.numbits z in
+ let backref_bits = Z.numbits (Z.of_int bit_pos) in
+ if atom_bits <= backref_bits then begin
+ write_bit writer false;
+ mat_encode writer z
+ end else begin
+ write_bit writer true;
+ write_bit writer true;
+ mat_encode writer (Z.of_int bit_pos)
+ end
+ | Cell _ ->
+ (* always use backref for cells *)
+ write_bit writer true;
+ write_bit writer true;
+ mat_encode writer (Z.of_int bit_pos)
+ end
+ | None ->
+ let current_pos = writer_pos writer in
+ Hashtbl.add positions noun current_pos;
+ begin match noun with
+ | Atom z ->
+ write_bit writer false;
+ mat_encode writer z
+ | Cell (h, t) ->
+ write_bit writer true;
+ write_bit writer false;
+ encode h;
+ encode t
+ end
+ in
+
+ if verbose then Printf.eprintf "jam: starting...\n%!";
+ encode noun;
+ if verbose then Printf.eprintf "\njam: done! processed %d nodes\n%!" !counter;
+ writer_to_bytes writer
+
+module IntTbl = Hashtbl.Make (struct
+ type t = int
+ let equal = Int.equal
+ let hash = Hashtbl.hash
+end)
+
+let cue ?(verbose=false) bytes =
+ let reader = reader_create bytes in
+
+ (* Pre-size the backref table based on payload size to minimize rehashing *)
+ let estimated_nouns =
+ let approx = Bytes.length bytes / 8 in
+ if approx < 1024 then 1024 else approx
+ in
+ let backrefs = IntTbl.create estimated_nouns in
+
+ (* Manual stack to eliminate recursion and track unfinished cells *)
+ let stack_pos = ref (Array.make 1024 0) in
+ let stack_head = ref (Array.make 1024 None) in
+ let stack_size = ref 0 in
+
+ (* Progress tracking *)
+ let nouns_processed = ref 0 in
+ let next_report = ref 10000 in
+
+ let grow_stack () =
+ let old_pos = !stack_pos in
+ let old_head = !stack_head in
+ let old_len = Array.length old_pos in
+ let new_len = old_len * 2 in
+ let new_pos = Array.make new_len 0 in
+ let new_head = Array.make new_len None in
+ Array.blit old_pos 0 new_pos 0 old_len;
+ Array.blit old_head 0 new_head 0 old_len;
+ stack_pos := new_pos;
+ stack_head := new_head
+ in
+
+ let push_frame pos =
+ if !stack_size = Array.length !stack_pos then grow_stack ();
+ let idx = !stack_size in
+ let pos_arr = !stack_pos in
+ let head_arr = !stack_head in
+ pos_arr.(idx) <- pos;
+ head_arr.(idx) <- None;
+ stack_size := idx + 1
+ in
+
+ let result = ref None in
+
+ let rec emit noun =
+ incr nouns_processed;
+ if verbose && !nouns_processed >= !next_report then begin
+ Printf.eprintf "cue: processed %d nouns, bits read %d, stack depth %d\r%!"
+ !nouns_processed (reader_pos reader) !stack_size;
+ next_report := !nouns_processed + 10000
+ end;
+
+ if !stack_size = 0 then
+ result := Some noun
+ else begin
+ let idx = !stack_size - 1 in
+ let head_arr = !stack_head in
+ match head_arr.(idx) with
+ | None ->
+ head_arr.(idx) <- Some noun
+ | Some head ->
+ let pos_arr = !stack_pos in
+ let cell_pos = pos_arr.(idx) in
+ head_arr.(idx) <- None;
+ stack_size := idx;
+ let cell = cell head noun in
+ IntTbl.replace backrefs cell_pos cell;
+ emit cell
+ end
+ in
+
+ if verbose then Printf.eprintf "cue: starting, input size %d bytes\n%!" (Bytes.length bytes);
+
+ let last_progress = ref 0 in
+ let iterations = ref 0 in
+
+ while Option.is_none !result do
+ incr iterations;
+ let pos = reader_pos reader in
+
+ (* Check if we're stuck *)
+ if verbose && !iterations mod 100000 = 0 then begin
+ if pos = !last_progress then
+ Printf.eprintf "\nWARNING: no progress in last 100k iterations at bit %d\n%!" pos
+ else
+ last_progress := pos
+ end;
+
+ let tag0 = read_bit reader in
+
+ if not tag0 then begin
+ (* Atom: tag bit 0 *)
+ let value = mat_decode ~verbose reader in
+ let atom = atom value in
+ IntTbl.replace backrefs pos atom;
+ emit atom
+ end else begin
+ let tag1 = read_bit reader in
+ if tag1 then begin
+ (* Backref: tag bits 11 *)
+ let ref_pos = mat_decode ~verbose reader in
+ let ref_int =
+ if Z.fits_int ref_pos then Z.to_int ref_pos else raise Exit
+ in
+ match IntTbl.find_opt backrefs ref_int with
+ | Some noun -> emit noun
+ | None ->
+ Printf.eprintf "cue: invalid backref to position %d\n%!" ref_int;
+ raise Exit
+ end else begin
+ (* Cell: tag bits 10 - push frame and continue decoding head *)
+ push_frame pos
+ end
+ end
+ done;
+
+ if verbose then Printf.eprintf "\ncue: done! processed %d nouns\n%!" !nouns_processed;
+
+ Option.get !result