diff options
Diffstat (limited to 'ocaml/lib/serial.ml')
| -rw-r--r-- | ocaml/lib/serial.ml | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml new file mode 100644 index 0000000..47e04d1 --- /dev/null +++ b/ocaml/lib/serial.ml @@ -0,0 +1,225 @@ +open Noun +open Bitstream + +(* Jam hashtable: use physical equality first (fast path), + then fall back to structural equality for correctness. + Hash based on pointer value for O(1) performance. *) +module NounTbl = Hashtbl.Make (struct + type t = noun + let equal a b = (a == b) || Noun.equal a b + let hash noun = Hashtbl.hash (Obj.magic noun : int) +end) + +let mat_encode writer n = + if Z.equal n Z.zero then + write_bit writer true + else begin + let a = Z.numbits n in + let b = Z.numbits (Z.of_int a) in + for _ = 1 to b do + write_bit writer false + done; + write_bit writer true; + if b > 1 then + write_bits writer (Z.of_int a) (b - 1); + write_bits writer n a + end + +let mat_decode ?(verbose=false) reader = + let zeros = count_zero_bits_until_one reader in + if zeros = 0 then Z.zero + else + let len_bits = + if zeros = 1 then Z.zero else read_bits reader (zeros - 1) + in + let width_z = Z.add (Z.shift_left Z.one (zeros - 1)) len_bits in + let width = + try + let w = Z.to_int width_z in + if verbose && w > 1000000 then + Printf.eprintf "\nmat_decode: reading large atom with %d bits\n%!" w; + w + with Z.Overflow -> + Printf.eprintf "\nmat_decode: width overflow! zeros=%d\n%!" zeros; + raise Exit + in + read_bits reader width + +let jam ?(verbose=false) noun = + let writer = writer_create () in + (* Use polymorphic Hashtbl with custom hash/equal like ocaml-old *) + let positions = Hashtbl.create 1024 in + let counter = ref 0 in + + let rec encode noun = + incr counter; + if verbose && !counter mod 10000 = 0 then + Printf.eprintf "jam: processed %d nodes, table size %d, bits written %d\r%!" + !counter (Hashtbl.length positions) (writer_pos writer); + + match Hashtbl.find_opt positions noun with + | Some bit_pos -> + begin match noun with + | Atom z -> + (* if atom is smaller than backref, encode atom directly *) + let atom_bits = Z.numbits z in + let backref_bits = Z.numbits (Z.of_int bit_pos) in + if atom_bits <= backref_bits then begin + write_bit writer false; + mat_encode writer z + end else begin + write_bit writer true; + write_bit writer true; + mat_encode writer (Z.of_int bit_pos) + end + | Cell _ -> + (* always use backref for cells *) + write_bit writer true; + write_bit writer true; + mat_encode writer (Z.of_int bit_pos) + end + | None -> + let current_pos = writer_pos writer in + Hashtbl.add positions noun current_pos; + begin match noun with + | Atom z -> + write_bit writer false; + mat_encode writer z + | Cell (h, t) -> + write_bit writer true; + write_bit writer false; + encode h; + encode t + end + in + + if verbose then Printf.eprintf "jam: starting...\n%!"; + encode noun; + if verbose then Printf.eprintf "\njam: done! processed %d nodes\n%!" !counter; + writer_to_bytes writer + +module IntTbl = Hashtbl.Make (struct + type t = int + let equal = Int.equal + let hash = Hashtbl.hash +end) + +let cue ?(verbose=false) bytes = + let reader = reader_create bytes in + + (* Pre-size the backref table based on payload size to minimize rehashing *) + let estimated_nouns = + let approx = Bytes.length bytes / 8 in + if approx < 1024 then 1024 else approx + in + let backrefs = IntTbl.create estimated_nouns in + + (* Manual stack to eliminate recursion and track unfinished cells *) + let stack_pos = ref (Array.make 1024 0) in + let stack_head = ref (Array.make 1024 None) in + let stack_size = ref 0 in + + (* Progress tracking *) + let nouns_processed = ref 0 in + let next_report = ref 10000 in + + let grow_stack () = + let old_pos = !stack_pos in + let old_head = !stack_head in + let old_len = Array.length old_pos in + let new_len = old_len * 2 in + let new_pos = Array.make new_len 0 in + let new_head = Array.make new_len None in + Array.blit old_pos 0 new_pos 0 old_len; + Array.blit old_head 0 new_head 0 old_len; + stack_pos := new_pos; + stack_head := new_head + in + + let push_frame pos = + if !stack_size = Array.length !stack_pos then grow_stack (); + let idx = !stack_size in + let pos_arr = !stack_pos in + let head_arr = !stack_head in + pos_arr.(idx) <- pos; + head_arr.(idx) <- None; + stack_size := idx + 1 + in + + let result = ref None in + + let rec emit noun = + incr nouns_processed; + if verbose && !nouns_processed >= !next_report then begin + Printf.eprintf "cue: processed %d nouns, bits read %d, stack depth %d\r%!" + !nouns_processed (reader_pos reader) !stack_size; + next_report := !nouns_processed + 10000 + end; + + if !stack_size = 0 then + result := Some noun + else begin + let idx = !stack_size - 1 in + let head_arr = !stack_head in + match head_arr.(idx) with + | None -> + head_arr.(idx) <- Some noun + | Some head -> + let pos_arr = !stack_pos in + let cell_pos = pos_arr.(idx) in + head_arr.(idx) <- None; + stack_size := idx; + let cell = cell head noun in + IntTbl.replace backrefs cell_pos cell; + emit cell + end + in + + if verbose then Printf.eprintf "cue: starting, input size %d bytes\n%!" (Bytes.length bytes); + + let last_progress = ref 0 in + let iterations = ref 0 in + + while Option.is_none !result do + incr iterations; + let pos = reader_pos reader in + + (* Check if we're stuck *) + if verbose && !iterations mod 100000 = 0 then begin + if pos = !last_progress then + Printf.eprintf "\nWARNING: no progress in last 100k iterations at bit %d\n%!" pos + else + last_progress := pos + end; + + let tag0 = read_bit reader in + + if not tag0 then begin + (* Atom: tag bit 0 *) + let value = mat_decode ~verbose reader in + let atom = atom value in + IntTbl.replace backrefs pos atom; + emit atom + end else begin + let tag1 = read_bit reader in + if tag1 then begin + (* Backref: tag bits 11 *) + let ref_pos = mat_decode ~verbose reader in + let ref_int = + if Z.fits_int ref_pos then Z.to_int ref_pos else raise Exit + in + match IntTbl.find_opt backrefs ref_int with + | Some noun -> emit noun + | None -> + Printf.eprintf "cue: invalid backref to position %d\n%!" ref_int; + raise Exit + end else begin + (* Cell: tag bits 10 - push frame and continue decoding head *) + push_frame pos + end + end + done; + + if verbose then Printf.eprintf "\ncue: done! processed %d nouns\n%!" !nouns_processed; + + Option.get !result |
