open Noun open Bitstream (* Jam hashtable: use physical equality first (fast path), then fall back to structural equality for correctness. Hash based on pointer value for O(1) performance. *) module NounTbl = Hashtbl.Make (struct type t = noun let equal a b = (a == b) || Noun.equal a b let hash noun = Hashtbl.hash (Obj.magic noun : int) end) let mat_encode writer n = if Z.equal n Z.zero then write_bit writer true else begin let a = Z.numbits n in let b = Z.numbits (Z.of_int a) in for _ = 1 to b do write_bit writer false done; write_bit writer true; if b > 1 then write_bits writer (Z.of_int a) (b - 1); write_bits writer n a end let mat_decode ?(verbose=false) reader = let zeros = count_zero_bits_until_one reader in if zeros = 0 then Z.zero else let len_bits = if zeros = 1 then Z.zero else read_bits reader (zeros - 1) in let width_z = Z.add (Z.shift_left Z.one (zeros - 1)) len_bits in let width = try let w = Z.to_int width_z in if verbose && w > 1000000 then Printf.eprintf "\nmat_decode: reading large atom with %d bits\n%!" w; w with Z.Overflow -> Printf.eprintf "\nmat_decode: width overflow! zeros=%d\n%!" zeros; raise Exit in read_bits reader width let jam ?(verbose=false) noun = let writer = writer_create () in (* Use polymorphic Hashtbl with custom hash/equal like ocaml-old *) let positions = Hashtbl.create 1024 in let counter = ref 0 in let rec encode noun = incr counter; if verbose && !counter mod 10000 = 0 then Printf.eprintf "jam: processed %d nodes, table size %d, bits written %d\r%!" !counter (Hashtbl.length positions) (writer_pos writer); match Hashtbl.find_opt positions noun with | Some bit_pos -> begin match noun with | Atom z -> (* if atom is smaller than backref, encode atom directly *) let atom_bits = Z.numbits z in let backref_bits = Z.numbits (Z.of_int bit_pos) in if atom_bits <= backref_bits then begin write_bit writer false; mat_encode writer z end else begin write_bit writer true; write_bit writer true; mat_encode writer (Z.of_int bit_pos) end | Cell _ -> (* always use backref for cells *) write_bit writer true; write_bit writer true; mat_encode writer (Z.of_int bit_pos) end | None -> let current_pos = writer_pos writer in Hashtbl.add positions noun current_pos; begin match noun with | Atom z -> write_bit writer false; mat_encode writer z | Cell (h, t) -> write_bit writer true; write_bit writer false; encode h; encode t end in if verbose then Printf.eprintf "jam: starting...\n%!"; encode noun; if verbose then Printf.eprintf "\njam: done! processed %d nodes\n%!" !counter; writer_to_bytes writer module IntTbl = Hashtbl.Make (struct type t = int let equal = Int.equal let hash = Hashtbl.hash end) let cue ?(verbose=false) bytes = let reader = reader_create bytes in (* Pre-size the backref table based on payload size to minimize rehashing *) let estimated_nouns = let approx = Bytes.length bytes / 8 in if approx < 1024 then 1024 else approx in let backrefs = IntTbl.create estimated_nouns in (* Manual stack to eliminate recursion and track unfinished cells *) let stack_pos = ref (Array.make 1024 0) in let stack_head = ref (Array.make 1024 None) in let stack_size = ref 0 in (* Progress tracking *) let nouns_processed = ref 0 in let next_report = ref 10000 in let grow_stack () = let old_pos = !stack_pos in let old_head = !stack_head in let old_len = Array.length old_pos in let new_len = old_len * 2 in let new_pos = Array.make new_len 0 in let new_head = Array.make new_len None in Array.blit old_pos 0 new_pos 0 old_len; Array.blit old_head 0 new_head 0 old_len; stack_pos := new_pos; stack_head := new_head in let push_frame pos = if !stack_size = Array.length !stack_pos then grow_stack (); let idx = !stack_size in let pos_arr = !stack_pos in let head_arr = !stack_head in pos_arr.(idx) <- pos; head_arr.(idx) <- None; stack_size := idx + 1 in let result = ref None in let rec emit noun = incr nouns_processed; if verbose && !nouns_processed >= !next_report then begin Printf.eprintf "cue: processed %d nouns, bits read %d, stack depth %d\r%!" !nouns_processed (reader_pos reader) !stack_size; next_report := !nouns_processed + 10000 end; if !stack_size = 0 then result := Some noun else begin let idx = !stack_size - 1 in let head_arr = !stack_head in match head_arr.(idx) with | None -> head_arr.(idx) <- Some noun | Some head -> let pos_arr = !stack_pos in let cell_pos = pos_arr.(idx) in head_arr.(idx) <- None; stack_size := idx; let cell = cell head noun in IntTbl.replace backrefs cell_pos cell; emit cell end in if verbose then Printf.eprintf "cue: starting, input size %d bytes\n%!" (Bytes.length bytes); let last_progress = ref 0 in let iterations = ref 0 in while Option.is_none !result do incr iterations; let pos = reader_pos reader in (* Check if we're stuck *) if verbose && !iterations mod 100000 = 0 then begin if pos = !last_progress then Printf.eprintf "\nWARNING: no progress in last 100k iterations at bit %d\n%!" pos else last_progress := pos end; let tag0 = read_bit reader in if not tag0 then begin (* Atom: tag bit 0 *) let value = mat_decode ~verbose reader in let atom = atom value in IntTbl.replace backrefs pos atom; emit atom end else begin let tag1 = read_bit reader in if tag1 then begin (* Backref: tag bits 11 *) let ref_pos = mat_decode ~verbose reader in let ref_int = if Z.fits_int ref_pos then Z.to_int ref_pos else raise Exit in match IntTbl.find_opt backrefs ref_int with | Some noun -> emit noun | None -> Printf.eprintf "cue: invalid backref to position %d\n%!" ref_int; raise Exit end else begin (* Cell: tag bits 10 - push frame and continue decoding head *) push_frame pos end end done; if verbose then Printf.eprintf "\ncue: done! processed %d nouns\n%!" !nouns_processed; Option.get !result