diff options
Diffstat (limited to 'ocaml/lib/serial.ml')
-rw-r--r-- | ocaml/lib/serial.ml | 181 |
1 files changed, 147 insertions, 34 deletions
diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml index 9ededf1..ac97421 100644 --- a/ocaml/lib/serial.ml +++ b/ocaml/lib/serial.ml @@ -18,6 +18,15 @@ open Noun open Bitstream +type cue_progress = + nouns:int -> bits:int -> depth:int -> max_depth:int -> unit + +type cue_event = + | Cue_atom_begin of { position : int; value_bits : int } + | Cue_atom_end of { position : int; total_bits : int; value_bits : int } + | Cue_backref of { position : int; ref_pos : int } + | Cue_emit of { nouns : int; depth : int; max_depth : int } + (** Mat-encode a number into the bitstream Mat encoding is a variable-length integer encoding: @@ -50,28 +59,26 @@ let mat_encode w n = end (** Mat-decode from bitstream, returns (value, bits_read) *) -let mat_decode r = +let mat_decode ?on_value_bits r = let start_pos = reader_pos r in - (* Count leading 0 bits until we hit a 1 bit *) - let b = ref 0 in - while not (read_bit r) do - b := !b + 1 - done; - - let b = !b in + (* Count leading 0 bits until we hit the terminating 1 bit. *) + let b = Bitstream.count_zero_bits_until_one r in if b = 0 then (* Just a single 1 bit means 0 *) - (Z.zero, reader_pos r - start_pos) + (Option.iter (fun f -> f 0) on_value_bits; + (Z.zero, reader_pos r - start_pos, 0)) else begin (* Read the length bits and compute a = 2^(b-1) + bits_read *) let bits_val = read_bits r (b - 1) in let a = Z.to_int (Z.add (Z.shift_left Z.one (b - 1)) bits_val) in + Option.iter (fun f -> f a) on_value_bits; + (* Read n in a bits *) let n = read_bits r a in - (n, reader_pos r - start_pos) + (n, reader_pos r - start_pos, a) end (** Jam: serialize a noun to bytes @@ -137,45 +144,151 @@ let jam noun = Uses a hash table to store nouns by bit position for backreferences. *) -let cue bytes = +module IntTable = Hashtbl.Make (struct + type t = int + + let equal = Int.equal + let hash x = x land max_int +end) + +let cue ?progress ?(progress_interval = 200_000) ?inspect bytes = let r = reader_create bytes in - let backref_table = Hashtbl.create 256 in (* bit position -> noun *) - let rec cue_noun () = - let pos = reader_pos r in + (* Pre-size the backref table based on payload size to minimise rehashing. *) + let estimated_nouns = + let approx = Bytes.length bytes / 8 in + if approx < 1024 then 1024 else approx + in + let backref_table = IntTable.create estimated_nouns in + + (* Manual stack used to eliminate recursion while tracking unfinished cells. *) + let initial_stack_capacity = 1024 in + let stack_pos = ref (Array.make initial_stack_capacity 0) in + let stack_head = ref (Array.make initial_stack_capacity None) in + let stack_size = ref 0 in + let max_depth = ref 0 in + + (* Noun counter is used for periodic progress callbacks. *) + let nouns_processed = ref 0 in + + let report_tick, report_final = + match progress with + | None -> ( (fun ~nouns:_ -> ()), (fun ~nouns:_ -> ()) ) + | Some callback -> + let interval = if progress_interval <= 0 then 1 else progress_interval in + let next_report = ref interval in + + let call_callback nouns = + callback + ~nouns + ~bits:(reader_pos r) + ~depth:!stack_size + ~max_depth:!max_depth + in + + let tick ~nouns = + if nouns >= !next_report then begin + call_callback nouns; + next_report := nouns + interval + end + in + + let final ~nouns = + if nouns < !next_report then call_callback nouns + in + + (tick, final) + in - (* Read tag bit *) + let inspect_event = match inspect with Some f -> f | None -> fun _ -> () in + + let grow_stack () = + let old_pos = !stack_pos in + let old_head = !stack_head in + let old_len = Array.length old_pos in + let new_len = old_len * 2 in + let new_pos = Array.make new_len 0 in + let new_head = Array.make new_len None in + Array.blit old_pos 0 new_pos 0 old_len; + Array.blit old_head 0 new_head 0 old_len; + stack_pos := new_pos; + stack_head := new_head + in + + let push_frame pos = + if !stack_size = Array.length !stack_pos then grow_stack (); + let idx = !stack_size in + let pos_arr = !stack_pos in + let head_arr = !stack_head in + pos_arr.(idx) <- pos; + head_arr.(idx) <- None; + stack_size := idx + 1; + if !stack_size > !max_depth then max_depth := !stack_size + in + + let result = ref None in + + let rec emit noun = + incr nouns_processed; + report_tick ~nouns:!nouns_processed; + inspect_event (Cue_emit { nouns = !nouns_processed; depth = !stack_size; max_depth = !max_depth }); + + if !stack_size = 0 then + result := Some noun + else begin + let idx = !stack_size - 1 in + let head_arr = !stack_head in + match head_arr.(idx) with + | None -> + head_arr.(idx) <- Some noun + | Some head -> + let pos_arr = !stack_pos in + let cell_pos = pos_arr.(idx) in + head_arr.(idx) <- None; + stack_size := idx; + let cell = Cell (head, noun) in + IntTable.replace backref_table cell_pos cell; + emit cell + end + in + + while Option.is_none !result do + let pos = reader_pos r in let tag0 = read_bit r in if not tag0 then begin (* Atom: tag bit 0 *) - let (value, _width) = mat_decode r in - let result = Atom value in - Hashtbl.add backref_table pos result; - result + let on_value_bits bits = + inspect_event (Cue_atom_begin { position = pos; value_bits = bits }) + in + let (value, total_bits, value_bits) = mat_decode ~on_value_bits r in + let atom = Atom value in + IntTable.replace backref_table pos atom; + inspect_event (Cue_atom_end { position = pos; total_bits; value_bits }); + emit atom end else begin - (* Read second tag bit *) let tag1 = read_bit r in - if tag1 then begin (* Backref: tag bits 11 *) - let (ref_pos, _width) = mat_decode r in - let ref_pos = Z.to_int ref_pos in - match Hashtbl.find_opt backref_table ref_pos with - | Some noun -> noun - | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos)) + let (ref_pos, _width, _value_bits) = mat_decode r in + let ref_int = Z.to_int ref_pos in + inspect_event (Cue_backref { position = pos; ref_pos = ref_int }); + match IntTable.find_opt backref_table ref_int with + | Some noun -> emit noun + | None -> + raise + (Invalid_argument + (Printf.sprintf "cue: invalid backref to position %d" ref_int)) end else begin - (* Cell: tag bits 01 *) - let head = cue_noun () in - let tail = cue_noun () in - let result = Cell (head, tail) in - Hashtbl.add backref_table pos result; - result + (* Cell: tag bits 01 – push frame and continue decoding head. *) + push_frame pos end end - in + done; + + report_final ~nouns:!nouns_processed; - cue_noun () + Option.get !result (** Convert bytes to a hex string for debugging *) let bytes_to_hex bytes = |