summaryrefslogtreecommitdiff
path: root/ocaml/lib/serial.ml
blob: 47e04d1751d9b5a0b550c1996212e204598dc4fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
open Noun
open Bitstream

(* Jam hashtable: use physical equality first (fast path),
   then fall back to structural equality for correctness.
   Hash based on pointer value for O(1) performance. *)
module NounTbl = Hashtbl.Make (struct
  type t = noun
  let equal a b = (a == b) || Noun.equal a b
  let hash noun = Hashtbl.hash (Obj.magic noun : int)
end)

let mat_encode writer n =
  if Z.equal n Z.zero then
    write_bit writer true
  else begin
    let a = Z.numbits n in
    let b = Z.numbits (Z.of_int a) in
    for _ = 1 to b do
      write_bit writer false
    done;
    write_bit writer true;
    if b > 1 then
      write_bits writer (Z.of_int a) (b - 1);
    write_bits writer n a
  end

let mat_decode ?(verbose=false) reader =
  let zeros = count_zero_bits_until_one reader in
  if zeros = 0 then Z.zero
  else
    let len_bits =
      if zeros = 1 then Z.zero else read_bits reader (zeros - 1)
    in
    let width_z = Z.add (Z.shift_left Z.one (zeros - 1)) len_bits in
    let width =
      try
        let w = Z.to_int width_z in
        if verbose && w > 1000000 then
          Printf.eprintf "\nmat_decode: reading large atom with %d bits\n%!" w;
        w
      with Z.Overflow ->
        Printf.eprintf "\nmat_decode: width overflow! zeros=%d\n%!" zeros;
        raise Exit
    in
    read_bits reader width

let jam ?(verbose=false) noun =
  let writer = writer_create () in
  (* Use polymorphic Hashtbl with custom hash/equal like ocaml-old *)
  let positions = Hashtbl.create 1024 in
  let counter = ref 0 in

  let rec encode noun =
    incr counter;
    if verbose && !counter mod 10000 = 0 then
      Printf.eprintf "jam: processed %d nodes, table size %d, bits written %d\r%!"
        !counter (Hashtbl.length positions) (writer_pos writer);

    match Hashtbl.find_opt positions noun with
    | Some bit_pos ->
        begin match noun with
        | Atom z ->
            (* if atom is smaller than backref, encode atom directly *)
            let atom_bits = Z.numbits z in
            let backref_bits = Z.numbits (Z.of_int bit_pos) in
            if atom_bits <= backref_bits then begin
              write_bit writer false;
              mat_encode writer z
            end else begin
              write_bit writer true;
              write_bit writer true;
              mat_encode writer (Z.of_int bit_pos)
            end
        | Cell _ ->
            (* always use backref for cells *)
            write_bit writer true;
            write_bit writer true;
            mat_encode writer (Z.of_int bit_pos)
        end
    | None ->
        let current_pos = writer_pos writer in
        Hashtbl.add positions noun current_pos;
        begin match noun with
        | Atom z ->
            write_bit writer false;
            mat_encode writer z
        | Cell (h, t) ->
            write_bit writer true;
            write_bit writer false;
            encode h;
            encode t
        end
  in

  if verbose then Printf.eprintf "jam: starting...\n%!";
  encode noun;
  if verbose then Printf.eprintf "\njam: done! processed %d nodes\n%!" !counter;
  writer_to_bytes writer

module IntTbl = Hashtbl.Make (struct
  type t = int
  let equal = Int.equal
  let hash = Hashtbl.hash
end)

let cue ?(verbose=false) bytes =
  let reader = reader_create bytes in

  (* Pre-size the backref table based on payload size to minimize rehashing *)
  let estimated_nouns =
    let approx = Bytes.length bytes / 8 in
    if approx < 1024 then 1024 else approx
  in
  let backrefs = IntTbl.create estimated_nouns in

  (* Manual stack to eliminate recursion and track unfinished cells *)
  let stack_pos = ref (Array.make 1024 0) in
  let stack_head = ref (Array.make 1024 None) in
  let stack_size = ref 0 in

  (* Progress tracking *)
  let nouns_processed = ref 0 in
  let next_report = ref 10000 in

  let grow_stack () =
    let old_pos = !stack_pos in
    let old_head = !stack_head in
    let old_len = Array.length old_pos in
    let new_len = old_len * 2 in
    let new_pos = Array.make new_len 0 in
    let new_head = Array.make new_len None in
    Array.blit old_pos 0 new_pos 0 old_len;
    Array.blit old_head 0 new_head 0 old_len;
    stack_pos := new_pos;
    stack_head := new_head
  in

  let push_frame pos =
    if !stack_size = Array.length !stack_pos then grow_stack ();
    let idx = !stack_size in
    let pos_arr = !stack_pos in
    let head_arr = !stack_head in
    pos_arr.(idx) <- pos;
    head_arr.(idx) <- None;
    stack_size := idx + 1
  in

  let result = ref None in

  let rec emit noun =
    incr nouns_processed;
    if verbose && !nouns_processed >= !next_report then begin
      Printf.eprintf "cue: processed %d nouns, bits read %d, stack depth %d\r%!"
        !nouns_processed (reader_pos reader) !stack_size;
      next_report := !nouns_processed + 10000
    end;

    if !stack_size = 0 then
      result := Some noun
    else begin
      let idx = !stack_size - 1 in
      let head_arr = !stack_head in
      match head_arr.(idx) with
      | None ->
          head_arr.(idx) <- Some noun
      | Some head ->
          let pos_arr = !stack_pos in
          let cell_pos = pos_arr.(idx) in
          head_arr.(idx) <- None;
          stack_size := idx;
          let cell = cell head noun in
          IntTbl.replace backrefs cell_pos cell;
          emit cell
    end
  in

  if verbose then Printf.eprintf "cue: starting, input size %d bytes\n%!" (Bytes.length bytes);

  let last_progress = ref 0 in
  let iterations = ref 0 in

  while Option.is_none !result do
    incr iterations;
    let pos = reader_pos reader in

    (* Check if we're stuck *)
    if verbose && !iterations mod 100000 = 0 then begin
      if pos = !last_progress then
        Printf.eprintf "\nWARNING: no progress in last 100k iterations at bit %d\n%!" pos
      else
        last_progress := pos
    end;

    let tag0 = read_bit reader in

    if not tag0 then begin
      (* Atom: tag bit 0 *)
      let value = mat_decode ~verbose reader in
      let atom = atom value in
      IntTbl.replace backrefs pos atom;
      emit atom
    end else begin
      let tag1 = read_bit reader in
      if tag1 then begin
        (* Backref: tag bits 11 *)
        let ref_pos = mat_decode ~verbose reader in
        let ref_int =
          if Z.fits_int ref_pos then Z.to_int ref_pos else raise Exit
        in
        match IntTbl.find_opt backrefs ref_int with
        | Some noun -> emit noun
        | None ->
            Printf.eprintf "cue: invalid backref to position %d\n%!" ref_int;
            raise Exit
      end else begin
        (* Cell: tag bits 10 - push frame and continue decoding head *)
        push_frame pos
      end
    end
  done;

  if verbose then Printf.eprintf "\ncue: done! processed %d nouns\n%!" !nouns_processed;

  Option.get !result