blob: 47e04d1751d9b5a0b550c1996212e204598dc4fa (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
open Noun
open Bitstream
(* Jam hashtable: use physical equality first (fast path),
then fall back to structural equality for correctness.
Hash based on pointer value for O(1) performance. *)
module NounTbl = Hashtbl.Make (struct
type t = noun
let equal a b = (a == b) || Noun.equal a b
let hash noun = Hashtbl.hash (Obj.magic noun : int)
end)
let mat_encode writer n =
if Z.equal n Z.zero then
write_bit writer true
else begin
let a = Z.numbits n in
let b = Z.numbits (Z.of_int a) in
for _ = 1 to b do
write_bit writer false
done;
write_bit writer true;
if b > 1 then
write_bits writer (Z.of_int a) (b - 1);
write_bits writer n a
end
let mat_decode ?(verbose=false) reader =
let zeros = count_zero_bits_until_one reader in
if zeros = 0 then Z.zero
else
let len_bits =
if zeros = 1 then Z.zero else read_bits reader (zeros - 1)
in
let width_z = Z.add (Z.shift_left Z.one (zeros - 1)) len_bits in
let width =
try
let w = Z.to_int width_z in
if verbose && w > 1000000 then
Printf.eprintf "\nmat_decode: reading large atom with %d bits\n%!" w;
w
with Z.Overflow ->
Printf.eprintf "\nmat_decode: width overflow! zeros=%d\n%!" zeros;
raise Exit
in
read_bits reader width
let jam ?(verbose=false) noun =
let writer = writer_create () in
(* Use polymorphic Hashtbl with custom hash/equal like ocaml-old *)
let positions = Hashtbl.create 1024 in
let counter = ref 0 in
let rec encode noun =
incr counter;
if verbose && !counter mod 10000 = 0 then
Printf.eprintf "jam: processed %d nodes, table size %d, bits written %d\r%!"
!counter (Hashtbl.length positions) (writer_pos writer);
match Hashtbl.find_opt positions noun with
| Some bit_pos ->
begin match noun with
| Atom z ->
(* if atom is smaller than backref, encode atom directly *)
let atom_bits = Z.numbits z in
let backref_bits = Z.numbits (Z.of_int bit_pos) in
if atom_bits <= backref_bits then begin
write_bit writer false;
mat_encode writer z
end else begin
write_bit writer true;
write_bit writer true;
mat_encode writer (Z.of_int bit_pos)
end
| Cell _ ->
(* always use backref for cells *)
write_bit writer true;
write_bit writer true;
mat_encode writer (Z.of_int bit_pos)
end
| None ->
let current_pos = writer_pos writer in
Hashtbl.add positions noun current_pos;
begin match noun with
| Atom z ->
write_bit writer false;
mat_encode writer z
| Cell (h, t) ->
write_bit writer true;
write_bit writer false;
encode h;
encode t
end
in
if verbose then Printf.eprintf "jam: starting...\n%!";
encode noun;
if verbose then Printf.eprintf "\njam: done! processed %d nodes\n%!" !counter;
writer_to_bytes writer
module IntTbl = Hashtbl.Make (struct
type t = int
let equal = Int.equal
let hash = Hashtbl.hash
end)
let cue ?(verbose=false) bytes =
let reader = reader_create bytes in
(* Pre-size the backref table based on payload size to minimize rehashing *)
let estimated_nouns =
let approx = Bytes.length bytes / 8 in
if approx < 1024 then 1024 else approx
in
let backrefs = IntTbl.create estimated_nouns in
(* Manual stack to eliminate recursion and track unfinished cells *)
let stack_pos = ref (Array.make 1024 0) in
let stack_head = ref (Array.make 1024 None) in
let stack_size = ref 0 in
(* Progress tracking *)
let nouns_processed = ref 0 in
let next_report = ref 10000 in
let grow_stack () =
let old_pos = !stack_pos in
let old_head = !stack_head in
let old_len = Array.length old_pos in
let new_len = old_len * 2 in
let new_pos = Array.make new_len 0 in
let new_head = Array.make new_len None in
Array.blit old_pos 0 new_pos 0 old_len;
Array.blit old_head 0 new_head 0 old_len;
stack_pos := new_pos;
stack_head := new_head
in
let push_frame pos =
if !stack_size = Array.length !stack_pos then grow_stack ();
let idx = !stack_size in
let pos_arr = !stack_pos in
let head_arr = !stack_head in
pos_arr.(idx) <- pos;
head_arr.(idx) <- None;
stack_size := idx + 1
in
let result = ref None in
let rec emit noun =
incr nouns_processed;
if verbose && !nouns_processed >= !next_report then begin
Printf.eprintf "cue: processed %d nouns, bits read %d, stack depth %d\r%!"
!nouns_processed (reader_pos reader) !stack_size;
next_report := !nouns_processed + 10000
end;
if !stack_size = 0 then
result := Some noun
else begin
let idx = !stack_size - 1 in
let head_arr = !stack_head in
match head_arr.(idx) with
| None ->
head_arr.(idx) <- Some noun
| Some head ->
let pos_arr = !stack_pos in
let cell_pos = pos_arr.(idx) in
head_arr.(idx) <- None;
stack_size := idx;
let cell = cell head noun in
IntTbl.replace backrefs cell_pos cell;
emit cell
end
in
if verbose then Printf.eprintf "cue: starting, input size %d bytes\n%!" (Bytes.length bytes);
let last_progress = ref 0 in
let iterations = ref 0 in
while Option.is_none !result do
incr iterations;
let pos = reader_pos reader in
(* Check if we're stuck *)
if verbose && !iterations mod 100000 = 0 then begin
if pos = !last_progress then
Printf.eprintf "\nWARNING: no progress in last 100k iterations at bit %d\n%!" pos
else
last_progress := pos
end;
let tag0 = read_bit reader in
if not tag0 then begin
(* Atom: tag bit 0 *)
let value = mat_decode ~verbose reader in
let atom = atom value in
IntTbl.replace backrefs pos atom;
emit atom
end else begin
let tag1 = read_bit reader in
if tag1 then begin
(* Backref: tag bits 11 *)
let ref_pos = mat_decode ~verbose reader in
let ref_int =
if Z.fits_int ref_pos then Z.to_int ref_pos else raise Exit
in
match IntTbl.find_opt backrefs ref_int with
| Some noun -> emit noun
| None ->
Printf.eprintf "cue: invalid backref to position %d\n%!" ref_int;
raise Exit
end else begin
(* Cell: tag bits 10 - push frame and continue decoding head *)
push_frame pos
end
end
done;
if verbose then Printf.eprintf "\ncue: done! processed %d nouns\n%!" !nouns_processed;
Option.get !result
|