summaryrefslogtreecommitdiff
path: root/ocaml/test/bench_cue_pill.ml
blob: 064f0c5d599ecd35aad4e889bf539e3c01a81015 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
open Nock_lib

exception Stop of string

type limits = {
  max_nouns : int;
  max_bytes : int option;
  progress_interval : int;
  pill_path : string;
  verbose : bool;
  stop_atom_bits : int option;
}

let read_file_bytes path =
  let ic = open_in_bin path in
  let len = in_channel_length ic in
  let data = really_input_string ic len in
  close_in ic;
  Bytes.of_string data

let mib_of_bytes bytes = float_of_int bytes /. (1024. *. 1024.)

let run limits =
  let pill_bytes = read_file_bytes limits.pill_path in
  let total_bytes = Bytes.length pill_bytes in

  Printf.printf "Benchmarking cue on %s (%.2f MiB)\n" limits.pill_path
    (mib_of_bytes total_bytes);
  Stdlib.flush Stdlib.stdout;

  let start = Unix.gettimeofday () in
  let last_print = ref start in
  let processed_nouns = ref 0 in
  let processed_bytes = ref 0 in
  let current_depth = ref 0 in
  let peak_depth = ref 0 in
  let stop_reason = ref None in
  let largest_atom_bits = ref 0 in
  let largest_atom_total = ref 0 in
  let longest_emit_gap = ref 0.0 in
  let last_emit_time = ref start in

  let needs_inspect = limits.verbose || Option.is_some limits.stop_atom_bits in
  let maybe_inspect =
    if not needs_inspect then None
    else
      let atom_log_threshold_bits = 1024 in
      Some
        (fun event ->
          match event with
          | Serial.Cue_atom_begin { position; value_bits } ->
              (match limits.stop_atom_bits with
              | Some threshold when value_bits >= threshold ->
                  stop_reason :=
                    Some
                      (Printf.sprintf
                         "encountered large atom (%d bits) at position %d"
                         value_bits position);
                  raise (Stop "atom limit")
              | _ -> ());
              if limits.verbose && value_bits >= atom_log_threshold_bits then
                Printf.printf
                  "  [atom-start] pos=%d value_bits=%d (~%.2f KiB)\n%!"
                  position value_bits
                  (float_of_int value_bits /. 8. /. 1024.)
          | Serial.Cue_atom_end { position; total_bits; value_bits } ->
              if value_bits > !largest_atom_bits then begin
                largest_atom_bits := value_bits;
                largest_atom_total := total_bits;
                if limits.verbose then
                  Printf.printf
                    "  [atom-end] pos=%d value_bits=%d total_bits=%d (~%.2f KiB)\n%!"
                    position value_bits total_bits
                    (float_of_int value_bits /. 8. /. 1024.)
              end;
          | Serial.Cue_emit { nouns; depth; max_depth } ->
              let now = Unix.gettimeofday () in
              let gap = now -. !last_emit_time in
              if gap > !longest_emit_gap then longest_emit_gap := gap;
              if limits.verbose && gap > 0.2 then
                Printf.printf
                  "  [emit] gap %.3fs before noun %d (depth=%d peak=%d)\n%!"
                  gap nouns depth max_depth;
              last_emit_time := now
          | Serial.Cue_backref _ -> ())
  in

  let progress ~nouns ~bits ~depth ~max_depth =
    processed_nouns := nouns;
    processed_bytes := bits / 8;
    current_depth := depth;
    peak_depth := max_depth;

    let now = Unix.gettimeofday () in
    if now -. !last_print >= 0.5 then begin
      let elapsed = now -. start in
      let mib = mib_of_bytes !processed_bytes in
      let nouns_rate = float_of_int nouns /. elapsed in
      let mib_rate = mib /. elapsed in
      Printf.printf
        "  %.1fs | nouns=%d (%.0f/s) | %.2f MiB (%.2f MiB/s) | depth=%d/%d\n%!"
        elapsed nouns nouns_rate mib mib_rate depth max_depth;
      last_print := now
    end;

    if limits.max_nouns > 0 && nouns >= limits.max_nouns then begin
      stop_reason := Some (Printf.sprintf "reached noun limit %d" limits.max_nouns);
      raise (Stop "noun limit")
    end;

    begin
      match limits.max_bytes with
      | None -> ()
      | Some byte_limit ->
          if !processed_bytes >= byte_limit then begin
            stop_reason :=
              Some
                (Printf.sprintf "reached byte limit %.2f MiB"
                   (mib_of_bytes byte_limit));
            raise (Stop "byte limit")
          end
    end
  in

  let result =
    try
      let _ =
        Serial.cue ~progress
          ~progress_interval:limits.progress_interval
          ?inspect:maybe_inspect
          pill_bytes
      in
      `Finished
    with
    | Stop _ -> `Stopped
  in

  let elapsed = Unix.gettimeofday () -. start in
  let nouns = !processed_nouns in
  let bytes = !processed_bytes in
  let mib = mib_of_bytes bytes in
  let nouns_rate = float_of_int nouns /. elapsed in
  let mib_rate = mib /. elapsed in
  let depth_now = !current_depth in
  let depth_max = !peak_depth in

  begin
    match result with
    | `Finished -> Printf.printf "Completed full cue.\n"
    | `Stopped ->
        Printf.printf "Stopped early (%s).\n"
          (Option.value ~default:"manual stop" !stop_reason)
  end;

  Printf.printf
    "Processed %d nouns in %.2fs (%.2f MiB) \n  -> %.0f nouns/s, %.2f MiB/s\n  Depth: current=%d peak=%d\n"
    nouns elapsed mib nouns_rate mib_rate depth_now depth_max;
  if limits.verbose then
    Printf.printf
      "  Largest atom: value_bits=%d (~%.2f KiB), total_bits=%d\n  Longest emit gap: %.3fs\n"
      !largest_atom_bits
      (float_of_int !largest_atom_bits /. 8. /. 1024.)
      !largest_atom_total
      !longest_emit_gap;
  Stdlib.flush Stdlib.stdout

let parse_limits () =
  let pill_path = ref "solid.pill" in
  let noun_limit = ref 200_000 in
  let mib_limit = ref 0.0 in
  let progress_interval = ref 50_000 in
  let verbose = ref false in
  let stop_atom_bits = ref 0 in

  let spec =
    [ ( "--pill",
        Arg.String (fun s -> pill_path := s),
        "Path to pill (default: solid.pill)" );
      ( "--limit-nouns",
        Arg.Int (fun n -> noun_limit := n),
        "Stop after decoding at least this many nouns (default 200000, 0 to disable)"
      );
      ( "--limit-mib",
        Arg.Float (fun f -> mib_limit := f),
        "Stop after reading this many MiB of data (default 0 = disable)" );
      ( "--progress-interval",
        Arg.Int (fun i -> progress_interval := i),
        "Number of nouns between progress callbacks (default 50000)" )
    ; ( "--verbose",
        Arg.Unit (fun () -> verbose := true),
        "Enable detailed logging of atoms and emit gaps" )
    ; ( "--stop-atom-bits",
        Arg.Int (fun b -> stop_atom_bits := b),
        "Abort when encountering an atom with this many value bits or more" )
    ]
  in

  let usage = "bench_cue_pill [options]" in
  Arg.parse spec (fun _ -> ()) usage;

  let max_nouns = if !noun_limit <= 0 then 0 else !noun_limit in
  let max_bytes =
    if !mib_limit <= 0. then None
    else
      let bytes = int_of_float (!mib_limit *. 1024. *. 1024.) in
      Some bytes
  in

  {
    max_nouns;
    max_bytes;
    progress_interval = if !progress_interval <= 0 then 1 else !progress_interval;
    pill_path = !pill_path;
    verbose = !verbose;
    stop_atom_bits = if !stop_atom_bits <= 0 then None else Some !stop_atom_bits;
  }

let () = run (parse_limits ())