summaryrefslogtreecommitdiff
path: root/ocaml/lib
diff options
context:
space:
mode:
Diffstat (limited to 'ocaml/lib')
-rw-r--r--ocaml/lib/bitstream.ml102
-rw-r--r--ocaml/lib/dune4
-rw-r--r--ocaml/lib/nock.ml164
-rw-r--r--ocaml/lib/noun.ml69
-rw-r--r--ocaml/lib/serial.ml187
5 files changed, 526 insertions, 0 deletions
diff --git a/ocaml/lib/bitstream.ml b/ocaml/lib/bitstream.ml
new file mode 100644
index 0000000..cfe094c
--- /dev/null
+++ b/ocaml/lib/bitstream.ml
@@ -0,0 +1,102 @@
+(** Bitstream utilities for jam/cue serialization *)
+
+(** A bitstream writer *)
+type writer = {
+ buf: bytes ref; (** Buffer for bits *)
+ mutable bit_pos: int; (** Current bit position *)
+}
+
+(** A bitstream reader *)
+type reader = {
+ buf: bytes; (** Buffer to read from *)
+ mutable bit_pos: int; (** Current bit position *)
+ len: int; (** Length in bits *)
+}
+
+(** Create a new bitstream writer *)
+let writer_create () = {
+ buf = ref (Bytes.create 1024);
+ bit_pos = 0;
+}
+
+(** Grow the writer buffer if needed *)
+let writer_ensure (w : writer) (bits_needed : int) : unit =
+ let bytes_needed : int = (w.bit_pos + bits_needed + 7) / 8 in
+ let buf_ref : bytes ref = w.buf in
+ let current_buf : bytes = !buf_ref in
+ if bytes_needed > (Bytes.length current_buf) then begin
+ let old_buf : bytes = current_buf in
+ let new_size : int = max (bytes_needed * 2) (Bytes.length old_buf * 2) in
+ let new_buf : bytes = Bytes.create new_size in
+ Bytes.blit old_buf 0 new_buf 0 (Bytes.length old_buf);
+ buf_ref := new_buf
+ end
+
+(** Write a single bit *)
+let write_bit w bit =
+ writer_ensure w 1;
+ let byte_pos = w.bit_pos / 8 in
+ let bit_off = w.bit_pos mod 8 in
+ if bit then begin
+ let buf = !(w.buf) in
+ let old_byte = Bytes.get_uint8 buf byte_pos in
+ Bytes.set_uint8 buf byte_pos (old_byte lor (1 lsl bit_off))
+ end;
+ w.bit_pos <- w.bit_pos + 1
+
+(** Write multiple bits from a Z.t value *)
+let write_bits w value nbits =
+ writer_ensure w nbits;
+ for i = 0 to nbits - 1 do
+ let bit = Z.testbit value i in
+ write_bit w bit
+ done
+
+(** Get the final bytes from a writer *)
+let writer_to_bytes (w : writer) : bytes =
+ let byte_len = (w.bit_pos + 7) / 8 in
+ let buf_ref : bytes ref = w.buf in
+ let buf : bytes = !buf_ref in
+ Bytes.sub buf 0 byte_len
+
+(** Create a bitstream reader *)
+let reader_create buf =
+ {
+ buf;
+ bit_pos = 0;
+ len = Bytes.length buf * 8;
+ }
+
+(** Read a single bit *)
+let read_bit r =
+ if r.bit_pos >= r.len then
+ raise (Invalid_argument "read_bit: end of stream");
+ let byte_pos = r.bit_pos / 8 in
+ let bit_off = r.bit_pos mod 8 in
+ let byte_val = Bytes.get_uint8 r.buf byte_pos in
+ r.bit_pos <- r.bit_pos + 1;
+ (byte_val lsr bit_off) land 1 = 1
+
+(** Read multiple bits as a Z.t *)
+let read_bits r nbits =
+ let result = ref Z.zero in
+ for i = 0 to nbits - 1 do
+ if read_bit r then
+ result := Z.logor !result (Z.shift_left Z.one i)
+ done;
+ !result
+
+(** Peek at a bit without advancing *)
+let peek_bit r =
+ if r.bit_pos >= r.len then
+ raise (Invalid_argument "peek_bit: end of stream");
+ let byte_pos = r.bit_pos / 8 in
+ let bit_off = r.bit_pos mod 8 in
+ let byte_val = Bytes.get_uint8 r.buf byte_pos in
+ (byte_val lsr bit_off) land 1 = 1
+
+(** Get current bit position *)
+let reader_pos r = r.bit_pos
+
+(** Check if at end of stream *)
+let reader_at_end r = r.bit_pos >= r.len
diff --git a/ocaml/lib/dune b/ocaml/lib/dune
new file mode 100644
index 0000000..008de33
--- /dev/null
+++ b/ocaml/lib/dune
@@ -0,0 +1,4 @@
+(library
+ (name nock_lib)
+ (modules noun nock bitstream serial)
+ (libraries zarith))
diff --git a/ocaml/lib/nock.ml b/ocaml/lib/nock.ml
new file mode 100644
index 0000000..34065b8
--- /dev/null
+++ b/ocaml/lib/nock.ml
@@ -0,0 +1,164 @@
+open Noun
+
+(** Nock interpreter
+
+ Based on the reference implementation from vere/pkg/noun/nock.c
+
+ The Nock spec has 12 opcodes (0-11):
+ - 0: slot/fragment lookup
+ - 1: constant
+ - 2: nock (recursion)
+ - 3: is-cell test
+ - 4: increment
+ - 5: equality test
+ - 6: if-then-else
+ - 7: composition
+ - 8: push
+ - 9: call with axis
+ - 10: hint (ignored in reference implementation)
+ - 11: scry (errors in reference implementation)
+*)
+
+(** Main nock evaluation function: nock(subject, formula)
+
+ In Nock notation: *[subject formula]
+
+ This is a direct port of _n_nock_on from nock.c:157-396
+*)
+let rec nock_on bus fol =
+ match fol with
+ | Cell (hib, gal) when is_cell hib ->
+ (* [a b] -> compute both sides and cons *)
+ let poz = nock_on bus hib in
+ let riv = nock_on bus gal in
+ cell poz riv
+
+ | Cell (Atom op, gal) ->
+ (match Z.to_int op with
+ | 0 ->
+ (* /[axis subject] - slot/fragment lookup *)
+ if not (is_atom gal) then raise Exit
+ else slot (match gal with Atom n -> n | _ -> raise Exit) bus
+
+ | 1 ->
+ (* =[constant subject] - return constant *)
+ gal
+
+ | 2 ->
+ (* *[subject formula new_subject] - evaluate with new subject *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ let seb = nock_on bus b_gal in
+ let nex = nock_on bus c_gal in
+ nock_on seb nex
+
+ | 3 ->
+ (* ?[subject formula] - is-cell test *)
+ let gof = nock_on bus gal in
+ if is_cell gof then atom 0 else atom 1
+
+ | 4 ->
+ (* +[subject formula] - increment *)
+ let gof = nock_on bus gal in
+ inc gof
+
+ | 5 ->
+ (* =[subject formula] - equality test *)
+ let wim = nock_on bus gal in
+ if not (is_cell wim) then raise Exit;
+ let a = head wim in
+ let b = tail wim in
+ if equal a b then atom 0 else atom 1
+
+ | 6 ->
+ (* if-then-else *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let cd_gal = tail gal in
+ if not (is_cell cd_gal) then raise Exit;
+ let c_gal = head cd_gal in
+ let d_gal = tail cd_gal in
+
+ let tys = nock_on bus b_gal in
+ let nex = match tys with
+ | Atom n when Z.equal n Z.zero -> c_gal
+ | Atom n when Z.equal n Z.one -> d_gal
+ | _ -> raise Exit
+ in
+ nock_on bus nex
+
+ | 7 ->
+ (* composition: *[*[subject b] c] *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ let bod = nock_on bus b_gal in
+ nock_on bod c_gal
+
+ | 8 ->
+ (* push: *[[*[subject b] subject] c] *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ let heb = nock_on bus b_gal in
+ let bod = cell heb bus in
+ nock_on bod c_gal
+
+ | 9 ->
+ (* call: *[*[subject c] axis] *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ if not (is_atom b_gal) then raise Exit;
+
+ let seb = nock_on bus c_gal in
+ let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in
+ nock_on seb nex
+
+ | 10 ->
+ (* hint - in reference implementation, hints are mostly ignored *)
+ let nex =
+ if is_cell gal then
+ (* [[hint-tag hint-value] formula] *)
+ tail gal
+ else
+ (* [hint-tag formula] where hint-value is implicit *)
+ gal
+ in
+ nock_on bus nex
+
+ | 11 ->
+ (* scry - not implemented in reference nock, raises error *)
+ raise Exit
+
+ | _ ->
+ (* Invalid opcode *)
+ raise Exit
+ )
+
+ | _ ->
+ (* Invalid formula structure *)
+ raise Exit
+
+(** Convenience function: nock(subject, formula) *)
+let nock subject formula =
+ nock_on subject formula
+
+(** slam: apply gate to sample
+ slam(gate, sample) = *[gate [9 2 [0 1] [0 6] [1 sample] [0 7]]]
+
+ In practice this evaluates the gate (which is a core with a formula at axis 2)
+ with a modified sample (at axis 6).
+*)
+let slam gat sam =
+ let cor = cell (head gat) (cell sam (tail (tail gat))) in
+ let formula = slot (Z.of_int 2) cor in
+ nock_on cor formula
+
+(** kick: fire gate without changing sample
+ kick(gate) = *[gate 9 2 0 1]
+*)
+let kick gat =
+ let formula = slot (Z.of_int 2) gat in
+ nock_on gat formula
diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml
new file mode 100644
index 0000000..c59ec80
--- /dev/null
+++ b/ocaml/lib/noun.ml
@@ -0,0 +1,69 @@
+(** Noun type and basic operations *)
+
+(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *)
+type noun =
+ | Atom of Z.t (** Arbitrary-precision integer using Zarith *)
+ | Cell of noun * noun (** Pair of nouns *)
+
+(** Exception raised on nock evaluation errors *)
+exception Exit
+
+(** Create an atom from an int *)
+let atom n = Atom (Z.of_int n)
+
+(** Create a cell *)
+let cell a b = Cell (a, b)
+
+(** Test if a noun is a cell *)
+let is_cell = function
+ | Cell _ -> true
+ | Atom _ -> false
+
+(** Test if a noun is an atom *)
+let is_atom = function
+ | Atom _ -> true
+ | Cell _ -> false
+
+(** Get head of a cell *)
+let head = function
+ | Cell (h, _) -> h
+ | Atom _ -> raise Exit
+
+(** Get tail of a cell *)
+let tail = function
+ | Cell (_, t) -> t
+ | Atom _ -> raise Exit
+
+(** Fragment/axis lookup: slot(n, noun)
+ This implements the tree-addressing scheme:
+ - 1 is the root
+ - 2 is head, 3 is tail
+ - For n > 1: if even, go left; if odd, go right
+*)
+let rec slot n noun =
+ if Z.equal n Z.one then
+ noun
+ else if Z.equal n Z.zero then
+ raise Exit
+ else
+ let bit = Z.testbit n 0 in (* Check if odd *)
+ let parent = Z.shift_right n 1 in
+ let sub = slot parent noun in
+ if bit then tail sub else head sub
+
+(** Equality test for nouns *)
+let rec equal a b =
+ match a, b with
+ | Atom x, Atom y -> Z.equal x y
+ | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt
+ | _, _ -> false
+
+(** Increment an atom *)
+let inc = function
+ | Atom n -> Atom (Z.succ n)
+ | Cell _ -> raise Exit
+
+(** Pretty-print a noun *)
+let rec pp_noun fmt = function
+ | Atom n -> Format.fprintf fmt "%s" (Z.to_string n)
+ | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b
diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml
new file mode 100644
index 0000000..9ededf1
--- /dev/null
+++ b/ocaml/lib/serial.ml
@@ -0,0 +1,187 @@
+(** Jam/cue serialization for nouns
+
+ Based on the Vere implementation in pkg/noun/serial.c
+
+ Jam encoding:
+ - Atoms: tag bit 0, then mat-encoded value
+ - Cells: tag bits 01, then recursively encode head and tail
+ - Backrefs: tag bits 11, then mat-encoded position
+
+ Mat encoding (length-prefixed):
+ - For 0: just bit 1
+ - For n > 0:
+ - Let a = bit-width of n
+ - Let b = bit-width of a
+ - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits]
+*)
+
+open Noun
+open Bitstream
+
+(** Mat-encode a number into the bitstream
+
+ Mat encoding is a variable-length integer encoding:
+ - 0 is encoded as a single 1 bit
+ - For n > 0:
+ - a = number of bits in n (met 0 n)
+ - b = number of bits needed to represent a
+ - Write b 0-bits, then one 1-bit
+ - Write a in b-1 bits
+ - Write n in a bits
+*)
+let mat_encode w n =
+ if Z.equal n Z.zero then
+ write_bit w true
+ else begin
+ let a = Z.numbits n in (* bit-width of n *)
+ let b = Z.numbits (Z.of_int a) in (* bit-width of a *)
+
+ (* Write b 0-bits followed by one 1-bit *)
+ for _i = 1 to b do
+ write_bit w false
+ done;
+ write_bit w true;
+
+ (* Write a in b-1 bits *)
+ write_bits w (Z.of_int a) (b - 1);
+
+ (* Write n in a bits *)
+ write_bits w n a
+ end
+
+(** Mat-decode from bitstream, returns (value, bits_read) *)
+let mat_decode r =
+ let start_pos = reader_pos r in
+
+ (* Count leading 0 bits until we hit a 1 bit *)
+ let b = ref 0 in
+ while not (read_bit r) do
+ b := !b + 1
+ done;
+
+ let b = !b in
+
+ if b = 0 then
+ (* Just a single 1 bit means 0 *)
+ (Z.zero, reader_pos r - start_pos)
+ else begin
+ (* Read the length bits and compute a = 2^(b-1) + bits_read *)
+ let bits_val = read_bits r (b - 1) in
+ let a = Z.to_int (Z.add (Z.shift_left Z.one (b - 1)) bits_val) in
+
+ (* Read n in a bits *)
+ let n = read_bits r a in
+ (n, reader_pos r - start_pos)
+ end
+
+(** Jam: serialize a noun to bytes
+
+ Uses a hash table to track positions for backreferences.
+ Returns the serialized bytes.
+*)
+let jam noun =
+ let w = writer_create () in
+ let positions = Hashtbl.create 256 in (* noun -> bit position *)
+
+ let rec jam_noun n =
+ match n with
+ | Atom a ->
+ (* Check if we've seen this atom before *)
+ begin match Hashtbl.find_opt positions n with
+ | Some pos ->
+ (* Backref might be smaller than re-encoding *)
+ let atom_size = 1 + (Z.numbits a) in (* rough estimate *)
+ let backref_size = 2 + (Z.numbits (Z.of_int pos)) in
+
+ if backref_size < atom_size then begin
+ (* Encode backref: tag bits 11 *)
+ write_bit w true;
+ write_bit w true;
+ mat_encode w (Z.of_int pos)
+ end else begin
+ (* Encode atom *)
+ write_bit w false;
+ mat_encode w a
+ end
+ | None ->
+ (* Record position and encode atom *)
+ Hashtbl.add positions n w.bit_pos;
+ write_bit w false;
+ mat_encode w a
+ end
+
+ | Cell (head, tail) ->
+ (* Check for backref *)
+ begin match Hashtbl.find_opt positions n with
+ | Some pos ->
+ (* Encode backref: tag bits 11 *)
+ write_bit w true;
+ write_bit w true;
+ mat_encode w (Z.of_int pos)
+ | None ->
+ (* Record position and encode cell *)
+ Hashtbl.add positions n w.bit_pos;
+ (* Tag bits 01 for cell *)
+ write_bit w true;
+ write_bit w false;
+ (* Recursively encode head and tail *)
+ jam_noun head;
+ jam_noun tail
+ end
+ in
+
+ jam_noun noun;
+ writer_to_bytes w
+
+(** Cue: deserialize bytes to a noun
+
+ Uses a hash table to store nouns by bit position for backreferences.
+*)
+let cue bytes =
+ let r = reader_create bytes in
+ let backref_table = Hashtbl.create 256 in (* bit position -> noun *)
+
+ let rec cue_noun () =
+ let pos = reader_pos r in
+
+ (* Read tag bit *)
+ let tag0 = read_bit r in
+
+ if not tag0 then begin
+ (* Atom: tag bit 0 *)
+ let (value, _width) = mat_decode r in
+ let result = Atom value in
+ Hashtbl.add backref_table pos result;
+ result
+ end else begin
+ (* Read second tag bit *)
+ let tag1 = read_bit r in
+
+ if tag1 then begin
+ (* Backref: tag bits 11 *)
+ let (ref_pos, _width) = mat_decode r in
+ let ref_pos = Z.to_int ref_pos in
+ match Hashtbl.find_opt backref_table ref_pos with
+ | Some noun -> noun
+ | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos))
+ end else begin
+ (* Cell: tag bits 01 *)
+ let head = cue_noun () in
+ let tail = cue_noun () in
+ let result = Cell (head, tail) in
+ Hashtbl.add backref_table pos result;
+ result
+ end
+ end
+ in
+
+ cue_noun ()
+
+(** Convert bytes to a hex string for debugging *)
+let bytes_to_hex bytes =
+ let len = Bytes.length bytes in
+ let buf = Buffer.create (len * 2) in
+ for i = 0 to len - 1 do
+ Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i))
+ done;
+ Buffer.contents buf