diff options
Diffstat (limited to 'ocaml/lib')
-rw-r--r-- | ocaml/lib/bitstream.ml | 102 | ||||
-rw-r--r-- | ocaml/lib/dune | 4 | ||||
-rw-r--r-- | ocaml/lib/nock.ml | 164 | ||||
-rw-r--r-- | ocaml/lib/noun.ml | 69 | ||||
-rw-r--r-- | ocaml/lib/serial.ml | 187 |
5 files changed, 526 insertions, 0 deletions
diff --git a/ocaml/lib/bitstream.ml b/ocaml/lib/bitstream.ml new file mode 100644 index 0000000..cfe094c --- /dev/null +++ b/ocaml/lib/bitstream.ml @@ -0,0 +1,102 @@ +(** Bitstream utilities for jam/cue serialization *) + +(** A bitstream writer *) +type writer = { + buf: bytes ref; (** Buffer for bits *) + mutable bit_pos: int; (** Current bit position *) +} + +(** A bitstream reader *) +type reader = { + buf: bytes; (** Buffer to read from *) + mutable bit_pos: int; (** Current bit position *) + len: int; (** Length in bits *) +} + +(** Create a new bitstream writer *) +let writer_create () = { + buf = ref (Bytes.create 1024); + bit_pos = 0; +} + +(** Grow the writer buffer if needed *) +let writer_ensure (w : writer) (bits_needed : int) : unit = + let bytes_needed : int = (w.bit_pos + bits_needed + 7) / 8 in + let buf_ref : bytes ref = w.buf in + let current_buf : bytes = !buf_ref in + if bytes_needed > (Bytes.length current_buf) then begin + let old_buf : bytes = current_buf in + let new_size : int = max (bytes_needed * 2) (Bytes.length old_buf * 2) in + let new_buf : bytes = Bytes.create new_size in + Bytes.blit old_buf 0 new_buf 0 (Bytes.length old_buf); + buf_ref := new_buf + end + +(** Write a single bit *) +let write_bit w bit = + writer_ensure w 1; + let byte_pos = w.bit_pos / 8 in + let bit_off = w.bit_pos mod 8 in + if bit then begin + let buf = !(w.buf) in + let old_byte = Bytes.get_uint8 buf byte_pos in + Bytes.set_uint8 buf byte_pos (old_byte lor (1 lsl bit_off)) + end; + w.bit_pos <- w.bit_pos + 1 + +(** Write multiple bits from a Z.t value *) +let write_bits w value nbits = + writer_ensure w nbits; + for i = 0 to nbits - 1 do + let bit = Z.testbit value i in + write_bit w bit + done + +(** Get the final bytes from a writer *) +let writer_to_bytes (w : writer) : bytes = + let byte_len = (w.bit_pos + 7) / 8 in + let buf_ref : bytes ref = w.buf in + let buf : bytes = !buf_ref in + Bytes.sub buf 0 byte_len + +(** Create a bitstream reader *) +let reader_create buf = + { + buf; + bit_pos = 0; + len = Bytes.length buf * 8; + } + +(** Read a single bit *) +let read_bit r = + if r.bit_pos >= r.len then + raise (Invalid_argument "read_bit: end of stream"); + let byte_pos = r.bit_pos / 8 in + let bit_off = r.bit_pos mod 8 in + let byte_val = Bytes.get_uint8 r.buf byte_pos in + r.bit_pos <- r.bit_pos + 1; + (byte_val lsr bit_off) land 1 = 1 + +(** Read multiple bits as a Z.t *) +let read_bits r nbits = + let result = ref Z.zero in + for i = 0 to nbits - 1 do + if read_bit r then + result := Z.logor !result (Z.shift_left Z.one i) + done; + !result + +(** Peek at a bit without advancing *) +let peek_bit r = + if r.bit_pos >= r.len then + raise (Invalid_argument "peek_bit: end of stream"); + let byte_pos = r.bit_pos / 8 in + let bit_off = r.bit_pos mod 8 in + let byte_val = Bytes.get_uint8 r.buf byte_pos in + (byte_val lsr bit_off) land 1 = 1 + +(** Get current bit position *) +let reader_pos r = r.bit_pos + +(** Check if at end of stream *) +let reader_at_end r = r.bit_pos >= r.len diff --git a/ocaml/lib/dune b/ocaml/lib/dune new file mode 100644 index 0000000..008de33 --- /dev/null +++ b/ocaml/lib/dune @@ -0,0 +1,4 @@ +(library + (name nock_lib) + (modules noun nock bitstream serial) + (libraries zarith)) diff --git a/ocaml/lib/nock.ml b/ocaml/lib/nock.ml new file mode 100644 index 0000000..34065b8 --- /dev/null +++ b/ocaml/lib/nock.ml @@ -0,0 +1,164 @@ +open Noun + +(** Nock interpreter + + Based on the reference implementation from vere/pkg/noun/nock.c + + The Nock spec has 12 opcodes (0-11): + - 0: slot/fragment lookup + - 1: constant + - 2: nock (recursion) + - 3: is-cell test + - 4: increment + - 5: equality test + - 6: if-then-else + - 7: composition + - 8: push + - 9: call with axis + - 10: hint (ignored in reference implementation) + - 11: scry (errors in reference implementation) +*) + +(** Main nock evaluation function: nock(subject, formula) + + In Nock notation: *[subject formula] + + This is a direct port of _n_nock_on from nock.c:157-396 +*) +let rec nock_on bus fol = + match fol with + | Cell (hib, gal) when is_cell hib -> + (* [a b] -> compute both sides and cons *) + let poz = nock_on bus hib in + let riv = nock_on bus gal in + cell poz riv + + | Cell (Atom op, gal) -> + (match Z.to_int op with + | 0 -> + (* /[axis subject] - slot/fragment lookup *) + if not (is_atom gal) then raise Exit + else slot (match gal with Atom n -> n | _ -> raise Exit) bus + + | 1 -> + (* =[constant subject] - return constant *) + gal + + | 2 -> + (* *[subject formula new_subject] - evaluate with new subject *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + let seb = nock_on bus b_gal in + let nex = nock_on bus c_gal in + nock_on seb nex + + | 3 -> + (* ?[subject formula] - is-cell test *) + let gof = nock_on bus gal in + if is_cell gof then atom 0 else atom 1 + + | 4 -> + (* +[subject formula] - increment *) + let gof = nock_on bus gal in + inc gof + + | 5 -> + (* =[subject formula] - equality test *) + let wim = nock_on bus gal in + if not (is_cell wim) then raise Exit; + let a = head wim in + let b = tail wim in + if equal a b then atom 0 else atom 1 + + | 6 -> + (* if-then-else *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let cd_gal = tail gal in + if not (is_cell cd_gal) then raise Exit; + let c_gal = head cd_gal in + let d_gal = tail cd_gal in + + let tys = nock_on bus b_gal in + let nex = match tys with + | Atom n when Z.equal n Z.zero -> c_gal + | Atom n when Z.equal n Z.one -> d_gal + | _ -> raise Exit + in + nock_on bus nex + + | 7 -> + (* composition: *[*[subject b] c] *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + let bod = nock_on bus b_gal in + nock_on bod c_gal + + | 8 -> + (* push: *[[*[subject b] subject] c] *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + let heb = nock_on bus b_gal in + let bod = cell heb bus in + nock_on bod c_gal + + | 9 -> + (* call: *[*[subject c] axis] *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + if not (is_atom b_gal) then raise Exit; + + let seb = nock_on bus c_gal in + let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in + nock_on seb nex + + | 10 -> + (* hint - in reference implementation, hints are mostly ignored *) + let nex = + if is_cell gal then + (* [[hint-tag hint-value] formula] *) + tail gal + else + (* [hint-tag formula] where hint-value is implicit *) + gal + in + nock_on bus nex + + | 11 -> + (* scry - not implemented in reference nock, raises error *) + raise Exit + + | _ -> + (* Invalid opcode *) + raise Exit + ) + + | _ -> + (* Invalid formula structure *) + raise Exit + +(** Convenience function: nock(subject, formula) *) +let nock subject formula = + nock_on subject formula + +(** slam: apply gate to sample + slam(gate, sample) = *[gate [9 2 [0 1] [0 6] [1 sample] [0 7]]] + + In practice this evaluates the gate (which is a core with a formula at axis 2) + with a modified sample (at axis 6). +*) +let slam gat sam = + let cor = cell (head gat) (cell sam (tail (tail gat))) in + let formula = slot (Z.of_int 2) cor in + nock_on cor formula + +(** kick: fire gate without changing sample + kick(gate) = *[gate 9 2 0 1] +*) +let kick gat = + let formula = slot (Z.of_int 2) gat in + nock_on gat formula diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml new file mode 100644 index 0000000..c59ec80 --- /dev/null +++ b/ocaml/lib/noun.ml @@ -0,0 +1,69 @@ +(** Noun type and basic operations *) + +(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *) +type noun = + | Atom of Z.t (** Arbitrary-precision integer using Zarith *) + | Cell of noun * noun (** Pair of nouns *) + +(** Exception raised on nock evaluation errors *) +exception Exit + +(** Create an atom from an int *) +let atom n = Atom (Z.of_int n) + +(** Create a cell *) +let cell a b = Cell (a, b) + +(** Test if a noun is a cell *) +let is_cell = function + | Cell _ -> true + | Atom _ -> false + +(** Test if a noun is an atom *) +let is_atom = function + | Atom _ -> true + | Cell _ -> false + +(** Get head of a cell *) +let head = function + | Cell (h, _) -> h + | Atom _ -> raise Exit + +(** Get tail of a cell *) +let tail = function + | Cell (_, t) -> t + | Atom _ -> raise Exit + +(** Fragment/axis lookup: slot(n, noun) + This implements the tree-addressing scheme: + - 1 is the root + - 2 is head, 3 is tail + - For n > 1: if even, go left; if odd, go right +*) +let rec slot n noun = + if Z.equal n Z.one then + noun + else if Z.equal n Z.zero then + raise Exit + else + let bit = Z.testbit n 0 in (* Check if odd *) + let parent = Z.shift_right n 1 in + let sub = slot parent noun in + if bit then tail sub else head sub + +(** Equality test for nouns *) +let rec equal a b = + match a, b with + | Atom x, Atom y -> Z.equal x y + | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt + | _, _ -> false + +(** Increment an atom *) +let inc = function + | Atom n -> Atom (Z.succ n) + | Cell _ -> raise Exit + +(** Pretty-print a noun *) +let rec pp_noun fmt = function + | Atom n -> Format.fprintf fmt "%s" (Z.to_string n) + | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml new file mode 100644 index 0000000..9ededf1 --- /dev/null +++ b/ocaml/lib/serial.ml @@ -0,0 +1,187 @@ +(** Jam/cue serialization for nouns + + Based on the Vere implementation in pkg/noun/serial.c + + Jam encoding: + - Atoms: tag bit 0, then mat-encoded value + - Cells: tag bits 01, then recursively encode head and tail + - Backrefs: tag bits 11, then mat-encoded position + + Mat encoding (length-prefixed): + - For 0: just bit 1 + - For n > 0: + - Let a = bit-width of n + - Let b = bit-width of a + - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits] +*) + +open Noun +open Bitstream + +(** Mat-encode a number into the bitstream + + Mat encoding is a variable-length integer encoding: + - 0 is encoded as a single 1 bit + - For n > 0: + - a = number of bits in n (met 0 n) + - b = number of bits needed to represent a + - Write b 0-bits, then one 1-bit + - Write a in b-1 bits + - Write n in a bits +*) +let mat_encode w n = + if Z.equal n Z.zero then + write_bit w true + else begin + let a = Z.numbits n in (* bit-width of n *) + let b = Z.numbits (Z.of_int a) in (* bit-width of a *) + + (* Write b 0-bits followed by one 1-bit *) + for _i = 1 to b do + write_bit w false + done; + write_bit w true; + + (* Write a in b-1 bits *) + write_bits w (Z.of_int a) (b - 1); + + (* Write n in a bits *) + write_bits w n a + end + +(** Mat-decode from bitstream, returns (value, bits_read) *) +let mat_decode r = + let start_pos = reader_pos r in + + (* Count leading 0 bits until we hit a 1 bit *) + let b = ref 0 in + while not (read_bit r) do + b := !b + 1 + done; + + let b = !b in + + if b = 0 then + (* Just a single 1 bit means 0 *) + (Z.zero, reader_pos r - start_pos) + else begin + (* Read the length bits and compute a = 2^(b-1) + bits_read *) + let bits_val = read_bits r (b - 1) in + let a = Z.to_int (Z.add (Z.shift_left Z.one (b - 1)) bits_val) in + + (* Read n in a bits *) + let n = read_bits r a in + (n, reader_pos r - start_pos) + end + +(** Jam: serialize a noun to bytes + + Uses a hash table to track positions for backreferences. + Returns the serialized bytes. +*) +let jam noun = + let w = writer_create () in + let positions = Hashtbl.create 256 in (* noun -> bit position *) + + let rec jam_noun n = + match n with + | Atom a -> + (* Check if we've seen this atom before *) + begin match Hashtbl.find_opt positions n with + | Some pos -> + (* Backref might be smaller than re-encoding *) + let atom_size = 1 + (Z.numbits a) in (* rough estimate *) + let backref_size = 2 + (Z.numbits (Z.of_int pos)) in + + if backref_size < atom_size then begin + (* Encode backref: tag bits 11 *) + write_bit w true; + write_bit w true; + mat_encode w (Z.of_int pos) + end else begin + (* Encode atom *) + write_bit w false; + mat_encode w a + end + | None -> + (* Record position and encode atom *) + Hashtbl.add positions n w.bit_pos; + write_bit w false; + mat_encode w a + end + + | Cell (head, tail) -> + (* Check for backref *) + begin match Hashtbl.find_opt positions n with + | Some pos -> + (* Encode backref: tag bits 11 *) + write_bit w true; + write_bit w true; + mat_encode w (Z.of_int pos) + | None -> + (* Record position and encode cell *) + Hashtbl.add positions n w.bit_pos; + (* Tag bits 01 for cell *) + write_bit w true; + write_bit w false; + (* Recursively encode head and tail *) + jam_noun head; + jam_noun tail + end + in + + jam_noun noun; + writer_to_bytes w + +(** Cue: deserialize bytes to a noun + + Uses a hash table to store nouns by bit position for backreferences. +*) +let cue bytes = + let r = reader_create bytes in + let backref_table = Hashtbl.create 256 in (* bit position -> noun *) + + let rec cue_noun () = + let pos = reader_pos r in + + (* Read tag bit *) + let tag0 = read_bit r in + + if not tag0 then begin + (* Atom: tag bit 0 *) + let (value, _width) = mat_decode r in + let result = Atom value in + Hashtbl.add backref_table pos result; + result + end else begin + (* Read second tag bit *) + let tag1 = read_bit r in + + if tag1 then begin + (* Backref: tag bits 11 *) + let (ref_pos, _width) = mat_decode r in + let ref_pos = Z.to_int ref_pos in + match Hashtbl.find_opt backref_table ref_pos with + | Some noun -> noun + | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos)) + end else begin + (* Cell: tag bits 01 *) + let head = cue_noun () in + let tail = cue_noun () in + let result = Cell (head, tail) in + Hashtbl.add backref_table pos result; + result + end + end + in + + cue_noun () + +(** Convert bytes to a hex string for debugging *) +let bytes_to_hex bytes = + let len = Bytes.length bytes in + let buf = Buffer.create (len * 2) in + for i = 0 to len - 1 do + Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i)) + done; + Buffer.contents buf |