From c4b71435d9afdb67450f320f54fb7aa99dcae85e Mon Sep 17 00:00:00 2001 From: polwex Date: Sun, 5 Oct 2025 22:57:55 +0700 Subject: fixed jamcue --- ocaml/CLAUDE.md | 278 +++++++++++++++++++++++++++++ ocaml/bench_nock.ml | 132 -------------- ocaml/bitstream.ml | 97 ----------- ocaml/dune | 19 -- ocaml/dune-project | 11 +- ocaml/lib/bitstream.ml | 102 +++++++++++ ocaml/lib/dune | 4 + ocaml/lib/nock.ml | 164 ++++++++++++++++++ ocaml/lib/noun.ml | 69 ++++++++ ocaml/lib/serial.ml | 187 ++++++++++++++++++++ ocaml/nock.ml | 164 ------------------ ocaml/noun.ml | 69 -------- ocaml/serial.ml | 191 -------------------- ocaml/test/bench_nock.ml | 132 ++++++++++++++ ocaml/test/dune | 23 +++ ocaml/test/test_hex.ml | 26 +++ ocaml/test/test_jam_debug.ml | 20 +++ ocaml/test/test_nock.ml | 284 ++++++++++++++++++++++++++++++ ocaml/test/test_serial.ml | 185 ++++++++++++++++++++ ocaml/test_nock.ml | 284 ------------------------------ ocaml/test_serial.ml | 185 -------------------- ocaml/tmp/get_jam_vectors.ml | 10 ++ ocaml/tmp/quick_test.ml | 6 + ocaml/tmp/test_hex.ml | 9 + ocaml/tmp/test_jam.ml | 17 ++ ocaml/tmp/test_prec.ml | 5 + ocaml/tmp/test_ref.ml | 7 + ocaml/urb.py | 405 +++++++++++++++++++++++++++++++++++++++++++ 28 files changed, 1942 insertions(+), 1143 deletions(-) create mode 100644 ocaml/CLAUDE.md delete mode 100644 ocaml/bench_nock.ml delete mode 100644 ocaml/bitstream.ml delete mode 100644 ocaml/dune create mode 100644 ocaml/lib/bitstream.ml create mode 100644 ocaml/lib/dune create mode 100644 ocaml/lib/nock.ml create mode 100644 ocaml/lib/noun.ml create mode 100644 ocaml/lib/serial.ml delete mode 100644 ocaml/nock.ml delete mode 100644 ocaml/noun.ml delete mode 100644 ocaml/serial.ml create mode 100644 ocaml/test/bench_nock.ml create mode 100644 ocaml/test/dune create mode 100644 ocaml/test/test_hex.ml create mode 100644 ocaml/test/test_jam_debug.ml create mode 100644 ocaml/test/test_nock.ml create mode 100644 ocaml/test/test_serial.ml delete mode 100644 ocaml/test_nock.ml delete mode 100644 ocaml/test_serial.ml create mode 100644 ocaml/tmp/get_jam_vectors.ml create mode 100644 ocaml/tmp/quick_test.ml create mode 100644 ocaml/tmp/test_hex.ml create mode 100644 ocaml/tmp/test_jam.ml create mode 100644 ocaml/tmp/test_prec.ml create mode 100644 ocaml/tmp/test_ref.ml create mode 100644 ocaml/urb.py diff --git a/ocaml/CLAUDE.md b/ocaml/CLAUDE.md new file mode 100644 index 0000000..2577dee --- /dev/null +++ b/ocaml/CLAUDE.md @@ -0,0 +1,278 @@ +# Overe - OCaml Port of Vere (Urbit Runtime) + +## Project Goal + +Port the Vere (Urbit C runtime) to OCaml piece by piece, starting with core components and comparing performance against the original C implementation. + +## Completed Work + +### 1. Nock Interpreter (`lib/nock.ml`) + +**Status**: ✅ Complete and tested + +- Implemented complete Nock interpreter with all 12 opcodes +- Direct port of `_n_nock_on` from C implementation in `pkg/noun/nock.c` +- Uses Zarith (GMP-backed) for arbitrary-precision integers +- All test cases pass (`test/test_nock.ml`) + +**Key Functions**: +- `nock_on`: Main interpreter loop +- `slot`: Tree addressing using bit notation +- All 12 opcodes: 0 (slot), 1 (constant), 2 (nock), 3 (cell test), 4 (increment), 5 (equality), 6 (if-then-else), 7 (compose), 8 (push), 9 (call), 10 (hint), 11 (wish/deprecated) + +### 2. Performance Benchmarking + +**Status**: ✅ Complete with detailed results + +Created comprehensive benchmark suite comparing C vs OCaml: +- `bench_nock.ml`: OCaml benchmarks +- `bench_simple.c`: Standalone C implementation for comparison +- `compare.sh`: Comparison script +- `BENCHMARKS.md`: Full results documentation + +**Key Findings**: +- C is 2-5x faster on average +- OCaml is competitive or faster on allocation-heavy operations +- OCaml shows more consistent performance (less variance) +- For decrement: C 1.39x faster +- For tree navigation: C 4.87x faster +- For nock(decrement): OCaml 1.12x faster +- For deep recursion: C 2.41x faster + +### 3. Noun Type System (`lib/noun.ml`) + +**Status**: ✅ Complete + +```ocaml +type noun = + | Atom of Z.t + | Cell of noun * noun +``` + +Helper functions: +- `atom`, `cell`: Constructors +- `is_atom`, `is_cell`: Type predicates +- `head`, `tail`: Cell accessors +- `slot`: Tree addressing (axis addressing) +- `atom_to_int`, `noun_to_string`: Conversions + +### 4. Bitstream Utilities (`lib/bitstream.ml`) + +**Status**: ✅ Complete + +Implements bit-level reading/writing for jam/cue serialization: + +**Writer**: +```ocaml +type writer = { + buf: bytes ref; + mutable bit_pos: int; +} +``` +- `writer_create()`: Initialize writer +- `write_bit`: Write single bit +- `write_bits`: Write multiple bits from a Z.t value +- `writer_to_bytes`: Extract final bytes +- `writer_ensure`: Dynamic buffer resizing + +**Reader**: +```ocaml +type reader = { + bytes: bytes; + mutable bit_pos: int; +} +``` +- `reader_create`: Initialize from bytes +- `read_bit`: Read single bit +- `read_bits`: Read multiple bits into Z.t +- `reader_pos`: Get current bit position + +**Critical Fix**: OCaml operator precedence issue +- Problem: `!(w.buf)` parsed as `(!w).buf` +- Solution: Use intermediate variable pattern: + ```ocaml + let buf_ref : bytes ref = w.buf in + let buf : bytes = !buf_ref in + ``` + +### 5. Jam/Cue Serialization (`lib/serial.ml`) + +**Status**: 🔴 IN PROGRESS - Has bugs + +Implementation of noun serialization with backreference compression. + +**Jam Encoding Format**: +- Atoms: tag bit `0`, then mat-encoded value +- Cells: tag bits `01`, then recursively encode head and tail +- Backrefs: tag bits `11`, then mat-encoded position + +**Mat Encoding** (variable-length integers): +- For 0: single `0` bit +- For n > 0: + - a = bit-width of n (`Z.numbits n`) + - b = bit-width of a + - Write b `1`-bits, then one `0`-bit + - Write a in b-1 bits + - Write n in a bits + +**Current Implementation**: +```ocaml +let mat_encode w n = + if Z.equal n Z.zero then + write_bit w false + else begin + let a = Z.numbits n in + let b = Z.numbits (Z.of_int a) in + for _i = 1 to b do write_bit w true done; + write_bit w false; + write_bits w (Z.of_int a) (b - 1); (* Write a, not a-1 *) + write_bits w n a + end +``` + +**Known Bugs**: +1. ✅ Fixed: mat_encode for 0 wrote `true` instead of `false` +2. ✅ Fixed: Was writing `a-1` in `b-1` bits, changed to `a` based on C code analysis +3. 🔴 **CURRENT BUG**: Roundtrip test passes for 0,1,2,3 but fails at 4 + - Test shows: `FAIL: 4 -> 0` + - Need to compare against actual Urbit jam output to find discrepancy + +**Test Status** (`test/test_serial.ml`): +- Atom roundtrip: FAILS at value 4 +- Basic jam/cue: Not yet fully validated + +### 6. Python Urbit Interface (`urb.py`) + +**Status**: ✅ Updated to Python 3 + +Script to communicate with running Urbit ship via lens port. Can be used to get correct jam/cue test vectors. + +**Changes Made**: +- Shebang: `#!/usr/bin/env python3` +- String decoding: `.encode().decode('unicode_escape')` +- File write: mode `'wb'` for binary +- Logging: `warn` → `warning` +- Idiom: `'X' not in dict` + +**Usage** (needs running Urbit ship): +```bash +python3 urb.py -d "(jam 42)" # Get jam encoding of 42 +python3 urb.py -d "(cue 42)" # Decode jam encoding +``` + +## Project Structure + +``` +ocaml/ +├── dune-project # Project configuration (package: overe) +├── lib/ +│ ├── dune # Library stanza +│ ├── noun.ml # Noun type system +│ ├── nock.ml # Nock interpreter +│ ├── bitstream.ml # Bit-level I/O +│ └── serial.ml # Jam/cue serialization +├── test/ +│ ├── dune # Test executables +│ ├── test_nock.ml # Nock tests (all passing) +│ └── test_serial.ml # Jam/cue tests (failing at 4) +├── tmp/ # Temporary test files +├── urb.py # Python script to query Urbit ship +├── BENCHMARKS.md # Performance comparison results +└── CLAUDE.md # This file +``` + +## Next Steps + +### Immediate Priority: Fix Jam/Cue Bug + +1. **Get test vectors from Urbit**: + ```bash + python3 urb.py -d "(jam 0)" + python3 urb.py -d "(jam 1)" + python3 urb.py -d "(jam 2)" + python3 urb.py -d "(jam 3)" + python3 urb.py -d "(jam 4)" # This is where OCaml fails + ``` + +2. **Compare OCaml output**: + - Create test program to print hex output of jam for 0-10 + - Compare against Urbit's output byte-by-byte + - Identify exact discrepancy in mat encoding + +3. **Fix mat_decode**: + - Ensure it matches the corrected mat_encode formula + - Currently may be reading `a-1` when it should read `a` + +### Future Work + +1. **Complete Jam/Cue**: + - Fix current bugs + - Add comprehensive tests + - Benchmark performance vs C + +2. **Memory Management**: + - Implement noun hash-consing/deduplication + - Add reference counting or GC integration + - Study u3 memory system from C implementation + +3. **Jets**: + - Port jet dashboard system + - Implement critical jets for performance + - Add jet registration and lookup + +4. **Persistence**: + - Implement snapshot system + - Port event log handling + - Add checkpoint/restore functionality + +## Build Instructions + +```bash +# Build library and tests +dune build + +# Run Nock tests (all passing) +dune exec test/test_nock.exe + +# Run jam/cue tests (currently failing at 4) +dune exec test/test_serial.exe + +# Query running Urbit (requires ship running on port 12323 or with .http.ports) +python urb.py -d "(jam 42)" +``` + +## Technical Notes + +### OCaml Challenges Encountered + +1. **Operator precedence with refs**: `!(r.field)` doesn't work, need intermediate variable +2. **Zarith numbits**: Returns actual bit count, not bit index (e.g., 4 needs 3 bits, `numbits` returns 3) +3. **Mutable fields**: Need to use `mutable` keyword and assignment operator `:=` +4. **Bytes are mutable**: Use `Bytes.sub` to copy, not share + +### Mat Encoding Details from C Code + +From `pkg/noun/serial.c` line 126: +```c +// Write a_w (the width) in b_w-1 bits +u3r_chop(0, 0, b_w - 1, 0, &a_w, &bits); +``` + +This writes `a` (the width), NOT `a-1`. The OCaml implementation has been updated to match this. + +### References + +- Vere C implementation: `/home/y/code/urbit/vere/pkg/` +- Nock specification: https://urbit.org/docs/nock/reference/ +- Jam/cue format: See official Urbit docs (provided in conversation) +- u3 system architecture: See docs on king/serf, jets, persistence + +## Performance Philosophy + +Goal is not to beat C in raw speed, but to: +- Provide maintainable, type-safe implementation +- Enable experimentation with runtime features +- Achieve competitive performance on real workloads +- Leverage OCaml's strengths (allocation, GC, pattern matching) + +Current results show this is achievable: OCaml is within 2-5x of C, and actually faster for some allocation-heavy operations. diff --git a/ocaml/bench_nock.ml b/ocaml/bench_nock.ml deleted file mode 100644 index a71b3da..0000000 --- a/ocaml/bench_nock.ml +++ /dev/null @@ -1,132 +0,0 @@ -open Nock_lib.Noun -open Nock_lib.Nock - -(** Benchmark utilities *) - -let time_ms () = - Unix.gettimeofday () *. 1000.0 - -let bench_nock name subject formula iterations = - (* Warmup *) - for _i = 1 to 100 do - let _ = nock subject formula in () - done; - - (* Actual benchmark *) - Gc.compact (); - let start = time_ms () in - - for _i = 1 to iterations do - let _result = nock subject formula in () - done; - - let finish = time_ms () in - let total = finish -. start in - let per_iter = total /. (float_of_int iterations) in - let ops_per_sec = 1000.0 /. per_iter in - - Printf.printf "%-30s %8d iterations in %10.2f ms (%10.6f ms/iter, %10.0f ops/sec)\n" - name iterations total per_iter ops_per_sec - -(** Benchmarks *) - -let () = - Printf.printf "Nock Benchmark - OCaml Implementation\n"; - Printf.printf "======================================\n\n"; - - let iterations = 1_000_000 in (* 1M iterations for fast ops *) - let slow_iters = 100_000 in (* 100K for slower ops *) - - (* Benchmark 0: slot lookup *) - begin - let subject = cell (atom 42) (atom 99) in - let formula = cell (atom 0) (atom 2) in (* [0 2] - get head *) - bench_nock "Opcode 0: slot/fragment" subject formula iterations - end; - - (* Benchmark 1: constant *) - begin - let subject = atom 0 in - let formula = cell (atom 1) (atom 42) in (* [1 42] *) - bench_nock "Opcode 1: constant" subject formula iterations - end; - - (* Benchmark 3: is-cell *) - begin - let subject = atom 0 in - let formula = cell (atom 3) (cell (atom 1) (atom 42)) in (* [3 [1 42]] *) - bench_nock "Opcode 3: is-cell (atom)" subject formula iterations - end; - - (* Benchmark 4: increment *) - begin - let subject = atom 0 in - let formula = cell (atom 4) (cell (atom 1) (atom 1000)) in (* [4 [1 1000]] *) - bench_nock "Opcode 4: increment" subject formula iterations - end; - - (* Benchmark 5: equality *) - begin - let subject = atom 0 in - (* [5 [1 42] [1 42]] *) - let formula = cell (atom 5) (cell (cell (atom 1) (atom 42)) (cell (atom 1) (atom 42))) in - bench_nock "Opcode 5: equality (equal)" subject formula iterations - end; - - (* Benchmark 6: if-then-else *) - begin - let subject = atom 0 in - (* [6 [1 0] [1 11] [1 22]] *) - let formula = cell (atom 6) - (cell (cell (atom 1) (atom 0)) - (cell (cell (atom 1) (atom 11)) - (cell (atom 1) (atom 22)))) in - bench_nock "Opcode 6: if-then-else" subject formula iterations - end; - - (* Benchmark 7: composition *) - begin - let subject = atom 42 in - (* [7 [1 99] [0 1]] *) - let formula = cell (atom 7) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))) in - bench_nock "Opcode 7: composition" subject formula iterations - end; - - (* Benchmark 8: push *) - begin - let subject = atom 42 in - (* [8 [1 99] [0 1]] *) - let formula = cell (atom 8) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))) in - bench_nock "Opcode 8: push" subject formula iterations - end; - - (* Benchmark: Decrement-like operation (slower) *) - begin - (* [6 [5 [0 1] [1 0]] [1 0] [8 [1 0] [4 [0 3]]]] *) - (* This is: if(subject == 0) 0 else subject+1 (simplified) *) - let dec_fol = cell (atom 6) - (cell (cell (atom 5) (cell (cell (atom 0) (atom 1)) (cell (atom 1) (atom 0)))) - (cell (cell (atom 1) (atom 0)) - (cell (atom 8) (cell (cell (atom 1) (atom 0)) (cell (atom 4) (cell (atom 0) (atom 3))))))) in - - let subject = atom 10 in - bench_nock "Complex: decrement loop" subject dec_fol slow_iters - end; - - (* Benchmark: Tree construction *) - begin - let subject = atom 0 in - (* [[1 1] [1 2]] - constructs a cell *) - let formula = cell (cell (atom 1) (atom 1)) (cell (atom 1) (atom 2)) in - bench_nock "Cell construction" subject formula iterations - end; - - (* Benchmark: Deep slot lookup *) - begin - (* Build a deep tree: [[[[1 2] 3] 4] 5] *) - let subject = cell (cell (cell (cell (atom 1) (atom 2)) (atom 3)) (atom 4)) (atom 5) in - let formula = cell (atom 0) (atom 16) in (* slot 16 = deepest left (1) *) - bench_nock "Deep slot lookup (depth 4)" subject formula iterations - end; - - Printf.printf "\n" diff --git a/ocaml/bitstream.ml b/ocaml/bitstream.ml deleted file mode 100644 index 73eda36..0000000 --- a/ocaml/bitstream.ml +++ /dev/null @@ -1,97 +0,0 @@ -(** Bitstream utilities for jam/cue serialization *) - -(** A bitstream writer *) -type writer = { - buf: bytes ref; (** Buffer for bits *) - mutable bit_pos: int; (** Current bit position *) -} - -(** A bitstream reader *) -type reader = { - buf: bytes; (** Buffer to read from *) - mutable bit_pos: int; (** Current bit position *) - len: int; (** Length in bits *) -} - -(** Create a new bitstream writer *) -let writer_create () = { - buf = ref (Bytes.create 1024); - bit_pos = 0; -} - -(** Grow the writer buffer if needed *) -let writer_ensure w bits_needed = - let bytes_needed = (w.bit_pos + bits_needed + 7) / 8 in - if bytes_needed > Bytes.length !(w.buf) then begin - let old_buf = !(w.buf) in - let new_size = max (bytes_needed * 2) (Bytes.length old_buf * 2) in - let new_buf = Bytes.create new_size in - Bytes.blit old_buf 0 new_buf 0 (Bytes.length old_buf); - w.buf := new_buf - end - -(** Write a single bit *) -let write_bit w bit = - writer_ensure w 1; - let byte_pos = w.bit_pos / 8 in - let bit_off = w.bit_pos mod 8 in - if bit then begin - let old_byte = Bytes.get_uint8 !(w.buf) byte_pos in - Bytes.set_uint8 !(w.buf) byte_pos (old_byte lor (1 lsl bit_off)) - end; - w.bit_pos <- w.bit_pos + 1 - -(** Write multiple bits from a Z.t value *) -let write_bits w value nbits = - writer_ensure w nbits; - for i = 0 to nbits - 1 do - let bit = Z.testbit value i in - write_bit w bit - done - -(** Get the final bytes from a writer *) -let writer_to_bytes w = - let byte_len = (w.bit_pos + 7) / 8 in - Bytes.sub !(w.buf) 0 byte_len - -(** Create a bitstream reader *) -let reader_create buf = - { - buf; - bit_pos = 0; - len = Bytes.length buf * 8; - } - -(** Read a single bit *) -let read_bit r = - if r.bit_pos >= r.len then - raise (Invalid_argument "read_bit: end of stream"); - let byte_pos = r.bit_pos / 8 in - let bit_off = r.bit_pos mod 8 in - let byte_val = Bytes.get_uint8 r.buf byte_pos in - r.bit_pos <- r.bit_pos + 1; - (byte_val lsr bit_off) land 1 = 1 - -(** Read multiple bits as a Z.t *) -let read_bits r nbits = - let result = ref Z.zero in - for i = 0 to nbits - 1 do - if read_bit r then - result := Z.logor !result (Z.shift_left Z.one i) - done; - !result - -(** Peek at a bit without advancing *) -let peek_bit r = - if r.bit_pos >= r.len then - raise (Invalid_argument "peek_bit: end of stream"); - let byte_pos = r.bit_pos / 8 in - let bit_off = r.bit_pos mod 8 in - let byte_val = Bytes.get_uint8 r.buf byte_pos in - (byte_val lsr bit_off) land 1 = 1 - -(** Get current bit position *) -let reader_pos r = r.bit_pos - -(** Check if at end of stream *) -let reader_at_end r = r.bit_pos >= r.len diff --git a/ocaml/dune b/ocaml/dune deleted file mode 100644 index 3943b7b..0000000 --- a/ocaml/dune +++ /dev/null @@ -1,19 +0,0 @@ -(library - (name nock_lib) - (modules noun nock bitstream serial) - (libraries zarith)) - -(executable - (name test_nock) - (modules test_nock) - (libraries nock_lib zarith)) - -(executable - (name test_serial) - (modules test_serial) - (libraries nock_lib zarith)) - -(executable - (name bench_nock) - (modules bench_nock) - (libraries nock_lib zarith unix)) diff --git a/ocaml/dune-project b/ocaml/dune-project index 4a478ee..ec486dc 100644 --- a/ocaml/dune-project +++ b/ocaml/dune-project @@ -1,2 +1,9 @@ -(lang dune 3.0) -(name vere_nock) +(lang dune 3.20) + +(name overe) + +(package + (name overe) + (allow_empty) + (synopsis "Urbit on OCaml") + (description "With OCaml 5.3+ and Eio Multicore!")) diff --git a/ocaml/lib/bitstream.ml b/ocaml/lib/bitstream.ml new file mode 100644 index 0000000..cfe094c --- /dev/null +++ b/ocaml/lib/bitstream.ml @@ -0,0 +1,102 @@ +(** Bitstream utilities for jam/cue serialization *) + +(** A bitstream writer *) +type writer = { + buf: bytes ref; (** Buffer for bits *) + mutable bit_pos: int; (** Current bit position *) +} + +(** A bitstream reader *) +type reader = { + buf: bytes; (** Buffer to read from *) + mutable bit_pos: int; (** Current bit position *) + len: int; (** Length in bits *) +} + +(** Create a new bitstream writer *) +let writer_create () = { + buf = ref (Bytes.create 1024); + bit_pos = 0; +} + +(** Grow the writer buffer if needed *) +let writer_ensure (w : writer) (bits_needed : int) : unit = + let bytes_needed : int = (w.bit_pos + bits_needed + 7) / 8 in + let buf_ref : bytes ref = w.buf in + let current_buf : bytes = !buf_ref in + if bytes_needed > (Bytes.length current_buf) then begin + let old_buf : bytes = current_buf in + let new_size : int = max (bytes_needed * 2) (Bytes.length old_buf * 2) in + let new_buf : bytes = Bytes.create new_size in + Bytes.blit old_buf 0 new_buf 0 (Bytes.length old_buf); + buf_ref := new_buf + end + +(** Write a single bit *) +let write_bit w bit = + writer_ensure w 1; + let byte_pos = w.bit_pos / 8 in + let bit_off = w.bit_pos mod 8 in + if bit then begin + let buf = !(w.buf) in + let old_byte = Bytes.get_uint8 buf byte_pos in + Bytes.set_uint8 buf byte_pos (old_byte lor (1 lsl bit_off)) + end; + w.bit_pos <- w.bit_pos + 1 + +(** Write multiple bits from a Z.t value *) +let write_bits w value nbits = + writer_ensure w nbits; + for i = 0 to nbits - 1 do + let bit = Z.testbit value i in + write_bit w bit + done + +(** Get the final bytes from a writer *) +let writer_to_bytes (w : writer) : bytes = + let byte_len = (w.bit_pos + 7) / 8 in + let buf_ref : bytes ref = w.buf in + let buf : bytes = !buf_ref in + Bytes.sub buf 0 byte_len + +(** Create a bitstream reader *) +let reader_create buf = + { + buf; + bit_pos = 0; + len = Bytes.length buf * 8; + } + +(** Read a single bit *) +let read_bit r = + if r.bit_pos >= r.len then + raise (Invalid_argument "read_bit: end of stream"); + let byte_pos = r.bit_pos / 8 in + let bit_off = r.bit_pos mod 8 in + let byte_val = Bytes.get_uint8 r.buf byte_pos in + r.bit_pos <- r.bit_pos + 1; + (byte_val lsr bit_off) land 1 = 1 + +(** Read multiple bits as a Z.t *) +let read_bits r nbits = + let result = ref Z.zero in + for i = 0 to nbits - 1 do + if read_bit r then + result := Z.logor !result (Z.shift_left Z.one i) + done; + !result + +(** Peek at a bit without advancing *) +let peek_bit r = + if r.bit_pos >= r.len then + raise (Invalid_argument "peek_bit: end of stream"); + let byte_pos = r.bit_pos / 8 in + let bit_off = r.bit_pos mod 8 in + let byte_val = Bytes.get_uint8 r.buf byte_pos in + (byte_val lsr bit_off) land 1 = 1 + +(** Get current bit position *) +let reader_pos r = r.bit_pos + +(** Check if at end of stream *) +let reader_at_end r = r.bit_pos >= r.len diff --git a/ocaml/lib/dune b/ocaml/lib/dune new file mode 100644 index 0000000..008de33 --- /dev/null +++ b/ocaml/lib/dune @@ -0,0 +1,4 @@ +(library + (name nock_lib) + (modules noun nock bitstream serial) + (libraries zarith)) diff --git a/ocaml/lib/nock.ml b/ocaml/lib/nock.ml new file mode 100644 index 0000000..34065b8 --- /dev/null +++ b/ocaml/lib/nock.ml @@ -0,0 +1,164 @@ +open Noun + +(** Nock interpreter + + Based on the reference implementation from vere/pkg/noun/nock.c + + The Nock spec has 12 opcodes (0-11): + - 0: slot/fragment lookup + - 1: constant + - 2: nock (recursion) + - 3: is-cell test + - 4: increment + - 5: equality test + - 6: if-then-else + - 7: composition + - 8: push + - 9: call with axis + - 10: hint (ignored in reference implementation) + - 11: scry (errors in reference implementation) +*) + +(** Main nock evaluation function: nock(subject, formula) + + In Nock notation: *[subject formula] + + This is a direct port of _n_nock_on from nock.c:157-396 +*) +let rec nock_on bus fol = + match fol with + | Cell (hib, gal) when is_cell hib -> + (* [a b] -> compute both sides and cons *) + let poz = nock_on bus hib in + let riv = nock_on bus gal in + cell poz riv + + | Cell (Atom op, gal) -> + (match Z.to_int op with + | 0 -> + (* /[axis subject] - slot/fragment lookup *) + if not (is_atom gal) then raise Exit + else slot (match gal with Atom n -> n | _ -> raise Exit) bus + + | 1 -> + (* =[constant subject] - return constant *) + gal + + | 2 -> + (* *[subject formula new_subject] - evaluate with new subject *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + let seb = nock_on bus b_gal in + let nex = nock_on bus c_gal in + nock_on seb nex + + | 3 -> + (* ?[subject formula] - is-cell test *) + let gof = nock_on bus gal in + if is_cell gof then atom 0 else atom 1 + + | 4 -> + (* +[subject formula] - increment *) + let gof = nock_on bus gal in + inc gof + + | 5 -> + (* =[subject formula] - equality test *) + let wim = nock_on bus gal in + if not (is_cell wim) then raise Exit; + let a = head wim in + let b = tail wim in + if equal a b then atom 0 else atom 1 + + | 6 -> + (* if-then-else *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let cd_gal = tail gal in + if not (is_cell cd_gal) then raise Exit; + let c_gal = head cd_gal in + let d_gal = tail cd_gal in + + let tys = nock_on bus b_gal in + let nex = match tys with + | Atom n when Z.equal n Z.zero -> c_gal + | Atom n when Z.equal n Z.one -> d_gal + | _ -> raise Exit + in + nock_on bus nex + + | 7 -> + (* composition: *[*[subject b] c] *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + let bod = nock_on bus b_gal in + nock_on bod c_gal + + | 8 -> + (* push: *[[*[subject b] subject] c] *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + let heb = nock_on bus b_gal in + let bod = cell heb bus in + nock_on bod c_gal + + | 9 -> + (* call: *[*[subject c] axis] *) + if not (is_cell gal) then raise Exit; + let b_gal = head gal in + let c_gal = tail gal in + if not (is_atom b_gal) then raise Exit; + + let seb = nock_on bus c_gal in + let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in + nock_on seb nex + + | 10 -> + (* hint - in reference implementation, hints are mostly ignored *) + let nex = + if is_cell gal then + (* [[hint-tag hint-value] formula] *) + tail gal + else + (* [hint-tag formula] where hint-value is implicit *) + gal + in + nock_on bus nex + + | 11 -> + (* scry - not implemented in reference nock, raises error *) + raise Exit + + | _ -> + (* Invalid opcode *) + raise Exit + ) + + | _ -> + (* Invalid formula structure *) + raise Exit + +(** Convenience function: nock(subject, formula) *) +let nock subject formula = + nock_on subject formula + +(** slam: apply gate to sample + slam(gate, sample) = *[gate [9 2 [0 1] [0 6] [1 sample] [0 7]]] + + In practice this evaluates the gate (which is a core with a formula at axis 2) + with a modified sample (at axis 6). +*) +let slam gat sam = + let cor = cell (head gat) (cell sam (tail (tail gat))) in + let formula = slot (Z.of_int 2) cor in + nock_on cor formula + +(** kick: fire gate without changing sample + kick(gate) = *[gate 9 2 0 1] +*) +let kick gat = + let formula = slot (Z.of_int 2) gat in + nock_on gat formula diff --git a/ocaml/lib/noun.ml b/ocaml/lib/noun.ml new file mode 100644 index 0000000..c59ec80 --- /dev/null +++ b/ocaml/lib/noun.ml @@ -0,0 +1,69 @@ +(** Noun type and basic operations *) + +(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *) +type noun = + | Atom of Z.t (** Arbitrary-precision integer using Zarith *) + | Cell of noun * noun (** Pair of nouns *) + +(** Exception raised on nock evaluation errors *) +exception Exit + +(** Create an atom from an int *) +let atom n = Atom (Z.of_int n) + +(** Create a cell *) +let cell a b = Cell (a, b) + +(** Test if a noun is a cell *) +let is_cell = function + | Cell _ -> true + | Atom _ -> false + +(** Test if a noun is an atom *) +let is_atom = function + | Atom _ -> true + | Cell _ -> false + +(** Get head of a cell *) +let head = function + | Cell (h, _) -> h + | Atom _ -> raise Exit + +(** Get tail of a cell *) +let tail = function + | Cell (_, t) -> t + | Atom _ -> raise Exit + +(** Fragment/axis lookup: slot(n, noun) + This implements the tree-addressing scheme: + - 1 is the root + - 2 is head, 3 is tail + - For n > 1: if even, go left; if odd, go right +*) +let rec slot n noun = + if Z.equal n Z.one then + noun + else if Z.equal n Z.zero then + raise Exit + else + let bit = Z.testbit n 0 in (* Check if odd *) + let parent = Z.shift_right n 1 in + let sub = slot parent noun in + if bit then tail sub else head sub + +(** Equality test for nouns *) +let rec equal a b = + match a, b with + | Atom x, Atom y -> Z.equal x y + | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt + | _, _ -> false + +(** Increment an atom *) +let inc = function + | Atom n -> Atom (Z.succ n) + | Cell _ -> raise Exit + +(** Pretty-print a noun *) +let rec pp_noun fmt = function + | Atom n -> Format.fprintf fmt "%s" (Z.to_string n) + | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b diff --git a/ocaml/lib/serial.ml b/ocaml/lib/serial.ml new file mode 100644 index 0000000..9ededf1 --- /dev/null +++ b/ocaml/lib/serial.ml @@ -0,0 +1,187 @@ +(** Jam/cue serialization for nouns + + Based on the Vere implementation in pkg/noun/serial.c + + Jam encoding: + - Atoms: tag bit 0, then mat-encoded value + - Cells: tag bits 01, then recursively encode head and tail + - Backrefs: tag bits 11, then mat-encoded position + + Mat encoding (length-prefixed): + - For 0: just bit 1 + - For n > 0: + - Let a = bit-width of n + - Let b = bit-width of a + - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits] +*) + +open Noun +open Bitstream + +(** Mat-encode a number into the bitstream + + Mat encoding is a variable-length integer encoding: + - 0 is encoded as a single 1 bit + - For n > 0: + - a = number of bits in n (met 0 n) + - b = number of bits needed to represent a + - Write b 0-bits, then one 1-bit + - Write a in b-1 bits + - Write n in a bits +*) +let mat_encode w n = + if Z.equal n Z.zero then + write_bit w true + else begin + let a = Z.numbits n in (* bit-width of n *) + let b = Z.numbits (Z.of_int a) in (* bit-width of a *) + + (* Write b 0-bits followed by one 1-bit *) + for _i = 1 to b do + write_bit w false + done; + write_bit w true; + + (* Write a in b-1 bits *) + write_bits w (Z.of_int a) (b - 1); + + (* Write n in a bits *) + write_bits w n a + end + +(** Mat-decode from bitstream, returns (value, bits_read) *) +let mat_decode r = + let start_pos = reader_pos r in + + (* Count leading 0 bits until we hit a 1 bit *) + let b = ref 0 in + while not (read_bit r) do + b := !b + 1 + done; + + let b = !b in + + if b = 0 then + (* Just a single 1 bit means 0 *) + (Z.zero, reader_pos r - start_pos) + else begin + (* Read the length bits and compute a = 2^(b-1) + bits_read *) + let bits_val = read_bits r (b - 1) in + let a = Z.to_int (Z.add (Z.shift_left Z.one (b - 1)) bits_val) in + + (* Read n in a bits *) + let n = read_bits r a in + (n, reader_pos r - start_pos) + end + +(** Jam: serialize a noun to bytes + + Uses a hash table to track positions for backreferences. + Returns the serialized bytes. +*) +let jam noun = + let w = writer_create () in + let positions = Hashtbl.create 256 in (* noun -> bit position *) + + let rec jam_noun n = + match n with + | Atom a -> + (* Check if we've seen this atom before *) + begin match Hashtbl.find_opt positions n with + | Some pos -> + (* Backref might be smaller than re-encoding *) + let atom_size = 1 + (Z.numbits a) in (* rough estimate *) + let backref_size = 2 + (Z.numbits (Z.of_int pos)) in + + if backref_size < atom_size then begin + (* Encode backref: tag bits 11 *) + write_bit w true; + write_bit w true; + mat_encode w (Z.of_int pos) + end else begin + (* Encode atom *) + write_bit w false; + mat_encode w a + end + | None -> + (* Record position and encode atom *) + Hashtbl.add positions n w.bit_pos; + write_bit w false; + mat_encode w a + end + + | Cell (head, tail) -> + (* Check for backref *) + begin match Hashtbl.find_opt positions n with + | Some pos -> + (* Encode backref: tag bits 11 *) + write_bit w true; + write_bit w true; + mat_encode w (Z.of_int pos) + | None -> + (* Record position and encode cell *) + Hashtbl.add positions n w.bit_pos; + (* Tag bits 01 for cell *) + write_bit w true; + write_bit w false; + (* Recursively encode head and tail *) + jam_noun head; + jam_noun tail + end + in + + jam_noun noun; + writer_to_bytes w + +(** Cue: deserialize bytes to a noun + + Uses a hash table to store nouns by bit position for backreferences. +*) +let cue bytes = + let r = reader_create bytes in + let backref_table = Hashtbl.create 256 in (* bit position -> noun *) + + let rec cue_noun () = + let pos = reader_pos r in + + (* Read tag bit *) + let tag0 = read_bit r in + + if not tag0 then begin + (* Atom: tag bit 0 *) + let (value, _width) = mat_decode r in + let result = Atom value in + Hashtbl.add backref_table pos result; + result + end else begin + (* Read second tag bit *) + let tag1 = read_bit r in + + if tag1 then begin + (* Backref: tag bits 11 *) + let (ref_pos, _width) = mat_decode r in + let ref_pos = Z.to_int ref_pos in + match Hashtbl.find_opt backref_table ref_pos with + | Some noun -> noun + | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos)) + end else begin + (* Cell: tag bits 01 *) + let head = cue_noun () in + let tail = cue_noun () in + let result = Cell (head, tail) in + Hashtbl.add backref_table pos result; + result + end + end + in + + cue_noun () + +(** Convert bytes to a hex string for debugging *) +let bytes_to_hex bytes = + let len = Bytes.length bytes in + let buf = Buffer.create (len * 2) in + for i = 0 to len - 1 do + Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i)) + done; + Buffer.contents buf diff --git a/ocaml/nock.ml b/ocaml/nock.ml deleted file mode 100644 index 34065b8..0000000 --- a/ocaml/nock.ml +++ /dev/null @@ -1,164 +0,0 @@ -open Noun - -(** Nock interpreter - - Based on the reference implementation from vere/pkg/noun/nock.c - - The Nock spec has 12 opcodes (0-11): - - 0: slot/fragment lookup - - 1: constant - - 2: nock (recursion) - - 3: is-cell test - - 4: increment - - 5: equality test - - 6: if-then-else - - 7: composition - - 8: push - - 9: call with axis - - 10: hint (ignored in reference implementation) - - 11: scry (errors in reference implementation) -*) - -(** Main nock evaluation function: nock(subject, formula) - - In Nock notation: *[subject formula] - - This is a direct port of _n_nock_on from nock.c:157-396 -*) -let rec nock_on bus fol = - match fol with - | Cell (hib, gal) when is_cell hib -> - (* [a b] -> compute both sides and cons *) - let poz = nock_on bus hib in - let riv = nock_on bus gal in - cell poz riv - - | Cell (Atom op, gal) -> - (match Z.to_int op with - | 0 -> - (* /[axis subject] - slot/fragment lookup *) - if not (is_atom gal) then raise Exit - else slot (match gal with Atom n -> n | _ -> raise Exit) bus - - | 1 -> - (* =[constant subject] - return constant *) - gal - - | 2 -> - (* *[subject formula new_subject] - evaluate with new subject *) - if not (is_cell gal) then raise Exit; - let b_gal = head gal in - let c_gal = tail gal in - let seb = nock_on bus b_gal in - let nex = nock_on bus c_gal in - nock_on seb nex - - | 3 -> - (* ?[subject formula] - is-cell test *) - let gof = nock_on bus gal in - if is_cell gof then atom 0 else atom 1 - - | 4 -> - (* +[subject formula] - increment *) - let gof = nock_on bus gal in - inc gof - - | 5 -> - (* =[subject formula] - equality test *) - let wim = nock_on bus gal in - if not (is_cell wim) then raise Exit; - let a = head wim in - let b = tail wim in - if equal a b then atom 0 else atom 1 - - | 6 -> - (* if-then-else *) - if not (is_cell gal) then raise Exit; - let b_gal = head gal in - let cd_gal = tail gal in - if not (is_cell cd_gal) then raise Exit; - let c_gal = head cd_gal in - let d_gal = tail cd_gal in - - let tys = nock_on bus b_gal in - let nex = match tys with - | Atom n when Z.equal n Z.zero -> c_gal - | Atom n when Z.equal n Z.one -> d_gal - | _ -> raise Exit - in - nock_on bus nex - - | 7 -> - (* composition: *[*[subject b] c] *) - if not (is_cell gal) then raise Exit; - let b_gal = head gal in - let c_gal = tail gal in - let bod = nock_on bus b_gal in - nock_on bod c_gal - - | 8 -> - (* push: *[[*[subject b] subject] c] *) - if not (is_cell gal) then raise Exit; - let b_gal = head gal in - let c_gal = tail gal in - let heb = nock_on bus b_gal in - let bod = cell heb bus in - nock_on bod c_gal - - | 9 -> - (* call: *[*[subject c] axis] *) - if not (is_cell gal) then raise Exit; - let b_gal = head gal in - let c_gal = tail gal in - if not (is_atom b_gal) then raise Exit; - - let seb = nock_on bus c_gal in - let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in - nock_on seb nex - - | 10 -> - (* hint - in reference implementation, hints are mostly ignored *) - let nex = - if is_cell gal then - (* [[hint-tag hint-value] formula] *) - tail gal - else - (* [hint-tag formula] where hint-value is implicit *) - gal - in - nock_on bus nex - - | 11 -> - (* scry - not implemented in reference nock, raises error *) - raise Exit - - | _ -> - (* Invalid opcode *) - raise Exit - ) - - | _ -> - (* Invalid formula structure *) - raise Exit - -(** Convenience function: nock(subject, formula) *) -let nock subject formula = - nock_on subject formula - -(** slam: apply gate to sample - slam(gate, sample) = *[gate [9 2 [0 1] [0 6] [1 sample] [0 7]]] - - In practice this evaluates the gate (which is a core with a formula at axis 2) - with a modified sample (at axis 6). -*) -let slam gat sam = - let cor = cell (head gat) (cell sam (tail (tail gat))) in - let formula = slot (Z.of_int 2) cor in - nock_on cor formula - -(** kick: fire gate without changing sample - kick(gate) = *[gate 9 2 0 1] -*) -let kick gat = - let formula = slot (Z.of_int 2) gat in - nock_on gat formula diff --git a/ocaml/noun.ml b/ocaml/noun.ml deleted file mode 100644 index c59ec80..0000000 --- a/ocaml/noun.ml +++ /dev/null @@ -1,69 +0,0 @@ -(** Noun type and basic operations *) - -(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *) -type noun = - | Atom of Z.t (** Arbitrary-precision integer using Zarith *) - | Cell of noun * noun (** Pair of nouns *) - -(** Exception raised on nock evaluation errors *) -exception Exit - -(** Create an atom from an int *) -let atom n = Atom (Z.of_int n) - -(** Create a cell *) -let cell a b = Cell (a, b) - -(** Test if a noun is a cell *) -let is_cell = function - | Cell _ -> true - | Atom _ -> false - -(** Test if a noun is an atom *) -let is_atom = function - | Atom _ -> true - | Cell _ -> false - -(** Get head of a cell *) -let head = function - | Cell (h, _) -> h - | Atom _ -> raise Exit - -(** Get tail of a cell *) -let tail = function - | Cell (_, t) -> t - | Atom _ -> raise Exit - -(** Fragment/axis lookup: slot(n, noun) - This implements the tree-addressing scheme: - - 1 is the root - - 2 is head, 3 is tail - - For n > 1: if even, go left; if odd, go right -*) -let rec slot n noun = - if Z.equal n Z.one then - noun - else if Z.equal n Z.zero then - raise Exit - else - let bit = Z.testbit n 0 in (* Check if odd *) - let parent = Z.shift_right n 1 in - let sub = slot parent noun in - if bit then tail sub else head sub - -(** Equality test for nouns *) -let rec equal a b = - match a, b with - | Atom x, Atom y -> Z.equal x y - | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt - | _, _ -> false - -(** Increment an atom *) -let inc = function - | Atom n -> Atom (Z.succ n) - | Cell _ -> raise Exit - -(** Pretty-print a noun *) -let rec pp_noun fmt = function - | Atom n -> Format.fprintf fmt "%s" (Z.to_string n) - | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b diff --git a/ocaml/serial.ml b/ocaml/serial.ml deleted file mode 100644 index 039cd2f..0000000 --- a/ocaml/serial.ml +++ /dev/null @@ -1,191 +0,0 @@ -(** Jam/cue serialization for nouns - - Based on the Vere implementation in pkg/noun/serial.c - - Jam encoding: - - Atoms: tag bit 0, then mat-encoded value - - Cells: tag bits 01, then recursively encode head and tail - - Backrefs: tag bits 11, then mat-encoded position - - Mat encoding (length-prefixed): - - For 0: just bit 1 - - For n > 0: - - Let a = bit-width of n - - Let b = bit-width of a - - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits] -*) - -open Noun -open Bitstream - -(** Mat-encode a number into the bitstream - - Mat encoding is a variable-length integer encoding: - - 0 is encoded as a single 1 bit - - For n > 0: - - a = number of bits in n (met 0 n) - - b = number of bits needed to represent a - - Write b 1-bits, then a 0-bit - - Write a-1 in b-1 bits - - Write n in a bits -*) -let rec mat_encode w n = - if Z.equal n Z.zero then - write_bit w true - else begin - let a = Z.numbits n in (* bit-width of n *) - let b = Z.numbits (Z.of_int a) in (* bit-width of a *) - - (* Write b 1-bits followed by a 0-bit *) - for _i = 1 to b do - write_bit w true - done; - write_bit w false; - - (* Write a-1 in b-1 bits *) - write_bits w (Z.of_int (a - 1)) (b - 1); - - (* Write n in a bits *) - write_bits w n a - end - -(** Mat-decode from bitstream, returns (value, bits_read) *) -let rec mat_decode r = - let start_pos = reader_pos r in - - if not (read_bit r) then - (Z.zero, reader_pos r - start_pos) - else begin - (* Count leading 1 bits *) - let b = ref 1 in - while read_bit r do - b := !b + 1 - done; - - let b = !b in - - if b = 1 then - (* Special case: just "10" means 1 *) - (Z.one, reader_pos r - start_pos) - else begin - (* Read a-1 in b-1 bits *) - let a_minus_1 = read_bits r (b - 1) in - let a = Z.to_int (Z.add a_minus_1 Z.one) in - - (* Read n in a bits *) - let n = read_bits r a in - (n, reader_pos r - start_pos) - end - end - -(** Jam: serialize a noun to bytes - - Uses a hash table to track positions for backreferences. - Returns the serialized bytes. -*) -let jam noun = - let w = writer_create () in - let positions = Hashtbl.create 256 in (* noun -> bit position *) - - let rec jam_noun n = - match n with - | Atom a -> - (* Check if we've seen this atom before *) - begin match Hashtbl.find_opt positions n with - | Some pos -> - (* Backref might be smaller than re-encoding *) - let atom_size = 1 + (Z.numbits a) in (* rough estimate *) - let backref_size = 2 + (Z.numbits (Z.of_int pos)) in - - if backref_size < atom_size then begin - (* Encode backref: tag bits 11 *) - write_bit w true; - write_bit w true; - mat_encode w (Z.of_int pos) - end else begin - (* Encode atom *) - write_bit w false; - mat_encode w a - end - | None -> - (* Record position and encode atom *) - Hashtbl.add positions n w.bit_pos; - write_bit w false; - mat_encode w a - end - - | Cell (head, tail) -> - (* Check for backref *) - begin match Hashtbl.find_opt positions n with - | Some pos -> - (* Encode backref: tag bits 11 *) - write_bit w true; - write_bit w true; - mat_encode w (Z.of_int pos) - | None -> - (* Record position and encode cell *) - Hashtbl.add positions n w.bit_pos; - (* Tag bits 01 for cell *) - write_bit w true; - write_bit w false; - (* Recursively encode head and tail *) - jam_noun head; - jam_noun tail - end - in - - jam_noun noun; - writer_to_bytes w - -(** Cue: deserialize bytes to a noun - - Uses a hash table to store nouns by bit position for backreferences. -*) -let cue bytes = - let r = reader_create bytes in - let backref_table = Hashtbl.create 256 in (* bit position -> noun *) - - let rec cue_noun () = - let pos = reader_pos r in - - (* Read tag bit *) - let tag0 = read_bit r in - - if not tag0 then begin - (* Atom: tag bit 0 *) - let (value, _width) = mat_decode r in - let result = Atom value in - Hashtbl.add backref_table pos result; - result - end else begin - (* Read second tag bit *) - let tag1 = read_bit r in - - if tag1 then begin - (* Backref: tag bits 11 *) - let (ref_pos, _width) = mat_decode r in - let ref_pos = Z.to_int ref_pos in - match Hashtbl.find_opt backref_table ref_pos with - | Some noun -> noun - | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos)) - end else begin - (* Cell: tag bits 01 *) - let head = cue_noun () in - let tail = cue_noun () in - let result = Cell (head, tail) in - Hashtbl.add backref_table pos result; - result - end - end - in - - cue_noun () - -(** Convert bytes to a hex string for debugging *) -let bytes_to_hex bytes = - let len = Bytes.length bytes in - let buf = Buffer.create (len * 2) in - for i = 0 to len - 1 do - Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i)) - done; - Buffer.contents buf diff --git a/ocaml/test/bench_nock.ml b/ocaml/test/bench_nock.ml new file mode 100644 index 0000000..a71b3da --- /dev/null +++ b/ocaml/test/bench_nock.ml @@ -0,0 +1,132 @@ +open Nock_lib.Noun +open Nock_lib.Nock + +(** Benchmark utilities *) + +let time_ms () = + Unix.gettimeofday () *. 1000.0 + +let bench_nock name subject formula iterations = + (* Warmup *) + for _i = 1 to 100 do + let _ = nock subject formula in () + done; + + (* Actual benchmark *) + Gc.compact (); + let start = time_ms () in + + for _i = 1 to iterations do + let _result = nock subject formula in () + done; + + let finish = time_ms () in + let total = finish -. start in + let per_iter = total /. (float_of_int iterations) in + let ops_per_sec = 1000.0 /. per_iter in + + Printf.printf "%-30s %8d iterations in %10.2f ms (%10.6f ms/iter, %10.0f ops/sec)\n" + name iterations total per_iter ops_per_sec + +(** Benchmarks *) + +let () = + Printf.printf "Nock Benchmark - OCaml Implementation\n"; + Printf.printf "======================================\n\n"; + + let iterations = 1_000_000 in (* 1M iterations for fast ops *) + let slow_iters = 100_000 in (* 100K for slower ops *) + + (* Benchmark 0: slot lookup *) + begin + let subject = cell (atom 42) (atom 99) in + let formula = cell (atom 0) (atom 2) in (* [0 2] - get head *) + bench_nock "Opcode 0: slot/fragment" subject formula iterations + end; + + (* Benchmark 1: constant *) + begin + let subject = atom 0 in + let formula = cell (atom 1) (atom 42) in (* [1 42] *) + bench_nock "Opcode 1: constant" subject formula iterations + end; + + (* Benchmark 3: is-cell *) + begin + let subject = atom 0 in + let formula = cell (atom 3) (cell (atom 1) (atom 42)) in (* [3 [1 42]] *) + bench_nock "Opcode 3: is-cell (atom)" subject formula iterations + end; + + (* Benchmark 4: increment *) + begin + let subject = atom 0 in + let formula = cell (atom 4) (cell (atom 1) (atom 1000)) in (* [4 [1 1000]] *) + bench_nock "Opcode 4: increment" subject formula iterations + end; + + (* Benchmark 5: equality *) + begin + let subject = atom 0 in + (* [5 [1 42] [1 42]] *) + let formula = cell (atom 5) (cell (cell (atom 1) (atom 42)) (cell (atom 1) (atom 42))) in + bench_nock "Opcode 5: equality (equal)" subject formula iterations + end; + + (* Benchmark 6: if-then-else *) + begin + let subject = atom 0 in + (* [6 [1 0] [1 11] [1 22]] *) + let formula = cell (atom 6) + (cell (cell (atom 1) (atom 0)) + (cell (cell (atom 1) (atom 11)) + (cell (atom 1) (atom 22)))) in + bench_nock "Opcode 6: if-then-else" subject formula iterations + end; + + (* Benchmark 7: composition *) + begin + let subject = atom 42 in + (* [7 [1 99] [0 1]] *) + let formula = cell (atom 7) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))) in + bench_nock "Opcode 7: composition" subject formula iterations + end; + + (* Benchmark 8: push *) + begin + let subject = atom 42 in + (* [8 [1 99] [0 1]] *) + let formula = cell (atom 8) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))) in + bench_nock "Opcode 8: push" subject formula iterations + end; + + (* Benchmark: Decrement-like operation (slower) *) + begin + (* [6 [5 [0 1] [1 0]] [1 0] [8 [1 0] [4 [0 3]]]] *) + (* This is: if(subject == 0) 0 else subject+1 (simplified) *) + let dec_fol = cell (atom 6) + (cell (cell (atom 5) (cell (cell (atom 0) (atom 1)) (cell (atom 1) (atom 0)))) + (cell (cell (atom 1) (atom 0)) + (cell (atom 8) (cell (cell (atom 1) (atom 0)) (cell (atom 4) (cell (atom 0) (atom 3))))))) in + + let subject = atom 10 in + bench_nock "Complex: decrement loop" subject dec_fol slow_iters + end; + + (* Benchmark: Tree construction *) + begin + let subject = atom 0 in + (* [[1 1] [1 2]] - constructs a cell *) + let formula = cell (cell (atom 1) (atom 1)) (cell (atom 1) (atom 2)) in + bench_nock "Cell construction" subject formula iterations + end; + + (* Benchmark: Deep slot lookup *) + begin + (* Build a deep tree: [[[[1 2] 3] 4] 5] *) + let subject = cell (cell (cell (cell (atom 1) (atom 2)) (atom 3)) (atom 4)) (atom 5) in + let formula = cell (atom 0) (atom 16) in (* slot 16 = deepest left (1) *) + bench_nock "Deep slot lookup (depth 4)" subject formula iterations + end; + + Printf.printf "\n" diff --git a/ocaml/test/dune b/ocaml/test/dune new file mode 100644 index 0000000..b0ad51d --- /dev/null +++ b/ocaml/test/dune @@ -0,0 +1,23 @@ +(test + (name test_nock) + (modules test_nock) + (libraries nock_lib zarith)) + +(test + (name test_serial) + (modules test_serial) + (libraries nock_lib zarith)) + +(test + (name test_jam_debug) + (modules test_jam_debug) + (libraries nock_lib zarith)) + +(test + (name bench_nock) + (modules bench_nock) + (libraries nock_lib zarith unix)) + +(executable + (name test_hex) + (libraries nock_lib)) diff --git a/ocaml/test/test_hex.ml b/ocaml/test/test_hex.ml new file mode 100644 index 0000000..a228682 --- /dev/null +++ b/ocaml/test/test_hex.ml @@ -0,0 +1,26 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + for i = 0 to 10 do + let n = atom i in + let jammed = jam n in + Printf.printf "jam(%d) = %s (%d bytes)\n" i (bytes_to_hex jammed) (Bytes.length jammed) + done + +let () = + Printf.printf "\nRound-trip tests:\n"; + for i = 0 to 50 do + let n = atom i in + let jammed = jam n in + let cued = cue jammed in + match cued with + | Atom a when Z.equal a (Z.of_int i) -> + Printf.printf "OK: %d\n" i + | Atom a -> + Printf.printf "FAIL: %d -> %s\n" i (Z.to_string a); + exit 1 + | Cell _ -> + Printf.printf "FAIL: %d -> cell\n" i; + exit 1 + done diff --git a/ocaml/test/test_jam_debug.ml b/ocaml/test/test_jam_debug.ml new file mode 100644 index 0000000..cad3ee9 --- /dev/null +++ b/ocaml/test/test_jam_debug.ml @@ -0,0 +1,20 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + Printf.printf "Testing jam encoding:\n"; + + (* Test 0 *) + let n0 = atom 0 in + let j0 = jam n0 in + Printf.printf "jam(0) = %s\n" (bytes_to_hex j0); + + (* Test 1 *) + let n1 = atom 1 in + let j1 = jam n1 in + Printf.printf "jam(1) = %s\n" (bytes_to_hex j1); + + (* Test 2 *) + let n2 = atom 2 in + let j2 = jam n2 in + Printf.printf "jam(2) = %s\n" (bytes_to_hex j2); diff --git a/ocaml/test/test_nock.ml b/ocaml/test/test_nock.ml new file mode 100644 index 0000000..73f2ce2 --- /dev/null +++ b/ocaml/test/test_nock.ml @@ -0,0 +1,284 @@ +open Nock_lib.Noun +open Nock_lib.Nock + +(** Test utilities *) + +let assert_equal expected actual msg = + if not (equal expected actual) then begin + Printf.printf "FAIL: %s\n" msg; + Format.printf " Expected: %a@." pp_noun expected; + Format.printf " Actual: %a@." pp_noun actual; + exit 1 + end else + Printf.printf "PASS: %s\n" msg + +let _assert_raises_exit f msg = + try + let _ = f () in + Printf.printf "FAIL: %s (expected Exit exception)\n" msg; + exit 1 + with Exit -> + Printf.printf "PASS: %s\n" msg + +(** Basic noun tests *) + +let test_noun_basics () = + Printf.printf "\n=== Testing basic noun operations ===\n"; + + (* Test atom creation *) + let a = atom 42 in + assert_equal (atom 42) a "atom creation"; + + (* Test cell creation *) + let c = cell (atom 1) (atom 2) in + assert_equal (atom 1) (head c) "cell head"; + assert_equal (atom 2) (tail c) "cell tail"; + + (* Test is_cell and is_atom *) + if not (is_atom a) then Printf.printf "FAIL: is_atom on atom\n" else Printf.printf "PASS: is_atom on atom\n"; + if not (is_cell c) then Printf.printf "FAIL: is_cell on cell\n" else Printf.printf "PASS: is_cell on cell\n"; + if is_atom c then Printf.printf "FAIL: not is_atom on cell\n" else Printf.printf "PASS: not is_atom on cell\n"; + if is_cell a then Printf.printf "FAIL: not is_cell on atom\n" else Printf.printf "PASS: not is_cell on atom\n" + +(** Test slot/fragment addressing *) +let test_slots () = + Printf.printf "\n=== Testing slot/fragment addressing ===\n"; + + (* Build tree: [[1 2] [3 4]] *) + let tree = cell (cell (atom 1) (atom 2)) (cell (atom 3) (atom 4)) in + + (* Test slot addressing + 1 = whole tree + 2 = head = [1 2] + 3 = tail = [3 4] + 4 = head of head = 1 + 5 = tail of head = 2 + 6 = head of tail = 3 + 7 = tail of tail = 4 + *) + assert_equal tree (slot Z.one tree) "slot 1 (root)"; + assert_equal (cell (atom 1) (atom 2)) (slot (Z.of_int 2) tree) "slot 2 (head)"; + assert_equal (cell (atom 3) (atom 4)) (slot (Z.of_int 3) tree) "slot 3 (tail)"; + assert_equal (atom 1) (slot (Z.of_int 4) tree) "slot 4"; + assert_equal (atom 2) (slot (Z.of_int 5) tree) "slot 5"; + assert_equal (atom 3) (slot (Z.of_int 6) tree) "slot 6"; + assert_equal (atom 4) (slot (Z.of_int 7) tree) "slot 7" + +(** Test Nock opcode 0: slot lookup *) +let test_nock_0 () = + Printf.printf "\n=== Testing Nock opcode 0 (slot) ===\n"; + + let subject = cell (atom 4) (atom 5) in + + (* *[subject [0 1]] = subject *) + assert_equal subject (nock subject (cell (atom 0) (atom 1))) "nock 0: axis 1"; + + (* *[[4 5] [0 2]] = 4 *) + assert_equal (atom 4) (nock subject (cell (atom 0) (atom 2))) "nock 0: axis 2"; + + (* *[[4 5] [0 3]] = 5 *) + assert_equal (atom 5) (nock subject (cell (atom 0) (atom 3))) "nock 0: axis 3" + +(** Test Nock opcode 1: constant *) +let test_nock_1 () = + Printf.printf "\n=== Testing Nock opcode 1 (constant) ===\n"; + + let subject = atom 99 in + + (* *[subject [1 42]] = 42 *) + assert_equal (atom 42) (nock subject (cell (atom 1) (atom 42))) "nock 1: return constant"; + + (* *[subject [1 [1 2]]] = [1 2] *) + assert_equal + (cell (atom 1) (atom 2)) + (nock subject (cell (atom 1) (cell (atom 1) (atom 2)))) + "nock 1: return constant cell" + +(** Test Nock opcode 2: recursion *) +let test_nock_2 () = + Printf.printf "\n=== Testing Nock opcode 2 (nock) ===\n"; + + (* *[42 [2 [0 1] [1 0]]] = *[42 0] = crash *) + (* *[42 [2 [1 99] [1 0 1]]] = *[99 [0 1]] = 99 *) + let subject = atom 42 in + let formula = cell (atom 2) (cell (cell (atom 1) (atom 99)) (cell (atom 1) (cell (atom 0) (atom 1)))) in + assert_equal (atom 99) (nock subject formula) "nock 2: evaluate with new subject" + +(** Test Nock opcode 3: is-cell *) +let test_nock_3 () = + Printf.printf "\n=== Testing Nock opcode 3 (is-cell) ===\n"; + + (* *[42 [3 1 42]] = 1 (atom) *) + assert_equal (atom 1) (nock (atom 42) (cell (atom 3) (cell (atom 1) (atom 42)))) "nock 3: is-cell of atom"; + + (* *[42 [3 1 [1 2]]] = 0 (cell) *) + assert_equal + (atom 0) + (nock (atom 42) (cell (atom 3) (cell (atom 1) (cell (atom 1) (atom 2))))) + "nock 3: is-cell of cell" + +(** Test Nock opcode 4: increment *) +let test_nock_4 () = + Printf.printf "\n=== Testing Nock opcode 4 (increment) ===\n"; + + (* *[42 [4 1 41]] = 42 *) + assert_equal (atom 42) (nock (atom 0) (cell (atom 4) (cell (atom 1) (atom 41)))) "nock 4: increment"; + + (* *[0 [4 0 1]] = 1 *) + assert_equal (atom 1) (nock (atom 0) (cell (atom 4) (cell (atom 0) (atom 1)))) "nock 4: increment subject" + +(** Test Nock opcode 5: equality *) +let test_nock_5 () = + Printf.printf "\n=== Testing Nock opcode 5 (equality) ===\n"; + + (* *[0 [5 [1 4] [1 5]]] = 1 (not equal) *) + assert_equal + (atom 1) + (nock (atom 0) (cell (atom 5) (cell (cell (atom 1) (atom 4)) (cell (atom 1) (atom 5))))) + "nock 5: not equal"; + + (* *[0 [5 [1 4] [1 4]]] = 0 (equal) *) + assert_equal + (atom 0) + (nock (atom 0) (cell (atom 5) (cell (cell (atom 1) (atom 4)) (cell (atom 1) (atom 4))))) + "nock 5: equal" + +(** Test Nock opcode 6: if-then-else *) +let test_nock_6 () = + Printf.printf "\n=== Testing Nock opcode 6 (if-then-else) ===\n"; + + (* *[42 [6 [1 0] [1 11] [1 22]]] = 11 (if 0 then 11 else 22) *) + assert_equal + (atom 11) + (nock (atom 42) (cell (atom 6) (cell (cell (atom 1) (atom 0)) (cell (cell (atom 1) (atom 11)) (cell (atom 1) (atom 22)))))) + "nock 6: if true"; + + (* *[42 [6 [1 1] [1 11] [1 22]]] = 22 (if 1 then 11 else 22) *) + assert_equal + (atom 22) + (nock (atom 42) (cell (atom 6) (cell (cell (atom 1) (atom 1)) (cell (cell (atom 1) (atom 11)) (cell (atom 1) (atom 22)))))) + "nock 6: if false" + +(** Test Nock opcode 7: composition *) +let test_nock_7 () = + Printf.printf "\n=== Testing Nock opcode 7 (composition) ===\n"; + + (* *[42 [7 [1 99] [0 1]]] = *[99 [0 1]] = 99 *) + assert_equal + (atom 99) + (nock (atom 42) (cell (atom 7) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))))) + "nock 7: composition" + +(** Test Nock opcode 8: push *) +let test_nock_8 () = + Printf.printf "\n=== Testing Nock opcode 8 (push) ===\n"; + + (* *[42 [8 [1 99] [0 1]]] = *[[99 42] [0 1]] = [99 42] *) + assert_equal + (cell (atom 99) (atom 42)) + (nock (atom 42) (cell (atom 8) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))))) + "nock 8: push" + +(** Test Nock opcode 9: call *) +let test_nock_9 () = + Printf.printf "\n=== Testing Nock opcode 9 (call) ===\n"; + + (* Simplest test: *[42 [9 1 [0 1]]] + = evaluate [0 1] with 42 -> 42 + = slot 1 of 42 -> 42 + = *[42 42] -> should crash since 42 is not a valid formula + + Better test: create subject with formula at position 3 + *[[formula value] [9 2 [0 1]]] + where formula = [0 3] (get tail) + + Actually: *[[1 2] [9 2 [1 [0 3]]]] + = evaluate [1 [0 3]] with [1 2] -> [0 3] + = slot 2 of [1 2] -> 1 + + Wait, that's wrong. Let me think about what 9 does: + *[subject [9 axis formula]] + = *[subject *[*[subject formula] axis]] + + So: *[[1 2] [9 3 [0 1]]] + = *[*[[1 2] [0 1]] 3] + = *[[1 2] 3] + = slot 3 of [1 2] + = 2 + + But that's not right either. Let me re-read the spec. + + Actually from the C code: + seb = nock(bus, c_gal) + nex = slot(b_gal, seb) + result = nock(seb, nex) + + So for [9 b c]: + 1. Evaluate c with subject -> seb + 2. Get slot b from seb -> nex (this is the formula) + 3. Evaluate nex with seb as subject + + Test: *[[99 [4 [0 2]]] [9 3 [0 1]]] + 1. seb = *[[99 [4 [0 2]]] [0 1]] = [99 [4 [0 2]]] + 2. nex = slot 3 of [99 [4 [0 2]]] = [4 [0 2]] + 3. result = *[[99 [4 [0 2]]] [4 [0 2]]] + = increment of *[[99 [4 [0 2]]] [0 2]] + = increment of 99 + = 100 + *) + let subj = cell (atom 99) (cell (atom 4) (cell (atom 0) (atom 2))) in + assert_equal + (atom 100) + (nock subj (cell (atom 9) (cell (atom 3) (cell (atom 0) (atom 1))))) + "nock 9: call formula at axis 3" + +(** Test Nock opcode 10: hint *) +let test_nock_10 () = + Printf.printf "\n=== Testing Nock opcode 10 (hint) ===\n"; + + (* *[42 [10 99 [1 11]]] = 11 (hint ignored) *) + assert_equal + (atom 11) + (nock (atom 42) (cell (atom 10) (cell (atom 99) (cell (atom 1) (atom 11))))) + "nock 10: hint with value (ignored)"; + + (* *[42 [10 [99 [1 88]] [1 11]]] = 11 (hint ignored) *) + assert_equal + (atom 11) + (nock (atom 42) (cell (atom 10) (cell (cell (atom 99) (cell (atom 1) (atom 88))) (cell (atom 1) (atom 11))))) + "nock 10: hint with computed value (ignored)" + +(** Test Nock cell constructor shorthand *) +let test_nock_cons () = + Printf.printf "\n=== Testing Nock cons (cell auto-construction) ===\n"; + + (* *[42 [[1 6] [1 7]]] = [6 7] *) + assert_equal + (cell (atom 6) (atom 7)) + (nock (atom 42) (cell (cell (atom 1) (atom 6)) (cell (atom 1) (atom 7)))) + "nock cons: [[1 6] [1 7]]" + +(** Run all tests *) +let () = + Printf.printf "=================================\n"; + Printf.printf "Nock OCaml Test Suite\n"; + Printf.printf "=================================\n"; + + test_noun_basics (); + test_slots (); + test_nock_0 (); + test_nock_1 (); + test_nock_2 (); + test_nock_3 (); + test_nock_4 (); + test_nock_5 (); + test_nock_6 (); + test_nock_7 (); + test_nock_8 (); + test_nock_9 (); + test_nock_10 (); + test_nock_cons (); + + Printf.printf "\n=================================\n"; + Printf.printf "All tests passed!\n"; + Printf.printf "=================================\n" diff --git a/ocaml/test/test_serial.ml b/ocaml/test/test_serial.ml new file mode 100644 index 0000000..fca30f8 --- /dev/null +++ b/ocaml/test/test_serial.ml @@ -0,0 +1,185 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +(** Test utilities *) + +let assert_equal expected actual msg = + if not (equal expected actual) then begin + Printf.printf "FAIL: %s\n" msg; + Format.printf " Expected: %a@." pp_noun expected; + Format.printf " Actual: %a@." pp_noun actual; + exit 1 + end else + Printf.printf "PASS: %s\n" msg + +let _assert_bytes_equal expected actual msg = + if expected <> actual then begin + Printf.printf "FAIL: %s\n" msg; + Printf.printf " Expected: %s\n" (bytes_to_hex expected); + Printf.printf " Actual: %s\n" (bytes_to_hex actual); + exit 1 + end else + Printf.printf "PASS: %s\n" msg + +(** Round-trip test: jam then cue should give original *) +let test_roundtrip noun msg = + let jammed = jam noun in + let cued = cue jammed in + assert_equal noun cued msg + +(** Test basic atoms *) +let test_atoms () = + Printf.printf "\n=== Testing atom serialization ===\n"; + + (* Test 0 *) + let n = atom 0 in + test_roundtrip n "atom 0 roundtrip"; + + (* Test small atoms *) + test_roundtrip (atom 1) "atom 1 roundtrip"; + test_roundtrip (atom 2) "atom 2 roundtrip"; + test_roundtrip (atom 42) "atom 42 roundtrip"; + test_roundtrip (atom 255) "atom 255 roundtrip"; + test_roundtrip (atom 256) "atom 256 roundtrip"; + + (* Test larger atoms *) + test_roundtrip (atom 65535) "atom 65535 roundtrip"; + test_roundtrip (atom 1000000) "atom 1000000 roundtrip" + +(** Test basic cells *) +let test_cells () = + Printf.printf "\n=== Testing cell serialization ===\n"; + + (* Simple cell [1 2] *) + let c = cell (atom 1) (atom 2) in + test_roundtrip c "cell [1 2] roundtrip"; + + (* Nested cells [[1 2] 3] *) + let c = cell (cell (atom 1) (atom 2)) (atom 3) in + test_roundtrip c "cell [[1 2] 3] roundtrip"; + + (* Deep nesting *) + let c = cell (cell (cell (atom 1) (atom 2)) (atom 3)) (atom 4) in + test_roundtrip c "cell [[[1 2] 3] 4] roundtrip"; + + (* Larger values *) + let c = cell (atom 1000) (atom 2000) in + test_roundtrip c "cell [1000 2000] roundtrip" + +(** Test trees *) +let test_trees () = + Printf.printf "\n=== Testing tree serialization ===\n"; + + (* Binary tree *) + let tree = cell (cell (atom 1) (atom 2)) (cell (atom 3) (atom 4)) in + test_roundtrip tree "binary tree roundtrip"; + + (* Unbalanced tree *) + let tree = cell (atom 1) (cell (atom 2) (cell (atom 3) (atom 4))) in + test_roundtrip tree "right-leaning tree roundtrip"; + + (* List-like structure [1 [2 [3 0]]] *) + let list = cell (atom 1) (cell (atom 2) (cell (atom 3) (atom 0))) in + test_roundtrip list "list-like structure roundtrip" + +(** Test backreferences + + When the same sub-noun appears multiple times, jam should use backreferences +*) +let test_backrefs () = + Printf.printf "\n=== Testing backreferences ===\n"; + + (* Create a noun with shared structure: [42 42] + The second 42 should be a backref to the first *) + let shared = atom 42 in + let n = cell shared shared in + test_roundtrip n "shared atom [42 42] roundtrip"; + + (* More complex sharing: [[1 2] [1 2]] + Second cell should backref to first *) + let sub = cell (atom 1) (atom 2) in + let n = cell sub sub in + test_roundtrip n "shared cell [[1 2] [1 2]] roundtrip"; + + (* Test that backrefs actually save space *) + let sub = cell (atom 100) (atom 200) in + let with_backref = cell sub sub in + let without_backref = cell (cell (atom 100) (atom 200)) (cell (atom 100) (atom 200)) in + + let jammed_with = jam with_backref in + let jammed_without = jam without_backref in + + Printf.printf " Shared structure size: %d bytes\n" (Bytes.length jammed_with); + Printf.printf " Duplicated structure size: %d bytes\n" (Bytes.length jammed_without); + + (* Note: Due to how OCaml constructs values, physical equality might not work as expected, + but logical equality should still work for roundtrip *) + test_roundtrip with_backref "backref optimization roundtrip" + +(** Test known encodings + + These test vectors would ideally come from the Vere test suite or Urbit dojo +*) +let test_known_encodings () = + Printf.printf "\n=== Testing known encodings ===\n"; + + (* We can generate these from Urbit with (jam 0), (jam 1), etc. *) + + (* jam of 0 should be simple *) + let n = atom 0 in + let jammed = jam n in + Printf.printf " jam(0) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed); + test_roundtrip n "known encoding: 0"; + + (* jam of 1 *) + let n = atom 1 in + let jammed = jam n in + Printf.printf " jam(1) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed); + test_roundtrip n "known encoding: 1"; + + (* jam of [0 0] *) + let n = cell (atom 0) (atom 0) in + let jammed = jam n in + Printf.printf " jam([0 0]) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed); + test_roundtrip n "known encoding: [0 0]" + +(** Test edge cases *) +let test_edge_cases () = + Printf.printf "\n=== Testing edge cases ===\n"; + + (* Very large atom *) + let big = Atom (Z.of_string "123456789012345678901234567890") in + test_roundtrip big "very large atom roundtrip"; + + (* Deep nesting *) + let rec make_deep n = + if n = 0 then atom 0 + else cell (atom n) (make_deep (n - 1)) + in + let deep = make_deep 50 in + test_roundtrip deep "deeply nested structure (50 levels) roundtrip"; + + (* Wide tree *) + let rec make_wide n = + if n = 0 then atom 0 + else cell (make_wide (n - 1)) (make_wide (n - 1)) + in + let wide = make_wide 6 in (* 2^6 = 64 leaves *) + test_roundtrip wide "wide binary tree (6 levels) roundtrip" + +(** Run all tests *) +let () = + Printf.printf "=================================\n"; + Printf.printf "Jam/Cue Serialization Test Suite\n"; + Printf.printf "=================================\n"; + + test_atoms (); + test_cells (); + test_trees (); + test_backrefs (); + test_known_encodings (); + test_edge_cases (); + + Printf.printf "\n=================================\n"; + Printf.printf "All tests passed!\n"; + Printf.printf "=================================\n" diff --git a/ocaml/test_nock.ml b/ocaml/test_nock.ml deleted file mode 100644 index 73f2ce2..0000000 --- a/ocaml/test_nock.ml +++ /dev/null @@ -1,284 +0,0 @@ -open Nock_lib.Noun -open Nock_lib.Nock - -(** Test utilities *) - -let assert_equal expected actual msg = - if not (equal expected actual) then begin - Printf.printf "FAIL: %s\n" msg; - Format.printf " Expected: %a@." pp_noun expected; - Format.printf " Actual: %a@." pp_noun actual; - exit 1 - end else - Printf.printf "PASS: %s\n" msg - -let _assert_raises_exit f msg = - try - let _ = f () in - Printf.printf "FAIL: %s (expected Exit exception)\n" msg; - exit 1 - with Exit -> - Printf.printf "PASS: %s\n" msg - -(** Basic noun tests *) - -let test_noun_basics () = - Printf.printf "\n=== Testing basic noun operations ===\n"; - - (* Test atom creation *) - let a = atom 42 in - assert_equal (atom 42) a "atom creation"; - - (* Test cell creation *) - let c = cell (atom 1) (atom 2) in - assert_equal (atom 1) (head c) "cell head"; - assert_equal (atom 2) (tail c) "cell tail"; - - (* Test is_cell and is_atom *) - if not (is_atom a) then Printf.printf "FAIL: is_atom on atom\n" else Printf.printf "PASS: is_atom on atom\n"; - if not (is_cell c) then Printf.printf "FAIL: is_cell on cell\n" else Printf.printf "PASS: is_cell on cell\n"; - if is_atom c then Printf.printf "FAIL: not is_atom on cell\n" else Printf.printf "PASS: not is_atom on cell\n"; - if is_cell a then Printf.printf "FAIL: not is_cell on atom\n" else Printf.printf "PASS: not is_cell on atom\n" - -(** Test slot/fragment addressing *) -let test_slots () = - Printf.printf "\n=== Testing slot/fragment addressing ===\n"; - - (* Build tree: [[1 2] [3 4]] *) - let tree = cell (cell (atom 1) (atom 2)) (cell (atom 3) (atom 4)) in - - (* Test slot addressing - 1 = whole tree - 2 = head = [1 2] - 3 = tail = [3 4] - 4 = head of head = 1 - 5 = tail of head = 2 - 6 = head of tail = 3 - 7 = tail of tail = 4 - *) - assert_equal tree (slot Z.one tree) "slot 1 (root)"; - assert_equal (cell (atom 1) (atom 2)) (slot (Z.of_int 2) tree) "slot 2 (head)"; - assert_equal (cell (atom 3) (atom 4)) (slot (Z.of_int 3) tree) "slot 3 (tail)"; - assert_equal (atom 1) (slot (Z.of_int 4) tree) "slot 4"; - assert_equal (atom 2) (slot (Z.of_int 5) tree) "slot 5"; - assert_equal (atom 3) (slot (Z.of_int 6) tree) "slot 6"; - assert_equal (atom 4) (slot (Z.of_int 7) tree) "slot 7" - -(** Test Nock opcode 0: slot lookup *) -let test_nock_0 () = - Printf.printf "\n=== Testing Nock opcode 0 (slot) ===\n"; - - let subject = cell (atom 4) (atom 5) in - - (* *[subject [0 1]] = subject *) - assert_equal subject (nock subject (cell (atom 0) (atom 1))) "nock 0: axis 1"; - - (* *[[4 5] [0 2]] = 4 *) - assert_equal (atom 4) (nock subject (cell (atom 0) (atom 2))) "nock 0: axis 2"; - - (* *[[4 5] [0 3]] = 5 *) - assert_equal (atom 5) (nock subject (cell (atom 0) (atom 3))) "nock 0: axis 3" - -(** Test Nock opcode 1: constant *) -let test_nock_1 () = - Printf.printf "\n=== Testing Nock opcode 1 (constant) ===\n"; - - let subject = atom 99 in - - (* *[subject [1 42]] = 42 *) - assert_equal (atom 42) (nock subject (cell (atom 1) (atom 42))) "nock 1: return constant"; - - (* *[subject [1 [1 2]]] = [1 2] *) - assert_equal - (cell (atom 1) (atom 2)) - (nock subject (cell (atom 1) (cell (atom 1) (atom 2)))) - "nock 1: return constant cell" - -(** Test Nock opcode 2: recursion *) -let test_nock_2 () = - Printf.printf "\n=== Testing Nock opcode 2 (nock) ===\n"; - - (* *[42 [2 [0 1] [1 0]]] = *[42 0] = crash *) - (* *[42 [2 [1 99] [1 0 1]]] = *[99 [0 1]] = 99 *) - let subject = atom 42 in - let formula = cell (atom 2) (cell (cell (atom 1) (atom 99)) (cell (atom 1) (cell (atom 0) (atom 1)))) in - assert_equal (atom 99) (nock subject formula) "nock 2: evaluate with new subject" - -(** Test Nock opcode 3: is-cell *) -let test_nock_3 () = - Printf.printf "\n=== Testing Nock opcode 3 (is-cell) ===\n"; - - (* *[42 [3 1 42]] = 1 (atom) *) - assert_equal (atom 1) (nock (atom 42) (cell (atom 3) (cell (atom 1) (atom 42)))) "nock 3: is-cell of atom"; - - (* *[42 [3 1 [1 2]]] = 0 (cell) *) - assert_equal - (atom 0) - (nock (atom 42) (cell (atom 3) (cell (atom 1) (cell (atom 1) (atom 2))))) - "nock 3: is-cell of cell" - -(** Test Nock opcode 4: increment *) -let test_nock_4 () = - Printf.printf "\n=== Testing Nock opcode 4 (increment) ===\n"; - - (* *[42 [4 1 41]] = 42 *) - assert_equal (atom 42) (nock (atom 0) (cell (atom 4) (cell (atom 1) (atom 41)))) "nock 4: increment"; - - (* *[0 [4 0 1]] = 1 *) - assert_equal (atom 1) (nock (atom 0) (cell (atom 4) (cell (atom 0) (atom 1)))) "nock 4: increment subject" - -(** Test Nock opcode 5: equality *) -let test_nock_5 () = - Printf.printf "\n=== Testing Nock opcode 5 (equality) ===\n"; - - (* *[0 [5 [1 4] [1 5]]] = 1 (not equal) *) - assert_equal - (atom 1) - (nock (atom 0) (cell (atom 5) (cell (cell (atom 1) (atom 4)) (cell (atom 1) (atom 5))))) - "nock 5: not equal"; - - (* *[0 [5 [1 4] [1 4]]] = 0 (equal) *) - assert_equal - (atom 0) - (nock (atom 0) (cell (atom 5) (cell (cell (atom 1) (atom 4)) (cell (atom 1) (atom 4))))) - "nock 5: equal" - -(** Test Nock opcode 6: if-then-else *) -let test_nock_6 () = - Printf.printf "\n=== Testing Nock opcode 6 (if-then-else) ===\n"; - - (* *[42 [6 [1 0] [1 11] [1 22]]] = 11 (if 0 then 11 else 22) *) - assert_equal - (atom 11) - (nock (atom 42) (cell (atom 6) (cell (cell (atom 1) (atom 0)) (cell (cell (atom 1) (atom 11)) (cell (atom 1) (atom 22)))))) - "nock 6: if true"; - - (* *[42 [6 [1 1] [1 11] [1 22]]] = 22 (if 1 then 11 else 22) *) - assert_equal - (atom 22) - (nock (atom 42) (cell (atom 6) (cell (cell (atom 1) (atom 1)) (cell (cell (atom 1) (atom 11)) (cell (atom 1) (atom 22)))))) - "nock 6: if false" - -(** Test Nock opcode 7: composition *) -let test_nock_7 () = - Printf.printf "\n=== Testing Nock opcode 7 (composition) ===\n"; - - (* *[42 [7 [1 99] [0 1]]] = *[99 [0 1]] = 99 *) - assert_equal - (atom 99) - (nock (atom 42) (cell (atom 7) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))))) - "nock 7: composition" - -(** Test Nock opcode 8: push *) -let test_nock_8 () = - Printf.printf "\n=== Testing Nock opcode 8 (push) ===\n"; - - (* *[42 [8 [1 99] [0 1]]] = *[[99 42] [0 1]] = [99 42] *) - assert_equal - (cell (atom 99) (atom 42)) - (nock (atom 42) (cell (atom 8) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))))) - "nock 8: push" - -(** Test Nock opcode 9: call *) -let test_nock_9 () = - Printf.printf "\n=== Testing Nock opcode 9 (call) ===\n"; - - (* Simplest test: *[42 [9 1 [0 1]]] - = evaluate [0 1] with 42 -> 42 - = slot 1 of 42 -> 42 - = *[42 42] -> should crash since 42 is not a valid formula - - Better test: create subject with formula at position 3 - *[[formula value] [9 2 [0 1]]] - where formula = [0 3] (get tail) - - Actually: *[[1 2] [9 2 [1 [0 3]]]] - = evaluate [1 [0 3]] with [1 2] -> [0 3] - = slot 2 of [1 2] -> 1 - - Wait, that's wrong. Let me think about what 9 does: - *[subject [9 axis formula]] - = *[subject *[*[subject formula] axis]] - - So: *[[1 2] [9 3 [0 1]]] - = *[*[[1 2] [0 1]] 3] - = *[[1 2] 3] - = slot 3 of [1 2] - = 2 - - But that's not right either. Let me re-read the spec. - - Actually from the C code: - seb = nock(bus, c_gal) - nex = slot(b_gal, seb) - result = nock(seb, nex) - - So for [9 b c]: - 1. Evaluate c with subject -> seb - 2. Get slot b from seb -> nex (this is the formula) - 3. Evaluate nex with seb as subject - - Test: *[[99 [4 [0 2]]] [9 3 [0 1]]] - 1. seb = *[[99 [4 [0 2]]] [0 1]] = [99 [4 [0 2]]] - 2. nex = slot 3 of [99 [4 [0 2]]] = [4 [0 2]] - 3. result = *[[99 [4 [0 2]]] [4 [0 2]]] - = increment of *[[99 [4 [0 2]]] [0 2]] - = increment of 99 - = 100 - *) - let subj = cell (atom 99) (cell (atom 4) (cell (atom 0) (atom 2))) in - assert_equal - (atom 100) - (nock subj (cell (atom 9) (cell (atom 3) (cell (atom 0) (atom 1))))) - "nock 9: call formula at axis 3" - -(** Test Nock opcode 10: hint *) -let test_nock_10 () = - Printf.printf "\n=== Testing Nock opcode 10 (hint) ===\n"; - - (* *[42 [10 99 [1 11]]] = 11 (hint ignored) *) - assert_equal - (atom 11) - (nock (atom 42) (cell (atom 10) (cell (atom 99) (cell (atom 1) (atom 11))))) - "nock 10: hint with value (ignored)"; - - (* *[42 [10 [99 [1 88]] [1 11]]] = 11 (hint ignored) *) - assert_equal - (atom 11) - (nock (atom 42) (cell (atom 10) (cell (cell (atom 99) (cell (atom 1) (atom 88))) (cell (atom 1) (atom 11))))) - "nock 10: hint with computed value (ignored)" - -(** Test Nock cell constructor shorthand *) -let test_nock_cons () = - Printf.printf "\n=== Testing Nock cons (cell auto-construction) ===\n"; - - (* *[42 [[1 6] [1 7]]] = [6 7] *) - assert_equal - (cell (atom 6) (atom 7)) - (nock (atom 42) (cell (cell (atom 1) (atom 6)) (cell (atom 1) (atom 7)))) - "nock cons: [[1 6] [1 7]]" - -(** Run all tests *) -let () = - Printf.printf "=================================\n"; - Printf.printf "Nock OCaml Test Suite\n"; - Printf.printf "=================================\n"; - - test_noun_basics (); - test_slots (); - test_nock_0 (); - test_nock_1 (); - test_nock_2 (); - test_nock_3 (); - test_nock_4 (); - test_nock_5 (); - test_nock_6 (); - test_nock_7 (); - test_nock_8 (); - test_nock_9 (); - test_nock_10 (); - test_nock_cons (); - - Printf.printf "\n=================================\n"; - Printf.printf "All tests passed!\n"; - Printf.printf "=================================\n" diff --git a/ocaml/test_serial.ml b/ocaml/test_serial.ml deleted file mode 100644 index 69887c5..0000000 --- a/ocaml/test_serial.ml +++ /dev/null @@ -1,185 +0,0 @@ -open Nock_lib.Noun -open Nock_lib.Serial - -(** Test utilities *) - -let assert_equal expected actual msg = - if not (equal expected actual) then begin - Printf.printf "FAIL: %s\n" msg; - Format.printf " Expected: %a@." pp_noun expected; - Format.printf " Actual: %a@." pp_noun actual; - exit 1 - end else - Printf.printf "PASS: %s\n" msg - -let assert_bytes_equal expected actual msg = - if expected <> actual then begin - Printf.printf "FAIL: %s\n" msg; - Printf.printf " Expected: %s\n" (bytes_to_hex expected); - Printf.printf " Actual: %s\n" (bytes_to_hex actual); - exit 1 - end else - Printf.printf "PASS: %s\n" msg - -(** Round-trip test: jam then cue should give original *) -let test_roundtrip noun msg = - let jammed = jam noun in - let cued = cue jammed in - assert_equal noun cued msg - -(** Test basic atoms *) -let test_atoms () = - Printf.printf "\n=== Testing atom serialization ===\n"; - - (* Test 0 *) - let n = atom 0 in - test_roundtrip n "atom 0 roundtrip"; - - (* Test small atoms *) - test_roundtrip (atom 1) "atom 1 roundtrip"; - test_roundtrip (atom 2) "atom 2 roundtrip"; - test_roundtrip (atom 42) "atom 42 roundtrip"; - test_roundtrip (atom 255) "atom 255 roundtrip"; - test_roundtrip (atom 256) "atom 256 roundtrip"; - - (* Test larger atoms *) - test_roundtrip (atom 65535) "atom 65535 roundtrip"; - test_roundtrip (atom 1000000) "atom 1000000 roundtrip" - -(** Test basic cells *) -let test_cells () = - Printf.printf "\n=== Testing cell serialization ===\n"; - - (* Simple cell [1 2] *) - let c = cell (atom 1) (atom 2) in - test_roundtrip c "cell [1 2] roundtrip"; - - (* Nested cells [[1 2] 3] *) - let c = cell (cell (atom 1) (atom 2)) (atom 3) in - test_roundtrip c "cell [[1 2] 3] roundtrip"; - - (* Deep nesting *) - let c = cell (cell (cell (atom 1) (atom 2)) (atom 3)) (atom 4) in - test_roundtrip c "cell [[[1 2] 3] 4] roundtrip"; - - (* Larger values *) - let c = cell (atom 1000) (atom 2000) in - test_roundtrip c "cell [1000 2000] roundtrip" - -(** Test trees *) -let test_trees () = - Printf.printf "\n=== Testing tree serialization ===\n"; - - (* Binary tree *) - let tree = cell (cell (atom 1) (atom 2)) (cell (atom 3) (atom 4)) in - test_roundtrip tree "binary tree roundtrip"; - - (* Unbalanced tree *) - let tree = cell (atom 1) (cell (atom 2) (cell (atom 3) (atom 4))) in - test_roundtrip tree "right-leaning tree roundtrip"; - - (* List-like structure [1 [2 [3 0]]] *) - let list = cell (atom 1) (cell (atom 2) (cell (atom 3) (atom 0))) in - test_roundtrip list "list-like structure roundtrip" - -(** Test backreferences - - When the same sub-noun appears multiple times, jam should use backreferences -*) -let test_backrefs () = - Printf.printf "\n=== Testing backreferences ===\n"; - - (* Create a noun with shared structure: [42 42] - The second 42 should be a backref to the first *) - let shared = atom 42 in - let n = cell shared shared in - test_roundtrip n "shared atom [42 42] roundtrip"; - - (* More complex sharing: [[1 2] [1 2]] - Second cell should backref to first *) - let sub = cell (atom 1) (atom 2) in - let n = cell sub sub in - test_roundtrip n "shared cell [[1 2] [1 2]] roundtrip"; - - (* Test that backrefs actually save space *) - let sub = cell (atom 100) (atom 200) in - let with_backref = cell sub sub in - let without_backref = cell (cell (atom 100) (atom 200)) (cell (atom 100) (atom 200)) in - - let jammed_with = jam with_backref in - let jammed_without = jam without_backref in - - Printf.printf " Shared structure size: %d bytes\n" (Bytes.length jammed_with); - Printf.printf " Duplicated structure size: %d bytes\n" (Bytes.length jammed_without); - - (* Note: Due to how OCaml constructs values, physical equality might not work as expected, - but logical equality should still work for roundtrip *) - test_roundtrip with_backref "backref optimization roundtrip" - -(** Test known encodings - - These test vectors would ideally come from the Vere test suite or Urbit dojo -*) -let test_known_encodings () = - Printf.printf "\n=== Testing known encodings ===\n"; - - (* We can generate these from Urbit with (jam 0), (jam 1), etc. *) - - (* jam of 0 should be simple *) - let n = atom 0 in - let jammed = jam n in - Printf.printf " jam(0) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed); - test_roundtrip n "known encoding: 0"; - - (* jam of 1 *) - let n = atom 1 in - let jammed = jam n in - Printf.printf " jam(1) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed); - test_roundtrip n "known encoding: 1"; - - (* jam of [0 0] *) - let n = cell (atom 0) (atom 0) in - let jammed = jam n in - Printf.printf " jam([0 0]) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed); - test_roundtrip n "known encoding: [0 0]" - -(** Test edge cases *) -let test_edge_cases () = - Printf.printf "\n=== Testing edge cases ===\n"; - - (* Very large atom *) - let big = Atom (Z.of_string "123456789012345678901234567890") in - test_roundtrip big "very large atom roundtrip"; - - (* Deep nesting *) - let rec make_deep n = - if n = 0 then atom 0 - else cell (atom n) (make_deep (n - 1)) - in - let deep = make_deep 50 in - test_roundtrip deep "deeply nested structure (50 levels) roundtrip"; - - (* Wide tree *) - let rec make_wide n = - if n = 0 then atom 0 - else cell (make_wide (n - 1)) (make_wide (n - 1)) - in - let wide = make_wide 6 in (* 2^6 = 64 leaves *) - test_roundtrip wide "wide binary tree (6 levels) roundtrip" - -(** Run all tests *) -let () = - Printf.printf "=================================\n"; - Printf.printf "Jam/Cue Serialization Test Suite\n"; - Printf.printf "=================================\n"; - - test_atoms (); - test_cells (); - test_trees (); - test_backrefs (); - test_known_encodings (); - test_edge_cases (); - - Printf.printf "\n=================================\n"; - Printf.printf "All tests passed!\n"; - Printf.printf "=================================\n" diff --git a/ocaml/tmp/get_jam_vectors.ml b/ocaml/tmp/get_jam_vectors.ml new file mode 100644 index 0000000..1266f21 --- /dev/null +++ b/ocaml/tmp/get_jam_vectors.ml @@ -0,0 +1,10 @@ +(* Quick test to see what our jam produces for small numbers *) +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + for i = 0 to 10 do + let n = atom i in + let jammed = jam n in + Printf.printf "%d: %s\n" i (bytes_to_hex jammed) + done diff --git a/ocaml/tmp/quick_test.ml b/ocaml/tmp/quick_test.ml new file mode 100644 index 0000000..9ce168a --- /dev/null +++ b/ocaml/tmp/quick_test.ml @@ -0,0 +1,6 @@ +let () = + Printf.printf "Z.numbits 0 = %d\n" (Z.numbits Z.zero); + Printf.printf "Z.numbits 1 = %d\n" (Z.numbits Z.one); + Printf.printf "Z.numbits 2 = %d\n" (Z.numbits (Z.of_int 2)); + Printf.printf "Z.numbits 42 = %d\n" (Z.numbits (Z.of_int 42)); + Printf.printf "Z.numbits 6 = %d\n" (Z.numbits (Z.of_int 6)); diff --git a/ocaml/tmp/test_hex.ml b/ocaml/tmp/test_hex.ml new file mode 100644 index 0000000..6504640 --- /dev/null +++ b/ocaml/tmp/test_hex.ml @@ -0,0 +1,9 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + for i = 0 to 10 do + let n = atom i in + let jammed = jam n in + Printf.printf "jam(%d) = %s (%d bytes)\n" i (bytes_to_hex jammed) (Bytes.length jammed) + done diff --git a/ocaml/tmp/test_jam.ml b/ocaml/tmp/test_jam.ml new file mode 100644 index 0000000..e616e3e --- /dev/null +++ b/ocaml/tmp/test_jam.ml @@ -0,0 +1,17 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + let test n_int = + let n = atom n_int in + let jammed = jam n in + Printf.printf "jam(%d) = %s\n" n_int (bytes_to_hex jammed); + let cued = cue jammed in + match cued with + | Atom a -> Printf.printf "cue -> %s\n" (Z.to_string a) + | Cell _ -> Printf.printf "cue -> cell?!\n" + in + test 0; + test 1; + test 2; + test 42; diff --git a/ocaml/tmp/test_prec.ml b/ocaml/tmp/test_prec.ml new file mode 100644 index 0000000..2a9134c --- /dev/null +++ b/ocaml/tmp/test_prec.ml @@ -0,0 +1,5 @@ +type t = { x : int ref } +let r = { x = ref 42 } +let test () = + let y = !(r.x) in + y diff --git a/ocaml/tmp/test_ref.ml b/ocaml/tmp/test_ref.ml new file mode 100644 index 0000000..8bff6e7 --- /dev/null +++ b/ocaml/tmp/test_ref.ml @@ -0,0 +1,7 @@ +type writer = { + buf: bytes ref; + mutable bit_pos: int; +} + +let test w = + Bytes.length !(w.buf) diff --git a/ocaml/urb.py b/ocaml/urb.py new file mode 100644 index 0000000..904233e --- /dev/null +++ b/ocaml/urb.py @@ -0,0 +1,405 @@ + +#!/usr/bin/env python3 + +# TODO: +# - -h text +# - output to clay + +import os +import sys +import logging +import json +import requests +import argparse +import base64 + +logging.basicConfig( + level=logging.WARNING, + format='%(levelname)s %(funcName)s %(lineno)s - %(message)s', + stream=sys.stderr, +) + +logging.debug(['sys.argv', sys.argv]) + +def preprocess_args(old_args): + """Split out [] + + We use [] to delimit tuples. The following syntaxes are all equivalent: + + -- --open --open a b --close --open c d --close --close + -- [ [ a b ] [ c d ] ] + -- [ [a b] [c d] ] + -- [[a b] [c d]] + -- etc + + We don't allow [[a b][c d]]. The rule is that we accept zero or more [ at + the beginning of a token and zero or more ] at the end of a token. + + In this function, we convert all legal syntaxes to as if they were entered + as in the first example above. This allows them to be parsed by a + relatively sane argparse system. + """ + + if old_args == []: + return [] + if old_args[0][0] == '[': + if len(old_args[0]) > 1: + r = preprocess_args([old_args[0][1:]] + old_args[1:]) + return ['--open'] + r + else: + return ['--open'] + preprocess_args(old_args[1:]) + if old_args[0][-1] == ']': + if len(old_args[0]) > 1: + return preprocess_args([old_args[0][:-1]]) + \ + ['--close'] + preprocess_args(old_args[1:]) + else: + return ['--close'] + preprocess_args(old_args[1:]) + return [old_args[0]] + preprocess_args(old_args[1:]) + +args = preprocess_args(sys.argv[1:]) + +logging.debug(['preprocessed', args]) + + +class sourceAction(argparse.Action): + """Handle source flag. + + This is all the 'primitive' source flags -- no nesting, no tuple stuff, + just one flag with one argument. + + Besides the normal argparse.Action arguments, we require the following named + argument: + + -- which='foo'. Since all source flags use res.source, this specifies the + key of the entry for this flag. + """ + + def __init__(self, option_strings, dest, **kwargs): + self.which = kwargs['which'] + del kwargs['which'] + super(sourceAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + logging.debug('%r %r' % (new_value, option_string)) + logging.debug('source %s' % res.source) + logging.debug('level %s' % res.level) + + if res.source is not None: + def help(source, level): + logging.debug('source %s' % source) + logging.debug('level %s' % level) + if not isinstance(source, list): + raise ValueError('Already specified one source') + elif level == 0: + msg = 'Already specified a source %r %s' % (source, level) + raise ValueError(msg) + elif level == 1: + return source + [self.construct_value(new_value)] + else: + return source[:-1] + [help(source[-1], level - 1)] + res.source = help(res.source, res.level) + else: + res.source = \ + self.construct_value(new_value) + + logging.debug(res.source) + + def construct_value(self, new_value): + if new_value == '-': + return self.construct_value(''.join(sys.stdin.readlines())) + elif new_value[0:2] == '@@': + with open(new_value[2:]) as f: + content = f.readlines() + return self.construct_value(''.join(content)) + else: + return {self.which: new_value} + +class transformerAction(argparse.Action): + """Handle transformer flag. + + This is all the tranformer flags. Each flag takes one argument and + transforms the previous source. + + Besides the normal argparse.Action arguments, we require the following named + arguments: + + -- which='foo'. Since all source flags use res.source, this specifies the + key of the entry for this flag. + + -- nesting='foo'. The key for the argument is 'foo'. + """ + + def __init__(self, option_strings, dest, **kwargs): + self.which = kwargs['which'] + self.nesting = kwargs['nesting'] + del kwargs['which'] + del kwargs['nesting'] + super(transformerAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + logging.debug('%r %r' % (new_value, option_string)) + logging.debug('source %s' % res.source) + logging.debug('level %s' % res.level) + + if res.source is None: + raise ValueError('Need source before transformer') + else: + def help(source, level): + logging.debug('source %s' % source) + logging.debug('level %s' % level) + if level == 0 or level is None: + res = {self.nesting: new_value, "next": source} + return {self.which: res} + elif not isinstance(source, list): + raise ValueError('Already specified one source') + else: + return source[:-1] + [help(source[-1], level - 1)] + res.source = help(res.source, res.level) + + logging.debug(res.source) + +class openAction(argparse.Action): + """Handle open tuple. + + Opens a source tuple. Can only exist in the same places as any other + source. + """ + + def __init__(self, option_strings, dest, **kwargs): + super(openAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + if res.level is None: + res.level = 0 + + logging.debug('source %s' % res.source) + logging.debug('level %s' % res.level) + + if res.source is None: + res.source = [] + res.level = 1 + return + + def help(source, level): + if not isinstance(source, list): + raise ValueError('Starting tuple after source is finished') + if level == 1: + return (source + [[]], level + 1) + elif level > 1: + rsource, rlevel = help(source[-1], level - 1) + return (source[:-1] + [rsource], rlevel + 1) + else: + msg = 'opening strange level %r %s' % (source, level) + raise ValueError(msg) + + res.source, res.level = help(res.source, res.level) + +class closeAction(argparse.Action): + """Handle close tuple. + + Closes a source tuple. Can only exist when a tuple is already open. + """ + + def __init__(self, option_strings, dest, **kwargs): + super(closeAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + if res.level is None: + raise ValueError('Ending tuple before starting one') + + logging.debug('level %s' % res.level) + + if res.source is None: + raise ValueError('Ending tuple with empty source') + + def help(source, level): + if not isinstance(source, list): + raise ValueError('Ending tuple that isn\'t a tuple') + if level == 1: + return level - 1 + elif level > 1: + return help(source[-1], level - 1) + 1 + else: + msg = 'closing strange level %r %s' % (source, level) + raise ValueError(msg) + + res.level = help(res.source, res.level) + + logging.debug('level %s' % res.level) + +class sinkAction(argparse.Action): + """Handle sink flag. + + We expect only one sinkAction to ever be executed. We recommend using + mutually_exclusive_group's. + + Besides the normal action flags, we require the following named argument: + + -- which='foo'. Since all sink flags use res.sink, this specifies the key + of the entry for this flag. + """ + + def __init__(self, option_strings, dest, **kwargs): + self.which = kwargs['which'] + del kwargs['which'] + super(sinkAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + res.sink = self.construct_value(new_value) + + logging.debug(res.sink) + + def construct_value(self, new_value): + if self.which == 'output-file': + return {self.which: new_value[::-1].replace('.','/',1)[::-1]} + elif self.which == 'output-pill': + return {self.which: new_value[::-1].replace('.','/',1)[::-1]} + else: + return {self.which: new_value} + +class FullPaths(argparse.Action): + """Expand user- and relative-paths""" + def __call__(self, parser, namespace, values, option_string=None): + if values != None: + path = os.path.abspath(os.path.expanduser(values)) + setattr(namespace, self.dest, path) + +def is_dir(dirname): + """Checks if a path is an actual directory""" + if not os.path.isdir(dirname): + msg = "{0} is not a directory".format(dirname) + raise argparse.ArgumentTypeError(msg) + else: + return dirname + +def get_args(): + """Get CLI arguments and options""" + parser = argparse.ArgumentParser(description="""do something""") + + parser.add_argument('alignments', help="The folder of alignments", + action=FullPaths, type=is_dir) + +parser = argparse.ArgumentParser(description='headless urbit') +parser.add_argument('pier', nargs='?', + help='target urbit directory', + action=FullPaths, type=is_dir) +parser.add_argument('-d', '--dojo', which='dojo', + metavar='command-line', + help='run dojo command', + action=sourceAction, dest='source') +parser.add_argument('-D', '--data', which='data', + metavar='text', + help='literal text data', + action=sourceAction) +parser.add_argument('-c', '--clay', which='clay', + metavar='clay-path', + help='load data from clay', + action=sourceAction) +parser.add_argument('-u', '--url', which='url', + metavar='url', + help='pull data from url', + action=sourceAction) +parser.add_argument('-a', '--api', which='api', + metavar='command', + help='get data from api', + action=sourceAction) +parser.add_argument('-g', '--get-api', which='get-api', + metavar='api:endpoint', + help='get data from api endpoint', + action=sourceAction) +parser.add_argument('-l', '--listen-api', which='listen-api', + metavar='api:event', + help='listen to event from api', + action=sourceAction) +parser.add_argument('-m', '--mark', which='as', + metavar='mark', + help='transform a source to another mark', + nesting='mark', + action=transformerAction) +parser.add_argument('-H', '--hoon', which='hoon', + metavar='code', + help='transform a source by hoon code', + nesting='code', + action=transformerAction) +parser.add_argument('--open', + nargs=0, + help='start tuple', + action=openAction, dest='level') +parser.add_argument('--close', + nargs=0, + help='stop tuple', + action=closeAction) + +sinks = parser.add_mutually_exclusive_group() +sinks.add_argument('-s', '--stdout', const={'stdout': None}, + default={'stdout': None}, + action='store_const', dest='sink') +sinks.add_argument('-f', '--output-file', which='output-file', + metavar='path', + action=sinkAction) +sinks.add_argument('-P', '--output-pill', which='output-pill', + metavar='path', + action=sinkAction) +sinks.add_argument('-C', '--output-clay', which='output-clay', + metavar='clay-path', + action=sinkAction) +sinks.add_argument('-U', '--output-url', which='url', + metavar='url', + action=sinkAction) +sinks.add_argument('-t', '--to-api', which='to-api', + metavar='api-command', + action=sinkAction) +sinks.add_argument('-n', '--send-api', which='send-api', + metavar='api:endpoint', + action=sinkAction) +sinks.add_argument('-x', '--command', which='command', + metavar='command', + action=sinkAction) +sinks.add_argument('-p', '--app', which='app', + metavar='app', + action=sinkAction) + + +args = parser.parse_args(args) + + +if args.source is None: + args.source = {"data": ''.join(sys.stdin)} + +payload = {"source": args.source, "sink": args.sink} +logging.debug(['payload', json.dumps(payload)]) + +PORT = "" + +if args.pier is None: + PORT = os.environ.get('LENS_PORT', 12323) + # if 'LENS_PORT' not in os.environ: + # logging.warning("No pier or port specified, looking on port " + str(PORT)) +else: + with open(os.path.join(args.pier, ".http.ports")) as ports: + for line in ports: + if -1 != line.find("loopback"): + PORT = line.split()[0] + logging.info("Found port %s" % PORT) + break + + if not PORT: + logging.error("Error reading port from .http.ports file") + sys.exit(1) + +url = "http://localhost:%s" % PORT + +r = requests.post(url, data=json.dumps(payload)) + +if r.text[0] == '"': + # Python 3: decode unicode escape sequences + print(r.text[1:-1].encode().decode('unicode_escape')) +elif r.text[0] == '{': + # print(r.text) + json_data = json.loads(r.text) + logging.debug(json_data) + with open(json_data['file'][:0:-1].replace('/','.',1)[::-1], 'wb') as f: + f.write(base64.b64decode(json_data['data'])) +else: + logging.warning("unrecognized response") + print(r.text) -- cgit v1.2.3