summaryrefslogtreecommitdiff
path: root/ocaml
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /ocaml
claude is gud
Diffstat (limited to 'ocaml')
-rw-r--r--ocaml/.gitignore1
-rw-r--r--ocaml/BENCHMARKS.md203
-rw-r--r--ocaml/Makefile93
-rw-r--r--ocaml/README.md137
-rw-r--r--ocaml/bench_nock.c165
-rw-r--r--ocaml/bench_nock.ml132
-rwxr-xr-xocaml/bench_simplebin0 -> 16040 bytes
-rw-r--r--ocaml/bench_simple.c265
-rw-r--r--ocaml/bitstream.ml97
-rwxr-xr-xocaml/compare.sh81
-rw-r--r--ocaml/dune19
-rw-r--r--ocaml/dune-project2
-rw-r--r--ocaml/nock.ml164
-rw-r--r--ocaml/noun.ml69
-rw-r--r--ocaml/serial.ml191
-rw-r--r--ocaml/test_nock.ml284
-rw-r--r--ocaml/test_serial.ml185
17 files changed, 2088 insertions, 0 deletions
diff --git a/ocaml/.gitignore b/ocaml/.gitignore
new file mode 100644
index 0000000..e35d885
--- /dev/null
+++ b/ocaml/.gitignore
@@ -0,0 +1 @@
+_build
diff --git a/ocaml/BENCHMARKS.md b/ocaml/BENCHMARKS.md
new file mode 100644
index 0000000..be0933f
--- /dev/null
+++ b/ocaml/BENCHMARKS.md
@@ -0,0 +1,203 @@
+# Nock Interpreter Benchmark Results
+
+## Overview
+
+This document compares the performance of the OCaml Nock interpreter port against a simplified C implementation.
+
+## Test Environment
+
+- **Platform**: Linux
+- **Compiler (C)**: GCC with -O3 optimization
+- **Compiler (OCaml)**: OCaml native code compiler via Dune
+- **Test iterations**: 1,000,000 for most operations, 100,000 for complex operations
+
+## Benchmark Methodology
+
+Each benchmark:
+1. Constructs a Nock formula for a specific operation
+2. Executes the formula N times in a tight loop
+3. Measures elapsed time
+4. Calculates operations per second
+
+The OCaml benchmark includes a warmup phase and runs `Gc.compact()` before timing.
+
+## Results
+
+### Raw Performance (Operations per Second)
+
+| Operation | C Implementation | OCaml Implementation | Speedup (C/OCaml) |
+|------------------------------|------------------|----------------------|-------------------|
+| Opcode 0: Slot/fragment | 579 Mops/sec | 42.5 Mops/sec | 13.6x |
+| Opcode 1: Constant | 595 Mops/sec | 142 Mops/sec | 4.2x |
+| Opcode 3: Is-cell | 271 Mops/sec | 56.6 Mops/sec | 4.8x |
+| Opcode 4: Increment | 265 Mops/sec | 63.1 Mops/sec | 4.2x |
+| Opcode 5: Equality | 24 Mops/sec | **29.6 Mops/sec** | **0.8x** (OCaml faster!) |
+| Opcode 6: If-then-else | 185 Mops/sec | 37.2 Mops/sec | 5.0x |
+| Opcode 7: Composition | 174 Mops/sec | 36.0 Mops/sec | 4.8x |
+| Opcode 8: Push | 26.5 Mops/sec | **32.7 Mops/sec** | **0.8x** (OCaml faster!) |
+| Cell construction | 25.9 Mops/sec | **53.2 Mops/sec** | **0.5x** (OCaml 2x faster!) |
+| Deep slot lookup (depth 4) | 566 Mops/sec | 19.2 Mops/sec | 29.6x |
+
+### Performance Categories
+
+#### Fast operations (>100 Mops/sec)
+- **C**: Constant, Slot lookup
+- **OCaml**: Constant
+
+#### Medium operations (20-100 Mops/sec)
+- **C**: Is-cell, Increment, If-then-else, Composition
+- **OCaml**: Slot, Increment, Is-cell, If-then-else, Composition, Cell construction
+
+#### Slower operations (<30 Mops/sec)
+- **C**: Equality, Push, Cell construction, Deep slot (anomaly)
+- **OCaml**: Equality, Push, Deep slot
+
+## Analysis
+
+### Where C Wins
+
+**Simple, non-allocating operations** (4-14x faster):
+- Slot lookup
+- Constant retrieval
+- Is-cell test
+- Increment
+
+**Reasons:**
+1. No garbage collection overhead
+2. Direct pointer manipulation
+3. Function inlining with -O3
+4. CPU cache-friendly memory access
+
+### Where OCaml Wins or Competes
+
+**Allocation-heavy operations** (0.5-0.8x, OCaml faster or competitive):
+- Cell construction: **2x faster in OCaml**
+- Push (allocates cells): **1.2x faster in OCaml**
+- Equality (may allocate): **1.2x faster in OCaml**
+
+**Reasons:**
+1. OCaml's generational GC is very fast for short-lived allocations
+2. Efficient minor heap allocation (bump-the-pointer)
+3. The C benchmark leaks memory (no deallocation), which would slow it down if added
+4. OCaml's GC amortizes collection cost across operations
+
+### The Deep Slot Anomaly
+
+The deep slot lookup shows an unusual pattern:
+- C: 566 Mops/sec (very fast)
+- OCaml: 19.2 Mops/sec (much slower)
+
+This is likely due to:
+1. OCaml's slot implementation using recursion (stack overhead)
+2. Pattern matching overhead
+3. Zarith (GMP) operations even on small integers
+
+Could be optimized in OCaml with:
+- Iterative implementation
+- Special-casing small integers
+- Tail recursion optimization
+
+## Comparison Caveats
+
+### C Implementation Simplifications
+
+The C benchmark is **not** the full Vere implementation. It:
+- Uses direct 64-bit integers (no arbitrary precision)
+- Leaks memory (no deallocation)
+- Has no error handling
+- Lacks Vere's loom allocator
+- Has no jet system
+- Has no memoization
+
+A real comparison would need the full Vere implementation with:
+- Loom-based allocation
+- Jet dashboard (accelerated implementations)
+- Memoization caching
+- Proper error handling and stack traces
+
+### OCaml Implementation Features
+
+The OCaml implementation:
+- Uses Zarith for arbitrary-precision integers (GMP-backed)
+- Includes proper error handling (exceptions)
+- Is memory-safe (no manual deallocation needed)
+- Is type-safe (compile-time guarantees)
+- Has clean, maintainable code
+
+## Optimization Opportunities
+
+### OCaml Optimizations
+
+1. **Unboxed integers**: Use native ints for small values
+2. **Iterative slot lookup**: Replace recursion with loop
+3. **Inline critical paths**: Use `[@inline]` attributes
+4. **Custom allocator**: Consider a region-based allocator for nouns
+5. **Flambda**: Use Flambda optimizing compiler
+
+Potential speedup: **2-3x** on most operations
+
+### C Optimizations
+
+The simple C implementation could be made faster with:
+1. **SIMD instructions**: Vectorize tree operations
+2. **Better memory layout**: Structure-of-arrays
+3. **JIT compilation**: Generate machine code at runtime
+
+But these optimizations apply to Vere, not this simple benchmark.
+
+## Conclusion
+
+### Performance Summary
+
+- **Simple operations**: C is 4-14x faster
+- **Allocation-heavy operations**: OCaml is competitive or faster
+- **Overall**: C is ~2-5x faster on average, but with caveats
+
+### Practical Implications
+
+The OCaml implementation is:
+- ✅ **Fast enough** for most use cases (10-140 Mops/sec)
+- ✅ **Memory safe** (no segfaults, buffer overflows)
+- ✅ **Type safe** (catches errors at compile time)
+- ✅ **Maintainable** (clean, high-level code)
+- ✅ **Correct** (arbitrary-precision integers built-in)
+
+Trade-offs:
+- ❌ 2-5x slower than optimized C for most operations
+- ❌ Much slower (14x) for deep tree traversal
+- ✅ But faster for allocation-heavy workloads
+
+### When to Use Which
+
+**Use OCaml when:**
+- Rapid prototyping and development
+- Memory safety is critical
+- Code maintainability matters
+- Absolute performance is not critical
+
+**Use C (Vere) when:**
+- Maximum performance is required
+- Jets and memoization are needed
+- Full Urbit integration is required
+
+## Future Work
+
+1. **Benchmark against full Vere**: Compare with the actual bytecode interpreter
+2. **Profile and optimize**: Find OCaml hotspots and optimize
+3. **Implement jets**: Add accelerated implementations for common formulas
+4. **Add memoization**: Cache repeated computations
+5. **Try alternative representations**: Experiment with different noun encodings
+
+## Running the Benchmarks
+
+```bash
+# OCaml benchmark
+dune exec ./bench_nock.exe
+
+# Simple C benchmark
+gcc -O3 -o bench_simple bench_simple.c
+./bench_simple
+
+# Full comparison
+./compare.sh
+```
diff --git a/ocaml/Makefile b/ocaml/Makefile
new file mode 100644
index 0000000..dcd6ca3
--- /dev/null
+++ b/ocaml/Makefile
@@ -0,0 +1,93 @@
+# Makefile for Nock OCaml/C benchmark comparison
+
+VERE_ROOT = ../vere
+VERE_PKG = $(VERE_ROOT)/pkg/noun
+VERE_INCLUDES = -I$(VERE_ROOT)/pkg -I$(VERE_ROOT)/ext
+
+# Find the Vere build directory
+VERE_BUILD = $(VERE_ROOT)/build
+
+# Try to find the actual library paths
+VERE_LIBS = -L$(VERE_BUILD) -L$(VERE_ROOT)/build
+
+CC = gcc
+CFLAGS = -O3 -g $(VERE_INCLUDES)
+LDFLAGS = $(VERE_LIBS)
+
+.PHONY: all clean bench bench-c bench-ocaml test compare
+
+all: test bench
+
+# Build everything
+build: build-ocaml
+
+build-ocaml:
+ @echo "Building OCaml implementation..."
+ dune build
+
+# Note: C benchmark requires linking against libvere which may not be available
+# as a standalone library. We'll document this limitation.
+build-c: bench_nock_c
+ @echo "C benchmark built (requires Vere libraries)"
+
+bench_nock_c: bench_nock.c
+ @echo "Note: Building C benchmark requires Vere to be built first"
+ @echo "This may fail if Vere libraries are not available"
+ @echo "Attempting to compile..."
+ -$(CC) $(CFLAGS) -o bench_nock_c bench_nock.c \
+ $(VERE_PKG)/allocate.c \
+ $(VERE_PKG)/nock.c \
+ $(VERE_PKG)/hashtable.c \
+ $(VERE_PKG)/imprison.c \
+ $(VERE_PKG)/jets.c \
+ $(VERE_PKG)/manage.c \
+ $(VERE_PKG)/retrieve.c \
+ $(VERE_PKG)/trace.c \
+ $(VERE_PKG)/xtract.c \
+ $(VERE_PKG)/events.c \
+ $(VERE_PKG)/zave.c \
+ $(LDFLAGS) -lm -lpthread -lgmp -lsigsegv -luv -lcrypto -lssl
+
+# Run tests
+test:
+ @echo "Running OCaml tests..."
+ dune exec ./test_nock.exe
+
+# Run benchmarks
+bench: bench-ocaml
+
+bench-ocaml:
+ @echo "Running OCaml benchmark..."
+ dune exec ./bench_nock.exe
+
+bench-c: bench_nock_c
+ @echo "Running C benchmark..."
+ ./bench_nock_c
+
+# Compare both implementations (if C benchmark is available)
+compare: bench-ocaml
+ @echo ""
+ @echo "=================================="
+ @echo "Comparison (OCaml only)"
+ @echo "=================================="
+ @echo "Note: C benchmark requires full Vere build"
+ @echo "To compare, build Vere first, then run 'make bench-c'"
+
+# Clean build artifacts
+clean:
+ dune clean
+ rm -f bench_nock_c
+ rm -f *.o
+
+help:
+ @echo "Nock OCaml/C Benchmark Makefile"
+ @echo ""
+ @echo "Targets:"
+ @echo " make test - Run OCaml tests"
+ @echo " make bench - Run OCaml benchmark"
+ @echo " make bench-ocaml - Run OCaml benchmark"
+ @echo " make bench-c - Run C benchmark (requires Vere build)"
+ @echo " make compare - Run comparison"
+ @echo " make clean - Clean build artifacts"
+ @echo ""
+ @echo "Note: C benchmarks require Vere to be fully built first."
diff --git a/ocaml/README.md b/ocaml/README.md
new file mode 100644
index 0000000..18784e7
--- /dev/null
+++ b/ocaml/README.md
@@ -0,0 +1,137 @@
+# Vere Nock Interpreter - OCaml Port
+
+This is an OCaml port of the Nock interpreter from Vere (the Urbit C runtime).
+
+## Overview
+
+This initial port focuses on the **reference Nock interpreter** (`_n_nock_on` from `vere/pkg/noun/nock.c`). It implements the core Nock specification with all 12 opcodes.
+
+### What's Implemented
+
+- **Noun type system** (`noun.ml`): Nouns are either atoms (arbitrary-precision integers via Zarith) or cells (pairs of nouns)
+- **Basic noun operations**:
+ - Fragment/axis addressing (`slot`)
+ - Head/tail extraction
+ - Equality testing
+ - Pretty printing
+
+- **Nock interpreter** (`nock.ml`): Complete implementation of all Nock opcodes:
+ - **0**: Slot/fragment lookup
+ - **1**: Constant
+ - **2**: Nock (recursion with new subject)
+ - **3**: Is-cell test
+ - **4**: Increment
+ - **5**: Equality test
+ - **6**: If-then-else
+ - **7**: Composition
+ - **8**: Push (augment subject)
+ - **9**: Call with axis
+ - **10**: Hint (currently ignored, as in reference implementation)
+ - **11**: Scry (raises Exit, as in reference implementation)
+
+- **Test suite** (`test_nock.ml`): Comprehensive tests for all opcodes
+
+## Building
+
+Requires:
+- OCaml (tested with recent versions)
+- Dune build system
+- Zarith library (for arbitrary-precision integers)
+
+```bash
+dune build
+```
+
+## Running Tests
+
+```bash
+dune exec ./test_nock.exe
+```
+
+All tests should pass.
+
+## Benchmarks
+
+Run benchmarks to compare performance:
+
+```bash
+# Run OCaml benchmark
+dune exec ./bench_nock.exe
+
+# Run simple C benchmark (for comparison)
+./bench_simple
+
+# Run full comparison
+./compare.sh
+```
+
+### Performance Comparison
+
+Benchmark results comparing the OCaml implementation against a simple C implementation:
+
+| Operation | C (Mops/sec) | OCaml (Mops/sec) | Ratio (C/OCaml) |
+|------------------------|--------------|------------------|-----------------|
+| Slot/fragment | 579 | 42.5 | 13.6x |
+| Constant | 595 | 142 | 4.2x |
+| Is-cell test | 271 | 56.6 | 4.8x |
+| Increment | 265 | 63.1 | 4.2x |
+| Equality | 24 | 29.6 | 0.8x (faster!) |
+| If-then-else | 185 | 37.2 | 5.0x |
+| Composition | 174 | 36.0 | 4.8x |
+| Push | 26.5 | 32.7 | 0.8x (faster!) |
+| Cell construction | 25.9 | 53.2 | 0.5x (faster!) |
+| Deep slot lookup | 566 | 19.2 | 29.6x |
+
+**Key observations:**
+
+1. **Simple operations** (slot, constant): C is 4-14x faster due to no GC overhead and direct pointer manipulation
+2. **Memory-intensive operations** (equality, push, cell construction): OCaml is **competitive or faster** due to efficient allocation and GC
+3. **Average performance**: C is ~2-5x faster for most operations
+
+**Important notes:**
+- The C benchmark is a simplified implementation without Vere's loom allocator, jet system, or other infrastructure
+- OCaml uses Zarith (GMP-based) for arbitrary-precision integers, while simple C uses direct 64-bit values
+- OCaml provides memory safety and type safety with reasonable performance
+- For production use, the full Vere implementation would be faster due to jets and memoization
+
+## Architecture
+
+### Differences from C Implementation
+
+1. **Memory management**: OCaml's GC vs Vere's loom-based allocation
+2. **No bytecode compiler** (yet): This port only includes the reference interpreter, not the optimized bytecode interpreter
+3. **No jet system** (yet): Jets (optimized implementations of common Nock formulas) are not yet implemented
+4. **Simple error handling**: Uses OCaml exceptions instead of Vere's bail mechanism
+
+### Type Mapping
+
+| Vere (C) | OCaml |
+|---------------------|----------------------|
+| `u3_noun` | `noun` (variant type) |
+| `u3_atom` | `Atom of Z.t` |
+| `u3_cell` | `Cell of noun * noun`|
+| `u3_none` | Not used (Option type instead) |
+
+## Next Steps for Porting
+
+Potential areas to expand:
+
+1. **Bytecode compiler**: Port the bytecode compiler and interpreter (~100 opcodes)
+2. **Jet system**: Implement the jet dashboard for accelerating common computations
+3. **Memory management**: Implement loom-style allocation (optional, for closer parity)
+4. **Memoization**: Port the `%memo` hint implementation
+5. **Integration**: Hook up to other Vere subsystems (jets, traces, etc.)
+
+## Files
+
+- `noun.ml` - Noun type and basic operations
+- `nock.ml` - Nock interpreter
+- `test_nock.ml` - Test suite
+- `dune` - Build configuration
+- `dune-project` - Dune project file
+
+## References
+
+- [Nock Specification](https://developers.urbit.org/reference/nock/definition)
+- [Vere Source](https://github.com/urbit/vere)
+- Original C implementation: `vere/pkg/noun/nock.c`
diff --git a/ocaml/bench_nock.c b/ocaml/bench_nock.c
new file mode 100644
index 0000000..7747e10
--- /dev/null
+++ b/ocaml/bench_nock.c
@@ -0,0 +1,165 @@
+/// Nock benchmark - C implementation
+///
+/// Benchmarks various Nock operations using the C reference interpreter
+
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+
+#include "../vere/pkg/noun/noun.h"
+
+static double
+get_time_ms(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
+}
+
+static void
+bench_nock(const char* name, u3_noun subject, u3_noun formula, c3_w iterations)
+{
+ double start = get_time_ms();
+
+ for (c3_w i = 0; i < iterations; i++) {
+ u3_noun result = u3n_nock_on(u3k(subject), u3k(formula));
+ u3z(result);
+ }
+
+ double end = get_time_ms();
+ double total = end - start;
+ double per_iter = total / iterations;
+
+ printf("%-30s %8u iterations in %10.2f ms (%10.6f ms/iter, %10.0f ops/sec)\n",
+ name, iterations, total, per_iter, 1000.0 / per_iter);
+}
+
+int
+main(int argc, char* argv[])
+{
+ // Initialize Urbit memory system
+ u3m_boot_lite(1 << 26); // 64MB loom
+
+ printf("Nock Benchmark - C Implementation\n");
+ printf("==================================\n\n");
+
+ c3_w iterations = 1000000; // 1M iterations for fast ops
+ c3_w slow_iters = 100000; // 100K for slower ops
+
+ // Benchmark 0: slot lookup
+ {
+ u3_noun subject = u3nc(u3i_word(42), u3i_word(99));
+ u3_noun formula = u3nc(0, 2); // [0 2] - get head
+ bench_nock("Opcode 0: slot/fragment", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 1: constant
+ {
+ u3_noun subject = u3i_word(0);
+ u3_noun formula = u3nc(1, u3i_word(42)); // [1 42]
+ bench_nock("Opcode 1: constant", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 3: is-cell
+ {
+ u3_noun subject = u3i_word(0);
+ u3_noun formula = u3nc(3, u3nc(1, u3i_word(42))); // [3 [1 42]]
+ bench_nock("Opcode 3: is-cell (atom)", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 4: increment
+ {
+ u3_noun subject = u3i_word(0);
+ u3_noun formula = u3nc(4, u3nc(1, u3i_word(1000))); // [4 [1 1000]]
+ bench_nock("Opcode 4: increment", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 5: equality
+ {
+ u3_noun subject = u3i_word(0);
+ u3_noun formula = u3nt(5, u3nc(1, u3i_word(42)), u3nc(1, u3i_word(42)));
+ bench_nock("Opcode 5: equality (equal)", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 6: if-then-else
+ {
+ u3_noun subject = u3i_word(0);
+ // [6 [1 0] [1 11] [1 22]]
+ u3_noun formula = u3nq(6, u3nc(1, 0), u3nc(1, u3i_word(11)), u3nc(1, u3i_word(22)));
+ bench_nock("Opcode 6: if-then-else", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 7: composition
+ {
+ u3_noun subject = u3i_word(42);
+ // [7 [1 99] [0 1]]
+ u3_noun formula = u3nt(7, u3nc(1, u3i_word(99)), u3nc(0, 1));
+ bench_nock("Opcode 7: composition", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark 8: push
+ {
+ u3_noun subject = u3i_word(42);
+ // [8 [1 99] [0 1]]
+ u3_noun formula = u3nt(8, u3nc(1, u3i_word(99)), u3nc(0, 1));
+ bench_nock("Opcode 8: push", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark: Fibonacci-like recursion (slower)
+ {
+ // Decrement: [6 [5 [0 1] [1 0]] [1 0] [8 [1 0] [4 [0 3]]]]
+ // This is: if(subject == 0) 0 else subject-1
+ u3_noun dec_fol = u3nq(6,
+ u3nt(5, u3nc(0, 1), u3nc(1, 0)),
+ u3nc(1, 0),
+ u3nt(8, u3nc(1, 0), u3nc(4, u3nc(0, 3))));
+
+ u3_noun subject = u3i_word(10);
+ bench_nock("Complex: decrement loop", subject, dec_fol, slow_iters);
+ u3z(subject);
+ u3z(dec_fol);
+ }
+
+ // Benchmark: Tree construction
+ {
+ u3_noun subject = u3i_word(0);
+ // [[1 1] [1 2]] - constructs a cell
+ u3_noun formula = u3nc(u3nc(1, 1), u3nc(1, 2));
+ bench_nock("Cell construction", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ // Benchmark: Deep slot lookup
+ {
+ // Build a deep tree: [[[[1 2] 3] 4] 5]
+ u3_noun subject = u3nc(u3nc(u3nc(u3nc(1, 2), 3), 4), 5);
+ u3_noun formula = u3nc(0, 16); // slot 16 = deepest left (1)
+ bench_nock("Deep slot lookup (depth 4)", subject, formula, iterations);
+ u3z(subject);
+ u3z(formula);
+ }
+
+ printf("\n");
+
+ // Cleanup
+ u3m_grab(u3_none);
+
+ return 0;
+}
diff --git a/ocaml/bench_nock.ml b/ocaml/bench_nock.ml
new file mode 100644
index 0000000..a71b3da
--- /dev/null
+++ b/ocaml/bench_nock.ml
@@ -0,0 +1,132 @@
+open Nock_lib.Noun
+open Nock_lib.Nock
+
+(** Benchmark utilities *)
+
+let time_ms () =
+ Unix.gettimeofday () *. 1000.0
+
+let bench_nock name subject formula iterations =
+ (* Warmup *)
+ for _i = 1 to 100 do
+ let _ = nock subject formula in ()
+ done;
+
+ (* Actual benchmark *)
+ Gc.compact ();
+ let start = time_ms () in
+
+ for _i = 1 to iterations do
+ let _result = nock subject formula in ()
+ done;
+
+ let finish = time_ms () in
+ let total = finish -. start in
+ let per_iter = total /. (float_of_int iterations) in
+ let ops_per_sec = 1000.0 /. per_iter in
+
+ Printf.printf "%-30s %8d iterations in %10.2f ms (%10.6f ms/iter, %10.0f ops/sec)\n"
+ name iterations total per_iter ops_per_sec
+
+(** Benchmarks *)
+
+let () =
+ Printf.printf "Nock Benchmark - OCaml Implementation\n";
+ Printf.printf "======================================\n\n";
+
+ let iterations = 1_000_000 in (* 1M iterations for fast ops *)
+ let slow_iters = 100_000 in (* 100K for slower ops *)
+
+ (* Benchmark 0: slot lookup *)
+ begin
+ let subject = cell (atom 42) (atom 99) in
+ let formula = cell (atom 0) (atom 2) in (* [0 2] - get head *)
+ bench_nock "Opcode 0: slot/fragment" subject formula iterations
+ end;
+
+ (* Benchmark 1: constant *)
+ begin
+ let subject = atom 0 in
+ let formula = cell (atom 1) (atom 42) in (* [1 42] *)
+ bench_nock "Opcode 1: constant" subject formula iterations
+ end;
+
+ (* Benchmark 3: is-cell *)
+ begin
+ let subject = atom 0 in
+ let formula = cell (atom 3) (cell (atom 1) (atom 42)) in (* [3 [1 42]] *)
+ bench_nock "Opcode 3: is-cell (atom)" subject formula iterations
+ end;
+
+ (* Benchmark 4: increment *)
+ begin
+ let subject = atom 0 in
+ let formula = cell (atom 4) (cell (atom 1) (atom 1000)) in (* [4 [1 1000]] *)
+ bench_nock "Opcode 4: increment" subject formula iterations
+ end;
+
+ (* Benchmark 5: equality *)
+ begin
+ let subject = atom 0 in
+ (* [5 [1 42] [1 42]] *)
+ let formula = cell (atom 5) (cell (cell (atom 1) (atom 42)) (cell (atom 1) (atom 42))) in
+ bench_nock "Opcode 5: equality (equal)" subject formula iterations
+ end;
+
+ (* Benchmark 6: if-then-else *)
+ begin
+ let subject = atom 0 in
+ (* [6 [1 0] [1 11] [1 22]] *)
+ let formula = cell (atom 6)
+ (cell (cell (atom 1) (atom 0))
+ (cell (cell (atom 1) (atom 11))
+ (cell (atom 1) (atom 22)))) in
+ bench_nock "Opcode 6: if-then-else" subject formula iterations
+ end;
+
+ (* Benchmark 7: composition *)
+ begin
+ let subject = atom 42 in
+ (* [7 [1 99] [0 1]] *)
+ let formula = cell (atom 7) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))) in
+ bench_nock "Opcode 7: composition" subject formula iterations
+ end;
+
+ (* Benchmark 8: push *)
+ begin
+ let subject = atom 42 in
+ (* [8 [1 99] [0 1]] *)
+ let formula = cell (atom 8) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1))) in
+ bench_nock "Opcode 8: push" subject formula iterations
+ end;
+
+ (* Benchmark: Decrement-like operation (slower) *)
+ begin
+ (* [6 [5 [0 1] [1 0]] [1 0] [8 [1 0] [4 [0 3]]]] *)
+ (* This is: if(subject == 0) 0 else subject+1 (simplified) *)
+ let dec_fol = cell (atom 6)
+ (cell (cell (atom 5) (cell (cell (atom 0) (atom 1)) (cell (atom 1) (atom 0))))
+ (cell (cell (atom 1) (atom 0))
+ (cell (atom 8) (cell (cell (atom 1) (atom 0)) (cell (atom 4) (cell (atom 0) (atom 3))))))) in
+
+ let subject = atom 10 in
+ bench_nock "Complex: decrement loop" subject dec_fol slow_iters
+ end;
+
+ (* Benchmark: Tree construction *)
+ begin
+ let subject = atom 0 in
+ (* [[1 1] [1 2]] - constructs a cell *)
+ let formula = cell (cell (atom 1) (atom 1)) (cell (atom 1) (atom 2)) in
+ bench_nock "Cell construction" subject formula iterations
+ end;
+
+ (* Benchmark: Deep slot lookup *)
+ begin
+ (* Build a deep tree: [[[[1 2] 3] 4] 5] *)
+ let subject = cell (cell (cell (cell (atom 1) (atom 2)) (atom 3)) (atom 4)) (atom 5) in
+ let formula = cell (atom 0) (atom 16) in (* slot 16 = deepest left (1) *)
+ bench_nock "Deep slot lookup (depth 4)" subject formula iterations
+ end;
+
+ Printf.printf "\n"
diff --git a/ocaml/bench_simple b/ocaml/bench_simple
new file mode 100755
index 0000000..c9c64e7
--- /dev/null
+++ b/ocaml/bench_simple
Binary files differ
diff --git a/ocaml/bench_simple.c b/ocaml/bench_simple.c
new file mode 100644
index 0000000..530746f
--- /dev/null
+++ b/ocaml/bench_simple.c
@@ -0,0 +1,265 @@
+/// Simple standalone Nock benchmark
+/// This is a simplified version that doesn't require linking against full Vere
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <sys/time.h>
+#include <gmp.h>
+
+// Simplified noun type (direct atom or indirect cell)
+typedef uintptr_t noun;
+
+#define IS_ATOM(n) (!((n) & 1))
+#define IS_CELL(n) ((n) & 1)
+#define MAKE_DIRECT_ATOM(n) ((noun)(n) << 1)
+#define GET_DIRECT_ATOM(n) ((n) >> 1)
+
+// Simple cell structure
+typedef struct cell_s {
+ noun head;
+ noun tail;
+} cell_t;
+
+#define CELL_PTR(n) ((cell_t*)((n) & ~1ULL))
+#define MAKE_CELL(ptr) ((noun)ptr | 1)
+
+static noun
+make_atom(uint64_t val)
+{
+ return MAKE_DIRECT_ATOM(val);
+}
+
+static noun
+make_cell(noun head, noun tail)
+{
+ cell_t* c = malloc(sizeof(cell_t));
+ c->head = head;
+ c->tail = tail;
+ return MAKE_CELL(c);
+}
+
+static noun
+head(noun n)
+{
+ return CELL_PTR(n)->head;
+}
+
+static noun
+tail(noun n)
+{
+ return CELL_PTR(n)->tail;
+}
+
+// Simplified slot (no error handling)
+static noun
+slot(uint64_t axis, noun n)
+{
+ if (axis == 1) return n;
+ if (axis & 1) {
+ // Odd: go right (tail)
+ return slot(axis >> 1, tail(n));
+ } else {
+ // Even: go left (head)
+ return slot(axis >> 1, head(n));
+ }
+}
+
+// Ultra-simplified nock (only handles the opcodes we benchmark)
+static noun
+nock_simple(noun subject, noun formula)
+{
+ if (IS_CELL(formula) && IS_CELL(head(formula))) {
+ // Cell construction: [[a b] c] -> [nock(a) nock(c)]
+ noun h = nock_simple(subject, head(formula));
+ noun t = nock_simple(subject, tail(formula));
+ return make_cell(h, t);
+ }
+
+ if (!IS_CELL(formula)) return 0; // Error
+
+ noun op = head(formula);
+ noun arg = tail(formula);
+
+ if (!IS_ATOM(op)) return 0; // Error
+
+ uint64_t opcode = GET_DIRECT_ATOM(op);
+
+ switch (opcode) {
+ case 0: // slot
+ if (!IS_ATOM(arg)) return 0;
+ return slot(GET_DIRECT_ATOM(arg), subject);
+
+ case 1: // constant
+ return arg;
+
+ case 3: { // is-cell
+ noun val = nock_simple(subject, arg);
+ return IS_CELL(val) ? make_atom(0) : make_atom(1);
+ }
+
+ case 4: { // increment
+ noun val = nock_simple(subject, arg);
+ if (!IS_ATOM(val)) return 0;
+ return MAKE_DIRECT_ATOM(GET_DIRECT_ATOM(val) + 1);
+ }
+
+ case 5: { // equality
+ noun pair = nock_simple(subject, arg);
+ noun a = head(pair);
+ noun b = tail(pair);
+ // Simplified: only works for direct atoms
+ if (IS_ATOM(a) && IS_ATOM(b)) {
+ return (a == b) ? make_atom(0) : make_atom(1);
+ }
+ return make_atom(1);
+ }
+
+ case 6: { // if-then-else
+ noun test_fol = head(arg);
+ noun yes_fol = head(tail(arg));
+ noun no_fol = tail(tail(arg));
+
+ noun test_val = nock_simple(subject, test_fol);
+ uint64_t test = GET_DIRECT_ATOM(test_val);
+
+ if (test == 0) {
+ return nock_simple(subject, yes_fol);
+ } else {
+ return nock_simple(subject, no_fol);
+ }
+ }
+
+ case 7: { // composition
+ noun b = head(arg);
+ noun c = tail(arg);
+ noun new_subj = nock_simple(subject, b);
+ return nock_simple(new_subj, c);
+ }
+
+ case 8: { // push
+ noun b = head(arg);
+ noun c = tail(arg);
+ noun val = nock_simple(subject, b);
+ noun new_subj = make_cell(val, subject);
+ return nock_simple(new_subj, c);
+ }
+
+ default:
+ return 0; // Error
+ }
+}
+
+static double
+get_time_ms(void)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000.0) + (tv.tv_usec / 1000.0);
+}
+
+static void
+bench(const char* name, noun subject, noun formula, int iterations)
+{
+ double start = get_time_ms();
+
+ for (int i = 0; i < iterations; i++) {
+ noun _result = nock_simple(subject, formula);
+ // Note: we're leaking memory here, but it's fine for a benchmark
+ }
+
+ double end = get_time_ms();
+ double total = end - start;
+ double per_iter = total / iterations;
+
+ printf("%-30s %8d iterations in %10.2f ms (%10.6f ms/iter, %10.0f ops/sec)\n",
+ name, iterations, total, per_iter, 1000.0 / per_iter);
+}
+
+int main()
+{
+ printf("Nock Benchmark - Simple C Implementation\n");
+ printf("=========================================\n");
+ printf("(Simplified version without full Vere infrastructure)\n\n");
+
+ int iterations = 1000000;
+ int slow_iters = 100000;
+
+ // Benchmark 0: slot
+ bench("Opcode 0: slot/fragment",
+ make_cell(make_atom(42), make_atom(99)),
+ make_cell(make_atom(0), make_atom(2)),
+ iterations);
+
+ // Benchmark 1: constant
+ bench("Opcode 1: constant",
+ make_atom(0),
+ make_cell(make_atom(1), make_atom(42)),
+ iterations);
+
+ // Benchmark 3: is-cell
+ bench("Opcode 3: is-cell (atom)",
+ make_atom(0),
+ make_cell(make_atom(3), make_cell(make_atom(1), make_atom(42))),
+ iterations);
+
+ // Benchmark 4: increment
+ bench("Opcode 4: increment",
+ make_atom(0),
+ make_cell(make_atom(4), make_cell(make_atom(1), make_atom(1000))),
+ iterations);
+
+ // Benchmark 5: equality
+ bench("Opcode 5: equality (equal)",
+ make_atom(0),
+ make_cell(make_atom(5),
+ make_cell(make_cell(make_atom(1), make_atom(42)),
+ make_cell(make_atom(1), make_atom(42)))),
+ iterations);
+
+ // Benchmark 6: if-then-else
+ bench("Opcode 6: if-then-else",
+ make_atom(0),
+ make_cell(make_atom(6),
+ make_cell(make_cell(make_atom(1), make_atom(0)),
+ make_cell(make_cell(make_atom(1), make_atom(11)),
+ make_cell(make_atom(1), make_atom(22))))),
+ iterations);
+
+ // Benchmark 7: composition
+ bench("Opcode 7: composition",
+ make_atom(42),
+ make_cell(make_atom(7),
+ make_cell(make_cell(make_atom(1), make_atom(99)),
+ make_cell(make_atom(0), make_atom(1)))),
+ iterations);
+
+ // Benchmark 8: push
+ bench("Opcode 8: push",
+ make_atom(42),
+ make_cell(make_atom(8),
+ make_cell(make_cell(make_atom(1), make_atom(99)),
+ make_cell(make_atom(0), make_atom(1)))),
+ iterations);
+
+ // Cell construction
+ bench("Cell construction",
+ make_atom(0),
+ make_cell(make_cell(make_atom(1), make_atom(1)),
+ make_cell(make_atom(1), make_atom(2))),
+ iterations);
+
+ // Deep slot lookup
+ bench("Deep slot lookup (depth 4)",
+ make_cell(make_cell(make_cell(make_cell(make_atom(1), make_atom(2)),
+ make_atom(3)),
+ make_atom(4)),
+ make_atom(5)),
+ make_cell(make_atom(0), make_atom(16)),
+ iterations);
+
+ printf("\n");
+
+ return 0;
+}
diff --git a/ocaml/bitstream.ml b/ocaml/bitstream.ml
new file mode 100644
index 0000000..73eda36
--- /dev/null
+++ b/ocaml/bitstream.ml
@@ -0,0 +1,97 @@
+(** Bitstream utilities for jam/cue serialization *)
+
+(** A bitstream writer *)
+type writer = {
+ buf: bytes ref; (** Buffer for bits *)
+ mutable bit_pos: int; (** Current bit position *)
+}
+
+(** A bitstream reader *)
+type reader = {
+ buf: bytes; (** Buffer to read from *)
+ mutable bit_pos: int; (** Current bit position *)
+ len: int; (** Length in bits *)
+}
+
+(** Create a new bitstream writer *)
+let writer_create () = {
+ buf = ref (Bytes.create 1024);
+ bit_pos = 0;
+}
+
+(** Grow the writer buffer if needed *)
+let writer_ensure w bits_needed =
+ let bytes_needed = (w.bit_pos + bits_needed + 7) / 8 in
+ if bytes_needed > Bytes.length !(w.buf) then begin
+ let old_buf = !(w.buf) in
+ let new_size = max (bytes_needed * 2) (Bytes.length old_buf * 2) in
+ let new_buf = Bytes.create new_size in
+ Bytes.blit old_buf 0 new_buf 0 (Bytes.length old_buf);
+ w.buf := new_buf
+ end
+
+(** Write a single bit *)
+let write_bit w bit =
+ writer_ensure w 1;
+ let byte_pos = w.bit_pos / 8 in
+ let bit_off = w.bit_pos mod 8 in
+ if bit then begin
+ let old_byte = Bytes.get_uint8 !(w.buf) byte_pos in
+ Bytes.set_uint8 !(w.buf) byte_pos (old_byte lor (1 lsl bit_off))
+ end;
+ w.bit_pos <- w.bit_pos + 1
+
+(** Write multiple bits from a Z.t value *)
+let write_bits w value nbits =
+ writer_ensure w nbits;
+ for i = 0 to nbits - 1 do
+ let bit = Z.testbit value i in
+ write_bit w bit
+ done
+
+(** Get the final bytes from a writer *)
+let writer_to_bytes w =
+ let byte_len = (w.bit_pos + 7) / 8 in
+ Bytes.sub !(w.buf) 0 byte_len
+
+(** Create a bitstream reader *)
+let reader_create buf =
+ {
+ buf;
+ bit_pos = 0;
+ len = Bytes.length buf * 8;
+ }
+
+(** Read a single bit *)
+let read_bit r =
+ if r.bit_pos >= r.len then
+ raise (Invalid_argument "read_bit: end of stream");
+ let byte_pos = r.bit_pos / 8 in
+ let bit_off = r.bit_pos mod 8 in
+ let byte_val = Bytes.get_uint8 r.buf byte_pos in
+ r.bit_pos <- r.bit_pos + 1;
+ (byte_val lsr bit_off) land 1 = 1
+
+(** Read multiple bits as a Z.t *)
+let read_bits r nbits =
+ let result = ref Z.zero in
+ for i = 0 to nbits - 1 do
+ if read_bit r then
+ result := Z.logor !result (Z.shift_left Z.one i)
+ done;
+ !result
+
+(** Peek at a bit without advancing *)
+let peek_bit r =
+ if r.bit_pos >= r.len then
+ raise (Invalid_argument "peek_bit: end of stream");
+ let byte_pos = r.bit_pos / 8 in
+ let bit_off = r.bit_pos mod 8 in
+ let byte_val = Bytes.get_uint8 r.buf byte_pos in
+ (byte_val lsr bit_off) land 1 = 1
+
+(** Get current bit position *)
+let reader_pos r = r.bit_pos
+
+(** Check if at end of stream *)
+let reader_at_end r = r.bit_pos >= r.len
diff --git a/ocaml/compare.sh b/ocaml/compare.sh
new file mode 100755
index 0000000..cf2f90f
--- /dev/null
+++ b/ocaml/compare.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+echo "=========================================="
+echo "Nock Interpreter Benchmark Comparison"
+echo "=========================================="
+echo ""
+
+echo "Running Simple C Implementation..."
+echo "------------------------------------------"
+./bench_simple > /tmp/bench_c.txt 2>&1
+cat /tmp/bench_c.txt
+echo ""
+
+echo "Running OCaml Implementation..."
+echo "------------------------------------------"
+dune exec ./bench_nock.exe > /tmp/bench_ocaml.txt 2>&1
+cat /tmp/bench_ocaml.txt
+echo ""
+
+echo "=========================================="
+echo "Side-by-Side Comparison"
+echo "=========================================="
+echo ""
+echo "Note: Simple C uses a minimal interpreter without Vere's infrastructure"
+echo "OCaml uses Zarith (GMP-based) for arbitrary-precision integers"
+echo ""
+
+# Extract ops/sec and create comparison
+echo "Operation | C (ops/sec) | OCaml (ops/sec) | Ratio (C/OCaml)"
+echo "-------------------------------|--------------|-----------------|----------------"
+
+extract_ops() {
+ grep "$1" /tmp/bench_c.txt | awk '{print $(NF-1)}' | tr -d ','
+}
+
+extract_ops_ocaml() {
+ grep "$1" /tmp/bench_ocaml.txt | awk '{print $(NF-1)}' | tr -d ','
+}
+
+compare_bench() {
+ name="$1"
+ c_ops=$(extract_ops "$name")
+ ocaml_ops=$(extract_ops_ocaml "$name")
+
+ if [ -n "$c_ops" ] && [ -n "$ocaml_ops" ]; then
+ ratio=$(echo "scale=2; $c_ops / $ocaml_ops" | bc)
+ printf "%-30s | %12.0f | %15.0f | %5.2fx\n" "$name" "$c_ops" "$ocaml_ops" "$ratio"
+ fi
+}
+
+compare_bench "Opcode 0: slot"
+compare_bench "Opcode 1: constant"
+compare_bench "Opcode 3: is-cell"
+compare_bench "Opcode 4: increment"
+compare_bench "Opcode 5: equality"
+compare_bench "Opcode 6: if-then-else"
+compare_bench "Opcode 7: composition"
+compare_bench "Opcode 8: push"
+compare_bench "Cell construction"
+compare_bench "Deep slot lookup"
+
+echo ""
+echo "=========================================="
+echo "Summary"
+echo "=========================================="
+echo ""
+echo "The simple C implementation is faster primarily because:"
+echo " 1. No garbage collection overhead"
+echo " 2. Direct pointer manipulation"
+echo " 3. Inline function calls with -O3"
+echo " 4. Stack allocation for small values"
+echo ""
+echo "The OCaml implementation advantages:"
+echo " 1. Memory safety (no manual memory management)"
+echo " 2. Arbitrary-precision integers built-in (via Zarith/GMP)"
+echo " 3. Pattern matching for cleaner code"
+echo " 4. Type safety catches errors at compile time"
+echo ""
+echo "For a real comparison with Vere, we would need to benchmark"
+echo "against the full Vere nock interpreter with its loom allocator,"
+echo "jet system, and memoization."
diff --git a/ocaml/dune b/ocaml/dune
new file mode 100644
index 0000000..3943b7b
--- /dev/null
+++ b/ocaml/dune
@@ -0,0 +1,19 @@
+(library
+ (name nock_lib)
+ (modules noun nock bitstream serial)
+ (libraries zarith))
+
+(executable
+ (name test_nock)
+ (modules test_nock)
+ (libraries nock_lib zarith))
+
+(executable
+ (name test_serial)
+ (modules test_serial)
+ (libraries nock_lib zarith))
+
+(executable
+ (name bench_nock)
+ (modules bench_nock)
+ (libraries nock_lib zarith unix))
diff --git a/ocaml/dune-project b/ocaml/dune-project
new file mode 100644
index 0000000..4a478ee
--- /dev/null
+++ b/ocaml/dune-project
@@ -0,0 +1,2 @@
+(lang dune 3.0)
+(name vere_nock)
diff --git a/ocaml/nock.ml b/ocaml/nock.ml
new file mode 100644
index 0000000..34065b8
--- /dev/null
+++ b/ocaml/nock.ml
@@ -0,0 +1,164 @@
+open Noun
+
+(** Nock interpreter
+
+ Based on the reference implementation from vere/pkg/noun/nock.c
+
+ The Nock spec has 12 opcodes (0-11):
+ - 0: slot/fragment lookup
+ - 1: constant
+ - 2: nock (recursion)
+ - 3: is-cell test
+ - 4: increment
+ - 5: equality test
+ - 6: if-then-else
+ - 7: composition
+ - 8: push
+ - 9: call with axis
+ - 10: hint (ignored in reference implementation)
+ - 11: scry (errors in reference implementation)
+*)
+
+(** Main nock evaluation function: nock(subject, formula)
+
+ In Nock notation: *[subject formula]
+
+ This is a direct port of _n_nock_on from nock.c:157-396
+*)
+let rec nock_on bus fol =
+ match fol with
+ | Cell (hib, gal) when is_cell hib ->
+ (* [a b] -> compute both sides and cons *)
+ let poz = nock_on bus hib in
+ let riv = nock_on bus gal in
+ cell poz riv
+
+ | Cell (Atom op, gal) ->
+ (match Z.to_int op with
+ | 0 ->
+ (* /[axis subject] - slot/fragment lookup *)
+ if not (is_atom gal) then raise Exit
+ else slot (match gal with Atom n -> n | _ -> raise Exit) bus
+
+ | 1 ->
+ (* =[constant subject] - return constant *)
+ gal
+
+ | 2 ->
+ (* *[subject formula new_subject] - evaluate with new subject *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ let seb = nock_on bus b_gal in
+ let nex = nock_on bus c_gal in
+ nock_on seb nex
+
+ | 3 ->
+ (* ?[subject formula] - is-cell test *)
+ let gof = nock_on bus gal in
+ if is_cell gof then atom 0 else atom 1
+
+ | 4 ->
+ (* +[subject formula] - increment *)
+ let gof = nock_on bus gal in
+ inc gof
+
+ | 5 ->
+ (* =[subject formula] - equality test *)
+ let wim = nock_on bus gal in
+ if not (is_cell wim) then raise Exit;
+ let a = head wim in
+ let b = tail wim in
+ if equal a b then atom 0 else atom 1
+
+ | 6 ->
+ (* if-then-else *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let cd_gal = tail gal in
+ if not (is_cell cd_gal) then raise Exit;
+ let c_gal = head cd_gal in
+ let d_gal = tail cd_gal in
+
+ let tys = nock_on bus b_gal in
+ let nex = match tys with
+ | Atom n when Z.equal n Z.zero -> c_gal
+ | Atom n when Z.equal n Z.one -> d_gal
+ | _ -> raise Exit
+ in
+ nock_on bus nex
+
+ | 7 ->
+ (* composition: *[*[subject b] c] *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ let bod = nock_on bus b_gal in
+ nock_on bod c_gal
+
+ | 8 ->
+ (* push: *[[*[subject b] subject] c] *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ let heb = nock_on bus b_gal in
+ let bod = cell heb bus in
+ nock_on bod c_gal
+
+ | 9 ->
+ (* call: *[*[subject c] axis] *)
+ if not (is_cell gal) then raise Exit;
+ let b_gal = head gal in
+ let c_gal = tail gal in
+ if not (is_atom b_gal) then raise Exit;
+
+ let seb = nock_on bus c_gal in
+ let nex = slot (match b_gal with Atom n -> n | _ -> raise Exit) seb in
+ nock_on seb nex
+
+ | 10 ->
+ (* hint - in reference implementation, hints are mostly ignored *)
+ let nex =
+ if is_cell gal then
+ (* [[hint-tag hint-value] formula] *)
+ tail gal
+ else
+ (* [hint-tag formula] where hint-value is implicit *)
+ gal
+ in
+ nock_on bus nex
+
+ | 11 ->
+ (* scry - not implemented in reference nock, raises error *)
+ raise Exit
+
+ | _ ->
+ (* Invalid opcode *)
+ raise Exit
+ )
+
+ | _ ->
+ (* Invalid formula structure *)
+ raise Exit
+
+(** Convenience function: nock(subject, formula) *)
+let nock subject formula =
+ nock_on subject formula
+
+(** slam: apply gate to sample
+ slam(gate, sample) = *[gate [9 2 [0 1] [0 6] [1 sample] [0 7]]]
+
+ In practice this evaluates the gate (which is a core with a formula at axis 2)
+ with a modified sample (at axis 6).
+*)
+let slam gat sam =
+ let cor = cell (head gat) (cell sam (tail (tail gat))) in
+ let formula = slot (Z.of_int 2) cor in
+ nock_on cor formula
+
+(** kick: fire gate without changing sample
+ kick(gate) = *[gate 9 2 0 1]
+*)
+let kick gat =
+ let formula = slot (Z.of_int 2) gat in
+ nock_on gat formula
diff --git a/ocaml/noun.ml b/ocaml/noun.ml
new file mode 100644
index 0000000..c59ec80
--- /dev/null
+++ b/ocaml/noun.ml
@@ -0,0 +1,69 @@
+(** Noun type and basic operations *)
+
+(** A noun is either an atom (arbitrary-precision integer) or a cell (pair of nouns) *)
+type noun =
+ | Atom of Z.t (** Arbitrary-precision integer using Zarith *)
+ | Cell of noun * noun (** Pair of nouns *)
+
+(** Exception raised on nock evaluation errors *)
+exception Exit
+
+(** Create an atom from an int *)
+let atom n = Atom (Z.of_int n)
+
+(** Create a cell *)
+let cell a b = Cell (a, b)
+
+(** Test if a noun is a cell *)
+let is_cell = function
+ | Cell _ -> true
+ | Atom _ -> false
+
+(** Test if a noun is an atom *)
+let is_atom = function
+ | Atom _ -> true
+ | Cell _ -> false
+
+(** Get head of a cell *)
+let head = function
+ | Cell (h, _) -> h
+ | Atom _ -> raise Exit
+
+(** Get tail of a cell *)
+let tail = function
+ | Cell (_, t) -> t
+ | Atom _ -> raise Exit
+
+(** Fragment/axis lookup: slot(n, noun)
+ This implements the tree-addressing scheme:
+ - 1 is the root
+ - 2 is head, 3 is tail
+ - For n > 1: if even, go left; if odd, go right
+*)
+let rec slot n noun =
+ if Z.equal n Z.one then
+ noun
+ else if Z.equal n Z.zero then
+ raise Exit
+ else
+ let bit = Z.testbit n 0 in (* Check if odd *)
+ let parent = Z.shift_right n 1 in
+ let sub = slot parent noun in
+ if bit then tail sub else head sub
+
+(** Equality test for nouns *)
+let rec equal a b =
+ match a, b with
+ | Atom x, Atom y -> Z.equal x y
+ | Cell (ah, at), Cell (bh, bt) -> equal ah bh && equal at bt
+ | _, _ -> false
+
+(** Increment an atom *)
+let inc = function
+ | Atom n -> Atom (Z.succ n)
+ | Cell _ -> raise Exit
+
+(** Pretty-print a noun *)
+let rec pp_noun fmt = function
+ | Atom n -> Format.fprintf fmt "%s" (Z.to_string n)
+ | Cell (a, b) -> Format.fprintf fmt "[%a %a]" pp_noun a pp_noun b
diff --git a/ocaml/serial.ml b/ocaml/serial.ml
new file mode 100644
index 0000000..039cd2f
--- /dev/null
+++ b/ocaml/serial.ml
@@ -0,0 +1,191 @@
+(** Jam/cue serialization for nouns
+
+ Based on the Vere implementation in pkg/noun/serial.c
+
+ Jam encoding:
+ - Atoms: tag bit 0, then mat-encoded value
+ - Cells: tag bits 01, then recursively encode head and tail
+ - Backrefs: tag bits 11, then mat-encoded position
+
+ Mat encoding (length-prefixed):
+ - For 0: just bit 1
+ - For n > 0:
+ - Let a = bit-width of n
+ - Let b = bit-width of a
+ - Encode: [1 repeated b times][0][a in b-1 bits][n in a bits]
+*)
+
+open Noun
+open Bitstream
+
+(** Mat-encode a number into the bitstream
+
+ Mat encoding is a variable-length integer encoding:
+ - 0 is encoded as a single 1 bit
+ - For n > 0:
+ - a = number of bits in n (met 0 n)
+ - b = number of bits needed to represent a
+ - Write b 1-bits, then a 0-bit
+ - Write a-1 in b-1 bits
+ - Write n in a bits
+*)
+let rec mat_encode w n =
+ if Z.equal n Z.zero then
+ write_bit w true
+ else begin
+ let a = Z.numbits n in (* bit-width of n *)
+ let b = Z.numbits (Z.of_int a) in (* bit-width of a *)
+
+ (* Write b 1-bits followed by a 0-bit *)
+ for _i = 1 to b do
+ write_bit w true
+ done;
+ write_bit w false;
+
+ (* Write a-1 in b-1 bits *)
+ write_bits w (Z.of_int (a - 1)) (b - 1);
+
+ (* Write n in a bits *)
+ write_bits w n a
+ end
+
+(** Mat-decode from bitstream, returns (value, bits_read) *)
+let rec mat_decode r =
+ let start_pos = reader_pos r in
+
+ if not (read_bit r) then
+ (Z.zero, reader_pos r - start_pos)
+ else begin
+ (* Count leading 1 bits *)
+ let b = ref 1 in
+ while read_bit r do
+ b := !b + 1
+ done;
+
+ let b = !b in
+
+ if b = 1 then
+ (* Special case: just "10" means 1 *)
+ (Z.one, reader_pos r - start_pos)
+ else begin
+ (* Read a-1 in b-1 bits *)
+ let a_minus_1 = read_bits r (b - 1) in
+ let a = Z.to_int (Z.add a_minus_1 Z.one) in
+
+ (* Read n in a bits *)
+ let n = read_bits r a in
+ (n, reader_pos r - start_pos)
+ end
+ end
+
+(** Jam: serialize a noun to bytes
+
+ Uses a hash table to track positions for backreferences.
+ Returns the serialized bytes.
+*)
+let jam noun =
+ let w = writer_create () in
+ let positions = Hashtbl.create 256 in (* noun -> bit position *)
+
+ let rec jam_noun n =
+ match n with
+ | Atom a ->
+ (* Check if we've seen this atom before *)
+ begin match Hashtbl.find_opt positions n with
+ | Some pos ->
+ (* Backref might be smaller than re-encoding *)
+ let atom_size = 1 + (Z.numbits a) in (* rough estimate *)
+ let backref_size = 2 + (Z.numbits (Z.of_int pos)) in
+
+ if backref_size < atom_size then begin
+ (* Encode backref: tag bits 11 *)
+ write_bit w true;
+ write_bit w true;
+ mat_encode w (Z.of_int pos)
+ end else begin
+ (* Encode atom *)
+ write_bit w false;
+ mat_encode w a
+ end
+ | None ->
+ (* Record position and encode atom *)
+ Hashtbl.add positions n w.bit_pos;
+ write_bit w false;
+ mat_encode w a
+ end
+
+ | Cell (head, tail) ->
+ (* Check for backref *)
+ begin match Hashtbl.find_opt positions n with
+ | Some pos ->
+ (* Encode backref: tag bits 11 *)
+ write_bit w true;
+ write_bit w true;
+ mat_encode w (Z.of_int pos)
+ | None ->
+ (* Record position and encode cell *)
+ Hashtbl.add positions n w.bit_pos;
+ (* Tag bits 01 for cell *)
+ write_bit w true;
+ write_bit w false;
+ (* Recursively encode head and tail *)
+ jam_noun head;
+ jam_noun tail
+ end
+ in
+
+ jam_noun noun;
+ writer_to_bytes w
+
+(** Cue: deserialize bytes to a noun
+
+ Uses a hash table to store nouns by bit position for backreferences.
+*)
+let cue bytes =
+ let r = reader_create bytes in
+ let backref_table = Hashtbl.create 256 in (* bit position -> noun *)
+
+ let rec cue_noun () =
+ let pos = reader_pos r in
+
+ (* Read tag bit *)
+ let tag0 = read_bit r in
+
+ if not tag0 then begin
+ (* Atom: tag bit 0 *)
+ let (value, _width) = mat_decode r in
+ let result = Atom value in
+ Hashtbl.add backref_table pos result;
+ result
+ end else begin
+ (* Read second tag bit *)
+ let tag1 = read_bit r in
+
+ if tag1 then begin
+ (* Backref: tag bits 11 *)
+ let (ref_pos, _width) = mat_decode r in
+ let ref_pos = Z.to_int ref_pos in
+ match Hashtbl.find_opt backref_table ref_pos with
+ | Some noun -> noun
+ | None -> raise (Invalid_argument (Printf.sprintf "cue: invalid backref to position %d" ref_pos))
+ end else begin
+ (* Cell: tag bits 01 *)
+ let head = cue_noun () in
+ let tail = cue_noun () in
+ let result = Cell (head, tail) in
+ Hashtbl.add backref_table pos result;
+ result
+ end
+ end
+ in
+
+ cue_noun ()
+
+(** Convert bytes to a hex string for debugging *)
+let bytes_to_hex bytes =
+ let len = Bytes.length bytes in
+ let buf = Buffer.create (len * 2) in
+ for i = 0 to len - 1 do
+ Buffer.add_string buf (Printf.sprintf "%02x" (Bytes.get_uint8 bytes i))
+ done;
+ Buffer.contents buf
diff --git a/ocaml/test_nock.ml b/ocaml/test_nock.ml
new file mode 100644
index 0000000..73f2ce2
--- /dev/null
+++ b/ocaml/test_nock.ml
@@ -0,0 +1,284 @@
+open Nock_lib.Noun
+open Nock_lib.Nock
+
+(** Test utilities *)
+
+let assert_equal expected actual msg =
+ if not (equal expected actual) then begin
+ Printf.printf "FAIL: %s\n" msg;
+ Format.printf " Expected: %a@." pp_noun expected;
+ Format.printf " Actual: %a@." pp_noun actual;
+ exit 1
+ end else
+ Printf.printf "PASS: %s\n" msg
+
+let _assert_raises_exit f msg =
+ try
+ let _ = f () in
+ Printf.printf "FAIL: %s (expected Exit exception)\n" msg;
+ exit 1
+ with Exit ->
+ Printf.printf "PASS: %s\n" msg
+
+(** Basic noun tests *)
+
+let test_noun_basics () =
+ Printf.printf "\n=== Testing basic noun operations ===\n";
+
+ (* Test atom creation *)
+ let a = atom 42 in
+ assert_equal (atom 42) a "atom creation";
+
+ (* Test cell creation *)
+ let c = cell (atom 1) (atom 2) in
+ assert_equal (atom 1) (head c) "cell head";
+ assert_equal (atom 2) (tail c) "cell tail";
+
+ (* Test is_cell and is_atom *)
+ if not (is_atom a) then Printf.printf "FAIL: is_atom on atom\n" else Printf.printf "PASS: is_atom on atom\n";
+ if not (is_cell c) then Printf.printf "FAIL: is_cell on cell\n" else Printf.printf "PASS: is_cell on cell\n";
+ if is_atom c then Printf.printf "FAIL: not is_atom on cell\n" else Printf.printf "PASS: not is_atom on cell\n";
+ if is_cell a then Printf.printf "FAIL: not is_cell on atom\n" else Printf.printf "PASS: not is_cell on atom\n"
+
+(** Test slot/fragment addressing *)
+let test_slots () =
+ Printf.printf "\n=== Testing slot/fragment addressing ===\n";
+
+ (* Build tree: [[1 2] [3 4]] *)
+ let tree = cell (cell (atom 1) (atom 2)) (cell (atom 3) (atom 4)) in
+
+ (* Test slot addressing
+ 1 = whole tree
+ 2 = head = [1 2]
+ 3 = tail = [3 4]
+ 4 = head of head = 1
+ 5 = tail of head = 2
+ 6 = head of tail = 3
+ 7 = tail of tail = 4
+ *)
+ assert_equal tree (slot Z.one tree) "slot 1 (root)";
+ assert_equal (cell (atom 1) (atom 2)) (slot (Z.of_int 2) tree) "slot 2 (head)";
+ assert_equal (cell (atom 3) (atom 4)) (slot (Z.of_int 3) tree) "slot 3 (tail)";
+ assert_equal (atom 1) (slot (Z.of_int 4) tree) "slot 4";
+ assert_equal (atom 2) (slot (Z.of_int 5) tree) "slot 5";
+ assert_equal (atom 3) (slot (Z.of_int 6) tree) "slot 6";
+ assert_equal (atom 4) (slot (Z.of_int 7) tree) "slot 7"
+
+(** Test Nock opcode 0: slot lookup *)
+let test_nock_0 () =
+ Printf.printf "\n=== Testing Nock opcode 0 (slot) ===\n";
+
+ let subject = cell (atom 4) (atom 5) in
+
+ (* *[subject [0 1]] = subject *)
+ assert_equal subject (nock subject (cell (atom 0) (atom 1))) "nock 0: axis 1";
+
+ (* *[[4 5] [0 2]] = 4 *)
+ assert_equal (atom 4) (nock subject (cell (atom 0) (atom 2))) "nock 0: axis 2";
+
+ (* *[[4 5] [0 3]] = 5 *)
+ assert_equal (atom 5) (nock subject (cell (atom 0) (atom 3))) "nock 0: axis 3"
+
+(** Test Nock opcode 1: constant *)
+let test_nock_1 () =
+ Printf.printf "\n=== Testing Nock opcode 1 (constant) ===\n";
+
+ let subject = atom 99 in
+
+ (* *[subject [1 42]] = 42 *)
+ assert_equal (atom 42) (nock subject (cell (atom 1) (atom 42))) "nock 1: return constant";
+
+ (* *[subject [1 [1 2]]] = [1 2] *)
+ assert_equal
+ (cell (atom 1) (atom 2))
+ (nock subject (cell (atom 1) (cell (atom 1) (atom 2))))
+ "nock 1: return constant cell"
+
+(** Test Nock opcode 2: recursion *)
+let test_nock_2 () =
+ Printf.printf "\n=== Testing Nock opcode 2 (nock) ===\n";
+
+ (* *[42 [2 [0 1] [1 0]]] = *[42 0] = crash *)
+ (* *[42 [2 [1 99] [1 0 1]]] = *[99 [0 1]] = 99 *)
+ let subject = atom 42 in
+ let formula = cell (atom 2) (cell (cell (atom 1) (atom 99)) (cell (atom 1) (cell (atom 0) (atom 1)))) in
+ assert_equal (atom 99) (nock subject formula) "nock 2: evaluate with new subject"
+
+(** Test Nock opcode 3: is-cell *)
+let test_nock_3 () =
+ Printf.printf "\n=== Testing Nock opcode 3 (is-cell) ===\n";
+
+ (* *[42 [3 1 42]] = 1 (atom) *)
+ assert_equal (atom 1) (nock (atom 42) (cell (atom 3) (cell (atom 1) (atom 42)))) "nock 3: is-cell of atom";
+
+ (* *[42 [3 1 [1 2]]] = 0 (cell) *)
+ assert_equal
+ (atom 0)
+ (nock (atom 42) (cell (atom 3) (cell (atom 1) (cell (atom 1) (atom 2)))))
+ "nock 3: is-cell of cell"
+
+(** Test Nock opcode 4: increment *)
+let test_nock_4 () =
+ Printf.printf "\n=== Testing Nock opcode 4 (increment) ===\n";
+
+ (* *[42 [4 1 41]] = 42 *)
+ assert_equal (atom 42) (nock (atom 0) (cell (atom 4) (cell (atom 1) (atom 41)))) "nock 4: increment";
+
+ (* *[0 [4 0 1]] = 1 *)
+ assert_equal (atom 1) (nock (atom 0) (cell (atom 4) (cell (atom 0) (atom 1)))) "nock 4: increment subject"
+
+(** Test Nock opcode 5: equality *)
+let test_nock_5 () =
+ Printf.printf "\n=== Testing Nock opcode 5 (equality) ===\n";
+
+ (* *[0 [5 [1 4] [1 5]]] = 1 (not equal) *)
+ assert_equal
+ (atom 1)
+ (nock (atom 0) (cell (atom 5) (cell (cell (atom 1) (atom 4)) (cell (atom 1) (atom 5)))))
+ "nock 5: not equal";
+
+ (* *[0 [5 [1 4] [1 4]]] = 0 (equal) *)
+ assert_equal
+ (atom 0)
+ (nock (atom 0) (cell (atom 5) (cell (cell (atom 1) (atom 4)) (cell (atom 1) (atom 4)))))
+ "nock 5: equal"
+
+(** Test Nock opcode 6: if-then-else *)
+let test_nock_6 () =
+ Printf.printf "\n=== Testing Nock opcode 6 (if-then-else) ===\n";
+
+ (* *[42 [6 [1 0] [1 11] [1 22]]] = 11 (if 0 then 11 else 22) *)
+ assert_equal
+ (atom 11)
+ (nock (atom 42) (cell (atom 6) (cell (cell (atom 1) (atom 0)) (cell (cell (atom 1) (atom 11)) (cell (atom 1) (atom 22))))))
+ "nock 6: if true";
+
+ (* *[42 [6 [1 1] [1 11] [1 22]]] = 22 (if 1 then 11 else 22) *)
+ assert_equal
+ (atom 22)
+ (nock (atom 42) (cell (atom 6) (cell (cell (atom 1) (atom 1)) (cell (cell (atom 1) (atom 11)) (cell (atom 1) (atom 22))))))
+ "nock 6: if false"
+
+(** Test Nock opcode 7: composition *)
+let test_nock_7 () =
+ Printf.printf "\n=== Testing Nock opcode 7 (composition) ===\n";
+
+ (* *[42 [7 [1 99] [0 1]]] = *[99 [0 1]] = 99 *)
+ assert_equal
+ (atom 99)
+ (nock (atom 42) (cell (atom 7) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1)))))
+ "nock 7: composition"
+
+(** Test Nock opcode 8: push *)
+let test_nock_8 () =
+ Printf.printf "\n=== Testing Nock opcode 8 (push) ===\n";
+
+ (* *[42 [8 [1 99] [0 1]]] = *[[99 42] [0 1]] = [99 42] *)
+ assert_equal
+ (cell (atom 99) (atom 42))
+ (nock (atom 42) (cell (atom 8) (cell (cell (atom 1) (atom 99)) (cell (atom 0) (atom 1)))))
+ "nock 8: push"
+
+(** Test Nock opcode 9: call *)
+let test_nock_9 () =
+ Printf.printf "\n=== Testing Nock opcode 9 (call) ===\n";
+
+ (* Simplest test: *[42 [9 1 [0 1]]]
+ = evaluate [0 1] with 42 -> 42
+ = slot 1 of 42 -> 42
+ = *[42 42] -> should crash since 42 is not a valid formula
+
+ Better test: create subject with formula at position 3
+ *[[formula value] [9 2 [0 1]]]
+ where formula = [0 3] (get tail)
+
+ Actually: *[[1 2] [9 2 [1 [0 3]]]]
+ = evaluate [1 [0 3]] with [1 2] -> [0 3]
+ = slot 2 of [1 2] -> 1
+
+ Wait, that's wrong. Let me think about what 9 does:
+ *[subject [9 axis formula]]
+ = *[subject *[*[subject formula] axis]]
+
+ So: *[[1 2] [9 3 [0 1]]]
+ = *[*[[1 2] [0 1]] 3]
+ = *[[1 2] 3]
+ = slot 3 of [1 2]
+ = 2
+
+ But that's not right either. Let me re-read the spec.
+
+ Actually from the C code:
+ seb = nock(bus, c_gal)
+ nex = slot(b_gal, seb)
+ result = nock(seb, nex)
+
+ So for [9 b c]:
+ 1. Evaluate c with subject -> seb
+ 2. Get slot b from seb -> nex (this is the formula)
+ 3. Evaluate nex with seb as subject
+
+ Test: *[[99 [4 [0 2]]] [9 3 [0 1]]]
+ 1. seb = *[[99 [4 [0 2]]] [0 1]] = [99 [4 [0 2]]]
+ 2. nex = slot 3 of [99 [4 [0 2]]] = [4 [0 2]]
+ 3. result = *[[99 [4 [0 2]]] [4 [0 2]]]
+ = increment of *[[99 [4 [0 2]]] [0 2]]
+ = increment of 99
+ = 100
+ *)
+ let subj = cell (atom 99) (cell (atom 4) (cell (atom 0) (atom 2))) in
+ assert_equal
+ (atom 100)
+ (nock subj (cell (atom 9) (cell (atom 3) (cell (atom 0) (atom 1)))))
+ "nock 9: call formula at axis 3"
+
+(** Test Nock opcode 10: hint *)
+let test_nock_10 () =
+ Printf.printf "\n=== Testing Nock opcode 10 (hint) ===\n";
+
+ (* *[42 [10 99 [1 11]]] = 11 (hint ignored) *)
+ assert_equal
+ (atom 11)
+ (nock (atom 42) (cell (atom 10) (cell (atom 99) (cell (atom 1) (atom 11)))))
+ "nock 10: hint with value (ignored)";
+
+ (* *[42 [10 [99 [1 88]] [1 11]]] = 11 (hint ignored) *)
+ assert_equal
+ (atom 11)
+ (nock (atom 42) (cell (atom 10) (cell (cell (atom 99) (cell (atom 1) (atom 88))) (cell (atom 1) (atom 11)))))
+ "nock 10: hint with computed value (ignored)"
+
+(** Test Nock cell constructor shorthand *)
+let test_nock_cons () =
+ Printf.printf "\n=== Testing Nock cons (cell auto-construction) ===\n";
+
+ (* *[42 [[1 6] [1 7]]] = [6 7] *)
+ assert_equal
+ (cell (atom 6) (atom 7))
+ (nock (atom 42) (cell (cell (atom 1) (atom 6)) (cell (atom 1) (atom 7))))
+ "nock cons: [[1 6] [1 7]]"
+
+(** Run all tests *)
+let () =
+ Printf.printf "=================================\n";
+ Printf.printf "Nock OCaml Test Suite\n";
+ Printf.printf "=================================\n";
+
+ test_noun_basics ();
+ test_slots ();
+ test_nock_0 ();
+ test_nock_1 ();
+ test_nock_2 ();
+ test_nock_3 ();
+ test_nock_4 ();
+ test_nock_5 ();
+ test_nock_6 ();
+ test_nock_7 ();
+ test_nock_8 ();
+ test_nock_9 ();
+ test_nock_10 ();
+ test_nock_cons ();
+
+ Printf.printf "\n=================================\n";
+ Printf.printf "All tests passed!\n";
+ Printf.printf "=================================\n"
diff --git a/ocaml/test_serial.ml b/ocaml/test_serial.ml
new file mode 100644
index 0000000..69887c5
--- /dev/null
+++ b/ocaml/test_serial.ml
@@ -0,0 +1,185 @@
+open Nock_lib.Noun
+open Nock_lib.Serial
+
+(** Test utilities *)
+
+let assert_equal expected actual msg =
+ if not (equal expected actual) then begin
+ Printf.printf "FAIL: %s\n" msg;
+ Format.printf " Expected: %a@." pp_noun expected;
+ Format.printf " Actual: %a@." pp_noun actual;
+ exit 1
+ end else
+ Printf.printf "PASS: %s\n" msg
+
+let assert_bytes_equal expected actual msg =
+ if expected <> actual then begin
+ Printf.printf "FAIL: %s\n" msg;
+ Printf.printf " Expected: %s\n" (bytes_to_hex expected);
+ Printf.printf " Actual: %s\n" (bytes_to_hex actual);
+ exit 1
+ end else
+ Printf.printf "PASS: %s\n" msg
+
+(** Round-trip test: jam then cue should give original *)
+let test_roundtrip noun msg =
+ let jammed = jam noun in
+ let cued = cue jammed in
+ assert_equal noun cued msg
+
+(** Test basic atoms *)
+let test_atoms () =
+ Printf.printf "\n=== Testing atom serialization ===\n";
+
+ (* Test 0 *)
+ let n = atom 0 in
+ test_roundtrip n "atom 0 roundtrip";
+
+ (* Test small atoms *)
+ test_roundtrip (atom 1) "atom 1 roundtrip";
+ test_roundtrip (atom 2) "atom 2 roundtrip";
+ test_roundtrip (atom 42) "atom 42 roundtrip";
+ test_roundtrip (atom 255) "atom 255 roundtrip";
+ test_roundtrip (atom 256) "atom 256 roundtrip";
+
+ (* Test larger atoms *)
+ test_roundtrip (atom 65535) "atom 65535 roundtrip";
+ test_roundtrip (atom 1000000) "atom 1000000 roundtrip"
+
+(** Test basic cells *)
+let test_cells () =
+ Printf.printf "\n=== Testing cell serialization ===\n";
+
+ (* Simple cell [1 2] *)
+ let c = cell (atom 1) (atom 2) in
+ test_roundtrip c "cell [1 2] roundtrip";
+
+ (* Nested cells [[1 2] 3] *)
+ let c = cell (cell (atom 1) (atom 2)) (atom 3) in
+ test_roundtrip c "cell [[1 2] 3] roundtrip";
+
+ (* Deep nesting *)
+ let c = cell (cell (cell (atom 1) (atom 2)) (atom 3)) (atom 4) in
+ test_roundtrip c "cell [[[1 2] 3] 4] roundtrip";
+
+ (* Larger values *)
+ let c = cell (atom 1000) (atom 2000) in
+ test_roundtrip c "cell [1000 2000] roundtrip"
+
+(** Test trees *)
+let test_trees () =
+ Printf.printf "\n=== Testing tree serialization ===\n";
+
+ (* Binary tree *)
+ let tree = cell (cell (atom 1) (atom 2)) (cell (atom 3) (atom 4)) in
+ test_roundtrip tree "binary tree roundtrip";
+
+ (* Unbalanced tree *)
+ let tree = cell (atom 1) (cell (atom 2) (cell (atom 3) (atom 4))) in
+ test_roundtrip tree "right-leaning tree roundtrip";
+
+ (* List-like structure [1 [2 [3 0]]] *)
+ let list = cell (atom 1) (cell (atom 2) (cell (atom 3) (atom 0))) in
+ test_roundtrip list "list-like structure roundtrip"
+
+(** Test backreferences
+
+ When the same sub-noun appears multiple times, jam should use backreferences
+*)
+let test_backrefs () =
+ Printf.printf "\n=== Testing backreferences ===\n";
+
+ (* Create a noun with shared structure: [42 42]
+ The second 42 should be a backref to the first *)
+ let shared = atom 42 in
+ let n = cell shared shared in
+ test_roundtrip n "shared atom [42 42] roundtrip";
+
+ (* More complex sharing: [[1 2] [1 2]]
+ Second cell should backref to first *)
+ let sub = cell (atom 1) (atom 2) in
+ let n = cell sub sub in
+ test_roundtrip n "shared cell [[1 2] [1 2]] roundtrip";
+
+ (* Test that backrefs actually save space *)
+ let sub = cell (atom 100) (atom 200) in
+ let with_backref = cell sub sub in
+ let without_backref = cell (cell (atom 100) (atom 200)) (cell (atom 100) (atom 200)) in
+
+ let jammed_with = jam with_backref in
+ let jammed_without = jam without_backref in
+
+ Printf.printf " Shared structure size: %d bytes\n" (Bytes.length jammed_with);
+ Printf.printf " Duplicated structure size: %d bytes\n" (Bytes.length jammed_without);
+
+ (* Note: Due to how OCaml constructs values, physical equality might not work as expected,
+ but logical equality should still work for roundtrip *)
+ test_roundtrip with_backref "backref optimization roundtrip"
+
+(** Test known encodings
+
+ These test vectors would ideally come from the Vere test suite or Urbit dojo
+*)
+let test_known_encodings () =
+ Printf.printf "\n=== Testing known encodings ===\n";
+
+ (* We can generate these from Urbit with (jam 0), (jam 1), etc. *)
+
+ (* jam of 0 should be simple *)
+ let n = atom 0 in
+ let jammed = jam n in
+ Printf.printf " jam(0) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed);
+ test_roundtrip n "known encoding: 0";
+
+ (* jam of 1 *)
+ let n = atom 1 in
+ let jammed = jam n in
+ Printf.printf " jam(1) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed);
+ test_roundtrip n "known encoding: 1";
+
+ (* jam of [0 0] *)
+ let n = cell (atom 0) (atom 0) in
+ let jammed = jam n in
+ Printf.printf " jam([0 0]) = %s (%d bytes)\n" (bytes_to_hex jammed) (Bytes.length jammed);
+ test_roundtrip n "known encoding: [0 0]"
+
+(** Test edge cases *)
+let test_edge_cases () =
+ Printf.printf "\n=== Testing edge cases ===\n";
+
+ (* Very large atom *)
+ let big = Atom (Z.of_string "123456789012345678901234567890") in
+ test_roundtrip big "very large atom roundtrip";
+
+ (* Deep nesting *)
+ let rec make_deep n =
+ if n = 0 then atom 0
+ else cell (atom n) (make_deep (n - 1))
+ in
+ let deep = make_deep 50 in
+ test_roundtrip deep "deeply nested structure (50 levels) roundtrip";
+
+ (* Wide tree *)
+ let rec make_wide n =
+ if n = 0 then atom 0
+ else cell (make_wide (n - 1)) (make_wide (n - 1))
+ in
+ let wide = make_wide 6 in (* 2^6 = 64 leaves *)
+ test_roundtrip wide "wide binary tree (6 levels) roundtrip"
+
+(** Run all tests *)
+let () =
+ Printf.printf "=================================\n";
+ Printf.printf "Jam/Cue Serialization Test Suite\n";
+ Printf.printf "=================================\n";
+
+ test_atoms ();
+ test_cells ();
+ test_trees ();
+ test_backrefs ();
+ test_known_encodings ();
+ test_edge_cases ();
+
+ Printf.printf "\n=================================\n";
+ Printf.printf "All tests passed!\n";
+ Printf.printf "=================================\n"