diff options
author | polwex <polwex@sortug.com> | 2025-10-05 22:57:55 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-10-05 22:57:55 +0700 |
commit | c4b71435d9afdb67450f320f54fb7aa99dcae85e (patch) | |
tree | a08c4c2f7965a95fcfe6dda09629d3f103d25a0b | |
parent | fcedfddf00b3f994e4f4e40332ac7fc192c63244 (diff) |
fixed jamcue
-rw-r--r-- | ocaml/CLAUDE.md | 278 | ||||
-rw-r--r-- | ocaml/dune-project | 11 | ||||
-rw-r--r-- | ocaml/lib/bitstream.ml (renamed from ocaml/bitstream.ml) | 27 | ||||
-rw-r--r-- | ocaml/lib/dune | 4 | ||||
-rw-r--r-- | ocaml/lib/nock.ml (renamed from ocaml/nock.ml) | 0 | ||||
-rw-r--r-- | ocaml/lib/noun.ml (renamed from ocaml/noun.ml) | 0 | ||||
-rw-r--r-- | ocaml/lib/serial.ml (renamed from ocaml/serial.ml) | 54 | ||||
-rw-r--r-- | ocaml/test/bench_nock.ml (renamed from ocaml/bench_nock.ml) | 0 | ||||
-rw-r--r-- | ocaml/test/dune (renamed from ocaml/dune) | 20 | ||||
-rw-r--r-- | ocaml/test/test_hex.ml | 26 | ||||
-rw-r--r-- | ocaml/test/test_jam_debug.ml | 20 | ||||
-rw-r--r-- | ocaml/test/test_nock.ml (renamed from ocaml/test_nock.ml) | 0 | ||||
-rw-r--r-- | ocaml/test/test_serial.ml (renamed from ocaml/test_serial.ml) | 2 | ||||
-rw-r--r-- | ocaml/tmp/get_jam_vectors.ml | 10 | ||||
-rw-r--r-- | ocaml/tmp/quick_test.ml | 6 | ||||
-rw-r--r-- | ocaml/tmp/test_hex.ml | 9 | ||||
-rw-r--r-- | ocaml/tmp/test_jam.ml | 17 | ||||
-rw-r--r-- | ocaml/tmp/test_prec.ml | 5 | ||||
-rw-r--r-- | ocaml/tmp/test_ref.ml | 7 | ||||
-rw-r--r-- | ocaml/urb.py | 405 |
20 files changed, 850 insertions, 51 deletions
diff --git a/ocaml/CLAUDE.md b/ocaml/CLAUDE.md new file mode 100644 index 0000000..2577dee --- /dev/null +++ b/ocaml/CLAUDE.md @@ -0,0 +1,278 @@ +# Overe - OCaml Port of Vere (Urbit Runtime) + +## Project Goal + +Port the Vere (Urbit C runtime) to OCaml piece by piece, starting with core components and comparing performance against the original C implementation. + +## Completed Work + +### 1. Nock Interpreter (`lib/nock.ml`) + +**Status**: ✅ Complete and tested + +- Implemented complete Nock interpreter with all 12 opcodes +- Direct port of `_n_nock_on` from C implementation in `pkg/noun/nock.c` +- Uses Zarith (GMP-backed) for arbitrary-precision integers +- All test cases pass (`test/test_nock.ml`) + +**Key Functions**: +- `nock_on`: Main interpreter loop +- `slot`: Tree addressing using bit notation +- All 12 opcodes: 0 (slot), 1 (constant), 2 (nock), 3 (cell test), 4 (increment), 5 (equality), 6 (if-then-else), 7 (compose), 8 (push), 9 (call), 10 (hint), 11 (wish/deprecated) + +### 2. Performance Benchmarking + +**Status**: ✅ Complete with detailed results + +Created comprehensive benchmark suite comparing C vs OCaml: +- `bench_nock.ml`: OCaml benchmarks +- `bench_simple.c`: Standalone C implementation for comparison +- `compare.sh`: Comparison script +- `BENCHMARKS.md`: Full results documentation + +**Key Findings**: +- C is 2-5x faster on average +- OCaml is competitive or faster on allocation-heavy operations +- OCaml shows more consistent performance (less variance) +- For decrement: C 1.39x faster +- For tree navigation: C 4.87x faster +- For nock(decrement): OCaml 1.12x faster +- For deep recursion: C 2.41x faster + +### 3. Noun Type System (`lib/noun.ml`) + +**Status**: ✅ Complete + +```ocaml +type noun = + | Atom of Z.t + | Cell of noun * noun +``` + +Helper functions: +- `atom`, `cell`: Constructors +- `is_atom`, `is_cell`: Type predicates +- `head`, `tail`: Cell accessors +- `slot`: Tree addressing (axis addressing) +- `atom_to_int`, `noun_to_string`: Conversions + +### 4. Bitstream Utilities (`lib/bitstream.ml`) + +**Status**: ✅ Complete + +Implements bit-level reading/writing for jam/cue serialization: + +**Writer**: +```ocaml +type writer = { + buf: bytes ref; + mutable bit_pos: int; +} +``` +- `writer_create()`: Initialize writer +- `write_bit`: Write single bit +- `write_bits`: Write multiple bits from a Z.t value +- `writer_to_bytes`: Extract final bytes +- `writer_ensure`: Dynamic buffer resizing + +**Reader**: +```ocaml +type reader = { + bytes: bytes; + mutable bit_pos: int; +} +``` +- `reader_create`: Initialize from bytes +- `read_bit`: Read single bit +- `read_bits`: Read multiple bits into Z.t +- `reader_pos`: Get current bit position + +**Critical Fix**: OCaml operator precedence issue +- Problem: `!(w.buf)` parsed as `(!w).buf` +- Solution: Use intermediate variable pattern: + ```ocaml + let buf_ref : bytes ref = w.buf in + let buf : bytes = !buf_ref in + ``` + +### 5. Jam/Cue Serialization (`lib/serial.ml`) + +**Status**: 🔴 IN PROGRESS - Has bugs + +Implementation of noun serialization with backreference compression. + +**Jam Encoding Format**: +- Atoms: tag bit `0`, then mat-encoded value +- Cells: tag bits `01`, then recursively encode head and tail +- Backrefs: tag bits `11`, then mat-encoded position + +**Mat Encoding** (variable-length integers): +- For 0: single `0` bit +- For n > 0: + - a = bit-width of n (`Z.numbits n`) + - b = bit-width of a + - Write b `1`-bits, then one `0`-bit + - Write a in b-1 bits + - Write n in a bits + +**Current Implementation**: +```ocaml +let mat_encode w n = + if Z.equal n Z.zero then + write_bit w false + else begin + let a = Z.numbits n in + let b = Z.numbits (Z.of_int a) in + for _i = 1 to b do write_bit w true done; + write_bit w false; + write_bits w (Z.of_int a) (b - 1); (* Write a, not a-1 *) + write_bits w n a + end +``` + +**Known Bugs**: +1. ✅ Fixed: mat_encode for 0 wrote `true` instead of `false` +2. ✅ Fixed: Was writing `a-1` in `b-1` bits, changed to `a` based on C code analysis +3. 🔴 **CURRENT BUG**: Roundtrip test passes for 0,1,2,3 but fails at 4 + - Test shows: `FAIL: 4 -> 0` + - Need to compare against actual Urbit jam output to find discrepancy + +**Test Status** (`test/test_serial.ml`): +- Atom roundtrip: FAILS at value 4 +- Basic jam/cue: Not yet fully validated + +### 6. Python Urbit Interface (`urb.py`) + +**Status**: ✅ Updated to Python 3 + +Script to communicate with running Urbit ship via lens port. Can be used to get correct jam/cue test vectors. + +**Changes Made**: +- Shebang: `#!/usr/bin/env python3` +- String decoding: `.encode().decode('unicode_escape')` +- File write: mode `'wb'` for binary +- Logging: `warn` → `warning` +- Idiom: `'X' not in dict` + +**Usage** (needs running Urbit ship): +```bash +python3 urb.py -d "(jam 42)" # Get jam encoding of 42 +python3 urb.py -d "(cue 42)" # Decode jam encoding +``` + +## Project Structure + +``` +ocaml/ +├── dune-project # Project configuration (package: overe) +├── lib/ +│ ├── dune # Library stanza +│ ├── noun.ml # Noun type system +│ ├── nock.ml # Nock interpreter +│ ├── bitstream.ml # Bit-level I/O +│ └── serial.ml # Jam/cue serialization +├── test/ +│ ├── dune # Test executables +│ ├── test_nock.ml # Nock tests (all passing) +│ └── test_serial.ml # Jam/cue tests (failing at 4) +├── tmp/ # Temporary test files +├── urb.py # Python script to query Urbit ship +├── BENCHMARKS.md # Performance comparison results +└── CLAUDE.md # This file +``` + +## Next Steps + +### Immediate Priority: Fix Jam/Cue Bug + +1. **Get test vectors from Urbit**: + ```bash + python3 urb.py -d "(jam 0)" + python3 urb.py -d "(jam 1)" + python3 urb.py -d "(jam 2)" + python3 urb.py -d "(jam 3)" + python3 urb.py -d "(jam 4)" # This is where OCaml fails + ``` + +2. **Compare OCaml output**: + - Create test program to print hex output of jam for 0-10 + - Compare against Urbit's output byte-by-byte + - Identify exact discrepancy in mat encoding + +3. **Fix mat_decode**: + - Ensure it matches the corrected mat_encode formula + - Currently may be reading `a-1` when it should read `a` + +### Future Work + +1. **Complete Jam/Cue**: + - Fix current bugs + - Add comprehensive tests + - Benchmark performance vs C + +2. **Memory Management**: + - Implement noun hash-consing/deduplication + - Add reference counting or GC integration + - Study u3 memory system from C implementation + +3. **Jets**: + - Port jet dashboard system + - Implement critical jets for performance + - Add jet registration and lookup + +4. **Persistence**: + - Implement snapshot system + - Port event log handling + - Add checkpoint/restore functionality + +## Build Instructions + +```bash +# Build library and tests +dune build + +# Run Nock tests (all passing) +dune exec test/test_nock.exe + +# Run jam/cue tests (currently failing at 4) +dune exec test/test_serial.exe + +# Query running Urbit (requires ship running on port 12323 or with .http.ports) +python urb.py -d "(jam 42)" +``` + +## Technical Notes + +### OCaml Challenges Encountered + +1. **Operator precedence with refs**: `!(r.field)` doesn't work, need intermediate variable +2. **Zarith numbits**: Returns actual bit count, not bit index (e.g., 4 needs 3 bits, `numbits` returns 3) +3. **Mutable fields**: Need to use `mutable` keyword and assignment operator `:=` +4. **Bytes are mutable**: Use `Bytes.sub` to copy, not share + +### Mat Encoding Details from C Code + +From `pkg/noun/serial.c` line 126: +```c +// Write a_w (the width) in b_w-1 bits +u3r_chop(0, 0, b_w - 1, 0, &a_w, &bits); +``` + +This writes `a` (the width), NOT `a-1`. The OCaml implementation has been updated to match this. + +### References + +- Vere C implementation: `/home/y/code/urbit/vere/pkg/` +- Nock specification: https://urbit.org/docs/nock/reference/ +- Jam/cue format: See official Urbit docs (provided in conversation) +- u3 system architecture: See docs on king/serf, jets, persistence + +## Performance Philosophy + +Goal is not to beat C in raw speed, but to: +- Provide maintainable, type-safe implementation +- Enable experimentation with runtime features +- Achieve competitive performance on real workloads +- Leverage OCaml's strengths (allocation, GC, pattern matching) + +Current results show this is achievable: OCaml is within 2-5x of C, and actually faster for some allocation-heavy operations. diff --git a/ocaml/dune-project b/ocaml/dune-project index 4a478ee..ec486dc 100644 --- a/ocaml/dune-project +++ b/ocaml/dune-project @@ -1,2 +1,9 @@ -(lang dune 3.0) -(name vere_nock) +(lang dune 3.20) + +(name overe) + +(package + (name overe) + (allow_empty) + (synopsis "Urbit on OCaml") + (description "With OCaml 5.3+ and Eio Multicore!")) diff --git a/ocaml/bitstream.ml b/ocaml/lib/bitstream.ml index 73eda36..cfe094c 100644 --- a/ocaml/bitstream.ml +++ b/ocaml/lib/bitstream.ml @@ -20,14 +20,16 @@ let writer_create () = { } (** Grow the writer buffer if needed *) -let writer_ensure w bits_needed = - let bytes_needed = (w.bit_pos + bits_needed + 7) / 8 in - if bytes_needed > Bytes.length !(w.buf) then begin - let old_buf = !(w.buf) in - let new_size = max (bytes_needed * 2) (Bytes.length old_buf * 2) in - let new_buf = Bytes.create new_size in +let writer_ensure (w : writer) (bits_needed : int) : unit = + let bytes_needed : int = (w.bit_pos + bits_needed + 7) / 8 in + let buf_ref : bytes ref = w.buf in + let current_buf : bytes = !buf_ref in + if bytes_needed > (Bytes.length current_buf) then begin + let old_buf : bytes = current_buf in + let new_size : int = max (bytes_needed * 2) (Bytes.length old_buf * 2) in + let new_buf : bytes = Bytes.create new_size in Bytes.blit old_buf 0 new_buf 0 (Bytes.length old_buf); - w.buf := new_buf + buf_ref := new_buf end (** Write a single bit *) @@ -36,8 +38,9 @@ let write_bit w bit = let byte_pos = w.bit_pos / 8 in let bit_off = w.bit_pos mod 8 in if bit then begin - let old_byte = Bytes.get_uint8 !(w.buf) byte_pos in - Bytes.set_uint8 !(w.buf) byte_pos (old_byte lor (1 lsl bit_off)) + let buf = !(w.buf) in + let old_byte = Bytes.get_uint8 buf byte_pos in + Bytes.set_uint8 buf byte_pos (old_byte lor (1 lsl bit_off)) end; w.bit_pos <- w.bit_pos + 1 @@ -50,9 +53,11 @@ let write_bits w value nbits = done (** Get the final bytes from a writer *) -let writer_to_bytes w = +let writer_to_bytes (w : writer) : bytes = let byte_len = (w.bit_pos + 7) / 8 in - Bytes.sub !(w.buf) 0 byte_len + let buf_ref : bytes ref = w.buf in + let buf : bytes = !buf_ref in + Bytes.sub buf 0 byte_len (** Create a bitstream reader *) let reader_create buf = diff --git a/ocaml/lib/dune b/ocaml/lib/dune new file mode 100644 index 0000000..008de33 --- /dev/null +++ b/ocaml/lib/dune @@ -0,0 +1,4 @@ +(library + (name nock_lib) + (modules noun nock bitstream serial) + (libraries zarith)) diff --git a/ocaml/nock.ml b/ocaml/lib/nock.ml index 34065b8..34065b8 100644 --- a/ocaml/nock.ml +++ b/ocaml/lib/nock.ml diff --git a/ocaml/noun.ml b/ocaml/lib/noun.ml index c59ec80..c59ec80 100644 --- a/ocaml/noun.ml +++ b/ocaml/lib/noun.ml diff --git a/ocaml/serial.ml b/ocaml/lib/serial.ml index 039cd2f..9ededf1 100644 --- a/ocaml/serial.ml +++ b/ocaml/lib/serial.ml @@ -25,57 +25,53 @@ open Bitstream - For n > 0: - a = number of bits in n (met 0 n) - b = number of bits needed to represent a - - Write b 1-bits, then a 0-bit - - Write a-1 in b-1 bits + - Write b 0-bits, then one 1-bit + - Write a in b-1 bits - Write n in a bits *) -let rec mat_encode w n = +let mat_encode w n = if Z.equal n Z.zero then write_bit w true else begin let a = Z.numbits n in (* bit-width of n *) let b = Z.numbits (Z.of_int a) in (* bit-width of a *) - (* Write b 1-bits followed by a 0-bit *) + (* Write b 0-bits followed by one 1-bit *) for _i = 1 to b do - write_bit w true + write_bit w false done; - write_bit w false; + write_bit w true; - (* Write a-1 in b-1 bits *) - write_bits w (Z.of_int (a - 1)) (b - 1); + (* Write a in b-1 bits *) + write_bits w (Z.of_int a) (b - 1); (* Write n in a bits *) write_bits w n a end (** Mat-decode from bitstream, returns (value, bits_read) *) -let rec mat_decode r = +let mat_decode r = let start_pos = reader_pos r in - if not (read_bit r) then - (Z.zero, reader_pos r - start_pos) - else begin - (* Count leading 1 bits *) - let b = ref 1 in - while read_bit r do - b := !b + 1 - done; + (* Count leading 0 bits until we hit a 1 bit *) + let b = ref 0 in + while not (read_bit r) do + b := !b + 1 + done; - let b = !b in + let b = !b in - if b = 1 then - (* Special case: just "10" means 1 *) - (Z.one, reader_pos r - start_pos) - else begin - (* Read a-1 in b-1 bits *) - let a_minus_1 = read_bits r (b - 1) in - let a = Z.to_int (Z.add a_minus_1 Z.one) in + if b = 0 then + (* Just a single 1 bit means 0 *) + (Z.zero, reader_pos r - start_pos) + else begin + (* Read the length bits and compute a = 2^(b-1) + bits_read *) + let bits_val = read_bits r (b - 1) in + let a = Z.to_int (Z.add (Z.shift_left Z.one (b - 1)) bits_val) in - (* Read n in a bits *) - let n = read_bits r a in - (n, reader_pos r - start_pos) - end + (* Read n in a bits *) + let n = read_bits r a in + (n, reader_pos r - start_pos) end (** Jam: serialize a noun to bytes diff --git a/ocaml/bench_nock.ml b/ocaml/test/bench_nock.ml index a71b3da..a71b3da 100644 --- a/ocaml/bench_nock.ml +++ b/ocaml/test/bench_nock.ml diff --git a/ocaml/dune b/ocaml/test/dune index 3943b7b..b0ad51d 100644 --- a/ocaml/dune +++ b/ocaml/test/dune @@ -1,19 +1,23 @@ -(library - (name nock_lib) - (modules noun nock bitstream serial) - (libraries zarith)) - -(executable +(test (name test_nock) (modules test_nock) (libraries nock_lib zarith)) -(executable +(test (name test_serial) (modules test_serial) (libraries nock_lib zarith)) -(executable +(test + (name test_jam_debug) + (modules test_jam_debug) + (libraries nock_lib zarith)) + +(test (name bench_nock) (modules bench_nock) (libraries nock_lib zarith unix)) + +(executable + (name test_hex) + (libraries nock_lib)) diff --git a/ocaml/test/test_hex.ml b/ocaml/test/test_hex.ml new file mode 100644 index 0000000..a228682 --- /dev/null +++ b/ocaml/test/test_hex.ml @@ -0,0 +1,26 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + for i = 0 to 10 do + let n = atom i in + let jammed = jam n in + Printf.printf "jam(%d) = %s (%d bytes)\n" i (bytes_to_hex jammed) (Bytes.length jammed) + done + +let () = + Printf.printf "\nRound-trip tests:\n"; + for i = 0 to 50 do + let n = atom i in + let jammed = jam n in + let cued = cue jammed in + match cued with + | Atom a when Z.equal a (Z.of_int i) -> + Printf.printf "OK: %d\n" i + | Atom a -> + Printf.printf "FAIL: %d -> %s\n" i (Z.to_string a); + exit 1 + | Cell _ -> + Printf.printf "FAIL: %d -> cell\n" i; + exit 1 + done diff --git a/ocaml/test/test_jam_debug.ml b/ocaml/test/test_jam_debug.ml new file mode 100644 index 0000000..cad3ee9 --- /dev/null +++ b/ocaml/test/test_jam_debug.ml @@ -0,0 +1,20 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + Printf.printf "Testing jam encoding:\n"; + + (* Test 0 *) + let n0 = atom 0 in + let j0 = jam n0 in + Printf.printf "jam(0) = %s\n" (bytes_to_hex j0); + + (* Test 1 *) + let n1 = atom 1 in + let j1 = jam n1 in + Printf.printf "jam(1) = %s\n" (bytes_to_hex j1); + + (* Test 2 *) + let n2 = atom 2 in + let j2 = jam n2 in + Printf.printf "jam(2) = %s\n" (bytes_to_hex j2); diff --git a/ocaml/test_nock.ml b/ocaml/test/test_nock.ml index 73f2ce2..73f2ce2 100644 --- a/ocaml/test_nock.ml +++ b/ocaml/test/test_nock.ml diff --git a/ocaml/test_serial.ml b/ocaml/test/test_serial.ml index 69887c5..fca30f8 100644 --- a/ocaml/test_serial.ml +++ b/ocaml/test/test_serial.ml @@ -12,7 +12,7 @@ let assert_equal expected actual msg = end else Printf.printf "PASS: %s\n" msg -let assert_bytes_equal expected actual msg = +let _assert_bytes_equal expected actual msg = if expected <> actual then begin Printf.printf "FAIL: %s\n" msg; Printf.printf " Expected: %s\n" (bytes_to_hex expected); diff --git a/ocaml/tmp/get_jam_vectors.ml b/ocaml/tmp/get_jam_vectors.ml new file mode 100644 index 0000000..1266f21 --- /dev/null +++ b/ocaml/tmp/get_jam_vectors.ml @@ -0,0 +1,10 @@ +(* Quick test to see what our jam produces for small numbers *) +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + for i = 0 to 10 do + let n = atom i in + let jammed = jam n in + Printf.printf "%d: %s\n" i (bytes_to_hex jammed) + done diff --git a/ocaml/tmp/quick_test.ml b/ocaml/tmp/quick_test.ml new file mode 100644 index 0000000..9ce168a --- /dev/null +++ b/ocaml/tmp/quick_test.ml @@ -0,0 +1,6 @@ +let () = + Printf.printf "Z.numbits 0 = %d\n" (Z.numbits Z.zero); + Printf.printf "Z.numbits 1 = %d\n" (Z.numbits Z.one); + Printf.printf "Z.numbits 2 = %d\n" (Z.numbits (Z.of_int 2)); + Printf.printf "Z.numbits 42 = %d\n" (Z.numbits (Z.of_int 42)); + Printf.printf "Z.numbits 6 = %d\n" (Z.numbits (Z.of_int 6)); diff --git a/ocaml/tmp/test_hex.ml b/ocaml/tmp/test_hex.ml new file mode 100644 index 0000000..6504640 --- /dev/null +++ b/ocaml/tmp/test_hex.ml @@ -0,0 +1,9 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + for i = 0 to 10 do + let n = atom i in + let jammed = jam n in + Printf.printf "jam(%d) = %s (%d bytes)\n" i (bytes_to_hex jammed) (Bytes.length jammed) + done diff --git a/ocaml/tmp/test_jam.ml b/ocaml/tmp/test_jam.ml new file mode 100644 index 0000000..e616e3e --- /dev/null +++ b/ocaml/tmp/test_jam.ml @@ -0,0 +1,17 @@ +open Nock_lib.Noun +open Nock_lib.Serial + +let () = + let test n_int = + let n = atom n_int in + let jammed = jam n in + Printf.printf "jam(%d) = %s\n" n_int (bytes_to_hex jammed); + let cued = cue jammed in + match cued with + | Atom a -> Printf.printf "cue -> %s\n" (Z.to_string a) + | Cell _ -> Printf.printf "cue -> cell?!\n" + in + test 0; + test 1; + test 2; + test 42; diff --git a/ocaml/tmp/test_prec.ml b/ocaml/tmp/test_prec.ml new file mode 100644 index 0000000..2a9134c --- /dev/null +++ b/ocaml/tmp/test_prec.ml @@ -0,0 +1,5 @@ +type t = { x : int ref } +let r = { x = ref 42 } +let test () = + let y = !(r.x) in + y diff --git a/ocaml/tmp/test_ref.ml b/ocaml/tmp/test_ref.ml new file mode 100644 index 0000000..8bff6e7 --- /dev/null +++ b/ocaml/tmp/test_ref.ml @@ -0,0 +1,7 @@ +type writer = { + buf: bytes ref; + mutable bit_pos: int; +} + +let test w = + Bytes.length !(w.buf) diff --git a/ocaml/urb.py b/ocaml/urb.py new file mode 100644 index 0000000..904233e --- /dev/null +++ b/ocaml/urb.py @@ -0,0 +1,405 @@ + +#!/usr/bin/env python3 + +# TODO: +# - -h text +# - output to clay + +import os +import sys +import logging +import json +import requests +import argparse +import base64 + +logging.basicConfig( + level=logging.WARNING, + format='%(levelname)s %(funcName)s %(lineno)s - %(message)s', + stream=sys.stderr, +) + +logging.debug(['sys.argv', sys.argv]) + +def preprocess_args(old_args): + """Split out [] + + We use [] to delimit tuples. The following syntaxes are all equivalent: + + -- --open --open a b --close --open c d --close --close + -- [ [ a b ] [ c d ] ] + -- [ [a b] [c d] ] + -- [[a b] [c d]] + -- etc + + We don't allow [[a b][c d]]. The rule is that we accept zero or more [ at + the beginning of a token and zero or more ] at the end of a token. + + In this function, we convert all legal syntaxes to as if they were entered + as in the first example above. This allows them to be parsed by a + relatively sane argparse system. + """ + + if old_args == []: + return [] + if old_args[0][0] == '[': + if len(old_args[0]) > 1: + r = preprocess_args([old_args[0][1:]] + old_args[1:]) + return ['--open'] + r + else: + return ['--open'] + preprocess_args(old_args[1:]) + if old_args[0][-1] == ']': + if len(old_args[0]) > 1: + return preprocess_args([old_args[0][:-1]]) + \ + ['--close'] + preprocess_args(old_args[1:]) + else: + return ['--close'] + preprocess_args(old_args[1:]) + return [old_args[0]] + preprocess_args(old_args[1:]) + +args = preprocess_args(sys.argv[1:]) + +logging.debug(['preprocessed', args]) + + +class sourceAction(argparse.Action): + """Handle source flag. + + This is all the 'primitive' source flags -- no nesting, no tuple stuff, + just one flag with one argument. + + Besides the normal argparse.Action arguments, we require the following named + argument: + + -- which='foo'. Since all source flags use res.source, this specifies the + key of the entry for this flag. + """ + + def __init__(self, option_strings, dest, **kwargs): + self.which = kwargs['which'] + del kwargs['which'] + super(sourceAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + logging.debug('%r %r' % (new_value, option_string)) + logging.debug('source %s' % res.source) + logging.debug('level %s' % res.level) + + if res.source is not None: + def help(source, level): + logging.debug('source %s' % source) + logging.debug('level %s' % level) + if not isinstance(source, list): + raise ValueError('Already specified one source') + elif level == 0: + msg = 'Already specified a source %r %s' % (source, level) + raise ValueError(msg) + elif level == 1: + return source + [self.construct_value(new_value)] + else: + return source[:-1] + [help(source[-1], level - 1)] + res.source = help(res.source, res.level) + else: + res.source = \ + self.construct_value(new_value) + + logging.debug(res.source) + + def construct_value(self, new_value): + if new_value == '-': + return self.construct_value(''.join(sys.stdin.readlines())) + elif new_value[0:2] == '@@': + with open(new_value[2:]) as f: + content = f.readlines() + return self.construct_value(''.join(content)) + else: + return {self.which: new_value} + +class transformerAction(argparse.Action): + """Handle transformer flag. + + This is all the tranformer flags. Each flag takes one argument and + transforms the previous source. + + Besides the normal argparse.Action arguments, we require the following named + arguments: + + -- which='foo'. Since all source flags use res.source, this specifies the + key of the entry for this flag. + + -- nesting='foo'. The key for the argument is 'foo'. + """ + + def __init__(self, option_strings, dest, **kwargs): + self.which = kwargs['which'] + self.nesting = kwargs['nesting'] + del kwargs['which'] + del kwargs['nesting'] + super(transformerAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + logging.debug('%r %r' % (new_value, option_string)) + logging.debug('source %s' % res.source) + logging.debug('level %s' % res.level) + + if res.source is None: + raise ValueError('Need source before transformer') + else: + def help(source, level): + logging.debug('source %s' % source) + logging.debug('level %s' % level) + if level == 0 or level is None: + res = {self.nesting: new_value, "next": source} + return {self.which: res} + elif not isinstance(source, list): + raise ValueError('Already specified one source') + else: + return source[:-1] + [help(source[-1], level - 1)] + res.source = help(res.source, res.level) + + logging.debug(res.source) + +class openAction(argparse.Action): + """Handle open tuple. + + Opens a source tuple. Can only exist in the same places as any other + source. + """ + + def __init__(self, option_strings, dest, **kwargs): + super(openAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + if res.level is None: + res.level = 0 + + logging.debug('source %s' % res.source) + logging.debug('level %s' % res.level) + + if res.source is None: + res.source = [] + res.level = 1 + return + + def help(source, level): + if not isinstance(source, list): + raise ValueError('Starting tuple after source is finished') + if level == 1: + return (source + [[]], level + 1) + elif level > 1: + rsource, rlevel = help(source[-1], level - 1) + return (source[:-1] + [rsource], rlevel + 1) + else: + msg = 'opening strange level %r %s' % (source, level) + raise ValueError(msg) + + res.source, res.level = help(res.source, res.level) + +class closeAction(argparse.Action): + """Handle close tuple. + + Closes a source tuple. Can only exist when a tuple is already open. + """ + + def __init__(self, option_strings, dest, **kwargs): + super(closeAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + if res.level is None: + raise ValueError('Ending tuple before starting one') + + logging.debug('level %s' % res.level) + + if res.source is None: + raise ValueError('Ending tuple with empty source') + + def help(source, level): + if not isinstance(source, list): + raise ValueError('Ending tuple that isn\'t a tuple') + if level == 1: + return level - 1 + elif level > 1: + return help(source[-1], level - 1) + 1 + else: + msg = 'closing strange level %r %s' % (source, level) + raise ValueError(msg) + + res.level = help(res.source, res.level) + + logging.debug('level %s' % res.level) + +class sinkAction(argparse.Action): + """Handle sink flag. + + We expect only one sinkAction to ever be executed. We recommend using + mutually_exclusive_group's. + + Besides the normal action flags, we require the following named argument: + + -- which='foo'. Since all sink flags use res.sink, this specifies the key + of the entry for this flag. + """ + + def __init__(self, option_strings, dest, **kwargs): + self.which = kwargs['which'] + del kwargs['which'] + super(sinkAction, self).__init__(option_strings, dest, **kwargs) + + def __call__(self, parser, res, new_value, option_string): + res.sink = self.construct_value(new_value) + + logging.debug(res.sink) + + def construct_value(self, new_value): + if self.which == 'output-file': + return {self.which: new_value[::-1].replace('.','/',1)[::-1]} + elif self.which == 'output-pill': + return {self.which: new_value[::-1].replace('.','/',1)[::-1]} + else: + return {self.which: new_value} + +class FullPaths(argparse.Action): + """Expand user- and relative-paths""" + def __call__(self, parser, namespace, values, option_string=None): + if values != None: + path = os.path.abspath(os.path.expanduser(values)) + setattr(namespace, self.dest, path) + +def is_dir(dirname): + """Checks if a path is an actual directory""" + if not os.path.isdir(dirname): + msg = "{0} is not a directory".format(dirname) + raise argparse.ArgumentTypeError(msg) + else: + return dirname + +def get_args(): + """Get CLI arguments and options""" + parser = argparse.ArgumentParser(description="""do something""") + + parser.add_argument('alignments', help="The folder of alignments", + action=FullPaths, type=is_dir) + +parser = argparse.ArgumentParser(description='headless urbit') +parser.add_argument('pier', nargs='?', + help='target urbit directory', + action=FullPaths, type=is_dir) +parser.add_argument('-d', '--dojo', which='dojo', + metavar='command-line', + help='run dojo command', + action=sourceAction, dest='source') +parser.add_argument('-D', '--data', which='data', + metavar='text', + help='literal text data', + action=sourceAction) +parser.add_argument('-c', '--clay', which='clay', + metavar='clay-path', + help='load data from clay', + action=sourceAction) +parser.add_argument('-u', '--url', which='url', + metavar='url', + help='pull data from url', + action=sourceAction) +parser.add_argument('-a', '--api', which='api', + metavar='command', + help='get data from api', + action=sourceAction) +parser.add_argument('-g', '--get-api', which='get-api', + metavar='api:endpoint', + help='get data from api endpoint', + action=sourceAction) +parser.add_argument('-l', '--listen-api', which='listen-api', + metavar='api:event', + help='listen to event from api', + action=sourceAction) +parser.add_argument('-m', '--mark', which='as', + metavar='mark', + help='transform a source to another mark', + nesting='mark', + action=transformerAction) +parser.add_argument('-H', '--hoon', which='hoon', + metavar='code', + help='transform a source by hoon code', + nesting='code', + action=transformerAction) +parser.add_argument('--open', + nargs=0, + help='start tuple', + action=openAction, dest='level') +parser.add_argument('--close', + nargs=0, + help='stop tuple', + action=closeAction) + +sinks = parser.add_mutually_exclusive_group() +sinks.add_argument('-s', '--stdout', const={'stdout': None}, + default={'stdout': None}, + action='store_const', dest='sink') +sinks.add_argument('-f', '--output-file', which='output-file', + metavar='path', + action=sinkAction) +sinks.add_argument('-P', '--output-pill', which='output-pill', + metavar='path', + action=sinkAction) +sinks.add_argument('-C', '--output-clay', which='output-clay', + metavar='clay-path', + action=sinkAction) +sinks.add_argument('-U', '--output-url', which='url', + metavar='url', + action=sinkAction) +sinks.add_argument('-t', '--to-api', which='to-api', + metavar='api-command', + action=sinkAction) +sinks.add_argument('-n', '--send-api', which='send-api', + metavar='api:endpoint', + action=sinkAction) +sinks.add_argument('-x', '--command', which='command', + metavar='command', + action=sinkAction) +sinks.add_argument('-p', '--app', which='app', + metavar='app', + action=sinkAction) + + +args = parser.parse_args(args) + + +if args.source is None: + args.source = {"data": ''.join(sys.stdin)} + +payload = {"source": args.source, "sink": args.sink} +logging.debug(['payload', json.dumps(payload)]) + +PORT = "" + +if args.pier is None: + PORT = os.environ.get('LENS_PORT', 12323) + # if 'LENS_PORT' not in os.environ: + # logging.warning("No pier or port specified, looking on port " + str(PORT)) +else: + with open(os.path.join(args.pier, ".http.ports")) as ports: + for line in ports: + if -1 != line.find("loopback"): + PORT = line.split()[0] + logging.info("Found port %s" % PORT) + break + + if not PORT: + logging.error("Error reading port from .http.ports file") + sys.exit(1) + +url = "http://localhost:%s" % PORT + +r = requests.post(url, data=json.dumps(payload)) + +if r.text[0] == '"': + # Python 3: decode unicode escape sequences + print(r.text[1:-1].encode().decode('unicode_escape')) +elif r.text[0] == '{': + # print(r.text) + json_data = json.loads(r.text) + logging.debug(json_data) + with open(json_data['file'][:0:-1].replace('/','.',1)[::-1], 'wb') as f: + f.write(base64.b64decode(json_data['data'])) +else: + logging.warning("unrecognized response") + print(r.text) |