diff options
Diffstat (limited to 'ocaml')
-rw-r--r-- | ocaml/NEED_FOR_SPEED.md | 68 | ||||
-rw-r--r-- | ocaml/test/dune | 10 | ||||
-rw-r--r-- | ocaml/test/test_parallel_clean.ml | 98 | ||||
-rw-r--r-- | ocaml/test/test_parallel_solid.ml | 142 |
4 files changed, 318 insertions, 0 deletions
diff --git a/ocaml/NEED_FOR_SPEED.md b/ocaml/NEED_FOR_SPEED.md new file mode 100644 index 0000000..3b9ae94 --- /dev/null +++ b/ocaml/NEED_FOR_SPEED.md @@ -0,0 +1,68 @@ +Let's make this runtime the faster freaking ever + +Claude advices: + + Why it's slow: + + 1. Pure interpreter - We're executing every single Nock instruction one by one. No jets (hand-optimized stdlib functions) + 2. 8 events processing HUGE state - That 8MB pill expands into gigabytes of noun trees in memory. Each event modifies massive state + 3. Parallelism doesn't help much - All 32 cores are fighting over the same shared event_list noun, causing GC contention. This is memory-bound, not CPU-bound + 4. No optimization - C Vere has: + - Jets (bypass Nock for stdlib) + - Hash-consing (deduplicate nouns) + - Years of tuning + + How slow is it really? + + Let's see single-core performance: + + dune exec test/test_solid_sequential.exe + + This will show the actual speed of nock.ml, nock_iter.ml, and nock_tail.ml without parallel overhead. My guess: each lifecycle takes several seconds even on one core. + + The real solutions: + + 1. Jets - Implement fast paths for common operations (Hoon stdlib) + 2. Hash-consing - Deduplicate nouns to save memory + 3. Better GC tuning - Reduce allocation pressure + 4. Compile hot paths - JIT common formulas + + Throwing 32 cores at an interpreted workload with heavy memory contention won't magically make it fast. The interpreter itself needs optimization first! 𤷠+ +> then I guess we better get working on those real solutions. Which is more approachable at our current state. Hash-consing? + +ā Hash-consing is definitely more approachable! Here's why: + + What you already have: + + - ā
Mug (hash) computation working perfectly + - ā
Clean noun type definition + - ā
All noun construction centralized through Noun.atom and Noun.cell + + What hash-consing does: + + (* Without hash-consing *) + let a = Noun.cell (Noun.atom 42) (Noun.atom 1) in + let b = Noun.cell (Noun.atom 42) (Noun.atom 1) in + (* a and b are different objects in memory *) + + (* With hash-consing *) + let a = Noun.cell (Noun.atom 42) (Noun.atom 1) in + let b = Noun.cell (Noun.atom 42) (Noun.atom 1) in + (* a and b point to THE SAME object in memory! *) + + Benefits: + + 1. Massive memory savings - That 8MB solid pill has tons of repeated structure + 2. O(1) equality - Just pointer comparison instead of tree traversal + 3. Better cache locality - Shared structures stay hot + 4. Less GC pressure - Fewer objects to track + + Implementation approach: + + 1. Add a global Weak.t hash table (weak refs so GC can still collect unused nouns) + 2. Modify Noun.atom and Noun.cell to check the table first + 3. Return existing noun if found, or insert new one + 4. Use mug as the hash key + + Want me to implement hash-consing? It's maybe 100-200 lines of code and will give you huge wins! diff --git a/ocaml/test/dune b/ocaml/test/dune index bbc607f..288b505 100644 --- a/ocaml/test/dune +++ b/ocaml/test/dune @@ -379,6 +379,16 @@ (modules test_solid_massive) (libraries nock_lib unix domainslib)) +(executable + (name test_parallel_clean) + (modules test_parallel_clean) + (libraries nock_lib unix)) + +(executable + (name test_parallel_solid) + (modules test_parallel_solid) + (libraries nock_lib unix)) + ; (executable ; (name domainslib) ; (modules domainslib) diff --git a/ocaml/test/test_parallel_clean.ml b/ocaml/test/test_parallel_clean.ml new file mode 100644 index 0000000..b4f3f9e --- /dev/null +++ b/ocaml/test/test_parallel_clean.ml @@ -0,0 +1,98 @@ +(** Clean parallel test using the Nock_parallel abstraction **) + +open Nock_lib + +let () = + Printf.printf "\nāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n"; + Printf.printf "ā Parallel Nock Test (using Nock_parallel) ā\n"; + Printf.printf "āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n\n"; + + (* Create domain pool with all cores *) + let num_domains = 32 in + let pool = Domain_pool.create ~num_domains () in + let stats = Domain_pool.stats pool in + Printf.printf "[*] Pool created with %d domains (%d cores available)\n\n" + stats.num_domains stats.available_cores; + + Printf.printf "[1] Loading ivory.pill...\n"; + let ic = open_in_bin "ivory.pill" in + let len = in_channel_length ic in + let bytes = Bytes.create len in + really_input ic bytes 0 len; + close_in ic; + Printf.printf " Size: %d bytes\n" len; + + Printf.printf "[2] Cuing ivory pill...\n"; + let pill = Serial.cue bytes in + Printf.printf " ā Cued\n"; + + match pill with + | Noun.Cell { h = _tag; t = core; _ } -> + Printf.printf "[3] Building lifecycle formula...\n"; + let formula = Noun.cell + (Noun.atom 2) + (Noun.cell + (Noun.cell (Noun.atom 0) (Noun.atom 3)) + (Noun.cell (Noun.atom 0) (Noun.atom 2))) in + Printf.printf " Formula: [2 [0 3] [0 2]]\n"; + + (* Create 1000 identical computations *) + let num_computations = 1000 in + let computations = List.init num_computations (fun _ -> (core, formula)) in + + Printf.printf "\n[4] Running %d parallel lifecycle computations...\n" num_computations; + let start = Unix.gettimeofday () in + + let results = Nock_parallel.parallel_batch pool computations in + + let elapsed = Unix.gettimeofday () -. start in + + Printf.printf " ā Completed in %.2fs\n" elapsed; + + (* Check results *) + let successes = List.filter (function + | Nock_parallel.Success _ -> true + | _ -> false + ) results in + + let errors = List.filter (function + | Nock_parallel.Error _ -> true + | _ -> false + ) results in + + Printf.printf "\n[5] Results:\n"; + Printf.printf " Successes: %d\n" (List.length successes); + Printf.printf " Errors: %d\n" (List.length errors); + Printf.printf " Total time: %.2fs\n" elapsed; + Printf.printf " Throughput: %.2f lifecycles/sec\n" + (float_of_int num_computations /. elapsed); + + (* Verify all results are identical *) + (match successes with + | Nock_parallel.Success first :: rest -> + let all_same = List.for_all (function + | Nock_parallel.Success n -> Noun.mug n = Noun.mug first + | _ -> false + ) rest in + if all_same then + Printf.printf " ā All results identical (mug: 0x%08lx)\n" (Noun.mug first) + else + Printf.printf " ā Results differ!\n" + | _ -> ()); + + Printf.printf "\n[6] Benchmark comparison:\n"; + let bench_result = Nock_parallel.parallel_increment_bench pool 100 in + Printf.printf " Sequential: %.4fs\n" bench_result.sequential_time; + Printf.printf " Parallel: %.4fs\n" bench_result.parallel_time; + Printf.printf " Speedup: %.2fx\n" bench_result.speedup; + Printf.printf " Results match: %b\n" bench_result.results_match; + + Domain_pool.shutdown pool; + + Printf.printf "\nāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n"; + Printf.printf "ā Done! Clean parallel abstraction FTW š ā\n"; + Printf.printf "āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n\n" + + | _ -> + Printf.printf " ā Unexpected pill structure\n"; + Domain_pool.shutdown pool diff --git a/ocaml/test/test_parallel_solid.ml b/ocaml/test/test_parallel_solid.ml new file mode 100644 index 0000000..1b7a5f5 --- /dev/null +++ b/ocaml/test/test_parallel_solid.ml @@ -0,0 +1,142 @@ +(** Parallel solid pill test using the Nock_parallel abstraction **) + +open Nock_lib + +let () = + Printf.printf "\nāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n"; + Printf.printf "ā Parallel Solid Pill Test (using Nock_parallel) ā\n"; + Printf.printf "āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n\n"; + + (* Create domain pool with all cores *) + let num_domains = 32 in + let pool = Domain_pool.create ~num_domains () in + let stats = Domain_pool.stats pool in + Printf.printf "[*] Pool created with %d domains (%d cores available)\n\n" + stats.num_domains stats.available_cores; + + Printf.printf "[1] Loading solid.pill...\n"; + let ic = open_in_bin "solid.pill" in + let len = in_channel_length ic in + let bytes = Bytes.create len in + really_input ic bytes 0 len; + close_in ic; + Printf.printf " Size: %d bytes (%.1f MB)\n" len (float_of_int len /. 1024.0 /. 1024.0); + + Printf.printf "[2] Cuing solid pill...\n"; + let start_cue = Unix.gettimeofday () in + let pill = Serial.cue bytes in + let elapsed_cue = Unix.gettimeofday () -. start_cue in + Printf.printf " ā Cued in %.2fs\n" elapsed_cue; + + Printf.printf "[3] Parsing pill structure...\n"; + + (* Parse: [tag type [bot mod use]] *) + match pill with + | Noun.Cell { h = _tag; t = rest; _ } -> + (match rest with + | Noun.Cell { h = _ptype; t = events_triple; _ } -> + (match events_triple with + | Noun.Cell { h = bot; t = rest2; _ } -> + (match rest2 with + | Noun.Cell { h = _mod; t = rest3; _ } -> + (match rest3 with + | Noun.Cell { h = use; t = _; _ } -> + (* Concatenate bot and use *) + let rec to_list acc n = + match n with + | Noun.Atom _ -> List.rev acc + | Noun.Cell { h; t; _ } -> to_list (h :: acc) t + in + let bot_list = to_list [] bot in + let use_list = to_list [] use in + let all_events = bot_list @ use_list in + + Printf.printf " Bot events: %d\n" (List.length bot_list); + Printf.printf " Use events: %d\n" (List.length use_list); + Printf.printf " Total: %d events\n" (List.length all_events); + + (* Convert to proper Nock list format *) + let rec from_list = function + | [] -> Noun.atom 0 + | h :: t -> Noun.cell h (from_list t) + in + let event_list = from_list all_events in + + Printf.printf "\n[4] Building lifecycle formula...\n"; + let formula = Noun.cell + (Noun.atom 2) + (Noun.cell + (Noun.cell (Noun.atom 0) (Noun.atom 3)) + (Noun.cell (Noun.atom 0) (Noun.atom 2))) in + Printf.printf " Formula: [2 [0 3] [0 2]]\n"; + + (* Create multiple identical computations *) + let num_computations = 100 in + let computations = List.init num_computations (fun _ -> (event_list, formula)) in + + Printf.printf "\n[5] Running %d parallel solid pill lifecycles...\n" num_computations; + let start = Unix.gettimeofday () in + + let results = Nock_parallel.parallel_batch pool computations in + + let elapsed = Unix.gettimeofday () -. start in + + Printf.printf " ā Completed in %.2fs\n" elapsed; + + (* Check results *) + let successes = List.filter (function + | Nock_parallel.Success _ -> true + | _ -> false + ) results in + + let errors = List.filter (function + | Nock_parallel.Error _ -> true + | _ -> false + ) results in + + Printf.printf "\n[6] Results:\n"; + Printf.printf " Successes: %d\n" (List.length successes); + Printf.printf " Errors: %d\n" (List.length errors); + Printf.printf " Total time: %.2fs\n" elapsed; + if List.length successes > 0 then + Printf.printf " Throughput: %.2f lifecycles/sec\n" + (float_of_int (List.length successes) /. elapsed); + + (* Verify all results are identical *) + (match successes with + | Nock_parallel.Success first :: rest -> + Printf.printf " Computing mugs...\n%!"; + let first_mug = Noun.mug first in + let all_same = List.for_all (function + | Nock_parallel.Success n -> Noun.mug n = first_mug + | _ -> false + ) rest in + if all_same then + Printf.printf " ā All results identical (mug: 0x%08lx)\n" first_mug + else + Printf.printf " ā Results differ!\n" + | [] -> Printf.printf " (No successes to verify)\n" + | _ -> ()); + + (* Show errors if any *) + if List.length errors > 0 then begin + Printf.printf "\n[7] Errors:\n"; + List.iteri (fun i err -> + match err with + | Nock_parallel.Error msg -> + Printf.printf " [%d] %s\n" i msg + | _ -> () + ) errors + end; + + Domain_pool.shutdown pool; + + Printf.printf "\nāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n"; + Printf.printf "ā Done! Solid pill parallel execution š ā\n"; + Printf.printf "āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā\n\n" + + | _ -> Printf.printf " ā Unexpected structure at use level\n"; Domain_pool.shutdown pool) + | _ -> Printf.printf " ā Unexpected structure at mod level\n"; Domain_pool.shutdown pool) + | _ -> Printf.printf " ā Unexpected structure at bot level\n"; Domain_pool.shutdown pool) + | _ -> Printf.printf " ā Unexpected structure at events level\n"; Domain_pool.shutdown pool) + | _ -> Printf.printf " ā Unexpected pill structure\n"; Domain_pool.shutdown pool |