summaryrefslogtreecommitdiff
path: root/vere/pkg/noun/jets/i
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/pkg/noun/jets/i
claude is gud
Diffstat (limited to 'vere/pkg/noun/jets/i')
-rw-r--r--vere/pkg/noun/jets/i/lagoon.c3315
1 files changed, 3315 insertions, 0 deletions
diff --git a/vere/pkg/noun/jets/i/lagoon.c b/vere/pkg/noun/jets/i/lagoon.c
new file mode 100644
index 0000000..db7088b
--- /dev/null
+++ b/vere/pkg/noun/jets/i/lagoon.c
@@ -0,0 +1,3315 @@
+/// @file
+
+#include "jets/q.h"
+#include "jets/w.h"
+
+#include "c3/motes.h"
+
+#include "noun.h"
+#include "softfloat.h"
+#include "softblas.h"
+
+#include <math.h> // for pow()
+#include <stdio.h>
+
+#define f16_ceil(a) f16_roundToInt( a, softfloat_round_max, false )
+#define f32_ceil(a) f32_roundToInt( a, softfloat_round_max, false )
+#define f64_ceil(a) f64_roundToInt( a, softfloat_round_max, false )
+#define f128M_ceil(a, b) f128M_roundToInt( a, softfloat_round_max, false, b )
+
+ union half {
+ float16_t h;
+ c3_w c;
+ };
+
+ union sing {
+ float32_t s;
+ c3_w c;
+ };
+
+ union doub {
+ float64_t d;
+ c3_d c;
+ };
+
+ union quad {
+ float128_t q;
+ c3_d c[2];
+ };
+
+ // $?(%n %u %d %z %a)
+ static inline void
+ _set_rounding(c3_w a)
+ {
+ // We could use SoftBLAS set_rounding() to set the SoftFloat
+ // mode as well, but it's more explicit to do it here since
+ // we may use SoftFloat in any given Lagoon jet and we want
+ // you, dear developer, to see it set here.
+ switch ( a )
+ {
+ default:
+ u3m_bail(c3__fail);
+ break;
+ // %n - near
+ case c3__n:
+ softfloat_roundingMode = softfloat_round_near_even;
+ softblas_roundingMode = 'n';
+ break;
+ // %z - zero
+ case c3__z:
+ softfloat_roundingMode = softfloat_round_minMag;
+ softblas_roundingMode = 'z';
+ break;
+ // %u - up
+ case c3__u:
+ softfloat_roundingMode = softfloat_round_max;
+ softblas_roundingMode = 'u';
+ break;
+ // %d - down
+ case c3__d:
+ softfloat_roundingMode = softfloat_round_min;
+ softblas_roundingMode = 'd';
+ break;
+ // %a - away
+ case c3__a:
+ softfloat_roundingMode = softfloat_round_near_maxMag;
+ softblas_roundingMode = 'a';
+ break;
+ }
+ }
+
+/* length of shape = x * y * z * w * ...
+*/
+ static inline c3_d _get_length(u3_noun shape)
+ {
+ c3_d len = 1;
+ while (u3_nul != shape) {
+ len = len * u3x_atom(u3h(shape));
+ shape = u3t(shape);
+ }
+ return len;
+ }
+
+/* get dims from shape as array [x y z w ...]
+*/
+ static inline c3_d* _get_dims(u3_noun shape)
+ {
+ u3_atom len = u3qb_lent(shape);
+ c3_d len_d = u3r_chub(0, len);
+ c3_d* dims = (c3_d*)u3a_malloc(len_d*sizeof(c3_d));
+ for (c3_d i = 0; i < len_d; i++) {
+ dims[i] = u3r_chub(0, u3x_atom(u3h(shape)));
+ shape = u3t(shape);
+ }
+ u3z(len);
+ return dims;
+ }
+
+/* check consistency of array shape and bloq size
+ |= =ray
+ ^- ?
+ .= (roll shape.meta.ray ^mul)
+ (dec (met bloq.meta.ray data.ray))
+*/
+ static inline c3_o _check(u3_noun ray)
+ {
+ // Calculate expected size.
+ u3_atom shp = u3h(u3h(ray)); // (reported) shape of ray, +4
+ u3_atom blq = u3h(u3t(u3h(ray))); // block size of ray, +10
+ u3_atom sin = _get_length(shp); // calculated length of ray
+
+ // Calculate actual size.
+ u3_atom len = u3r_met(blq, u3t(ray)); // length of ray
+ u3_atom dex = u3qa_dec(len); // decrement length b/c of pinned 1
+
+ return __(sin == dex);
+ }
+
+/* add - axpy = 1*x+y
+*/
+ u3_noun
+ u3qi_la_add_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq
+ )
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1);
+ break;
+
+ case 5:
+ saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1);
+ break;
+
+ case 6:
+ daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1);
+ break;
+
+ case 7:
+ qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1);
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* sub - axpy = -1*y+x
+*/
+ u3_noun
+ u3qi_la_sub_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq
+ )
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1);
+ break;
+
+ case 5:
+ saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1);
+ break;
+
+ case 6:
+ daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1);
+ break;
+
+ case 7:
+ qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1);
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+
+/* mul - x.*y
+ elementwise multiplication
+*/
+ u3_noun
+ u3qi_la_mul_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float16_t*)y_bytes)[i] = f16_mul(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]);
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float32_t*)y_bytes)[i] = f32_mul(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]);
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float64_t*)y_bytes)[i] = f64_mul(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]);
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ f128M_mul(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i]));
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* div - x/y
+ elementwise division
+*/
+ u3_noun
+ u3qi_la_div_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float16_t*)y_bytes)[i] = f16_div(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]);
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float32_t*)y_bytes)[i] = f32_div(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]);
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float64_t*)y_bytes)[i] = f64_div(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]);
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i]));
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* mod - x % y = x - r*floor(x/r)
+ remainder after division
+*/
+ u3_noun
+ u3qi_la_mod_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ float16_t y_val16 = ((float16_t*)y_bytes)[i];
+ // Perform division x/n
+ float16_t div_result16 = f16_div(x_val16, y_val16);
+ // Compute floor of the division result
+ c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false);
+ float16_t floor_float16 = i64_to_f16(floor_result16);
+ // Multiply n by floor(x/n)
+ float16_t mult_result16 = f16_mul(y_val16, floor_float16);
+ // Compute remainder: x - n * floor(x/n)
+ ((float16_t*)y_bytes)[i] = f16_sub(x_val16, mult_result16);
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ float32_t y_val32 = ((float32_t*)y_bytes)[i];
+ // Perform division x/n
+ float32_t div_result32 = f32_div(x_val32, y_val32);
+ // Compute floor of the division result
+ c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false);
+ float32_t floor_float32 = i64_to_f32(floor_result32);
+ // Multiply n by floor(x/n)
+ float32_t mult_result32 = f32_mul(y_val32, floor_float32);
+ // Compute remainder: x - n * floor(x/n)
+ ((float32_t*)y_bytes)[i] = f32_sub(x_val32, mult_result32);
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ float64_t y_val64 = ((float64_t*)y_bytes)[i];
+ // Perform division x/n
+ float64_t div_result64 = f64_div(x_val64, y_val64);
+ // Compute floor of the division result
+ c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false);
+ float64_t floor_float64 = i64_to_f64(floor_result64);
+ // Multiply n by floor(x/n)
+ float64_t mult_result64 = f64_mul(y_val64, floor_float64);
+ // Compute remainder: x - n * floor(x/n)
+ ((float64_t*)y_bytes)[i] = f64_sub(x_val64, mult_result64);
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ float128_t y_val128 = ((float128_t*)y_bytes)[i];
+ // Perform division x/n
+ float128_t div_result128;
+ f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128);
+ // Compute floor of the division result
+ c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false);
+ float128_t floor_float128;
+ i64_to_f128M(floor_result128, &floor_float128);
+ // Multiply n by floor(x/n)
+ float128_t mult_result128;
+ f128M_mul(((float128_t*)&y_val128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128));
+ // Compute remainder: x - n * floor(x/n)
+ f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)y_bytes)[i]));
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* cumsum - x[0] + x[1] + ... x[n]
+*/
+ u3_noun
+ u3qi_la_cumsum_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by for range)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+
+ u3_noun r_data;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t sum16[2];
+ sum16[0] = (float16_t){SB_REAL16_ZERO};
+ for (c3_d i = len_x; i > 0; i--) {
+ sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i-1]);
+ }
+ sum16[1].v = 0x1;
+ r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)sum16);
+ break;}
+
+ case 5: {
+ float32_t sum32[2];
+ sum32[0] = (float32_t){SB_REAL32_ZERO};
+ for (c3_d i = len_x; i > 0; i--) {
+ sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i-1]);
+ }
+ sum32[1].v = 0x1;
+ r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)sum32);
+ break;}
+
+ case 6: {
+ float64_t sum64[2];
+ sum64[0] = (float64_t){SB_REAL64_ZERO};
+ for (c3_d i = len_x; i > 0; i--) {
+ sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i-1]);
+ }
+ sum64[1].v = 0x1;
+ r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)sum64);
+ break;}
+
+ case 7: {
+ float128_t sum128[2];
+ sum128[0] = (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO};
+ for (c3_d i = len_x; i > 0; i--) {
+ f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i-1]), &(sum128[0]));
+ }
+ sum128[1] = (float128_t){0x1, 0x0};
+ r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128);
+ break;}
+ }
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* argmin - argmin(x)
+*/
+ u3_noun
+ u3qi_la_argmin_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1, which doesn't matter here)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ c3_w min_idx = 0;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t min_val16 = ((float16_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f16_lt(((float16_t*)x_bytes)[i], min_val16)) {
+ min_val16 = ((float16_t*)x_bytes)[i];
+ min_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+
+ case 5: {
+ float32_t min_val32 = ((float32_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f32_lt(((float32_t*)x_bytes)[i], min_val32)) {
+ min_val32 = ((float32_t*)x_bytes)[i];
+ min_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+
+ case 6: {
+ float64_t min_val64 = ((float64_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f64_lt(((float64_t*)x_bytes)[i], min_val64)) {
+ min_val64 = ((float64_t*)x_bytes)[i];
+ min_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+
+ case 7: {
+ float128_t min_val128 = ((float128_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f128M_lt(&(((float128_t*)x_bytes)[i]), &min_val128)) {
+ min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]);
+ min_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+ }
+
+ u3_noun r_data = u3i_chub(min_idx);
+
+ return r_data;
+ }
+
+/* argmax - argmax(x)
+*/
+ u3_noun
+ u3qi_la_argmax_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1, which doesn't matter here)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ c3_w max_idx = 0;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t max_val16 = ((float16_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f16_gt(((float16_t*)x_bytes)[i], max_val16)) {
+ max_val16 = ((float16_t*)x_bytes)[i];
+ max_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+
+ case 5: {
+ float32_t max_val32 = ((float32_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f32_gt(((float32_t*)x_bytes)[i], max_val32)) {
+ max_val32 = ((float32_t*)x_bytes)[i];
+ max_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+
+ case 6: {
+ float64_t max_val64 = ((float64_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f64_gt(((float64_t*)x_bytes)[i], max_val64)) {
+ max_val64 = ((float64_t*)x_bytes)[i];
+ max_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+
+ case 7: {
+ float128_t max_val128 = ((float128_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ if(f128M_gt(&(((float128_t*)x_bytes)[i]), &max_val128)) {
+ max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]);
+ max_idx = (len_x - i - 1);
+ }
+ }
+ break;}
+ }
+
+ u3_noun r_data = u3i_chub(max_idx);
+
+ return r_data;
+ }
+
+/* ravel - x -> ~[x[0], x[1], ... x[n]]
+ entire nd-array busted out as a linear list
+*/
+ u3_noun
+ u3qi_la_ravel_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3_nul;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ r_data = u3nc(u3i_word(x_val16.v), r_data);
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ r_data = u3nc(u3i_word(x_val32.v), r_data);
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ r_data = u3nc(u3i_chub(x_val64.v), r_data);
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ r_data = u3nc(u3i_chubs(2, (c3_d*)&(x_val128.v)), r_data);
+ }
+ break;
+ }
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* min - min(x,y)
+*/
+ u3_noun
+ u3qi_la_min_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by for range)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+
+ u3_noun r_data;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t min_val16 = ((float16_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ min_val16 = f16_min(min_val16, ((float16_t*)x_bytes)[i]);
+ }
+ float16_t r16[2];
+ r16[0] = min_val16;
+ r16[1].v = 0x1;
+ r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16);
+ break;}
+
+ case 5: {
+ float32_t min_val32 = ((float32_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ min_val32 = f32_min(min_val32, ((float32_t*)x_bytes)[i]);
+ }
+ float32_t r32[2];
+ r32[0] = min_val32;
+ r32[1].v = 0x1;
+ r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32);
+ break;}
+
+ case 6: {
+ float64_t min_val64 = ((float64_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ min_val64 = f64_min(min_val64, ((float64_t*)x_bytes)[i]);
+ }
+ float64_t r64[2];
+ r64[0] = min_val64;
+ r64[1].v = 0x1;
+ r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64);
+ break;}
+
+ case 7: {
+ float128_t min_val128 = ((float128_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]);
+ }
+ float128_t r128[2];
+ r128[0] = min_val128;
+ r128[1] = (float128_t){0x1, 0x0};
+ r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128);
+ break;}
+ }
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* max - max(x,y)
+*/
+ u3_noun
+ u3qi_la_max_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by for range)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+
+ u3_noun r_data;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t max_val16 = ((float16_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ max_val16 = f16_max(max_val16, ((float16_t*)x_bytes)[i]);
+ }
+ float16_t r16[2];
+ r16[0] = max_val16;
+ r16[1].v = 0x1;
+ r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16);
+ break;}
+
+ case 5: {
+ float32_t max_val32 = ((float32_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ max_val32 = f32_max(max_val32, ((float32_t*)x_bytes)[i]);
+ }
+ float32_t r32[2];
+ r32[0] = max_val32;
+ r32[1].v = 0x1;
+ r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32);
+ break;}
+
+ case 6: {
+ float64_t max_val64 = ((float64_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ max_val64 = f64_max(max_val64, ((float64_t*)x_bytes)[i]);
+ }
+ float64_t r64[2];
+ r64[0] = max_val64;
+ r64[1].v = 0x1;
+ r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64);
+ break;}
+
+ case 7: {
+ float128_t max_val128 = ((float128_t*)x_bytes)[0];
+ for (c3_d i = 0; i < len_x; i++) {
+ max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]);
+ }
+ float128_t r128[2];
+ r128[0] = max_val128;
+ r128[1] = (float128_t){0x1, 0x0};
+ r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128);
+ break;}
+ }
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* abs - |x|
+*/
+ u3_noun
+ u3qi_la_abs_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by for range)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float16_t*)x_bytes)[i] = f16_abs(((float16_t*)x_bytes)[i]);
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float32_t*)x_bytes)[i] = f32_abs(((float32_t*)x_bytes)[i]);
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float64_t*)x_bytes)[i] = f64_abs(((float64_t*)x_bytes)[i]);
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float128_t*)x_bytes)[i] = f128_abs(((float128_t*)x_bytes)[i]);
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* gth - x > y
+*/
+ u3_noun
+ u3qi_la_gth_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ float16_t y_val16 = ((float16_t*)y_bytes)[i];
+ ((float16_t*)y_bytes)[i] = f16_gt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO};
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ float32_t y_val32 = ((float32_t*)y_bytes)[i];
+ ((float32_t*)y_bytes)[i] = f32_gt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO};
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ float64_t y_val64 = ((float64_t*)y_bytes)[i];
+ ((float64_t*)y_bytes)[i] = f64_gt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO};
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ float128_t y_val128 = ((float128_t*)y_bytes)[i];
+ ((float128_t*)y_bytes)[i] = f128M_gt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO};
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* gte - x > y
+*/
+ u3_noun
+ u3qi_la_gte_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ float16_t y_val16 = ((float16_t*)y_bytes)[i];
+ ((float16_t*)y_bytes)[i] = f16_ge(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO};
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ float32_t y_val32 = ((float32_t*)y_bytes)[i];
+ ((float32_t*)y_bytes)[i] = f32_ge(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO};
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ float64_t y_val64 = ((float64_t*)y_bytes)[i];
+ ((float64_t*)y_bytes)[i] = f64_ge(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO};
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ float128_t y_val128 = ((float128_t*)y_bytes)[i];
+ ((float128_t*)y_bytes)[i] = f128M_ge(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO};
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* lth - x > y
+*/
+ u3_noun
+ u3qi_la_lth_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ float16_t y_val16 = ((float16_t*)y_bytes)[i];
+ ((float16_t*)y_bytes)[i] = f16_lt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO};
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ float32_t y_val32 = ((float32_t*)y_bytes)[i];
+ ((float32_t*)y_bytes)[i] = f32_lt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO};
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ float64_t y_val64 = ((float64_t*)y_bytes)[i];
+ ((float64_t*)y_bytes)[i] = f64_lt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO};
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ float128_t y_val128 = ((float128_t*)y_bytes)[i];
+ ((float128_t*)y_bytes)[i] = f128M_lt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO};
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* lte - x > y
+*/
+ u3_noun
+ u3qi_la_lte_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ float16_t y_val16 = ((float16_t*)y_bytes)[i];
+ ((float16_t*)y_bytes)[i] = f16_le(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO};
+ }
+ break;
+
+ case 5:
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ float32_t y_val32 = ((float32_t*)y_bytes)[i];
+ ((float32_t*)y_bytes)[i] = f32_le(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO};
+ }
+ break;
+
+ case 6:
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ float64_t y_val64 = ((float64_t*)y_bytes)[i];
+ ((float64_t*)y_bytes)[i] = f64_le(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO};
+ }
+ break;
+
+ case 7:
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ float128_t y_val128 = ((float128_t*)y_bytes)[i];
+ ((float128_t*)y_bytes)[i] = f128M_le(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO};
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* adds - axpy = 1*x+[n]
+*/
+ u3_noun
+ u3qi_la_adds_i754(u3_noun x_data,
+ u3_noun n,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+
+ float16_t n16;
+ float32_t n32;
+ float64_t n64;
+ float128_t n128;
+
+ // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ u3r_bytes(0, 2, (c3_y*)&(n16.v), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float16_t*)y_bytes)[i] = n16;
+ }
+ haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1);
+ break;
+
+ case 5:
+ u3r_bytes(0, 4, (c3_y*)&(n32.v), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float32_t*)y_bytes)[i] = n32;
+ }
+ saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1);
+ break;
+
+ case 6:
+ u3r_bytes(0, 8, (c3_y*)&(n64.v), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float64_t*)y_bytes)[i] = n64;
+ }
+ daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1);
+ break;
+
+ case 7:
+ u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]};
+ }
+ qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1);
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ y_bytes[syz_x] = 0x1; // pin head
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* subs - axpy = -1*[n]+x
+*/
+ u3_noun
+ u3qi_la_subs_i754(u3_noun x_data,
+ u3_noun n,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/o leading 0x1)
+ c3_y* y_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+
+ float16_t n16;
+ float32_t n32;
+ float64_t n64;
+ float128_t n128;
+
+ // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ u3r_bytes(0, 2, (c3_y*)&(n16.v), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float16_t*)y_bytes)[i] = n16;
+ }
+ haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)y_bytes, 1, (float16_t*)x_bytes, 1);
+ break;
+
+ case 5:
+ u3r_bytes(0, 4, (c3_y*)&(n32.v), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float32_t*)y_bytes)[i] = n32;
+ }
+ saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)y_bytes, 1, (float32_t*)x_bytes, 1);
+ break;
+
+ case 6:
+ u3r_bytes(0, 8, (c3_y*)&(n64.v), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float64_t*)y_bytes)[i] = n64;
+ }
+ daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)y_bytes, 1, (float64_t*)x_bytes, 1);
+ break;
+
+ case 7:
+ u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n);
+ // set y to [n]
+ for (c3_d i = 0; i < len_x; i++) {
+ ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]};
+ }
+ qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)y_bytes, 1, (float128_t*)x_bytes, 1);
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ x_bytes[syz_x] = 0x1; // pin head
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* muls - ?scal n * x
+ elementwise multiplication
+*/
+ u3_noun
+ u3qi_la_muls_i754(u3_noun x_data,
+ u3_noun n,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+ x_bytes[syz_x] = 0x1; // pin head
+
+ float16_t n16;
+ float32_t n32;
+ float64_t n64;
+ float128_t n128;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ u3r_bytes(0, 2, (c3_y*)&(n16.v), n);
+ hscal(len_x, n16, (float16_t*)x_bytes, 1);
+ break;
+
+ case 5:
+ u3r_bytes(0, 4, (c3_y*)&(n32.v), n);
+ sscal(len_x, n32, (float32_t*)x_bytes, 1);
+ break;
+
+ case 6:
+ u3r_bytes(0, 8, (c3_y*)&(n64.v), n);
+ dscal(len_x, n64, (float64_t*)x_bytes, 1);
+ break;
+
+ case 7:
+ u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n);
+ qscal(len_x, n128, (float128_t*)x_bytes, 1);
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* divs - ?scal 1/n * x
+ elementwise division
+*/
+ u3_noun
+ u3qi_la_divs_i754(u3_noun x_data,
+ u3_noun n,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+ x_bytes[syz_x] = 0x1; // pin head
+
+ float16_t in16;
+ float32_t in32;
+ float64_t in64;
+ float128_t in128;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ // XX note that in16 is doing double duty here
+ u3r_bytes(0, 2, (c3_y*)&(in16.v), n);
+ in16 = f16_div((float16_t){SB_REAL16_ONE}, in16);
+ hscal(len_x, in16, (float16_t*)x_bytes, 1);
+ break;
+
+ case 5:
+ // XX note that in32 is doing double duty here
+ u3r_bytes(0, 4, (c3_y*)&(in32.v), n);
+ in32 = f32_div((float32_t){SB_REAL32_ONE}, in32);
+ sscal(len_x, in32, (float32_t*)x_bytes, 1);
+ break;
+
+ case 6:
+ // XX note that in64 is doing double duty here
+ u3r_bytes(0, 8, (c3_y*)&(in64.v), n);
+ in64 = f64_div((float64_t){SB_REAL64_ONE}, in64);
+ dscal(len_x, in64, (float64_t*)x_bytes, 1);
+ break;
+
+ case 7:
+ // XX note that in128 is doing double duty here
+ u3r_bytes(0, 16, (c3_y*)&(in128.v[0]), n);
+ f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &in128, &in128);
+ qscal(len_x, in128, (float128_t*)x_bytes, 1);
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* mods - x % [n] = x - r*floor(x/r)
+ remainder after scalar division
+*/
+ u3_noun
+ u3qi_la_mods_i754(u3_noun x_data,
+ u3_noun n,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ // we reuse it for results for parsimony
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+
+ float16_t n16, in16;
+ float32_t n32, in32;
+ float64_t n64, in64;
+ float128_t n128, in128;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ u3r_bytes(0, 2, (c3_y*)&(n16.v), n);
+ in16 = f16_div((float16_t){SB_REAL16_ONE}, n16);
+
+ for (c3_d i = 0; i < len_x; i++) {
+ float16_t x_val16 = ((float16_t*)x_bytes)[i];
+ // Perform division x/n
+ float16_t div_result16 = f16_mul(in16, x_val16);
+ // Compute floor of the division result
+ c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false);
+ float16_t floor_float16 = i64_to_f16(floor_result16);
+ // Multiply n by floor(x/n)
+ float16_t mult_result16 = f16_mul(n16, floor_float16);
+ // Compute remainder: x - n * floor(x/n)
+ ((float16_t*)x_bytes)[i] = f16_sub(x_val16, mult_result16);
+ }
+ break;
+
+ case 5:
+ u3r_bytes(0, 4, (c3_y*)&(n32.v), n);
+ in32 = f32_div((float32_t){SB_REAL32_ONE}, n32);
+
+ for (c3_d i = 0; i < len_x; i++) {
+ float32_t x_val32 = ((float32_t*)x_bytes)[i];
+ // Perform division x/n
+ float32_t div_result32 = f32_mul(in32, x_val32);
+ // Compute floor of the division result
+ c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false);
+ float32_t floor_float32 = i64_to_f32(floor_result32);
+ // Multiply n by floor(x/n)
+ float32_t mult_result32 = f32_mul(n32, floor_float32);
+ // Compute remainder: x - n * floor(x/n)
+ ((float32_t*)x_bytes)[i] = f32_sub(x_val32, mult_result32);
+ }
+ break;
+
+ case 6:
+ u3r_bytes(0, 8, (c3_y*)&(n64.v), n);
+ in64 = f64_div((float64_t){SB_REAL64_ONE}, n64);
+
+ for (c3_d i = 0; i < len_x; i++) {
+ float64_t x_val64 = ((float64_t*)x_bytes)[i];
+ // Perform division x/n
+ float64_t div_result64 = f64_mul(in64, x_val64);
+ // Compute floor of the division result
+ c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false);
+ float64_t floor_float64 = i64_to_f64(floor_result64);
+ // Multiply n by floor(x/n)
+ float64_t mult_result64 = f64_mul(n64, floor_float64);
+ // Compute remainder: x - n * floor(x/n)
+ ((float64_t*)x_bytes)[i] = f64_sub(x_val64, mult_result64);
+ }
+ break;
+
+ case 7:
+ u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n);
+ f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ZERO}), &n128, &in128);
+
+ for (c3_d i = 0; i < len_x; i++) {
+ float128_t x_val128 = ((float128_t*)x_bytes)[i];
+ // Perform division x/n
+ float128_t div_result128;
+ f128M_mul((float128_t*)&in128, (float128_t*)&x_val128, (float128_t*)&div_result128);
+ // Compute floor of the division result
+ c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false);
+ float128_t floor_float128;
+ i64_to_f128M(floor_result128, &floor_float128);
+ // Multiply n by floor(x/n)
+ float128_t mult_result128;
+ f128M_mul(((float128_t*)&n128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128));
+ // Compute remainder: x - n * floor(x/n)
+ f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)x_bytes)[i]));
+ }
+ break;
+ }
+
+ // r_data is the result noun of [data]
+ u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes);
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+
+ return r_data;
+ }
+
+/* dot - ?dot = x · y
+*/
+ u3_noun
+ u3qi_la_dot_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(shape);
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3);
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy)
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, y_bytes, y_data);
+
+ u3_noun r_data;
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t r16[2];
+ r16[0] = hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1);
+ r16[1].v = 0x1;
+ r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16);
+ break;}
+
+ case 5: {
+ float32_t r32[2];
+ r32[0] = sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1);
+ r32[1].v = 0x1;
+ r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32);
+ break;}
+
+ case 6: {
+ float64_t r64[2];
+ r64[0] = ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1);
+ r64[1].v = 0x1;
+ r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64);
+ break;}
+
+ case 7: {
+ float128_t r128[2];
+ r128[0] = qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1);
+ r128[1] = (float128_t){0x1, 0x0};
+ r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128);
+ break;}
+ }
+
+ // Clean up and return.
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+
+ return r_data;
+ }
+
+/* diag - diag(x)
+*/
+ u3_noun
+ u3qi_la_diag(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+ // Assert length of dims is 2.
+ if (u3qb_lent(shape) != 2) {
+ return u3m_bail(c3__exit);
+ }
+ // Unpack shape into an array of dimensions.
+ c3_d *dims = _get_dims(shape);
+ if (dims[0] != dims[1]) {
+ return u3m_bail(c3__exit);
+ }
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ c3_d len_x = _get_length(shape);
+ c3_d syz_x = len_x * pow(2, bloq - 3);
+ c3_d wyd = pow(2, bloq - 3);
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+ c3_d syz_y = wyd * dims[1];
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_y+1)*sizeof(c3_y));
+
+ u3_noun r_data;
+
+ // Grab the index at i*n_x+j in bytes; put it at j.
+ for (c3_d i = 0; i < dims[1]; i++) {
+ // Scan across whole field width.
+ for (c3_y k = 0; k < wyd; k++) {
+ y_bytes[i*wyd+k] = x_bytes[(i*dims[0]+i)*wyd+k];
+ }
+ }
+ y_bytes[syz_y] = 0x1; // pin head
+
+ // Unpack the result back into a noun.
+ r_data = u3i_bytes((syz_y+1)*sizeof(c3_y), y_bytes);
+
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+ u3a_free(dims);
+
+ return r_data;
+ }
+
+/* transpose - x'
+*/
+ u3_noun
+ u3qi_la_transpose(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ // Assert length of dims is 2.
+ if (u3qb_lent(shape) != 2) {
+ return u3m_bail(c3__exit);
+ }
+ // Unpack shape into an array of dimensions.
+ c3_d *dims = _get_dims(shape);
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ c3_d len_x = _get_length(shape);
+ c3_d syz_x = len_x * pow(2, bloq - 3);
+ c3_d wyd = pow(2, bloq - 3);
+ c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+ u3r_bytes(0, syz_x+1, x_bytes, x_data);
+ c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y));
+
+ u3_noun r_data;
+
+ // Grab the index at i*n_x+j in bytes; put it at j.
+ for (c3_d i = 0; i < dims[1]; i++) {
+ for (c3_d j = 0; j < dims[0]; j++) {
+ // Scan across whole field width.
+ for (c3_y k = 0; k < wyd; k++) {
+ y_bytes[(j*dims[1]+i)*wyd+k] = x_bytes[(i*dims[0]+j)*wyd+k];
+ }
+ }
+ }
+ y_bytes[syz_x] = 0x1; // pin head
+
+ // Unpack the result back into a noun.
+ r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes);
+
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+ u3a_free(dims);
+
+ return r_data;
+ }
+
+/* linspace - [a a+(b-a)/n ... b]
+*/
+ u3_noun
+ u3qi_la_linspace_i754(u3_noun a,
+ u3_noun b,
+ u3_noun n,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ u3_noun r_data;
+
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t a16, b16;
+ u3r_bytes(0, 2, (c3_y*)&(a16.v), a);
+ u3r_bytes(0, 2, (c3_y*)&(b16.v), b);
+ float16_t span16 = f16_sub(b16, a16);
+ float16_t interval16 = f16_div(span16, i32_to_f16(n-1));
+ c3_y* x_bytes16 = (c3_y*)u3a_malloc((n*2+1)*sizeof(c3_y));
+ for (c3_d i = 1; i < n-1; i++) {
+ ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16));
+ }
+ // Assign in reverse order so that n=1 case is correctly left-hand bound.
+ ((float16_t*)x_bytes16)[n-1] = b16;
+ ((float16_t*)x_bytes16)[0] = a16;
+ x_bytes16[n*2] = 0x1; // pin head
+ r_data = u3i_bytes((n*2+1)*sizeof(c3_y), x_bytes16);
+ u3a_free(x_bytes16);
+ break;}
+
+ case 5: {
+ float32_t a32, b32;
+ u3r_bytes(0, 4, (c3_y*)&(a32.v), a);
+ u3r_bytes(0, 4, (c3_y*)&(b32.v), b);
+ float32_t span32 = f32_sub(b32, a32);
+ float32_t interval32 = f32_div(span32, i32_to_f32(n-1));
+ c3_y* x_bytes32 = (c3_y*)u3a_malloc((n*4+1)*sizeof(c3_y));
+ for (c3_d i = 1; i < n-1; i++) {
+ ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32));
+ }
+ ((float32_t*)x_bytes32)[n-1] = b32;
+ ((float32_t*)x_bytes32)[0] = a32;
+ x_bytes32[n*4] = 0x1; // pin head
+ r_data = u3i_bytes((n*4+1)*sizeof(c3_y), x_bytes32);
+ u3a_free(x_bytes32);
+ break;}
+
+ case 6: {
+ float64_t a64, b64;
+ u3r_bytes(0, 8, (c3_y*)&(a64.v), a);
+ u3r_bytes(0, 8, (c3_y*)&(b64.v), b);
+ float64_t span64 = f64_sub(b64, a64);
+ float64_t interval64 = f64_div(span64, i32_to_f64(n-1));
+ c3_y* x_bytes64 = (c3_y*)u3a_malloc((n*8+1)*sizeof(c3_y));
+ for (c3_d i = 1; i < n-1; i++) {
+ ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64));
+ }
+ ((float64_t*)x_bytes64)[n-1] = b64;
+ ((float64_t*)x_bytes64)[0] = a64;
+ x_bytes64[n*8] = 0x1; // pin head
+ r_data = u3i_bytes((n*8+1)*sizeof(c3_y), x_bytes64);
+ u3a_free(x_bytes64);
+ break;}
+
+ case 7: {
+ float128_t a128, b128;
+ u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a);
+ u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b);
+ float128_t span128;
+ f128M_sub(&b128, &a128, &span128);
+ float128_t interval128;
+ float128_t n128;
+ i32_to_f128M(n-1, &n128);
+ f128M_div(&span128, &n128, &interval128);
+ c3_y* x_bytes128 = (c3_y*)u3a_malloc((n*16+1)*sizeof(c3_y));
+ float128_t i128;
+ for (c3_d i = 1; i < n-1; i++) {
+ i32_to_f128M(i, &i128);
+ f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]);
+ f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]);
+ }
+ ((float128_t*)x_bytes128)[n-1] = b128;
+ ((float128_t*)x_bytes128)[0] = a128;
+ x_bytes128[n*16] = 0x1; // pin head
+ r_data = u3i_bytes((n*16+1)*sizeof(c3_y), x_bytes128);
+ u3a_free(x_bytes128);
+ break;}
+ }
+
+ return r_data;
+ }
+
+/* range - [a a+d ... b]
+*/
+ u3_noun
+ u3qi_la_range_i754(u3_noun a,
+ u3_noun b,
+ u3_noun d,
+ u3_noun bloq)
+ {
+ // Fence on valid bloq size.
+ if (bloq < 4 || bloq > 7) {
+ return u3_none;
+ }
+
+ u3_noun r_data;
+
+ switch (u3x_atom(bloq)) {
+ case 4: {
+ float16_t a16, b16, interval16;
+ u3r_bytes(0, 2, (c3_y*)&(a16.v), a);
+ u3r_bytes(0, 2, (c3_y*)&(b16.v), b);
+ u3r_bytes(0, 2, (c3_y*)&(interval16.v), d);
+ c3_d n16 = f16_to_i64(f16_ceil(f16_div(f16_sub(b16, a16), interval16)), softfloat_round_minMag, false);
+ c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n16+1)*2)*sizeof(c3_y));
+ ((float16_t*)x_bytes16)[0] = a16;
+ for (c3_d i = 1; i < n16; i++) {
+ ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16));
+ }
+ ((float16_t*)x_bytes16)[n16].v = 0x1; // pin head
+ r_data = u3i_bytes(((n16+1)*2)*sizeof(c3_y), x_bytes16);
+ u3a_free(x_bytes16);
+ break;}
+
+ case 5: {
+ float32_t a32, b32, interval32;
+ u3r_bytes(0, 4, (c3_y*)&(a32.v), a);
+ u3r_bytes(0, 4, (c3_y*)&(b32.v), b);
+ u3r_bytes(0, 4, (c3_y*)&(interval32.v), d);
+ c3_d n32 = f32_to_i64(f32_ceil(f32_div(f32_sub(b32, a32), interval32)), softfloat_round_minMag, false);
+ c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n32+1)*4)*sizeof(c3_y));
+ ((float32_t*)x_bytes32)[0] = a32;
+ for (c3_d i = 1; i < n32; i++) {
+ ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32));
+ }
+ ((float32_t*)x_bytes32)[n32].v = 0x1; // pin head
+ r_data = u3i_bytes(((n32+1)*4)*sizeof(c3_y), x_bytes32);
+ u3a_free(x_bytes32);
+ break;}
+
+ case 6: {
+ float64_t a64, b64, interval64;
+ u3r_bytes(0, 8, (c3_y*)&(a64.v), a);
+ u3r_bytes(0, 8, (c3_y*)&(b64.v), b);
+ u3r_bytes(0, 8, (c3_y*)&(interval64.v), d);
+ c3_d n64 = f64_to_i64(f64_ceil(f64_div(f64_sub(b64, a64), interval64)), softfloat_round_minMag, false);
+ c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n64+1)*8)*sizeof(c3_y));
+ ((float64_t*)x_bytes64)[0] = a64;
+ for (c3_d i = 1; i < n64; i++) {
+ ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64));
+ }
+ ((float64_t*)x_bytes64)[n64].v = 0x1; // pin head
+ r_data = u3i_bytes(((n64+1)*8)*sizeof(c3_y), x_bytes64);
+ u3a_free(x_bytes64);
+ break;}
+
+ case 7: {
+ float128_t a128, b128, interval128;
+ u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a);
+ u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b);
+ u3r_bytes(0, 16, (c3_y*)&(interval128.v[0]), d);
+ float128_t tmp;
+ f128M_sub(&b128, &a128, &tmp);
+ f128M_div(&tmp, &interval128, &tmp);
+ f128M_ceil(&tmp, &tmp);
+ c3_d n128 = f128M_to_i64(&tmp, softfloat_round_minMag, false);
+ c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16)*sizeof(c3_y));
+ float128_t i128;
+ ((float128_t*)x_bytes128)[0] = a128;
+ for (c3_d i = 1; i < n128; i++) {
+ i32_to_f128M(i, &i128);
+ f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]);
+ f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]);
+ }
+ ((float128_t*)x_bytes128)[n128].v[0] = 0x1; // pin head
+ ((float128_t*)x_bytes128)[n128].v[1] = 0x0; // pin head
+ r_data = u3i_bytes(((n128+1)*16)*sizeof(c3_y), x_bytes128);
+ u3a_free(x_bytes128);
+ break;}
+ }
+
+ return r_data;
+ }
+
+/* trace - tr(x)
+*/
+ u3_noun
+ u3qi_la_trace_i754(u3_noun x_data,
+ u3_noun shape,
+ u3_noun bloq)
+ {
+ u3_noun d_data = u3qi_la_diag(x_data, shape, bloq);
+ c3_d len_x0 = _get_dims(shape)[0];
+ u3_noun r_data = u3qi_la_dot_i754(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq));
+ return r_data;
+ }
+
+/* mmul
+*/
+ u3_noun
+ u3qi_la_mmul_i754(u3_noun x_data,
+ u3_noun y_data,
+ u3_noun x_shape,
+ u3_noun y_shape,
+ u3_noun bloq)
+ {
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ c3_d M = u3x_atom(u3h(x_shape));
+ c3_d Na= u3x_atom(u3h(u3t(x_shape)));
+ c3_d Nb= u3x_atom(u3h(y_shape));
+ c3_d P = u3x_atom(u3h(u3t(y_shape)));
+
+ if ((u3_nul != u3t(u3t(x_shape))) ||
+ (u3_nul != u3t(u3t(y_shape))) ||
+ (Na != Nb)) {
+ return u3m_bail(c3__exit);
+ }
+ c3_d N = Na;
+
+ // Unpack the data as a byte array. We assume total length < 2**64.
+ // len_x is length in base units
+ c3_d len_x = _get_length(x_shape); // M*N
+
+ // syz_x is length in bytes
+ c3_d syz_x = len_x * pow(2, bloq-3); // M*N
+
+ // x_bytes is the data array (w/o leading 0x1)
+ c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y));
+ u3r_bytes(0, syz_x, x_bytes, x_data);
+
+ // len_x is length in base units
+ c3_d len_y = _get_length(y_shape); // N*P
+
+ // syz_x is length in bytes
+ c3_d syz_y = len_y * pow(2, bloq-3); // N*P
+
+ // y_bytes is the data array (w/o leading 0x1)
+ c3_y* y_bytes = (c3_y*)u3a_malloc(syz_y*sizeof(c3_y));
+ u3r_bytes(0, syz_y, y_bytes, y_data);
+
+ // len_r is length in base units
+ c3_d len_r = M*P; // M*P
+
+ // syz_r is length in bytes
+ c3_d syz_r = len_r * pow(2, bloq-3); // M*P
+
+ // r_bytes is the result array
+ c3_y* r_bytes = (c3_y*)u3a_malloc((syz_r+1)*sizeof(c3_y));
+ r_bytes[syz_r] = 0x1; // pin head
+ // initialize with 0x0s
+ for (c3_d i = 0; i < syz_r; i++) {
+ r_bytes[i] = 0x0;
+ }
+
+ // Switch on the block size.
+ switch (u3x_atom(bloq)) {
+ case 4:
+ hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, P, (float16_t){SB_REAL16_ZERO}, (float16_t*)r_bytes, P);
+ break;
+
+ case 5:
+ sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, P, (float32_t){SB_REAL32_ZERO}, (float32_t*)r_bytes, P);
+ break;
+
+ case 6:
+ dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, P, (float64_t){SB_REAL64_ZERO}, (float64_t*)r_bytes, P);
+ break;
+
+ case 7:
+ qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, P, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)r_bytes, P);
+ break;
+ }
+
+ // Unpack the result back into a noun.
+ u3_noun r_data = u3i_bytes(syz_r+1, r_bytes);
+ u3_noun M_ = u3i_chub(M);
+ u3_noun P_ = u3i_chub(P);
+
+ u3a_free(x_bytes);
+ u3a_free(y_bytes);
+ u3a_free(r_bytes);
+
+ return u3nc(u3nq(u3nt(M_, P_, u3_nul), u3k(bloq), c3__i754, u3_nul), r_data);
+ }
+
+ u3_noun
+ u3wi_la_add(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == u3ud(rnd)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_sub(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == u3ud(rnd)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_mul(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == u3ud(rnd)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_div(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == u3ud(rnd)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_mod(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == u3ud(rnd)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_cumsum(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(u3nc(x_meta, x_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_cumsum_i754(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3nc(0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_argmin(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(u3nc(x_meta, x_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_argmin_i754(x_data, x_shape, x_bloq);
+ // bare atom (@ index)
+ return r_data;}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_ravel(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(u3nc(x_meta, x_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_ravel_i754(x_data, x_shape, x_bloq);
+ // (list @)
+ return r_data;}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_argmax(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(u3nc(x_meta, x_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_argmax_i754(x_data, x_shape, x_bloq);
+ // bare atom (@ index)
+ return r_data;}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_min(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(u3nc(x_meta, x_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_min_i754(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_max(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(u3nc(x_meta, x_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_max_i754(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_abs(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_abs_i754(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_gth(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3k(x_meta), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_gte(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3k(x_meta), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_lth(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3k(x_meta), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_lte(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3k(x_meta), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_adds(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data, n;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_3, &n,
+ 0) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(n) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_adds_i754(x_data, n, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_subs(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data, n;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_3, &n,
+ 0) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(n) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_subs_i754(x_data, n, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_muls(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data, n;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_3, &n,
+ 0) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(n) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_muls_i754(x_data, n, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_divs(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data, n;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_3, &n,
+ 0) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(n) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_divs_i754(x_data, n, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_mods(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data, n;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_3, &n,
+ 0) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(n) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_mods_i754(x_data, n, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_dot(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3r_sing(x_meta, y_meta) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ c3_d len_x0 = _get_dims(x_shape)[0];
+ return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_transpose(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(cor)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_linspace(u3_noun cor)
+ {
+ u3_noun x_meta, a, b, n, rnd;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_12, &a,
+ u3x_sam_13, &b,
+ u3x_sam_7, &n,
+ 0))
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == u3ud(n) ||
+ (n < 1) // crash on zero size
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_linspace_i754(a, b, n, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ x_shape = u3nc(u3x_atom(n), u3_nul);
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_range(u3_noun cor)
+ {
+ u3_noun x_meta, a, b, d, rnd;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_12, &a,
+ u3x_sam_13, &b,
+ u3x_sam_7, &d,
+ 0))
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_range_i754(a, b, d, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ c3_d a_, b_, d_;
+ c3_ds n_;
+ switch (x_bloq) {
+ case 4:
+ u3r_bytes(0, 2, (c3_y*)&a_, a);
+ u3r_bytes(0, 2, (c3_y*)&b_, b);
+ u3r_bytes(0, 2, (c3_y*)&d_, d);
+ n_ = f16_to_i64(f16_ceil(f16_div(f16_sub((float16_t){b_}, (float16_t){a_}), (float16_t){d_})), softfloat_round_minMag, false) - 1;
+ break;
+ case 5:
+ u3r_bytes(0, 4, (c3_y*)&a_, a);
+ u3r_bytes(0, 4, (c3_y*)&b_, b);
+ u3r_bytes(0, 4, (c3_y*)&d_, d);
+ n_ = f32_to_i64(f32_ceil(f32_div(f32_sub((float32_t){b_}, (float32_t){a_}), (float32_t){d_})), softfloat_round_minMag, false) - 1;
+ break;
+ case 6:
+ u3r_bytes(0, 8, (c3_y*)&a_, a);
+ u3r_bytes(0, 8, (c3_y*)&b_, b);
+ u3r_bytes(0, 8, (c3_y*)&d_, d);
+ n_ = f64_to_i64(f64_ceil(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_})), softfloat_round_minMag, false) - 1;
+ break;
+ case 7: {
+ c3_d a__[2], b__[2], d__[2];
+ u3r_bytes(0, 16, (c3_y*)&a__, a);
+ u3r_bytes(0, 16, (c3_y*)&b__, b);
+ u3r_bytes(0, 16, (c3_y*)&d__, d);
+ float128_t tmp;
+ f128M_sub((float128_t*)&b__, (float128_t*)&a__, &tmp);
+ f128M_div(&tmp, (float128_t*)&d__, &tmp);
+ f128M_ceil(&tmp, &tmp);
+ n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false) - 1;
+ break;}
+ }
+ u3_noun n = u3i_chub(n_+1);
+ x_shape = u3nc(u3k(n), u3_nul);
+ return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_diag(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ x_tail = u3t(u3t(u3t(x_meta))); // 15
+ if ( c3n == u3ud(x_bloq) ||
+ c3n == u3ud(x_kind) ||
+ c3n == _check(cor)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun r_data = u3qi_la_diag(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ c3_d len_x0 = _get_dims(x_shape)[0];
+ return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_trace(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_2, &x_meta,
+ u3x_sam_3, &x_data,
+ 0) ||
+ c3n == u3ud(x_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind, x_tail;
+ if ( c3n == u3r_mean(x_meta,
+ 2, &x_shape,
+ 6, &x_bloq,
+ 14, &x_kind,
+ 15, &x_tail,
+ 0)
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754: {
+ u3_noun r_data = u3qi_la_trace_i754(x_data, x_shape, x_bloq);
+ if (r_data == u3_none) { return u3_none; }
+ return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);}
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }
+
+ u3_noun
+ u3wi_la_mmul(u3_noun cor)
+ {
+ // Each argument is a ray, [=meta data=@ux]
+ u3_noun x_meta, x_data,
+ y_meta, y_data;
+
+ if ( c3n == u3r_mean(cor,
+ u3x_sam_4, &x_meta,
+ u3x_sam_5, &x_data,
+ u3x_sam_6, &y_meta,
+ u3x_sam_7, &y_data,
+ 0) ||
+ c3n == u3ud(x_data) ||
+ c3n == u3ud(y_data) )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ u3_noun x_shape, x_bloq, x_kind,
+ y_shape,
+ rnd;
+ x_shape = u3h(x_meta); // 2
+ x_bloq = u3h(u3t(x_meta)); // 6
+ x_kind = u3h(u3t(u3t(x_meta))); // 14
+ y_shape = u3h(y_meta); // 2
+ rnd = u3h(u3t(u3t(u3t(cor)))); // 30
+ if ( c3n == _check(u3nc(x_meta, x_data)) ||
+ c3n == _check(u3nc(y_meta, y_data))
+ )
+ {
+ return u3m_bail(c3__exit);
+ } else {
+ switch (x_kind) {
+ case c3__i754:
+ _set_rounding(rnd);
+ u3_noun r_data = u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq);
+ // result is already [meta data]
+ return r_data;
+
+ default:
+ return u3_none;
+ }
+ }
+ }
+ }