/// @file #include "jets/q.h" #include "jets/w.h" #include "c3/motes.h" #include "noun.h" #include "softfloat.h" #include "softblas.h" #include // for pow() #include #define f16_ceil(a) f16_roundToInt( a, softfloat_round_max, false ) #define f32_ceil(a) f32_roundToInt( a, softfloat_round_max, false ) #define f64_ceil(a) f64_roundToInt( a, softfloat_round_max, false ) #define f128M_ceil(a, b) f128M_roundToInt( a, softfloat_round_max, false, b ) union half { float16_t h; c3_w c; }; union sing { float32_t s; c3_w c; }; union doub { float64_t d; c3_d c; }; union quad { float128_t q; c3_d c[2]; }; // $?(%n %u %d %z %a) static inline void _set_rounding(c3_w a) { // We could use SoftBLAS set_rounding() to set the SoftFloat // mode as well, but it's more explicit to do it here since // we may use SoftFloat in any given Lagoon jet and we want // you, dear developer, to see it set here. switch ( a ) { default: u3m_bail(c3__fail); break; // %n - near case c3__n: softfloat_roundingMode = softfloat_round_near_even; softblas_roundingMode = 'n'; break; // %z - zero case c3__z: softfloat_roundingMode = softfloat_round_minMag; softblas_roundingMode = 'z'; break; // %u - up case c3__u: softfloat_roundingMode = softfloat_round_max; softblas_roundingMode = 'u'; break; // %d - down case c3__d: softfloat_roundingMode = softfloat_round_min; softblas_roundingMode = 'd'; break; // %a - away case c3__a: softfloat_roundingMode = softfloat_round_near_maxMag; softblas_roundingMode = 'a'; break; } } /* length of shape = x * y * z * w * ... */ static inline c3_d _get_length(u3_noun shape) { c3_d len = 1; while (u3_nul != shape) { len = len * u3x_atom(u3h(shape)); shape = u3t(shape); } return len; } /* get dims from shape as array [x y z w ...] */ static inline c3_d* _get_dims(u3_noun shape) { u3_atom len = u3qb_lent(shape); c3_d len_d = u3r_chub(0, len); c3_d* dims = (c3_d*)u3a_malloc(len_d*sizeof(c3_d)); for (c3_d i = 0; i < len_d; i++) { dims[i] = u3r_chub(0, u3x_atom(u3h(shape))); shape = u3t(shape); } u3z(len); return dims; } /* check consistency of array shape and bloq size |= =ray ^- ? .= (roll shape.meta.ray ^mul) (dec (met bloq.meta.ray data.ray)) */ static inline c3_o _check(u3_noun ray) { // Calculate expected size. u3_atom shp = u3h(u3h(ray)); // (reported) shape of ray, +4 u3_atom blq = u3h(u3t(u3h(ray))); // block size of ray, +10 u3_atom sin = _get_length(shp); // calculated length of ray // Calculate actual size. u3_atom len = u3r_met(blq, u3t(ray)); // length of ray u3_atom dex = u3qa_dec(len); // decrement length b/c of pinned 1 return __(sin == dex); } /* add - axpy = 1*x+y */ u3_noun u3qi_la_add_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq ) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); break; case 5: saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); break; case 6: daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); break; case 7: qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* sub - axpy = -1*y+x */ u3_noun u3qi_la_sub_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq ) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); break; case 5: saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); break; case 6: daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); break; case 7: qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* mul - x.*y elementwise multiplication */ u3_noun u3qi_la_mul_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = f16_mul(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); } break; case 5: for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = f32_mul(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); } break; case 6: for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = f64_mul(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); } break; case 7: for (c3_d i = 0; i < len_x; i++) { f128M_mul(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* div - x/y elementwise division */ u3_noun u3qi_la_div_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = f16_div(((float16_t*)x_bytes)[i], ((float16_t*)y_bytes)[i]); } break; case 5: for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = f32_div(((float32_t*)x_bytes)[i], ((float32_t*)y_bytes)[i]); } break; case 6: for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = f64_div(((float64_t*)x_bytes)[i], ((float64_t*)y_bytes)[i]); } break; case 7: for (c3_d i = 0; i < len_x; i++) { f128M_div(&(((float128_t*)y_bytes)[i]), &(((float128_t*)x_bytes)[i]), &(((float128_t*)y_bytes)[i])); } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* mod - x % y = x - r*floor(x/r) remainder after division */ u3_noun u3qi_la_mod_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; float16_t y_val16 = ((float16_t*)y_bytes)[i]; // Perform division x/n float16_t div_result16 = f16_div(x_val16, y_val16); // Compute floor of the division result c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); float16_t floor_float16 = i64_to_f16(floor_result16); // Multiply n by floor(x/n) float16_t mult_result16 = f16_mul(y_val16, floor_float16); // Compute remainder: x - n * floor(x/n) ((float16_t*)y_bytes)[i] = f16_sub(x_val16, mult_result16); } break; case 5: for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; float32_t y_val32 = ((float32_t*)y_bytes)[i]; // Perform division x/n float32_t div_result32 = f32_div(x_val32, y_val32); // Compute floor of the division result c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); float32_t floor_float32 = i64_to_f32(floor_result32); // Multiply n by floor(x/n) float32_t mult_result32 = f32_mul(y_val32, floor_float32); // Compute remainder: x - n * floor(x/n) ((float32_t*)y_bytes)[i] = f32_sub(x_val32, mult_result32); } break; case 6: for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; float64_t y_val64 = ((float64_t*)y_bytes)[i]; // Perform division x/n float64_t div_result64 = f64_div(x_val64, y_val64); // Compute floor of the division result c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); float64_t floor_float64 = i64_to_f64(floor_result64); // Multiply n by floor(x/n) float64_t mult_result64 = f64_mul(y_val64, floor_float64); // Compute remainder: x - n * floor(x/n) ((float64_t*)y_bytes)[i] = f64_sub(x_val64, mult_result64); } break; case 7: for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; float128_t y_val128 = ((float128_t*)y_bytes)[i]; // Perform division x/n float128_t div_result128; f128M_div((float128_t*)&x_val128, (float128_t*)&y_val128, (float128_t*)&div_result128); // Compute floor of the division result c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); float128_t floor_float128; i64_to_f128M(floor_result128, &floor_float128); // Multiply n by floor(x/n) float128_t mult_result128; f128M_mul(((float128_t*)&y_val128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); // Compute remainder: x - n * floor(x/n) f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)y_bytes)[i])); } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* cumsum - x[0] + x[1] + ... x[n] */ u3_noun u3qi_la_cumsum_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // y_bytes is the data array (w/ leading 0x1, skipped by for range) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); u3_noun r_data; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: { float16_t sum16[2]; sum16[0] = (float16_t){SB_REAL16_ZERO}; for (c3_d i = len_x; i > 0; i--) { sum16[0] = f16_add(sum16[0], ((float16_t*)x_bytes)[i-1]); } sum16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)sum16); break;} case 5: { float32_t sum32[2]; sum32[0] = (float32_t){SB_REAL32_ZERO}; for (c3_d i = len_x; i > 0; i--) { sum32[0] = f32_add(sum32[0], ((float32_t*)x_bytes)[i-1]); } sum32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)sum32); break;} case 6: { float64_t sum64[2]; sum64[0] = (float64_t){SB_REAL64_ZERO}; for (c3_d i = len_x; i > 0; i--) { sum64[0] = f64_add(sum64[0], ((float64_t*)x_bytes)[i-1]); } sum64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)sum64); break;} case 7: { float128_t sum128[2]; sum128[0] = (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; for (c3_d i = len_x; i > 0; i--) { f128M_add(&(sum128[0]), &(((float128_t*)x_bytes)[i-1]), &(sum128[0])); } sum128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)sum128); break;} } // Clean up and return. u3a_free(x_bytes); return r_data; } /* argmin - argmin(x) */ u3_noun u3qi_la_argmin_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1, which doesn't matter here) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); c3_w min_idx = 0; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: { float16_t min_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f16_lt(((float16_t*)x_bytes)[i], min_val16)) { min_val16 = ((float16_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } } break;} case 5: { float32_t min_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f32_lt(((float32_t*)x_bytes)[i], min_val32)) { min_val32 = ((float32_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } } break;} case 6: { float64_t min_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f64_lt(((float64_t*)x_bytes)[i], min_val64)) { min_val64 = ((float64_t*)x_bytes)[i]; min_idx = (len_x - i - 1); } } break;} case 7: { float128_t min_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f128M_lt(&(((float128_t*)x_bytes)[i]), &min_val128)) { min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); min_idx = (len_x - i - 1); } } break;} } u3_noun r_data = u3i_chub(min_idx); return r_data; } /* argmax - argmax(x) */ u3_noun u3qi_la_argmax_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1, which doesn't matter here) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); c3_w max_idx = 0; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: { float16_t max_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f16_gt(((float16_t*)x_bytes)[i], max_val16)) { max_val16 = ((float16_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } } break;} case 5: { float32_t max_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f32_gt(((float32_t*)x_bytes)[i], max_val32)) { max_val32 = ((float32_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } } break;} case 6: { float64_t max_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f64_gt(((float64_t*)x_bytes)[i], max_val64)) { max_val64 = ((float64_t*)x_bytes)[i]; max_idx = (len_x - i - 1); } } break;} case 7: { float128_t max_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { if(f128M_gt(&(((float128_t*)x_bytes)[i]), &max_val128)) { max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); max_idx = (len_x - i - 1); } } break;} } u3_noun r_data = u3i_chub(max_idx); return r_data; } /* ravel - x -> ~[x[0], x[1], ... x[n]] entire nd-array busted out as a linear list */ u3_noun u3qi_la_ravel_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // r_data is the result noun of [data] u3_noun r_data = u3_nul; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; r_data = u3nc(u3i_word(x_val16.v), r_data); } break; case 5: for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; r_data = u3nc(u3i_word(x_val32.v), r_data); } break; case 6: for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; r_data = u3nc(u3i_chub(x_val64.v), r_data); } break; case 7: for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; r_data = u3nc(u3i_chubs(2, (c3_d*)&(x_val128.v)), r_data); } break; } // Clean up and return. u3a_free(x_bytes); return r_data; } /* min - min(x,y) */ u3_noun u3qi_la_min_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by for range) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); u3_noun r_data; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: { float16_t min_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val16 = f16_min(min_val16, ((float16_t*)x_bytes)[i]); } float16_t r16[2]; r16[0] = min_val16; r16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); break;} case 5: { float32_t min_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val32 = f32_min(min_val32, ((float32_t*)x_bytes)[i]); } float32_t r32[2]; r32[0] = min_val32; r32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); break;} case 6: { float64_t min_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val64 = f64_min(min_val64, ((float64_t*)x_bytes)[i]); } float64_t r64[2]; r64[0] = min_val64; r64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); break;} case 7: { float128_t min_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { min_val128 = *f128M_min(&min_val128, &((float128_t*)x_bytes)[i]); } float128_t r128[2]; r128[0] = min_val128; r128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); break;} } // Clean up and return. u3a_free(x_bytes); return r_data; } /* max - max(x,y) */ u3_noun u3qi_la_max_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by for range) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); u3_noun r_data; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: { float16_t max_val16 = ((float16_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val16 = f16_max(max_val16, ((float16_t*)x_bytes)[i]); } float16_t r16[2]; r16[0] = max_val16; r16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); break;} case 5: { float32_t max_val32 = ((float32_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val32 = f32_max(max_val32, ((float32_t*)x_bytes)[i]); } float32_t r32[2]; r32[0] = max_val32; r32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); break;} case 6: { float64_t max_val64 = ((float64_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val64 = f64_max(max_val64, ((float64_t*)x_bytes)[i]); } float64_t r64[2]; r64[0] = max_val64; r64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); break;} case 7: { float128_t max_val128 = ((float128_t*)x_bytes)[0]; for (c3_d i = 0; i < len_x; i++) { max_val128 = *f128M_max(&max_val128, &((float128_t*)x_bytes)[i]); } float128_t r128[2]; r128[0] = max_val128; r128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); break;} } // Clean up and return. u3a_free(x_bytes); return r_data; } /* abs - |x| */ u3_noun u3qi_la_abs_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by for range) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { ((float16_t*)x_bytes)[i] = f16_abs(((float16_t*)x_bytes)[i]); } break; case 5: for (c3_d i = 0; i < len_x; i++) { ((float32_t*)x_bytes)[i] = f32_abs(((float32_t*)x_bytes)[i]); } break; case 6: for (c3_d i = 0; i < len_x; i++) { ((float64_t*)x_bytes)[i] = f64_abs(((float64_t*)x_bytes)[i]); } break; case 7: for (c3_d i = 0; i < len_x; i++) { ((float128_t*)x_bytes)[i] = f128_abs(((float128_t*)x_bytes)[i]); } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); return r_data; } /* gth - x > y */ u3_noun u3qi_la_gth_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; float16_t y_val16 = ((float16_t*)y_bytes)[i]; ((float16_t*)y_bytes)[i] = f16_gt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; } break; case 5: for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; float32_t y_val32 = ((float32_t*)y_bytes)[i]; ((float32_t*)y_bytes)[i] = f32_gt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; } break; case 6: for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; float64_t y_val64 = ((float64_t*)y_bytes)[i]; ((float64_t*)y_bytes)[i] = f64_gt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; } break; case 7: for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; float128_t y_val128 = ((float128_t*)y_bytes)[i]; ((float128_t*)y_bytes)[i] = f128M_gt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* gte - x > y */ u3_noun u3qi_la_gte_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; float16_t y_val16 = ((float16_t*)y_bytes)[i]; ((float16_t*)y_bytes)[i] = f16_ge(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; } break; case 5: for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; float32_t y_val32 = ((float32_t*)y_bytes)[i]; ((float32_t*)y_bytes)[i] = f32_ge(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; } break; case 6: for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; float64_t y_val64 = ((float64_t*)y_bytes)[i]; ((float64_t*)y_bytes)[i] = f64_ge(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; } break; case 7: for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; float128_t y_val128 = ((float128_t*)y_bytes)[i]; ((float128_t*)y_bytes)[i] = f128M_ge(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* lth - x > y */ u3_noun u3qi_la_lth_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; float16_t y_val16 = ((float16_t*)y_bytes)[i]; ((float16_t*)y_bytes)[i] = f16_lt(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; } break; case 5: for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; float32_t y_val32 = ((float32_t*)y_bytes)[i]; ((float32_t*)y_bytes)[i] = f32_lt(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; } break; case 6: for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; float64_t y_val64 = ((float64_t*)y_bytes)[i]; ((float64_t*)y_bytes)[i] = f64_lt(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; } break; case 7: for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; float128_t y_val128 = ((float128_t*)y_bytes)[i]; ((float128_t*)y_bytes)[i] = f128M_lt(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* lte - x > y */ u3_noun u3qi_la_lte_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); // Switch on the block size. switch (u3x_atom(bloq)) { case 4: for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; float16_t y_val16 = ((float16_t*)y_bytes)[i]; ((float16_t*)y_bytes)[i] = f16_le(x_val16, y_val16) ? (float16_t){SB_REAL16_ONE} : (float16_t){SB_REAL16_ZERO}; } break; case 5: for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; float32_t y_val32 = ((float32_t*)y_bytes)[i]; ((float32_t*)y_bytes)[i] = f32_le(x_val32, y_val32) ? (float32_t){SB_REAL32_ONE} : (float32_t){SB_REAL32_ZERO}; } break; case 6: for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; float64_t y_val64 = ((float64_t*)y_bytes)[i]; ((float64_t*)y_bytes)[i] = f64_le(x_val64, y_val64) ? (float64_t){SB_REAL64_ONE} : (float64_t){SB_REAL64_ZERO}; } break; case 7: for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; float128_t y_val128 = ((float128_t*)y_bytes)[i]; ((float128_t*)y_bytes)[i] = f128M_le(((float128_t*)&x_val128), ((float128_t*)&y_val128)) ? (float128_t){SB_REAL128L_ONE, SB_REAL128U_ONE} : (float128_t){SB_REAL128L_ZERO, SB_REAL128U_ZERO}; } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* adds - axpy = 1*x+[n] */ u3_noun u3qi_la_adds_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); float16_t n16; float32_t n32; float64_t n64; float128_t n128; // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = n16; } haxpy(len_x, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); break; case 5: u3r_bytes(0, 4, (c3_y*)&(n32.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = n32; } saxpy(len_x, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); break; case 6: u3r_bytes(0, 8, (c3_y*)&(n64.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = n64; } daxpy(len_x, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); break; case 7: u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; } qaxpy(len_x, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); break; } // r_data is the result noun of [data] y_bytes[syz_x] = 0x1; // pin head u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* subs - axpy = -1*[n]+x */ u3_noun u3qi_la_subs_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/o leading 0x1) c3_y* y_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); float16_t n16; float32_t n32; float64_t n64; float128_t n128; // Switch on the block size. We assume that n fits in the target block size; Hoon typecheck should prevent. switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float16_t*)y_bytes)[i] = n16; } haxpy(len_x, (float16_t){SB_REAL16_NEGONE}, (float16_t*)y_bytes, 1, (float16_t*)x_bytes, 1); break; case 5: u3r_bytes(0, 4, (c3_y*)&(n32.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float32_t*)y_bytes)[i] = n32; } saxpy(len_x, (float32_t){SB_REAL32_NEGONE}, (float32_t*)y_bytes, 1, (float32_t*)x_bytes, 1); break; case 6: u3r_bytes(0, 8, (c3_y*)&(n64.v), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float64_t*)y_bytes)[i] = n64; } daxpy(len_x, (float64_t){SB_REAL64_NEGONE}, (float64_t*)y_bytes, 1, (float64_t*)x_bytes, 1); break; case 7: u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); // set y to [n] for (c3_d i = 0; i < len_x; i++) { ((float128_t*)y_bytes)[i] = (float128_t){n128.v[0], n128.v[1]}; } qaxpy(len_x, (float128_t){SB_REAL128L_NEGONE,SB_REAL128U_NEGONE}, (float128_t*)y_bytes, 1, (float128_t*)x_bytes, 1); break; } // r_data is the result noun of [data] x_bytes[syz_x] = 0x1; // pin head u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* muls - ?scal n * x elementwise multiplication */ u3_noun u3qi_la_muls_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); x_bytes[syz_x] = 0x1; // pin head float16_t n16; float32_t n32; float64_t n64; float128_t n128; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); hscal(len_x, n16, (float16_t*)x_bytes, 1); break; case 5: u3r_bytes(0, 4, (c3_y*)&(n32.v), n); sscal(len_x, n32, (float32_t*)x_bytes, 1); break; case 6: u3r_bytes(0, 8, (c3_y*)&(n64.v), n); dscal(len_x, n64, (float64_t*)x_bytes, 1); break; case 7: u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); qscal(len_x, n128, (float128_t*)x_bytes, 1); break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); return r_data; } /* divs - ?scal 1/n * x elementwise division */ u3_noun u3qi_la_divs_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); x_bytes[syz_x] = 0x1; // pin head float16_t in16; float32_t in32; float64_t in64; float128_t in128; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: // XX note that in16 is doing double duty here u3r_bytes(0, 2, (c3_y*)&(in16.v), n); in16 = f16_div((float16_t){SB_REAL16_ONE}, in16); hscal(len_x, in16, (float16_t*)x_bytes, 1); break; case 5: // XX note that in32 is doing double duty here u3r_bytes(0, 4, (c3_y*)&(in32.v), n); in32 = f32_div((float32_t){SB_REAL32_ONE}, in32); sscal(len_x, in32, (float32_t*)x_bytes, 1); break; case 6: // XX note that in64 is doing double duty here u3r_bytes(0, 8, (c3_y*)&(in64.v), n); in64 = f64_div((float64_t){SB_REAL64_ONE}, in64); dscal(len_x, in64, (float64_t*)x_bytes, 1); break; case 7: // XX note that in128 is doing double duty here u3r_bytes(0, 16, (c3_y*)&(in128.v[0]), n); f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}), &in128, &in128); qscal(len_x, in128, (float128_t*)x_bytes, 1); break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); return r_data; } /* mods - x % [n] = x - r*floor(x/r) remainder after scalar division */ u3_noun u3qi_la_mods_i754(u3_noun x_data, u3_noun n, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/ leading 0x1, skipped by ?axpy) // we reuse it for results for parsimony c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); float16_t n16, in16; float32_t n32, in32; float64_t n64, in64; float128_t n128, in128; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: u3r_bytes(0, 2, (c3_y*)&(n16.v), n); in16 = f16_div((float16_t){SB_REAL16_ONE}, n16); for (c3_d i = 0; i < len_x; i++) { float16_t x_val16 = ((float16_t*)x_bytes)[i]; // Perform division x/n float16_t div_result16 = f16_mul(in16, x_val16); // Compute floor of the division result c3_ds floor_result16 = f16_to_i64(div_result16, softfloat_round_minMag, false); float16_t floor_float16 = i64_to_f16(floor_result16); // Multiply n by floor(x/n) float16_t mult_result16 = f16_mul(n16, floor_float16); // Compute remainder: x - n * floor(x/n) ((float16_t*)x_bytes)[i] = f16_sub(x_val16, mult_result16); } break; case 5: u3r_bytes(0, 4, (c3_y*)&(n32.v), n); in32 = f32_div((float32_t){SB_REAL32_ONE}, n32); for (c3_d i = 0; i < len_x; i++) { float32_t x_val32 = ((float32_t*)x_bytes)[i]; // Perform division x/n float32_t div_result32 = f32_mul(in32, x_val32); // Compute floor of the division result c3_ds floor_result32 = f32_to_i64(div_result32, softfloat_round_minMag, false); float32_t floor_float32 = i64_to_f32(floor_result32); // Multiply n by floor(x/n) float32_t mult_result32 = f32_mul(n32, floor_float32); // Compute remainder: x - n * floor(x/n) ((float32_t*)x_bytes)[i] = f32_sub(x_val32, mult_result32); } break; case 6: u3r_bytes(0, 8, (c3_y*)&(n64.v), n); in64 = f64_div((float64_t){SB_REAL64_ONE}, n64); for (c3_d i = 0; i < len_x; i++) { float64_t x_val64 = ((float64_t*)x_bytes)[i]; // Perform division x/n float64_t div_result64 = f64_mul(in64, x_val64); // Compute floor of the division result c3_ds floor_result64 = f64_to_i64(div_result64, softfloat_round_minMag, false); float64_t floor_float64 = i64_to_f64(floor_result64); // Multiply n by floor(x/n) float64_t mult_result64 = f64_mul(n64, floor_float64); // Compute remainder: x - n * floor(x/n) ((float64_t*)x_bytes)[i] = f64_sub(x_val64, mult_result64); } break; case 7: u3r_bytes(0, 16, (c3_y*)&(n128.v[0]), n); f128M_div(&((float128_t){SB_REAL128L_ONE,SB_REAL128U_ZERO}), &n128, &in128); for (c3_d i = 0; i < len_x; i++) { float128_t x_val128 = ((float128_t*)x_bytes)[i]; // Perform division x/n float128_t div_result128; f128M_mul((float128_t*)&in128, (float128_t*)&x_val128, (float128_t*)&div_result128); // Compute floor of the division result c3_ds floor_result128 = f128M_to_i64(&div_result128, softfloat_round_minMag, false); float128_t floor_float128; i64_to_f128M(floor_result128, &floor_float128); // Multiply n by floor(x/n) float128_t mult_result128; f128M_mul(((float128_t*)&n128), ((float128_t*)&floor_float128), ((float128_t*)&mult_result128)); // Compute remainder: x - n * floor(x/n) f128M_sub(((float128_t*)&x_val128), ((float128_t*)&mult_result128), &(((float128_t*)x_bytes)[i])); } break; } // r_data is the result noun of [data] u3_noun r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), x_bytes); // Clean up and return. u3a_free(x_bytes); return r_data; } /* dot - ?dot = x ยท y */ u3_noun u3qi_la_dot_i754(u3_noun x_data, u3_noun y_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(shape); // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // y_bytes is the data array (w/ leading 0x1, skipped by ?axpy) c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, y_bytes, y_data); u3_noun r_data; // Switch on the block size. switch (u3x_atom(bloq)) { case 4: { float16_t r16[2]; r16[0] = hdot(len_x, (float16_t*)x_bytes, 1, (float16_t*)y_bytes, 1); r16[1].v = 0x1; r_data = u3i_bytes((2+1)*sizeof(c3_y), (c3_y*)r16); break;} case 5: { float32_t r32[2]; r32[0] = sdot(len_x, (float32_t*)x_bytes, 1, (float32_t*)y_bytes, 1); r32[1].v = 0x1; r_data = u3i_bytes((4+1)*sizeof(c3_y), (c3_y*)r32); break;} case 6: { float64_t r64[2]; r64[0] = ddot(len_x, (float64_t*)x_bytes, 1, (float64_t*)y_bytes, 1); r64[1].v = 0x1; r_data = u3i_bytes((8+1)*sizeof(c3_y), (c3_y*)r64); break;} case 7: { float128_t r128[2]; r128[0] = qdot(len_x, (float128_t*)x_bytes, 1, (float128_t*)y_bytes, 1); r128[1] = (float128_t){0x1, 0x0}; r_data = u3i_bytes((16+1)*sizeof(c3_y), (c3_y*)r128); break;} } // Clean up and return. u3a_free(x_bytes); u3a_free(y_bytes); return r_data; } /* diag - diag(x) */ u3_noun u3qi_la_diag(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } // Assert length of dims is 2. if (u3qb_lent(shape) != 2) { return u3m_bail(c3__exit); } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); if (dims[0] != dims[1]) { return u3m_bail(c3__exit); } // Unpack the data as a byte array. We assume total length < 2**64. c3_d len_x = _get_length(shape); c3_d syz_x = len_x * pow(2, bloq - 3); c3_d wyd = pow(2, bloq - 3); c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); c3_d syz_y = wyd * dims[1]; c3_y* y_bytes = (c3_y*)u3a_malloc((syz_y+1)*sizeof(c3_y)); u3_noun r_data; // Grab the index at i*n_x+j in bytes; put it at j. for (c3_d i = 0; i < dims[1]; i++) { // Scan across whole field width. for (c3_y k = 0; k < wyd; k++) { y_bytes[i*wyd+k] = x_bytes[(i*dims[0]+i)*wyd+k]; } } y_bytes[syz_y] = 0x1; // pin head // Unpack the result back into a noun. r_data = u3i_bytes((syz_y+1)*sizeof(c3_y), y_bytes); u3a_free(x_bytes); u3a_free(y_bytes); u3a_free(dims); return r_data; } /* transpose - x' */ u3_noun u3qi_la_transpose(u3_noun x_data, u3_noun shape, u3_noun bloq) { // Assert length of dims is 2. if (u3qb_lent(shape) != 2) { return u3m_bail(c3__exit); } // Unpack shape into an array of dimensions. c3_d *dims = _get_dims(shape); // Unpack the data as a byte array. We assume total length < 2**64. c3_d len_x = _get_length(shape); c3_d syz_x = len_x * pow(2, bloq - 3); c3_d wyd = pow(2, bloq - 3); c3_y* x_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3r_bytes(0, syz_x+1, x_bytes, x_data); c3_y* y_bytes = (c3_y*)u3a_malloc((syz_x+1)*sizeof(c3_y)); u3_noun r_data; // Grab the index at i*n_x+j in bytes; put it at j. for (c3_d i = 0; i < dims[1]; i++) { for (c3_d j = 0; j < dims[0]; j++) { // Scan across whole field width. for (c3_y k = 0; k < wyd; k++) { y_bytes[(j*dims[1]+i)*wyd+k] = x_bytes[(i*dims[0]+j)*wyd+k]; } } } y_bytes[syz_x] = 0x1; // pin head // Unpack the result back into a noun. r_data = u3i_bytes((syz_x+1)*sizeof(c3_y), y_bytes); u3a_free(x_bytes); u3a_free(y_bytes); u3a_free(dims); return r_data; } /* linspace - [a a+(b-a)/n ... b] */ u3_noun u3qi_la_linspace_i754(u3_noun a, u3_noun b, u3_noun n, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } u3_noun r_data; switch (u3x_atom(bloq)) { case 4: { float16_t a16, b16; u3r_bytes(0, 2, (c3_y*)&(a16.v), a); u3r_bytes(0, 2, (c3_y*)&(b16.v), b); float16_t span16 = f16_sub(b16, a16); float16_t interval16 = f16_div(span16, i32_to_f16(n-1)); c3_y* x_bytes16 = (c3_y*)u3a_malloc((n*2+1)*sizeof(c3_y)); for (c3_d i = 1; i < n-1; i++) { ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); } // Assign in reverse order so that n=1 case is correctly left-hand bound. ((float16_t*)x_bytes16)[n-1] = b16; ((float16_t*)x_bytes16)[0] = a16; x_bytes16[n*2] = 0x1; // pin head r_data = u3i_bytes((n*2+1)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); break;} case 5: { float32_t a32, b32; u3r_bytes(0, 4, (c3_y*)&(a32.v), a); u3r_bytes(0, 4, (c3_y*)&(b32.v), b); float32_t span32 = f32_sub(b32, a32); float32_t interval32 = f32_div(span32, i32_to_f32(n-1)); c3_y* x_bytes32 = (c3_y*)u3a_malloc((n*4+1)*sizeof(c3_y)); for (c3_d i = 1; i < n-1; i++) { ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); } ((float32_t*)x_bytes32)[n-1] = b32; ((float32_t*)x_bytes32)[0] = a32; x_bytes32[n*4] = 0x1; // pin head r_data = u3i_bytes((n*4+1)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); break;} case 6: { float64_t a64, b64; u3r_bytes(0, 8, (c3_y*)&(a64.v), a); u3r_bytes(0, 8, (c3_y*)&(b64.v), b); float64_t span64 = f64_sub(b64, a64); float64_t interval64 = f64_div(span64, i32_to_f64(n-1)); c3_y* x_bytes64 = (c3_y*)u3a_malloc((n*8+1)*sizeof(c3_y)); for (c3_d i = 1; i < n-1; i++) { ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); } ((float64_t*)x_bytes64)[n-1] = b64; ((float64_t*)x_bytes64)[0] = a64; x_bytes64[n*8] = 0x1; // pin head r_data = u3i_bytes((n*8+1)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); break;} case 7: { float128_t a128, b128; u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); float128_t span128; f128M_sub(&b128, &a128, &span128); float128_t interval128; float128_t n128; i32_to_f128M(n-1, &n128); f128M_div(&span128, &n128, &interval128); c3_y* x_bytes128 = (c3_y*)u3a_malloc((n*16+1)*sizeof(c3_y)); float128_t i128; for (c3_d i = 1; i < n-1; i++) { i32_to_f128M(i, &i128); f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); } ((float128_t*)x_bytes128)[n-1] = b128; ((float128_t*)x_bytes128)[0] = a128; x_bytes128[n*16] = 0x1; // pin head r_data = u3i_bytes((n*16+1)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); break;} } return r_data; } /* range - [a a+d ... b] */ u3_noun u3qi_la_range_i754(u3_noun a, u3_noun b, u3_noun d, u3_noun bloq) { // Fence on valid bloq size. if (bloq < 4 || bloq > 7) { return u3_none; } u3_noun r_data; switch (u3x_atom(bloq)) { case 4: { float16_t a16, b16, interval16; u3r_bytes(0, 2, (c3_y*)&(a16.v), a); u3r_bytes(0, 2, (c3_y*)&(b16.v), b); u3r_bytes(0, 2, (c3_y*)&(interval16.v), d); c3_d n16 = f16_to_i64(f16_ceil(f16_div(f16_sub(b16, a16), interval16)), softfloat_round_minMag, false); c3_y* x_bytes16 = (c3_y*)u3a_malloc(((n16+1)*2)*sizeof(c3_y)); ((float16_t*)x_bytes16)[0] = a16; for (c3_d i = 1; i < n16; i++) { ((float16_t*)x_bytes16)[i] = f16_add(a16, f16_mul(i32_to_f16(i), interval16)); } ((float16_t*)x_bytes16)[n16].v = 0x1; // pin head r_data = u3i_bytes(((n16+1)*2)*sizeof(c3_y), x_bytes16); u3a_free(x_bytes16); break;} case 5: { float32_t a32, b32, interval32; u3r_bytes(0, 4, (c3_y*)&(a32.v), a); u3r_bytes(0, 4, (c3_y*)&(b32.v), b); u3r_bytes(0, 4, (c3_y*)&(interval32.v), d); c3_d n32 = f32_to_i64(f32_ceil(f32_div(f32_sub(b32, a32), interval32)), softfloat_round_minMag, false); c3_y* x_bytes32 = (c3_y*)u3a_malloc(((n32+1)*4)*sizeof(c3_y)); ((float32_t*)x_bytes32)[0] = a32; for (c3_d i = 1; i < n32; i++) { ((float32_t*)x_bytes32)[i] = f32_add(a32, f32_mul(i32_to_f32(i), interval32)); } ((float32_t*)x_bytes32)[n32].v = 0x1; // pin head r_data = u3i_bytes(((n32+1)*4)*sizeof(c3_y), x_bytes32); u3a_free(x_bytes32); break;} case 6: { float64_t a64, b64, interval64; u3r_bytes(0, 8, (c3_y*)&(a64.v), a); u3r_bytes(0, 8, (c3_y*)&(b64.v), b); u3r_bytes(0, 8, (c3_y*)&(interval64.v), d); c3_d n64 = f64_to_i64(f64_ceil(f64_div(f64_sub(b64, a64), interval64)), softfloat_round_minMag, false); c3_y* x_bytes64 = (c3_y*)u3a_malloc(((n64+1)*8)*sizeof(c3_y)); ((float64_t*)x_bytes64)[0] = a64; for (c3_d i = 1; i < n64; i++) { ((float64_t*)x_bytes64)[i] = f64_add(a64, f64_mul(i32_to_f64(i), interval64)); } ((float64_t*)x_bytes64)[n64].v = 0x1; // pin head r_data = u3i_bytes(((n64+1)*8)*sizeof(c3_y), x_bytes64); u3a_free(x_bytes64); break;} case 7: { float128_t a128, b128, interval128; u3r_bytes(0, 16, (c3_y*)&(a128.v[0]), a); u3r_bytes(0, 16, (c3_y*)&(b128.v[0]), b); u3r_bytes(0, 16, (c3_y*)&(interval128.v[0]), d); float128_t tmp; f128M_sub(&b128, &a128, &tmp); f128M_div(&tmp, &interval128, &tmp); f128M_ceil(&tmp, &tmp); c3_d n128 = f128M_to_i64(&tmp, softfloat_round_minMag, false); c3_y* x_bytes128 = (c3_y*)u3a_malloc(((n128+1)*16)*sizeof(c3_y)); float128_t i128; ((float128_t*)x_bytes128)[0] = a128; for (c3_d i = 1; i < n128; i++) { i32_to_f128M(i, &i128); f128M_mul(&i128, &interval128, &((float128_t*)x_bytes128)[i]); f128M_add(&a128, &((float128_t*)x_bytes128)[i], &((float128_t*)x_bytes128)[i]); } ((float128_t*)x_bytes128)[n128].v[0] = 0x1; // pin head ((float128_t*)x_bytes128)[n128].v[1] = 0x0; // pin head r_data = u3i_bytes(((n128+1)*16)*sizeof(c3_y), x_bytes128); u3a_free(x_bytes128); break;} } return r_data; } /* trace - tr(x) */ u3_noun u3qi_la_trace_i754(u3_noun x_data, u3_noun shape, u3_noun bloq) { u3_noun d_data = u3qi_la_diag(x_data, shape, bloq); c3_d len_x0 = _get_dims(shape)[0]; u3_noun r_data = u3qi_la_dot_i754(d_data, d_data, u3nt(len_x0, 0x1, u3_nul), u3k(bloq)); return r_data; } /* mmul */ u3_noun u3qi_la_mmul_i754(u3_noun x_data, u3_noun y_data, u3_noun x_shape, u3_noun y_shape, u3_noun bloq) { // Unpack the data as a byte array. We assume total length < 2**64. c3_d M = u3x_atom(u3h(x_shape)); c3_d Na= u3x_atom(u3h(u3t(x_shape))); c3_d Nb= u3x_atom(u3h(y_shape)); c3_d P = u3x_atom(u3h(u3t(y_shape))); if ((u3_nul != u3t(u3t(x_shape))) || (u3_nul != u3t(u3t(y_shape))) || (Na != Nb)) { return u3m_bail(c3__exit); } c3_d N = Na; // Unpack the data as a byte array. We assume total length < 2**64. // len_x is length in base units c3_d len_x = _get_length(x_shape); // M*N // syz_x is length in bytes c3_d syz_x = len_x * pow(2, bloq-3); // M*N // x_bytes is the data array (w/o leading 0x1) c3_y* x_bytes = (c3_y*)u3a_malloc(syz_x*sizeof(c3_y)); u3r_bytes(0, syz_x, x_bytes, x_data); // len_x is length in base units c3_d len_y = _get_length(y_shape); // N*P // syz_x is length in bytes c3_d syz_y = len_y * pow(2, bloq-3); // N*P // y_bytes is the data array (w/o leading 0x1) c3_y* y_bytes = (c3_y*)u3a_malloc(syz_y*sizeof(c3_y)); u3r_bytes(0, syz_y, y_bytes, y_data); // len_r is length in base units c3_d len_r = M*P; // M*P // syz_r is length in bytes c3_d syz_r = len_r * pow(2, bloq-3); // M*P // r_bytes is the result array c3_y* r_bytes = (c3_y*)u3a_malloc((syz_r+1)*sizeof(c3_y)); r_bytes[syz_r] = 0x1; // pin head // initialize with 0x0s for (c3_d i = 0; i < syz_r; i++) { r_bytes[i] = 0x0; } // Switch on the block size. switch (u3x_atom(bloq)) { case 4: hgemm('N', 'N', M, N, P, (float16_t){SB_REAL16_ONE}, (float16_t*)x_bytes, N, (float16_t*)y_bytes, P, (float16_t){SB_REAL16_ZERO}, (float16_t*)r_bytes, P); break; case 5: sgemm('N', 'N', M, N, P, (float32_t){SB_REAL32_ONE}, (float32_t*)x_bytes, N, (float32_t*)y_bytes, P, (float32_t){SB_REAL32_ZERO}, (float32_t*)r_bytes, P); break; case 6: dgemm('N', 'N', M, N, P, (float64_t){SB_REAL64_ONE}, (float64_t*)x_bytes, N, (float64_t*)y_bytes, P, (float64_t){SB_REAL64_ZERO}, (float64_t*)r_bytes, P); break; case 7: qgemm('N', 'N', M, N, P, (float128_t){SB_REAL128L_ONE,SB_REAL128U_ONE}, (float128_t*)x_bytes, N, (float128_t*)y_bytes, P, (float128_t){SB_REAL128L_ZERO,SB_REAL128U_ZERO}, (float128_t*)r_bytes, P); break; } // Unpack the result back into a noun. u3_noun r_data = u3i_bytes(syz_r+1, r_bytes); u3_noun M_ = u3i_chub(M); u3_noun P_ = u3i_chub(P); u3a_free(x_bytes); u3a_free(y_bytes); u3a_free(r_bytes); return u3nc(u3nq(u3nt(M_, P_, u3_nul), u3k(bloq), c3__i754, u3_nul), r_data); } u3_noun u3wi_la_add(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_add_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_sub(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_sub_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_mul(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_mul_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_div(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_div_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_mod(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(rnd) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_mod_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_cumsum(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_cumsum_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nc(0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_argmin(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_argmin_i754(x_data, x_shape, x_bloq); // bare atom (@ index) return r_data;} default: return u3_none; } } } } u3_noun u3wi_la_ravel(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_ravel_i754(x_data, x_shape, x_bloq); // (list @) return r_data;} default: return u3_none; } } } } u3_noun u3wi_la_argmax(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_argmax_i754(x_data, x_shape, x_bloq); // bare atom (@ index) return r_data;} default: return u3_none; } } } } u3_noun u3wi_la_min(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_min_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_max(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(u3nc(x_meta, x_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_max_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_abs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_abs_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_gth(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_gth_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3k(x_meta), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_gte(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_gte_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3k(x_meta), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_lth(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_lth_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3k(x_meta), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_lte(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_lte_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3k(x_meta), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_adds(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || c3n == u3ud(x_data) || c3n == u3ud(n) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_adds_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } u3_noun u3wi_la_subs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || c3n == u3ud(x_data) || c3n == u3ud(n) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_subs_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } u3_noun u3wi_la_muls(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || c3n == u3ud(x_data) || c3n == u3ud(n) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_muls_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } u3_noun u3wi_la_divs(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || c3n == u3ud(x_data) || c3n == u3ud(n) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_divs_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } u3_noun u3wi_la_mods(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, n; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_3, &n, 0) || c3n == u3ud(x_data) || c3n == u3ud(n) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_mods_i754(x_data, n, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } u3_noun u3wi_la_dot(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3r_sing(x_meta, y_meta) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_dot_i754(x_data, y_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_transpose(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(cor) ) { return u3m_bail(c3__exit); } else { u3_noun r_data = u3qi_la_transpose(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(u3k(u3h(x_shape)), u3k(u3h(u3t(x_shape))), u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); } } } u3_noun u3wi_la_linspace(u3_noun cor) { u3_noun x_meta, a, b, n, rnd; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_12, &a, u3x_sam_13, &b, u3x_sam_7, &n, 0)) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == u3ud(n) || (n < 1) // crash on zero size ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_linspace_i754(a, b, n, x_bloq); if (r_data == u3_none) { return u3_none; } x_shape = u3nc(u3x_atom(n), u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_range(u3_noun cor) { u3_noun x_meta, a, b, d, rnd; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_12, &a, u3x_sam_13, &b, u3x_sam_7, &d, 0)) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_range_i754(a, b, d, x_bloq); if (r_data == u3_none) { return u3_none; } c3_d a_, b_, d_; c3_ds n_; switch (x_bloq) { case 4: u3r_bytes(0, 2, (c3_y*)&a_, a); u3r_bytes(0, 2, (c3_y*)&b_, b); u3r_bytes(0, 2, (c3_y*)&d_, d); n_ = f16_to_i64(f16_ceil(f16_div(f16_sub((float16_t){b_}, (float16_t){a_}), (float16_t){d_})), softfloat_round_minMag, false) - 1; break; case 5: u3r_bytes(0, 4, (c3_y*)&a_, a); u3r_bytes(0, 4, (c3_y*)&b_, b); u3r_bytes(0, 4, (c3_y*)&d_, d); n_ = f32_to_i64(f32_ceil(f32_div(f32_sub((float32_t){b_}, (float32_t){a_}), (float32_t){d_})), softfloat_round_minMag, false) - 1; break; case 6: u3r_bytes(0, 8, (c3_y*)&a_, a); u3r_bytes(0, 8, (c3_y*)&b_, b); u3r_bytes(0, 8, (c3_y*)&d_, d); n_ = f64_to_i64(f64_ceil(f64_div(f64_sub((float64_t){b_}, (float64_t){a_}), (float64_t){d_})), softfloat_round_minMag, false) - 1; break; case 7: { c3_d a__[2], b__[2], d__[2]; u3r_bytes(0, 16, (c3_y*)&a__, a); u3r_bytes(0, 16, (c3_y*)&b__, b); u3r_bytes(0, 16, (c3_y*)&d__, d); float128_t tmp; f128M_sub((float128_t*)&b__, (float128_t*)&a__, &tmp); f128M_div(&tmp, (float128_t*)&d__, &tmp); f128M_ceil(&tmp, &tmp); n_ = f128M_to_i64(&tmp, softfloat_round_minMag, false) - 1; break;} } u3_noun n = u3i_chub(n_+1); x_shape = u3nc(u3k(n), u3_nul); return u3nc(u3nq(u3k(x_shape), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); default: return u3_none; } } } } u3_noun u3wi_la_diag(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 x_tail = u3t(u3t(u3t(x_meta))); // 15 if ( c3n == u3ud(x_bloq) || c3n == u3ud(x_kind) || c3n == _check(cor) ) { return u3m_bail(c3__exit); } else { u3_noun r_data = u3qi_la_diag(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } c3_d len_x0 = _get_dims(x_shape)[0]; return u3nc(u3nq(u3nt(len_x0, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data); } } } u3_noun u3wi_la_trace(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data; if ( c3n == u3r_mean(cor, u3x_sam_2, &x_meta, u3x_sam_3, &x_data, 0) || c3n == u3ud(x_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, x_tail; if ( c3n == u3r_mean(x_meta, 2, &x_shape, 6, &x_bloq, 14, &x_kind, 15, &x_tail, 0) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: { u3_noun r_data = u3qi_la_trace_i754(x_data, x_shape, x_bloq); if (r_data == u3_none) { return u3_none; } return u3nc(u3nq(u3nt(0x1, 0x1, u3_nul), u3k(x_bloq), u3k(x_kind), u3k(x_tail)), r_data);} default: return u3_none; } } } } u3_noun u3wi_la_mmul(u3_noun cor) { // Each argument is a ray, [=meta data=@ux] u3_noun x_meta, x_data, y_meta, y_data; if ( c3n == u3r_mean(cor, u3x_sam_4, &x_meta, u3x_sam_5, &x_data, u3x_sam_6, &y_meta, u3x_sam_7, &y_data, 0) || c3n == u3ud(x_data) || c3n == u3ud(y_data) ) { return u3m_bail(c3__exit); } else { u3_noun x_shape, x_bloq, x_kind, y_shape, rnd; x_shape = u3h(x_meta); // 2 x_bloq = u3h(u3t(x_meta)); // 6 x_kind = u3h(u3t(u3t(x_meta))); // 14 y_shape = u3h(y_meta); // 2 rnd = u3h(u3t(u3t(u3t(cor)))); // 30 if ( c3n == _check(u3nc(x_meta, x_data)) || c3n == _check(u3nc(y_meta, y_data)) ) { return u3m_bail(c3__exit); } else { switch (x_kind) { case c3__i754: _set_rounding(rnd); u3_noun r_data = u3qi_la_mmul_i754(x_data, y_data, x_shape, y_shape, x_bloq); // result is already [meta data] return r_data; default: return u3_none; } } } }