From fcedfddf00b3f994e4f4e40332ac7fc192c63244 Mon Sep 17 00:00:00 2001 From: polwex Date: Sun, 5 Oct 2025 21:56:51 +0700 Subject: claude is gud --- vere/ext/gmp/gen/x86_64-macos/mpn/rsblsh_n.s | 269 +++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 vere/ext/gmp/gen/x86_64-macos/mpn/rsblsh_n.s (limited to 'vere/ext/gmp/gen/x86_64-macos/mpn/rsblsh_n.s') diff --git a/vere/ext/gmp/gen/x86_64-macos/mpn/rsblsh_n.s b/vere/ext/gmp/gen/x86_64-macos/mpn/rsblsh_n.s new file mode 100644 index 0000000..329c600 --- /dev/null +++ b/vere/ext/gmp/gen/x86_64-macos/mpn/rsblsh_n.s @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .text + .align 5, 0x90 + .globl ___gmpn_rsblsh_n + + +___gmpn_rsblsh_n: + + + + + mov (%rdx), %r10 + + mov %ecx, %eax + shr $3, %rcx + xor %r9d, %r9d + sub %r8, %r9 + and $7, %eax + + lea Ltab(%rip), %r11 + + movslq (%r11,%rax,4), %rax + add %r11, %rax + jmp *%rax + + +L0: lea 32(%rsi), %rsi + lea 32(%rdx), %rdx + lea 32(%rdi), %rdi + xor %r11d, %r11d + jmp Le0 + +L7: mov %r10, %r11 + lea 24(%rsi), %rsi + lea 24(%rdx), %rdx + lea 24(%rdi), %rdi + xor %r10d, %r10d + jmp Le7 + +L6: lea 16(%rsi), %rsi + lea 16(%rdx), %rdx + lea 16(%rdi), %rdi + xor %r11d, %r11d + jmp Le6 + +L5: mov %r10, %r11 + lea 8(%rsi), %rsi + lea 8(%rdx), %rdx + lea 8(%rdi), %rdi + xor %r10d, %r10d + jmp Le5 + +Lend: sbb 24(%rsi), %rax + mov %rax, -40(%rdi) + .byte 0xc4,194,179,0xf7,195 + sbb %rcx, %rax + + ret + + .align 5, 0x90 +Ltop: jrcxz Lend + mov -32(%rdx), %r10 + sbb 24(%rsi), %rax + lea 64(%rsi), %rsi + .byte 0xc4,66,179,0xf7,219 + mov %rax, -40(%rdi) +Le0: dec %rcx + .byte 0xc4,194,185,0xf7,194 + lea (%r11,%rax), %rax + mov -24(%rdx), %r11 + sbb -32(%rsi), %rax + .byte 0xc4,66,179,0xf7,210 + mov %rax, -32(%rdi) +Le7: .byte 0xc4,194,185,0xf7,195 + lea (%r10,%rax), %rax + mov -16(%rdx), %r10 + sbb -24(%rsi), %rax + .byte 0xc4,66,179,0xf7,219 + mov %rax, -24(%rdi) +Le6: .byte 0xc4,194,185,0xf7,194 + lea (%r11,%rax), %rax + mov -8(%rdx), %r11 + sbb -16(%rsi), %rax + .byte 0xc4,66,179,0xf7,210 + mov %rax, -16(%rdi) +Le5: .byte 0xc4,194,185,0xf7,195 + lea (%r10,%rax), %rax + mov (%rdx), %r10 + sbb -8(%rsi), %rax + .byte 0xc4,66,179,0xf7,219 + mov %rax, -8(%rdi) +Le4: .byte 0xc4,194,185,0xf7,194 + lea (%r11,%rax), %rax + mov 8(%rdx), %r11 + sbb (%rsi), %rax + .byte 0xc4,66,179,0xf7,210 + mov %rax, (%rdi) +Le3: .byte 0xc4,194,185,0xf7,195 + lea (%r10,%rax), %rax + mov 16(%rdx), %r10 + sbb 8(%rsi), %rax + .byte 0xc4,66,179,0xf7,219 + mov %rax, 8(%rdi) +Le2: .byte 0xc4,194,185,0xf7,194 + lea (%r11,%rax), %rax + mov 24(%rdx), %r11 + sbb 16(%rsi), %rax + lea 64(%rdx), %rdx + .byte 0xc4,66,179,0xf7,210 + mov %rax, 16(%rdi) + lea 64(%rdi), %rdi +Le1: .byte 0xc4,194,185,0xf7,195 + lea (%r10,%rax), %rax + jmp Ltop + +L4: xor %r11d, %r11d + jmp Le4 + +L3: mov %r10, %r11 + lea -8(%rsi), %rsi + lea -8(%rdx), %rdx + lea -8(%rdi), %rdi + xor %r10d, %r10d + jmp Le3 + +L2: lea -16(%rsi), %rsi + lea -16(%rdx), %rdx + lea -16(%rdi), %rdi + xor %r11d, %r11d + jmp Le2 + +L1: mov %r10, %r11 + lea -24(%rsi), %rsi + lea 40(%rdx), %rdx + lea 40(%rdi), %rdi + xor %r10d, %r10d + jmp Le1 + + .text + .align 3, 0x90 +Ltab: .set L0_tmp, L0-Ltab + .long L0_tmp + + .set L1_tmp, L1-Ltab + .long L1_tmp + + .set L2_tmp, L2-Ltab + .long L2_tmp + + .set L3_tmp, L3-Ltab + .long L3_tmp + + .set L4_tmp, L4-Ltab + .long L4_tmp + + .set L5_tmp, L5-Ltab + .long L5_tmp + + .set L6_tmp, L6-Ltab + .long L6_tmp + + .set L7_tmp, L7-Ltab + .long L7_tmp + + -- cgit v1.2.3