diff options
author | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
commit | fcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch) | |
tree | 51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-macos/mpn/sublsh2_n.s |
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-macos/mpn/sublsh2_n.s')
-rw-r--r-- | vere/ext/gmp/gen/x86_64-macos/mpn/sublsh2_n.s | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-macos/mpn/sublsh2_n.s b/vere/ext/gmp/gen/x86_64-macos/mpn/sublsh2_n.s new file mode 100644 index 0000000..8e62cfe --- /dev/null +++ b/vere/ext/gmp/gen/x86_64-macos/mpn/sublsh2_n.s @@ -0,0 +1,190 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .text + .align 3, 0x90 + .globl ___gmpn_sublsh2_n + + +___gmpn_sublsh2_n: + + + push %rbx + push %r12 + + mov %ecx, %eax + lea 24(%rsi,%rcx,8), %rsi + lea 24(%rdx,%rcx,8), %rdx + lea 24(%rdi,%rcx,8), %rdi + neg %rcx + + xor %r11d, %r11d + + mov -24(%rdx,%rcx,8), %r8 + shrd $62, %r8, %r11 + + and $3, %eax + je Lb0 + cmp $2, %eax + jc Lb1 + je Lb2 + +Lb3: mov -16(%rdx,%rcx,8), %r9 + shrd $62, %r9, %r8 + mov -8(%rdx,%rcx,8), %r10 + shrd $62, %r10, %r9 + mov -24(%rsi,%rcx,8), %r12 + sub %r11, %r12 + mov %r12, -24(%rdi,%rcx,8) + mov -16(%rsi,%rcx,8), %r12 + sbb %r8, %r12 + mov %r12, -16(%rdi,%rcx,8) + mov -8(%rsi,%rcx,8), %r12 + sbb %r9, %r12 + mov %r12, -8(%rdi,%rcx,8) + mov %r10, %r11 + sbb %eax, %eax + add $3, %rcx + js Ltop + jmp Lend + +Lb1: mov -24(%rsi,%rcx,8), %r12 + sub %r11, %r12 + mov %r12, -24(%rdi,%rcx,8) + mov %r8, %r11 + sbb %eax, %eax + inc %rcx + js Ltop + jmp Lend + +Lb2: mov -16(%rdx,%rcx,8), %r9 + shrd $62, %r9, %r8 + mov -24(%rsi,%rcx,8), %r12 + sub %r11, %r12 + mov %r12, -24(%rdi,%rcx,8) + mov -16(%rsi,%rcx,8), %r12 + sbb %r8, %r12 + mov %r12, -16(%rdi,%rcx,8) + mov %r9, %r11 + sbb %eax, %eax + add $2, %rcx + js Ltop + jmp Lend + + .align 4, 0x90 +Ltop: mov -24(%rdx,%rcx,8), %r8 + shrd $62, %r8, %r11 +Lb0: mov -16(%rdx,%rcx,8), %r9 + shrd $62, %r9, %r8 + mov -8(%rdx,%rcx,8), %r10 + shrd $62, %r10, %r9 + mov (%rdx,%rcx,8), %rbx + shrd $62, %rbx, %r10 + + add %eax, %eax + + mov -24(%rsi,%rcx,8), %r12 + sbb %r11, %r12 + mov %r12, -24(%rdi,%rcx,8) + + mov -16(%rsi,%rcx,8), %r12 + sbb %r8, %r12 + mov %r12, -16(%rdi,%rcx,8) + + mov -8(%rsi,%rcx,8), %r12 + sbb %r9, %r12 + mov %r12, -8(%rdi,%rcx,8) + + mov (%rsi,%rcx,8), %r12 + sbb %r10, %r12 + mov %r12, (%rdi,%rcx,8) + + mov %rbx, %r11 + sbb %eax, %eax + + add $4, %rcx + js Ltop + +Lend: shr $62, %r11 + pop %r12 + pop %rbx + sub %r11d, %eax + neg %eax + + ret + + |