summaryrefslogtreecommitdiff
path: root/vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s')
-rw-r--r--vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s219
1 files changed, 219 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s b/vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s
new file mode 100644
index 0000000..9b3b59d
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-windows/mpn/addmul_2.s
@@ -0,0 +1,219 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addmul_2
+
+ .def __gmpn_addmul_2
+ .scl 2
+ .type 32
+ .endef
+__gmpn_addmul_2:
+
+ push %rdi
+ push %rsi
+ mov %rcx, %rdi
+ mov %rdx, %rsi
+ mov %r8, %rdx
+ mov %r9, %rcx
+
+ mov %rdx, %r11
+ push %rbx
+ push %rbp
+
+ mov 0(%rcx), %r8
+ mov 8(%rcx), %r9
+
+ mov %edx, %ebx
+ mov (%rsi), %rax
+ lea -8(%rsi,%rdx,8), %rsi
+ lea -8(%rdi,%rdx,8), %rdi
+ mul %r8
+ neg %r11
+ and $3, %ebx
+ jz Lb0
+ cmp $2, %ebx
+ jc Lb1
+ jz Lb2
+
+Lb3: mov %rax, %rcx
+ mov %rdx, %rbp
+ xor %r10d, %r10d
+ mov 8(%rsi,%r11,8), %rax
+ dec %r11
+ jmp Llo3
+
+Lb2: mov %rax, %rbp
+ mov 8(%rsi,%r11,8), %rax
+ mov %rdx, %r10
+ xor %ebx, %ebx
+ add $-2, %r11
+ jmp Llo2
+
+Lb1: mov %rax, %r10
+ mov 8(%rsi,%r11,8), %rax
+ mov %rdx, %rbx
+ xor %ecx, %ecx
+ inc %r11
+ jmp Llo1
+
+Lb0: mov $0, %r10d
+ mov %rax, %rbx
+ mov 8(%rsi,%r11,8), %rax
+ mov %rdx, %rcx
+ xor %ebp, %ebp
+ jmp Llo0
+
+ .align 32, 0x90
+Ltop: mov $0, %ecx
+ mul %r8
+ add %rax, %r10
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rbx
+ adc $0, %ecx
+Llo1: mul %r9
+ add %r10, (%rdi,%r11,8)
+ mov $0, %r10d
+ adc %rax, %rbx
+ mov $0, %ebp
+ mov 8(%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+ mul %r8
+ add %rax, %rbx
+ mov 8(%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+Llo0: mul %r9
+ add %rbx, 8(%rdi,%r11,8)
+ adc %rax, %rcx
+ adc %rdx, %rbp
+ mov 16(%rsi,%r11,8), %rax
+ mul %r8
+ add %rax, %rcx
+ adc %rdx, %rbp
+ adc $0, %r10d
+ mov 16(%rsi,%r11,8), %rax
+Llo3: mul %r9
+ add %rcx, 16(%rdi,%r11,8)
+ adc %rax, %rbp
+ adc %rdx, %r10
+ xor %ebx, %ebx
+ mov 24(%rsi,%r11,8), %rax
+ mul %r8
+ add %rax, %rbp
+ mov 24(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ adc $0, %ebx
+Llo2: mul %r9
+ add %rbp, 24(%rdi,%r11,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+ mov 32(%rsi,%r11,8), %rax
+ add $4, %r11
+ js Ltop
+
+Lend: xor %ecx, %ecx
+ mul %r8
+ add %rax, %r10
+ mov (%rsi), %rax
+ adc %rdx, %rbx
+ adc %ecx, %ecx
+ mul %r9
+ add %r10, (%rdi)
+ adc %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, 8(%rdi)
+ mov %rcx, %rax
+
+ pop %rbp
+ pop %rbx
+ pop %rsi
+ pop %rdi
+ ret
+