diff options
author | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-10-05 21:56:51 +0700 |
commit | fcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch) | |
tree | 51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-windows/mpn/mod_1_4.s |
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-windows/mpn/mod_1_4.s')
-rw-r--r-- | vere/ext/gmp/gen/x86_64-windows/mpn/mod_1_4.s | 300 |
1 files changed, 300 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-windows/mpn/mod_1_4.s b/vere/ext/gmp/gen/x86_64-windows/mpn/mod_1_4.s new file mode 100644 index 0000000..1a8b397 --- /dev/null +++ b/vere/ext/gmp/gen/x86_64-windows/mpn/mod_1_4.s @@ -0,0 +1,300 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .text + .align 16, 0x90 + .globl __gmpn_mod_1s_4p + + .def __gmpn_mod_1s_4p + .scl 2 + .type 32 + .endef +__gmpn_mod_1s_4p: + + push %rdi + push %rsi + mov %rcx, %rdi + mov %rdx, %rsi + mov %r8, %rdx + mov %r9, %rcx + + push %r15 + push %r14 + push %r13 + push %r12 + push %rbp + push %rbx + + mov %rdx, %r15 + mov %rcx, %r14 + mov 16(%rcx), %r11 + mov 24(%rcx), %rbx + mov 32(%rcx), %rbp + mov 40(%rcx), %r13 + mov 48(%rcx), %r12 + xor %r8d, %r8d + mov %esi, %edx + and $3, %edx + je Lb0 + cmp $2, %edx + jc Lb1 + je Lb2 + +Lb3: lea -24(%rdi,%rsi,8), %rdi + mov 8(%rdi), %rax + mul %r11 + mov (%rdi), %r9 + add %rax, %r9 + adc %rdx, %r8 + mov 16(%rdi), %rax + mul %rbx + jmp Lm0 + + .align 8, 0x90 +Lb0: lea -32(%rdi,%rsi,8), %rdi + mov 8(%rdi), %rax + mul %r11 + mov (%rdi), %r9 + add %rax, %r9 + adc %rdx, %r8 + mov 16(%rdi), %rax + mul %rbx + add %rax, %r9 + adc %rdx, %r8 + mov 24(%rdi), %rax + mul %rbp + jmp Lm0 + + .align 8, 0x90 +Lb1: lea -8(%rdi,%rsi,8), %rdi + mov (%rdi), %r9 + jmp Lm1 + + .align 8, 0x90 +Lb2: lea -16(%rdi,%rsi,8), %rdi + mov 8(%rdi), %r8 + mov (%rdi), %r9 + jmp Lm1 + + .align 16, 0x90 +Ltop: mov -24(%rdi), %rax + mov -32(%rdi), %r10 + mul %r11 + add %rax, %r10 + mov -16(%rdi), %rax + mov $0, %ecx + adc %rdx, %rcx + mul %rbx + add %rax, %r10 + mov -8(%rdi), %rax + adc %rdx, %rcx + sub $32, %rdi + mul %rbp + add %rax, %r10 + mov %r13, %rax + adc %rdx, %rcx + mul %r9 + add %rax, %r10 + mov %r12, %rax + adc %rdx, %rcx + mul %r8 + mov %r10, %r9 + mov %rcx, %r8 +Lm0: add %rax, %r9 + adc %rdx, %r8 +Lm1: sub $4, %rsi + ja Ltop + +Lend: mov 8(%r14), %esi + mov %r8, %rax + mul %r11 + mov %rax, %r8 + add %r9, %r8 + adc $0, %rdx + xor %ecx, %ecx + sub %esi, %ecx + mov %r8, %rdi + shr %cl, %rdi + mov %esi, %ecx + sal %cl, %rdx + or %rdx, %rdi + mov %rdi, %rax + mulq (%r14) + mov %r15, %rbx + mov %rax, %r9 + sal %cl, %r8 + inc %rdi + add %r8, %r9 + adc %rdi, %rdx + imul %rbx, %rdx + sub %rdx, %r8 + lea (%r8,%rbx), %rax + cmp %r8, %r9 + cmovc %rax, %r8 + mov %r8, %rax + sub %rbx, %rax + cmovc %r8, %rax + shr %cl, %rax + pop %rbx + pop %rbp + pop %r12 + pop %r13 + pop %r14 + pop %r15 + pop %rsi + pop %rdi + ret + + + .align 16, 0x90 + .globl __gmpn_mod_1s_4p_cps + + .def __gmpn_mod_1s_4p_cps + .scl 2 + .type 32 + .endef +__gmpn_mod_1s_4p_cps: + + push %rdi + push %rsi + mov %rcx, %rdi + mov %rdx, %rsi + + push %rbp + bsr %rsi, %rcx + push %rbx + mov %rdi, %rbx + push %r12 + xor $63, %ecx + mov %rsi, %r12 + mov %ecx, %ebp + sal %cl, %r12 + + mov %r12, %rcx + sub $32, %rsp + + call __gmpn_invert_limb + add $32, %rsp + mov %r12, %r8 + mov %rax, %r11 + mov %rax, (%rbx) + mov %rbp, 8(%rbx) + neg %r8 + mov %ebp, %ecx + mov $1, %esi + + shld %cl, %rax, %rsi + + imul %r8, %rsi + mul %rsi + + add %rsi, %rdx + shr %cl, %rsi + mov %rsi, 16(%rbx) + + not %rdx + imul %r12, %rdx + lea (%rdx,%r12), %rsi + cmp %rdx, %rax + cmovnc %rdx, %rsi + mov %r11, %rax + mul %rsi + + add %rsi, %rdx + shr %cl, %rsi + mov %rsi, 24(%rbx) + + not %rdx + imul %r12, %rdx + lea (%rdx,%r12), %rsi + cmp %rdx, %rax + cmovnc %rdx, %rsi + mov %r11, %rax + mul %rsi + + add %rsi, %rdx + shr %cl, %rsi + mov %rsi, 32(%rbx) + + not %rdx + imul %r12, %rdx + lea (%rdx,%r12), %rsi + cmp %rdx, %rax + cmovnc %rdx, %rsi + mov %r11, %rax + mul %rsi + + add %rsi, %rdx + shr %cl, %rsi + mov %rsi, 40(%rbx) + + not %rdx + imul %r12, %rdx + add %rdx, %r12 + cmp %rdx, %rax + cmovnc %rdx, %r12 + + shr %cl, %r12 + mov %r12, 48(%rbx) + + pop %r12 + pop %rbx + pop %rbp + pop %rsi + pop %rdi + ret + |