summaryrefslogtreecommitdiff
path: root/vere/ext/gmp/gen/x86_64-macos/mpn/com.s
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-macos/mpn/com.s
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-macos/mpn/com.s')
-rw-r--r--vere/ext/gmp/gen/x86_64-macos/mpn/com.s335
1 files changed, 335 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-macos/mpn/com.s b/vere/ext/gmp/gen/x86_64-macos/mpn/com.s
new file mode 100644
index 0000000..bfac7e2
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-macos/mpn/com.s
@@ -0,0 +1,335 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 6, 0x90
+ .globl ___gmpn_com
+
+
+___gmpn_com:
+
+
+
+ cmp $7, %rdx
+ jbe Lbc
+
+ pcmpeqb %xmm5, %xmm5
+
+ test $8, %dil
+ jz Lrp_aligned
+
+ mov (%rsi), %r8
+ lea 8(%rsi), %rsi
+ not %r8
+ mov %r8, (%rdi)
+ lea 8(%rdi), %rdi
+ dec %rdx
+
+Lrp_aligned:
+ test $8, %sil
+ jnz Luent
+
+ jmp Lam
+
+ .align 4, 0x90
+Latop:movaps 0(%rsi), %xmm0
+ movaps 16(%rsi), %xmm1
+ movaps 32(%rsi), %xmm2
+ movaps 48(%rsi), %xmm3
+ lea 64(%rsi), %rsi
+ pxor %xmm5, %xmm0
+ pxor %xmm5, %xmm1
+ pxor %xmm5, %xmm2
+ pxor %xmm5, %xmm3
+ movaps %xmm0, (%rdi)
+ movaps %xmm1, 16(%rdi)
+ movaps %xmm2, 32(%rdi)
+ movaps %xmm3, 48(%rdi)
+ lea 64(%rdi), %rdi
+Lam: sub $8, %rdx
+ jnc Latop
+
+ test $4, %dl
+ jz 1f
+ movaps (%rsi), %xmm0
+ movaps 16(%rsi), %xmm1
+ lea 32(%rsi), %rsi
+ pxor %xmm5, %xmm0
+ pxor %xmm5, %xmm1
+ movaps %xmm0, (%rdi)
+ movaps %xmm1, 16(%rdi)
+ lea 32(%rdi), %rdi
+
+1: test $2, %dl
+ jz 1f
+ movaps (%rsi), %xmm0
+ lea 16(%rsi), %rsi
+ pxor %xmm5, %xmm0
+ movaps %xmm0, (%rdi)
+ lea 16(%rdi), %rdi
+
+1: test $1, %dl
+ jz 1f
+ mov (%rsi), %r8
+ not %r8
+ mov %r8, (%rdi)
+
+1:
+ ret
+
+Luent:
+
+
+
+
+ lea -40(%rsi), %rax
+ sub %rdi, %rax
+ cmp $80, %rax
+ jbe Lbc
+
+ sub $16, %rdx
+ jc Luend
+
+ movaps 120(%rsi), %xmm3
+
+ sub $16, %rdx
+ jmp Lum
+
+ .align 4, 0x90
+Lutop:movaps 120(%rsi), %xmm3
+ pxor %xmm5, %xmm0
+ movaps %xmm0, -128(%rdi)
+ sub $16, %rdx
+Lum: movaps 104(%rsi), %xmm2
+ .byte 0x66,0x0f,0x3a,0x0f,218,8
+ movaps 88(%rsi), %xmm1
+ pxor %xmm5, %xmm3
+ movaps %xmm3, 112(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,209,8
+ movaps 72(%rsi), %xmm0
+ pxor %xmm5, %xmm2
+ movaps %xmm2, 96(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,200,8
+ movaps 56(%rsi), %xmm3
+ pxor %xmm5, %xmm1
+ movaps %xmm1, 80(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,195,8
+ movaps 40(%rsi), %xmm2
+ pxor %xmm5, %xmm0
+ movaps %xmm0, 64(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,218,8
+ movaps 24(%rsi), %xmm1
+ pxor %xmm5, %xmm3
+ movaps %xmm3, 48(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,209,8
+ movaps 8(%rsi), %xmm0
+ pxor %xmm5, %xmm2
+ movaps %xmm2, 32(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,200,8
+ movaps -8(%rsi), %xmm3
+ pxor %xmm5, %xmm1
+ movaps %xmm1, 16(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,195,8
+ lea 128(%rsi), %rsi
+ lea 128(%rdi), %rdi
+ jnc Lutop
+
+ pxor %xmm5, %xmm0
+ movaps %xmm0, -128(%rdi)
+
+Luend:test $8, %dl
+ jz 1f
+ movaps 56(%rsi), %xmm3
+ movaps 40(%rsi), %xmm2
+ .byte 0x66,0x0f,0x3a,0x0f,218,8
+ movaps 24(%rsi), %xmm1
+ pxor %xmm5, %xmm3
+ movaps %xmm3, 48(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,209,8
+ movaps 8(%rsi), %xmm0
+ pxor %xmm5, %xmm2
+ movaps %xmm2, 32(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,200,8
+ movaps -8(%rsi), %xmm3
+ pxor %xmm5, %xmm1
+ movaps %xmm1, 16(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,195,8
+ lea 64(%rsi), %rsi
+ pxor %xmm5, %xmm0
+ movaps %xmm0, (%rdi)
+ lea 64(%rdi), %rdi
+
+1: test $4, %dl
+ jz 1f
+ movaps 24(%rsi), %xmm1
+ movaps 8(%rsi), %xmm0
+ .byte 0x66,0x0f,0x3a,0x0f,200,8
+ movaps -8(%rsi), %xmm3
+ pxor %xmm5, %xmm1
+ movaps %xmm1, 16(%rdi)
+ .byte 0x66,0x0f,0x3a,0x0f,195,8
+ lea 32(%rsi), %rsi
+ pxor %xmm5, %xmm0
+ movaps %xmm0, (%rdi)
+ lea 32(%rdi), %rdi
+
+1: test $2, %dl
+ jz 1f
+ movaps 8(%rsi), %xmm0
+ movaps -8(%rsi), %xmm3
+ .byte 0x66,0x0f,0x3a,0x0f,195,8
+ lea 16(%rsi), %rsi
+ pxor %xmm5, %xmm0
+ movaps %xmm0, (%rdi)
+ lea 16(%rdi), %rdi
+
+1: test $1, %dl
+ jz 1f
+ mov (%rsi), %r8
+ not %r8
+ mov %r8, (%rdi)
+
+1:
+ ret
+
+
+
+
+Lbc: lea -8(%rdi), %rdi
+ sub $4, %edx
+ jc Lend
+
+ .align 4, 0x90
+Ltop: mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ lea 32(%rdi), %rdi
+ mov 16(%rsi), %r10
+ mov 24(%rsi), %r11
+ lea 32(%rsi), %rsi
+ not %r8
+ not %r9
+ not %r10
+ not %r11
+ mov %r8, -24(%rdi)
+ mov %r9, -16(%rdi)
+ sub $4, %edx
+ mov %r10, -8(%rdi)
+ mov %r11, (%rdi)
+ jnc Ltop
+
+Lend: test $1, %dl
+ jz 1f
+ mov (%rsi), %r8
+ not %r8
+ mov %r8, 8(%rdi)
+ lea 8(%rdi), %rdi
+ lea 8(%rsi), %rsi
+1: test $2, %dl
+ jz 1f
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ not %r8
+ not %r9
+ mov %r8, 8(%rdi)
+ mov %r9, 16(%rdi)
+1:
+ ret
+
+