summaryrefslogtreecommitdiff
path: root/vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s')
-rw-r--r--vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s230
1 files changed, 230 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s b/vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s
new file mode 100644
index 0000000..7528e27
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-macos/mpn/rshift.s
@@ -0,0 +1,230 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 6, 0x90
+ .globl ___gmpn_rshift
+
+
+___gmpn_rshift:
+
+
+ movd %ecx, %xmm4
+ mov $64, %eax
+ sub %ecx, %eax
+ movd %eax, %xmm5
+
+ neg %ecx
+ mov (%rsi), %rax
+ shl %cl, %rax
+
+ cmp $3, %rdx
+ jle Lbc
+
+ test $8, %dil
+ jz Lrp_aligned
+
+
+ movq (%rsi), %xmm0
+ movq 8(%rsi), %xmm1
+ psrlq %xmm4, %xmm0
+ psllq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movq %xmm0, (%rdi)
+ lea 8(%rsi), %rsi
+ lea 8(%rdi), %rdi
+ dec %rdx
+
+Lrp_aligned:
+ lea 1(%rdx), %r8d
+ lea (%rsi,%rdx,8), %rsi
+ lea (%rdi,%rdx,8), %rdi
+ neg %rdx
+
+ and $6, %r8d
+ jz Lbu0
+ cmp $4, %r8d
+ jz Lbu4
+ jc Lbu2
+Lbu6: add $4, %rdx
+ jmp Li56
+Lbu0: add $6, %rdx
+ jmp Li70
+Lbu4: add $2, %rdx
+ jmp Li34
+Lbu2: add $8, %rdx
+ jge Lend
+
+ .align 4, 0x90
+Ltop: movdqu -64(%rsi,%rdx,8), %xmm1
+ movdqu -56(%rsi,%rdx,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -64(%rdi,%rdx,8)
+Li70:
+ movdqu -48(%rsi,%rdx,8), %xmm1
+ movdqu -40(%rsi,%rdx,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -48(%rdi,%rdx,8)
+Li56:
+ movdqu -32(%rsi,%rdx,8), %xmm1
+ movdqu -24(%rsi,%rdx,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -32(%rdi,%rdx,8)
+Li34:
+ movdqu -16(%rsi,%rdx,8), %xmm1
+ movdqu -8(%rsi,%rdx,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -16(%rdi,%rdx,8)
+ add $8, %rdx
+ jl Ltop
+
+Lend: test $1, %dl
+ jnz Le1
+
+ movdqu -16(%rsi), %xmm1
+ movq -8(%rsi), %xmm0
+ psrlq %xmm4, %xmm1
+ psllq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movdqa %xmm0, -16(%rdi)
+
+ ret
+
+Le1: movq -8(%rsi), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, -8(%rdi)
+
+ ret
+
+
+ .align 4, 0x90
+Lbc: dec %edx
+ jnz 1f
+ movq (%rsi), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, (%rdi)
+
+ ret
+
+1: movq (%rsi), %xmm1
+ movq 8(%rsi), %xmm0
+ psrlq %xmm4, %xmm1
+ psllq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movq %xmm0, (%rdi)
+ dec %edx
+ jnz 1f
+ movq 8(%rsi), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, 8(%rdi)
+
+ ret
+
+1: movq 8(%rsi), %xmm1
+ movq 16(%rsi), %xmm0
+ psrlq %xmm4, %xmm1
+ psllq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movq %xmm0, 8(%rdi)
+ movq 16(%rsi), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, 16(%rdi)
+
+ ret
+
+