summaryrefslogtreecommitdiff
path: root/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s')
-rw-r--r--vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s211
1 files changed, 211 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s b/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s
new file mode 100644
index 0000000..c7695bb
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s
@@ -0,0 +1,211 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 5, 0x90
+ .globl ___gmpn_popcount
+
+
+___gmpn_popcount:
+
+
+
+ mov %esi, %r8d
+ and $7, %r8d
+
+ .byte 0xf3,0x48,0x0f,0xb8,0x07
+ xor %ecx, %ecx
+
+ lea Ltab(%rip), %r9
+
+ movslq (%r9,%r8,4), %r8
+ add %r9, %r8
+ jmp *%r8
+
+
+L3: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x10
+ add $24, %rdi
+ sub $8, %rsi
+ jg Le34
+ add %r10, %rax
+ add %r11, %rax
+Ls1:
+ ret
+
+L1: sub $8, %rsi
+ jle Ls1
+ .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x10
+ add $8, %rdi
+ jmp Le12
+
+L7: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x10
+ add $-8, %rdi
+ jmp Le07
+
+L0: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x10
+ .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x18
+ jmp Le07
+
+L4: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x10
+ .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x18
+ add $32, %rdi
+ sub $8, %rsi
+ jle Lx4
+
+ .align 4, 0x90
+Ltop:
+Le34: .byte 0xf3,0x4c,0x0f,0xb8,0x07
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x08
+ add %r10, %rcx
+ add %r11, %rax
+Le12: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x10
+ .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x18
+ add %r8, %rcx
+ add %r9, %rax
+Le07: .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x20
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x28
+ add %r10, %rcx
+ add %r11, %rax
+Le56: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x30
+ .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x38
+ add $64, %rdi
+ add %r8, %rcx
+ add %r9, %rax
+ sub $8, %rsi
+ jg Ltop
+
+Lx4: add %r10, %rcx
+ add %r11, %rax
+Lx2: add %rcx, %rax
+
+
+ ret
+
+L2: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08
+ sub $8, %rsi
+ jle Lx2
+ .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x10
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x18
+ add $16, %rdi
+ jmp Le12
+
+L5: .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x10
+ add $-24, %rdi
+ jmp Le56
+
+L6: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08
+ .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x10
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x18
+ add $-16, %rdi
+ jmp Le56
+
+ .text
+ .align 3, 0x90
+Ltab: .set L0_tmp, L0-Ltab
+ .long L0_tmp
+
+ .set L1_tmp, L1-Ltab
+ .long L1_tmp
+
+ .set L2_tmp, L2-Ltab
+ .long L2_tmp
+
+ .set L3_tmp, L3-Ltab
+ .long L3_tmp
+
+ .set L4_tmp, L4-Ltab
+ .long L4_tmp
+
+ .set L5_tmp, L5-Ltab
+ .long L5_tmp
+
+ .set L6_tmp, L6-Ltab
+ .long L6_tmp
+
+ .set L7_tmp, L7-Ltab
+ .long L7_tmp
+