diff options
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s')
-rw-r--r-- | vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s | 211 |
1 files changed, 211 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s b/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s new file mode 100644 index 0000000..c7695bb --- /dev/null +++ b/vere/ext/gmp/gen/x86_64-macos/mpn/popcount.s @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .text + .align 5, 0x90 + .globl ___gmpn_popcount + + +___gmpn_popcount: + + + + mov %esi, %r8d + and $7, %r8d + + .byte 0xf3,0x48,0x0f,0xb8,0x07 + xor %ecx, %ecx + + lea Ltab(%rip), %r9 + + movslq (%r9,%r8,4), %r8 + add %r9, %r8 + jmp *%r8 + + +L3: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x10 + add $24, %rdi + sub $8, %rsi + jg Le34 + add %r10, %rax + add %r11, %rax +Ls1: + ret + +L1: sub $8, %rsi + jle Ls1 + .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x10 + add $8, %rdi + jmp Le12 + +L7: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x10 + add $-8, %rdi + jmp Le07 + +L0: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x10 + .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x18 + jmp Le07 + +L4: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x10 + .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x18 + add $32, %rdi + sub $8, %rsi + jle Lx4 + + .align 4, 0x90 +Ltop: +Le34: .byte 0xf3,0x4c,0x0f,0xb8,0x07 + .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x08 + add %r10, %rcx + add %r11, %rax +Le12: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x10 + .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x18 + add %r8, %rcx + add %r9, %rax +Le07: .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x20 + .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x28 + add %r10, %rcx + add %r11, %rax +Le56: .byte 0xf3,0x4c,0x0f,0xb8,0x57,0x30 + .byte 0xf3,0x4c,0x0f,0xb8,0x5f,0x38 + add $64, %rdi + add %r8, %rcx + add %r9, %rax + sub $8, %rsi + jg Ltop + +Lx4: add %r10, %rcx + add %r11, %rax +Lx2: add %rcx, %rax + + + ret + +L2: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08 + sub $8, %rsi + jle Lx2 + .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x10 + .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x18 + add $16, %rdi + jmp Le12 + +L5: .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x10 + add $-24, %rdi + jmp Le56 + +L6: .byte 0xf3,0x48,0x0f,0xb8,0x4f,0x08 + .byte 0xf3,0x4c,0x0f,0xb8,0x47,0x10 + .byte 0xf3,0x4c,0x0f,0xb8,0x4f,0x18 + add $-16, %rdi + jmp Le56 + + .text + .align 3, 0x90 +Ltab: .set L0_tmp, L0-Ltab + .long L0_tmp + + .set L1_tmp, L1-Ltab + .long L1_tmp + + .set L2_tmp, L2-Ltab + .long L2_tmp + + .set L3_tmp, L3-Ltab + .long L3_tmp + + .set L4_tmp, L4-Ltab + .long L4_tmp + + .set L5_tmp, L5-Ltab + .long L5_tmp + + .set L6_tmp, L6-Ltab + .long L6_tmp + + .set L7_tmp, L7-Ltab + .long L7_tmp + |