diff options
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s')
-rw-r--r-- | vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s new file mode 100644 index 0000000..00e2090 --- /dev/null +++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s @@ -0,0 +1,204 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .text + .align 16, 0x90 + .globl __gmpn_addlsh2_n + .type __gmpn_addlsh2_n,@function + +__gmpn_addlsh2_n: + + + push %r12 + push %r13 + push %r14 + push %r15 + + mov (%rdx), %r8 + lea (,%r8,4), %r12 + shr $62, %r8 + + mov %ecx, %eax + lea (%rdi,%rcx,8), %rdi + lea (%rsi,%rcx,8), %rsi + lea (%rdx,%rcx,8), %rdx + neg %rcx + and $3, %al + je .Lb00 + cmp $2, %al + jc .Lb01 + je .Lb10 + +.Lb11: mov 8(%rdx,%rcx,8), %r10 + lea (%r8,%r10,4), %r14 + shr $62, %r10 + mov 16(%rdx,%rcx,8), %r11 + lea (%r10,%r11,4), %r15 + shr $62, %r11 + add (%rsi,%rcx,8), %r12 + adc 8(%rsi,%rcx,8), %r14 + adc 16(%rsi,%rcx,8), %r15 + sbb %eax, %eax + mov %r12, (%rdi,%rcx,8) + mov %r14, 8(%rdi,%rcx,8) + mov %r15, 16(%rdi,%rcx,8) + add $3, %rcx + js .Ltop + jmp .Lend + +.Lb01: mov %r8, %r11 + add (%rsi,%rcx,8), %r12 + sbb %eax, %eax + mov %r12, (%rdi,%rcx,8) + add $1, %rcx + js .Ltop + jmp .Lend + +.Lb10: mov 8(%rdx,%rcx,8), %r11 + lea (%r8,%r11,4), %r15 + shr $62, %r11 + add (%rsi,%rcx,8), %r12 + adc 8(%rsi,%rcx,8), %r15 + sbb %eax, %eax + mov %r12, (%rdi,%rcx,8) + mov %r15, 8(%rdi,%rcx,8) + add $2, %rcx + js .Ltop + jmp .Lend + +.Lb00: mov 8(%rdx,%rcx,8), %r9 + mov 16(%rdx,%rcx,8), %r10 + jmp .Le00 + + .align 16, 0x90 +.Ltop: mov 16(%rdx,%rcx,8), %r10 + mov (%rdx,%rcx,8), %r8 + mov 8(%rdx,%rcx,8), %r9 + lea (%r11,%r8,4), %r12 + shr $62, %r8 +.Le00: lea (%r8,%r9,4), %r13 + shr $62, %r9 + mov 24(%rdx,%rcx,8), %r11 + lea (%r9,%r10,4), %r14 + shr $62, %r10 + lea (%r10,%r11,4), %r15 + shr $62, %r11 + add %eax, %eax + adc (%rsi,%rcx,8), %r12 + adc 8(%rsi,%rcx,8), %r13 + adc 16(%rsi,%rcx,8), %r14 + adc 24(%rsi,%rcx,8), %r15 + mov %r12, (%rdi,%rcx,8) + mov %r13, 8(%rdi,%rcx,8) + mov %r14, 16(%rdi,%rcx,8) + sbb %eax, %eax + mov %r15, 24(%rdi,%rcx,8) + add $4, %rcx + js .Ltop +.Lend: + + + sub %r11d, %eax + neg %eax + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + + ret + .size __gmpn_addlsh2_n,.-__gmpn_addlsh2_n + |