summaryrefslogtreecommitdiff
path: root/vere/ext/gmp/gen/x86_64-linux/mpn
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
committerpolwex <polwex@sortug.com>2025-10-05 21:56:51 +0700
commitfcedfddf00b3f994e4f4e40332ac7fc192c63244 (patch)
tree51d38e62c7bdfcc5f9a5e9435fe820c93cfc9a3d /vere/ext/gmp/gen/x86_64-linux/mpn
claude is gud
Diffstat (limited to 'vere/ext/gmp/gen/x86_64-linux/mpn')
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/add_err1_n.s237
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/add_err2_n.s184
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/add_err3_n.s168
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/add_n.s194
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/addaddmul_1msb0.s185
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/addlsh1_n.s179
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s204
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/addlsh_n.s228
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/addmul_1.s196
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/addmul_2.s209
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/and_n.s149
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/andn_n.s154
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_dbm1c.s121
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_q_1.s198
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/cnd_add_n.s190
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/cnd_sub_n.s190
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/com.s110
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/copyd.s108
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/copyi.s107
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_1n_pi1.s261
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2n_pi1.s171
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2u_pi1.s211
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/dive_1.s175
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/divrem_1.s335
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/divrem_2.s208
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/fib_table.c107
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/gcd_11.s256
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/gcd_22.s434
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/hamdist.s167
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb.s123
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb_table.s313
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/ior_n.s149
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/iorn_n.s154
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/jacobitab.h13
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/lshift.s186
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/lshiftc.s197
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_1.s241
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_2.s252
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_4.s283
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mod_34lsub1.s228
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mode1o.s189
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mp_bases.c268
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mul_1.s205
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mul_2.s218
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mul_basecase.s483
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mullo_basecase.s439
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/mulmid_basecase.s573
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/nand_n.s155
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/nior_n.s155
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/perfsqr.h46
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/popcount.s160
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/redc_1.s603
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh1_n.s179
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh2_n.s204
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh_n.s228
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/rsh1add_n.s203
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/rsh1sub_n.s203
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/rshift.s191
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sec_tabselect.s190
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sqr_basecase.s818
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sqr_diag_addlsh1.s130
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sub_err1_n.s237
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sub_err2_n.s184
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sub_err3_n.s168
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sub_n.s194
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/sublsh1_n.s175
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/submul_1.s196
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/xnor_n.s154
-rw-r--r--vere/ext/gmp/gen/x86_64-linux/mpn/xor_n.s149
69 files changed, 15072 insertions, 0 deletions
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/add_err1_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/add_err1_n.s
new file mode 100644
index 0000000..2cbba6a
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/add_err1_n.s
@@ -0,0 +1,237 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_add_err1_n
+ .type __gmpn_add_err1_n,@function
+
+__gmpn_add_err1_n:
+
+ mov 8(%rsp), %rax
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ lea (%rsi,%r9,8), %rsi
+ lea (%rdx,%r9,8), %rdx
+ lea (%rdi,%r9,8), %rdi
+
+ mov %r9d, %r10d
+ and $3, %r10d
+ jz .L0mod4
+ cmp $2, %r10d
+ jc .L1mod4
+ jz .L2mod4
+.L3mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ xor %r10d, %r10d
+ xor %r11d, %r11d
+ lea -24(%r8,%r9,8), %r8
+ neg %r9
+
+ shr $1, %al
+ mov (%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %r15
+ adc (%rdx,%r9,8), %r14
+ mov %r14, (%rdi,%r9,8)
+ cmovc 16(%r8), %rbx
+ adc 8(%rdx,%r9,8), %r15
+ mov %r15, 8(%rdi,%r9,8)
+ cmovc 8(%r8), %r10
+ mov 16(%rsi,%r9,8), %r14
+ adc 16(%rdx,%r9,8), %r14
+ mov %r14, 16(%rdi,%r9,8)
+ cmovc (%r8), %r11
+ setc %al
+ add %r10, %rbx
+ adc $0, %rbp
+ add %r11, %rbx
+ adc $0, %rbp
+
+ add $3, %r9
+ jnz .Lloop
+ jmp .Lend
+
+ .align 16, 0x90
+.L0mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ lea (%r8,%r9,8), %r8
+ neg %r9
+ jmp .Lloop
+
+ .align 16, 0x90
+.L1mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ lea -8(%r8,%r9,8), %r8
+ neg %r9
+
+ shr $1, %al
+ mov (%rsi,%r9,8), %r14
+ adc (%rdx,%r9,8), %r14
+ mov %r14, (%rdi,%r9,8)
+ cmovc (%r8), %rbx
+ setc %al
+
+ add $1, %r9
+ jnz .Lloop
+ jmp .Lend
+
+ .align 16, 0x90
+.L2mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ xor %r10d, %r10d
+ lea -16(%r8,%r9,8), %r8
+ neg %r9
+
+ shr $1, %al
+ mov (%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %r15
+ adc (%rdx,%r9,8), %r14
+ mov %r14, (%rdi,%r9,8)
+ cmovc 8(%r8), %rbx
+ adc 8(%rdx,%r9,8), %r15
+ mov %r15, 8(%rdi,%r9,8)
+ cmovc (%r8), %r10
+ setc %al
+ add %r10, %rbx
+ adc $0, %rbp
+
+ add $2, %r9
+ jnz .Lloop
+ jmp .Lend
+
+ .align 32, 0x90
+.Lloop:
+ shr $1, %al
+ mov -8(%r8), %r10
+ mov $0, %r13d
+ mov (%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %r15
+ adc (%rdx,%r9,8), %r14
+ cmovnc %r13, %r10
+ adc 8(%rdx,%r9,8), %r15
+ mov -16(%r8), %r11
+ mov %r14, (%rdi,%r9,8)
+ mov 16(%rsi,%r9,8), %r14
+ mov %r15, 8(%rdi,%r9,8)
+ cmovnc %r13, %r11
+ mov -24(%r8), %r12
+ adc 16(%rdx,%r9,8), %r14
+ cmovnc %r13, %r12
+ mov 24(%rsi,%r9,8), %r15
+ adc 24(%rdx,%r9,8), %r15
+ cmovc -32(%r8), %r13
+ setc %al
+ add %r10, %rbx
+ adc $0, %rbp
+ add %r11, %rbx
+ adc $0, %rbp
+ add %r12, %rbx
+ adc $0, %rbp
+ mov %r14, 16(%rdi,%r9,8)
+ add %r13, %rbx
+ lea -32(%r8), %r8
+ adc $0, %rbp
+ mov %r15, 24(%rdi,%r9,8)
+ add $4, %r9
+ jnz .Lloop
+
+.Lend:
+ mov %rbx, (%rcx)
+ mov %rbp, 8(%rcx)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+ .size __gmpn_add_err1_n,.-__gmpn_add_err1_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/add_err2_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/add_err2_n.s
new file mode 100644
index 0000000..1008479
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/add_err2_n.s
@@ -0,0 +1,184 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_add_err2_n
+ .type __gmpn_add_err2_n,@function
+
+__gmpn_add_err2_n:
+
+ mov 16(%rsp), %rax
+ mov 8(%rsp), %r10
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ xor %ebp, %ebp
+ xor %r11d, %r11d
+ xor %r12d, %r12d
+ xor %r13d, %r13d
+
+ sub %r8, %r9
+
+ lea (%rdi,%r10,8), %rdi
+ lea (%rsi,%r10,8), %rsi
+ lea (%rdx,%r10,8), %rdx
+
+ test $1, %r10
+ jnz .Lodd
+
+ lea -8(%r8,%r10,8), %r8
+ neg %r10
+ jmp .Ltop
+
+ .align 16, 0x90
+.Lodd:
+ lea -16(%r8,%r10,8), %r8
+ neg %r10
+ shr $1, %rax
+ mov (%rsi,%r10,8), %rbx
+ adc (%rdx,%r10,8), %rbx
+ cmovc 8(%r8), %rbp
+ cmovc 8(%r8,%r9), %r12
+ mov %rbx, (%rdi,%r10,8)
+ sbb %rax, %rax
+ inc %r10
+ jz .Lend
+
+ .align 16, 0x90
+.Ltop:
+ mov (%rsi,%r10,8), %rbx
+ shr $1, %rax
+ adc (%rdx,%r10,8), %rbx
+ mov %rbx, (%rdi,%r10,8)
+ sbb %r14, %r14
+
+ mov 8(%rsi,%r10,8), %rbx
+ adc 8(%rdx,%r10,8), %rbx
+ mov %rbx, 8(%rdi,%r10,8)
+ sbb %rax, %rax
+
+ mov (%r8), %rbx
+ and %r14, %rbx
+ add %rbx, %rbp
+ adc $0, %r11
+
+ and (%r8,%r9), %r14
+ add %r14, %r12
+ adc $0, %r13
+
+ mov -8(%r8), %rbx
+ and %rax, %rbx
+ add %rbx, %rbp
+ adc $0, %r11
+
+ mov -8(%r8,%r9), %rbx
+ and %rax, %rbx
+ add %rbx, %r12
+ adc $0, %r13
+
+ add $2, %r10
+ lea -16(%r8), %r8
+ jnz .Ltop
+.Lend:
+
+ mov %rbp, (%rcx)
+ mov %r11, 8(%rcx)
+ mov %r12, 16(%rcx)
+ mov %r13, 24(%rcx)
+
+ and $1, %eax
+
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+ .size __gmpn_add_err2_n,.-__gmpn_add_err2_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/add_err3_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/add_err3_n.s
new file mode 100644
index 0000000..cf99415
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/add_err3_n.s
@@ -0,0 +1,168 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_add_err3_n
+ .type __gmpn_add_err3_n,@function
+
+__gmpn_add_err3_n:
+
+ mov 24(%rsp), %rax
+ mov 16(%rsp), %r10
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ push %rcx
+ mov 64(%rsp), %rcx
+
+ xor %ebp, %ebp
+ xor %r11d, %r11d
+ xor %r12d, %r12d
+ xor %r13d, %r13d
+ xor %r14d, %r14d
+ xor %r15d, %r15d
+
+ sub %r8, %r9
+ sub %r8, %rcx
+
+ lea -8(%r8,%r10,8), %r8
+ lea (%rdi,%r10,8), %rdi
+ lea (%rsi,%r10,8), %rsi
+ lea (%rdx,%r10,8), %rdx
+ neg %r10
+
+ .align 16, 0x90
+.Ltop:
+ shr $1, %rax
+ mov (%rsi,%r10,8), %rax
+ adc (%rdx,%r10,8), %rax
+ mov %rax, (%rdi,%r10,8)
+ sbb %rax, %rax
+
+ mov (%r8), %rbx
+ and %rax, %rbx
+ add %rbx, %rbp
+ adc $0, %r11
+
+ mov (%r8,%r9), %rbx
+ and %rax, %rbx
+ add %rbx, %r12
+ adc $0, %r13
+
+ mov (%r8,%rcx), %rbx
+ and %rax, %rbx
+ add %rbx, %r14
+ adc $0, %r15
+
+ lea -8(%r8), %r8
+ inc %r10
+ jnz .Ltop
+
+.Lend:
+ and $1, %eax
+ pop %rcx
+
+ mov %rbp, (%rcx)
+ mov %r11, 8(%rcx)
+ mov %r12, 16(%rcx)
+ mov %r13, 24(%rcx)
+ mov %r14, 32(%rcx)
+ mov %r15, 40(%rcx)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+ .size __gmpn_add_err3_n,.-__gmpn_add_err3_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/add_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/add_n.s
new file mode 100644
index 0000000..14cc32b
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/add_n.s
@@ -0,0 +1,194 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_add_nc
+ .type __gmpn_add_nc,@function
+
+__gmpn_add_nc:
+
+
+
+ mov %ecx, %eax
+ shr $2, %rcx
+ and $3, %eax
+ bt $0, %r8
+ jrcxz .Llt4
+
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ dec %rcx
+ jmp .Lmid
+
+ .size __gmpn_add_nc,.-__gmpn_add_nc
+ .align 16, 0x90
+ .globl __gmpn_add_n
+ .type __gmpn_add_n,@function
+
+__gmpn_add_n:
+
+
+ mov %ecx, %eax
+ shr $2, %rcx
+ and $3, %eax
+ jrcxz .Llt4
+
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ dec %rcx
+ jmp .Lmid
+
+.Llt4: dec %eax
+ mov (%rsi), %r8
+ jnz .L2
+ adc (%rdx), %r8
+ mov %r8, (%rdi)
+ adc %eax, %eax
+
+ ret
+
+.L2: dec %eax
+ mov 8(%rsi), %r9
+ jnz .L3
+ adc (%rdx), %r8
+ adc 8(%rdx), %r9
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ adc %eax, %eax
+
+ ret
+
+.L3: mov 16(%rsi), %r10
+ adc (%rdx), %r8
+ adc 8(%rdx), %r9
+ adc 16(%rdx), %r10
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ setc %al
+
+ ret
+
+ .align 16, 0x90
+.Ltop: adc (%rdx), %r8
+ adc 8(%rdx), %r9
+ adc 16(%rdx), %r10
+ adc 24(%rdx), %r11
+ mov %r8, (%rdi)
+ lea 32(%rsi), %rsi
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ dec %rcx
+ mov %r11, 24(%rdi)
+ lea 32(%rdx), %rdx
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ lea 32(%rdi), %rdi
+.Lmid: mov 16(%rsi), %r10
+ mov 24(%rsi), %r11
+ jnz .Ltop
+
+.Lend: lea 32(%rsi), %rsi
+ adc (%rdx), %r8
+ adc 8(%rdx), %r9
+ adc 16(%rdx), %r10
+ adc 24(%rdx), %r11
+ lea 32(%rdx), %rdx
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ mov %r11, 24(%rdi)
+ lea 32(%rdi), %rdi
+
+ inc %eax
+ dec %eax
+ jnz .Llt4
+ adc %eax, %eax
+
+ ret
+ .size __gmpn_add_n,.-__gmpn_add_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addaddmul_1msb0.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addaddmul_1msb0.s
new file mode 100644
index 0000000..c821f7b
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addaddmul_1msb0.s
@@ -0,0 +1,185 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addaddmul_1msb0
+ .type __gmpn_addaddmul_1msb0,@function
+
+__gmpn_addaddmul_1msb0:
+
+ push %r12
+ push %rbp
+
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,8), %rbp
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+
+ mov (%rsi,%rcx,8), %rax
+ mul %r8
+ mov %rax, %r12
+ mov (%rbp,%rcx,8), %rax
+ mov %rdx, %r10
+ add $3, %rcx
+ jns .Lend
+
+ .align 16, 0x90
+.Ltop: mul %r9
+ add %rax, %r12
+ mov -16(%rsi,%rcx,8), %rax
+ adc %rdx, %r10
+ mov %r12, -24(%rdi,%rcx,8)
+ mul %r8
+ add %rax, %r10
+ mov -16(%rbp,%rcx,8), %rax
+ mov $0, %r11d
+ adc %rdx, %r11
+ mul %r9
+ add %rax, %r10
+ mov -8(%rsi,%rcx,8), %rax
+ adc %rdx, %r11
+ mov %r10, -16(%rdi,%rcx,8)
+ mul %r8
+ add %rax, %r11
+ mov -8(%rbp,%rcx,8), %rax
+ mov $0, %r12d
+ adc %rdx, %r12
+ mul %r9
+ add %rax, %r11
+ adc %rdx, %r12
+ mov (%rsi,%rcx,8), %rax
+ mul %r8
+ add %rax, %r12
+ mov %r11, -8(%rdi,%rcx,8)
+ mov (%rbp,%rcx,8), %rax
+ mov $0, %r10d
+ adc %rdx, %r10
+ add $3, %rcx
+ js .Ltop
+
+.Lend: cmp $1, %ecx
+ ja 2f
+ jz 1f
+
+ mul %r9
+ add %rax, %r12
+ mov -16(%rsi), %rax
+ adc %rdx, %r10
+ mov %r12, -24(%rdi)
+ mul %r8
+ add %rax, %r10
+ mov -16(%rbp), %rax
+ mov $0, %r11d
+ adc %rdx, %r11
+ mul %r9
+ add %rax, %r10
+ mov -8(%rsi), %rax
+ adc %rdx, %r11
+ mov %r10, -16(%rdi)
+ mul %r8
+ add %rax, %r11
+ mov -8(%rbp), %rax
+ mov $0, %r12d
+ adc %rdx, %r12
+ mul %r9
+ add %rax, %r11
+ adc %rdx, %r12
+ mov %r11, -8(%rdi)
+ mov %r12, %rax
+ pop %rbp
+ pop %r12
+ ret
+
+1: mul %r9
+ add %rax, %r12
+ mov -8(%rsi), %rax
+ adc %rdx, %r10
+ mov %r12, -16(%rdi)
+ mul %r8
+ add %rax, %r10
+ mov -8(%rbp), %rax
+ mov $0, %r11d
+ adc %rdx, %r11
+ mul %r9
+ add %rax, %r10
+ adc %rdx, %r11
+ mov %r10, -8(%rdi)
+ mov %r11, %rax
+ pop %rbp
+ pop %r12
+ ret
+
+2: mul %r9
+ add %rax, %r12
+ mov %r12, -8(%rdi)
+ adc %rdx, %r10
+ mov %r10, %rax
+ pop %rbp
+ pop %r12
+ ret
+ .size __gmpn_addaddmul_1msb0,.-__gmpn_addaddmul_1msb0
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh1_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh1_n.s
new file mode 100644
index 0000000..e3d3aae
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh1_n.s
@@ -0,0 +1,179 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addlsh1_n
+ .type __gmpn_addlsh1_n,@function
+
+__gmpn_addlsh1_n:
+
+
+ push %rbp
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,8), %rdx
+ neg %rcx
+ xor %ebp, %ebp
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: add %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ mov 16(%rdx,%rcx,8), %r10
+ adc %r10, %r10
+ sbb %eax, %eax
+ add (%rsi,%rcx,8), %r8
+ adc 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+ adc 16(%rsi,%rcx,8), %r10
+ mov %r10, 16(%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ add $3, %rcx
+ jmp .Lent
+
+.Lb10: add %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ sbb %eax, %eax
+ add (%rsi,%rcx,8), %r8
+ adc 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ add $2, %rcx
+ jmp .Lent
+
+.Lb01: add %r8, %r8
+ sbb %eax, %eax
+ add (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ inc %rcx
+.Lent: jns .Lend
+
+ .align 16, 0x90
+.Ltop: add %eax, %eax
+
+ mov (%rdx,%rcx,8), %r8
+.Lb00: adc %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ mov 16(%rdx,%rcx,8), %r10
+ adc %r10, %r10
+ mov 24(%rdx,%rcx,8), %r11
+ adc %r11, %r11
+
+ sbb %eax, %eax
+ add %ebp, %ebp
+
+ adc (%rsi,%rcx,8), %r8
+ nop
+ adc 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+ adc 16(%rsi,%rcx,8), %r10
+ adc 24(%rsi,%rcx,8), %r11
+ mov %r10, 16(%rdi,%rcx,8)
+ mov %r11, 24(%rdi,%rcx,8)
+
+ sbb %ebp, %ebp
+ add $4, %rcx
+ js .Ltop
+
+.Lend:
+
+ add %ebp, %eax
+ neg %eax
+
+
+ pop %rbp
+
+ ret
+ .size __gmpn_addlsh1_n,.-__gmpn_addlsh1_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s
new file mode 100644
index 0000000..00e2090
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh2_n.s
@@ -0,0 +1,204 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addlsh2_n
+ .type __gmpn_addlsh2_n,@function
+
+__gmpn_addlsh2_n:
+
+
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov (%rdx), %r8
+ lea (,%r8,4), %r12
+ shr $62, %r8
+
+ mov %ecx, %eax
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,8), %rdx
+ neg %rcx
+ and $3, %al
+ je .Lb00
+ cmp $2, %al
+ jc .Lb01
+ je .Lb10
+
+.Lb11: mov 8(%rdx,%rcx,8), %r10
+ lea (%r8,%r10,4), %r14
+ shr $62, %r10
+ mov 16(%rdx,%rcx,8), %r11
+ lea (%r10,%r11,4), %r15
+ shr $62, %r11
+ add (%rsi,%rcx,8), %r12
+ adc 8(%rsi,%rcx,8), %r14
+ adc 16(%rsi,%rcx,8), %r15
+ sbb %eax, %eax
+ mov %r12, (%rdi,%rcx,8)
+ mov %r14, 8(%rdi,%rcx,8)
+ mov %r15, 16(%rdi,%rcx,8)
+ add $3, %rcx
+ js .Ltop
+ jmp .Lend
+
+.Lb01: mov %r8, %r11
+ add (%rsi,%rcx,8), %r12
+ sbb %eax, %eax
+ mov %r12, (%rdi,%rcx,8)
+ add $1, %rcx
+ js .Ltop
+ jmp .Lend
+
+.Lb10: mov 8(%rdx,%rcx,8), %r11
+ lea (%r8,%r11,4), %r15
+ shr $62, %r11
+ add (%rsi,%rcx,8), %r12
+ adc 8(%rsi,%rcx,8), %r15
+ sbb %eax, %eax
+ mov %r12, (%rdi,%rcx,8)
+ mov %r15, 8(%rdi,%rcx,8)
+ add $2, %rcx
+ js .Ltop
+ jmp .Lend
+
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ mov 16(%rdx,%rcx,8), %r10
+ jmp .Le00
+
+ .align 16, 0x90
+.Ltop: mov 16(%rdx,%rcx,8), %r10
+ mov (%rdx,%rcx,8), %r8
+ mov 8(%rdx,%rcx,8), %r9
+ lea (%r11,%r8,4), %r12
+ shr $62, %r8
+.Le00: lea (%r8,%r9,4), %r13
+ shr $62, %r9
+ mov 24(%rdx,%rcx,8), %r11
+ lea (%r9,%r10,4), %r14
+ shr $62, %r10
+ lea (%r10,%r11,4), %r15
+ shr $62, %r11
+ add %eax, %eax
+ adc (%rsi,%rcx,8), %r12
+ adc 8(%rsi,%rcx,8), %r13
+ adc 16(%rsi,%rcx,8), %r14
+ adc 24(%rsi,%rcx,8), %r15
+ mov %r12, (%rdi,%rcx,8)
+ mov %r13, 8(%rdi,%rcx,8)
+ mov %r14, 16(%rdi,%rcx,8)
+ sbb %eax, %eax
+ mov %r15, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ js .Ltop
+.Lend:
+
+
+ sub %r11d, %eax
+ neg %eax
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+
+ ret
+ .size __gmpn_addlsh2_n,.-__gmpn_addlsh2_n
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh_n.s
new file mode 100644
index 0000000..2d261d5
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addlsh_n.s
@@ -0,0 +1,228 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addlsh_n
+ .type __gmpn_addlsh_n,@function
+
+__gmpn_addlsh_n:
+
+
+
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov (%rdx), %rax
+
+ mov $0, %ebp
+ sub %rcx, %rbp
+
+ lea -16(%rsi,%rcx,8), %rsi
+ lea -16(%rdi,%rcx,8), %rdi
+ lea 16(%rdx,%rcx,8), %r12
+
+ mov %rcx, %r9
+
+ mov %r8, %rcx
+ mov $1, %r8d
+ shl %cl, %r8
+
+ mul %r8
+
+ and $3, %r9d
+ jz .Lb0
+ cmp $2, %r9d
+ jc .Lb1
+ jz .Lb2
+
+.Lb3: mov %rax, %r11
+ add 16(%rsi,%rbp,8), %r11
+ mov -8(%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov (%r12,%rbp,8), %rax
+ mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ add $3, %rbp
+ jnz .Llo3
+ jmp .Lcj3
+
+.Lb2: mov %rax, %rbx
+ mov -8(%r12,%rbp,8), %rax
+ mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ add $2, %rbp
+ jz .Lcj2
+ mov %rdx, %r10
+ mov -16(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r10
+ xor %ecx, %ecx
+ jmp .Llo2
+
+.Lb1: mov %rax, %r9
+ mov %rdx, %r10
+ add $1, %rbp
+ jnz .Lgt1
+ add 8(%rsi,%rbp,8), %r9
+ jmp .Lcj1
+.Lgt1: mov -16(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r10
+ mov %rdx, %r11
+ mov -8(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r11
+ add 8(%rsi,%rbp,8), %r9
+ adc 16(%rsi,%rbp,8), %r10
+ adc 24(%rsi,%rbp,8), %r11
+ mov (%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ jmp .Llo1
+
+.Lb0: mov %rax, %r10
+ mov %rdx, %r11
+ mov -8(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r11
+ add 16(%rsi,%rbp,8), %r10
+ adc 24(%rsi,%rbp,8), %r11
+ mov (%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov 8(%r12,%rbp,8), %rax
+ add $4, %rbp
+ jz .Lend
+
+ .align 8, 0x90
+.Ltop: mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ mov %r10, -16(%rdi,%rbp,8)
+.Llo3: mov %rdx, %r10
+ mov -16(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r10
+ mov %r11, -8(%rdi,%rbp,8)
+.Llo2: mov %rdx, %r11
+ mov -8(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r11
+ add %ecx, %ecx
+ adc (%rsi,%rbp,8), %rbx
+ adc 8(%rsi,%rbp,8), %r9
+ adc 16(%rsi,%rbp,8), %r10
+ adc 24(%rsi,%rbp,8), %r11
+ mov (%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ mov %rbx, (%rdi,%rbp,8)
+.Llo1: mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov %r9, 8(%rdi,%rbp,8)
+.Llo0: mov 8(%r12,%rbp,8), %rax
+ add $4, %rbp
+ jnz .Ltop
+
+.Lend: mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ mov %r10, -16(%rdi,%rbp,8)
+.Lcj3: mov %r11, -8(%rdi,%rbp,8)
+.Lcj2: add %ecx, %ecx
+ adc (%rsi,%rbp,8), %rbx
+ adc 8(%rsi,%rbp,8), %r9
+ mov %rbx, (%rdi,%rbp,8)
+.Lcj1: mov %r9, 8(%rdi,%rbp,8)
+ mov %rdx, %rax
+ adc $0, %rax
+ pop %rbx
+ pop %rbp
+ pop %r12
+
+ ret
+ .size __gmpn_addlsh_n,.-__gmpn_addlsh_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addmul_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addmul_1.s
new file mode 100644
index 0000000..8daf1ac
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addmul_1.s
@@ -0,0 +1,196 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addmul_1
+ .type __gmpn_addmul_1,@function
+
+__gmpn_addmul_1:
+
+
+
+
+
+
+ mov (%rsi), %rax
+ push %rbx
+ mov %rdx, %rbx
+
+ mul %rcx
+ mov %rbx, %r11
+
+ and $3, %ebx
+ jz .Lb0
+ cmp $2, %ebx
+ jz .Lb2
+ jg .Lb3
+
+.Lb1: dec %r11
+ jne .Lgt1
+ add %rax, (%rdi)
+ jmp .Lret
+.Lgt1: lea 8(%rsi,%r11,8), %rsi
+ lea -8(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r10, %r10
+ xor %ebx, %ebx
+ mov %rax, %r9
+ mov (%rsi,%r11,8), %rax
+ mov %rdx, %r8
+ jmp .LL1
+
+.Lb0: lea (%rsi,%r11,8), %rsi
+ lea -16(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r10, %r10
+ mov %rax, %r8
+ mov %rdx, %rbx
+ jmp .LL0
+
+.Lb3: lea -8(%rsi,%r11,8), %rsi
+ lea -24(%rdi,%r11,8), %rdi
+ neg %r11
+ mov %rax, %rbx
+ mov %rdx, %r10
+ jmp .LL3
+
+.Lb2: lea -16(%rsi,%r11,8), %rsi
+ lea -32(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r8, %r8
+ xor %ebx, %ebx
+ mov %rax, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mov %rdx, %r9
+ jmp .LL2
+
+ .align 16, 0x90
+.Ltop: add %r10, (%rdi,%r11,8)
+ adc %rax, %r9
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %r8
+ mov $0, %r10d
+.LL1: mul %rcx
+ add %r9, 8(%rdi,%r11,8)
+ adc %rax, %r8
+ adc %rdx, %rbx
+.LL0: mov 8(%rsi,%r11,8), %rax
+ mul %rcx
+ add %r8, 16(%rdi,%r11,8)
+ adc %rax, %rbx
+ adc %rdx, %r10
+.LL3: mov 16(%rsi,%r11,8), %rax
+ mul %rcx
+ add %rbx, 24(%rdi,%r11,8)
+ mov $0, %r8d
+ mov %r8, %rbx
+ adc %rax, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mov %r8, %r9
+ adc %rdx, %r9
+.LL2: mul %rcx
+ add $4, %r11
+ js .Ltop
+
+ add %r10, (%rdi,%r11,8)
+ adc %rax, %r9
+ adc %r8, %rdx
+ add %r9, 8(%rdi,%r11,8)
+.Lret: adc $0, %rdx
+ mov %rdx, %rax
+
+ pop %rbx
+
+
+ ret
+ .size __gmpn_addmul_1,.-__gmpn_addmul_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/addmul_2.s b/vere/ext/gmp/gen/x86_64-linux/mpn/addmul_2.s
new file mode 100644
index 0000000..5883dab
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/addmul_2.s
@@ -0,0 +1,209 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_addmul_2
+ .type __gmpn_addmul_2,@function
+
+__gmpn_addmul_2:
+
+
+ mov %rdx, %r11
+ push %rbx
+ push %rbp
+
+ mov 0(%rcx), %r8
+ mov 8(%rcx), %r9
+
+ mov %edx, %ebx
+ mov (%rsi), %rax
+ lea -8(%rsi,%rdx,8), %rsi
+ lea -8(%rdi,%rdx,8), %rdi
+ mul %r8
+ neg %r11
+ and $3, %ebx
+ jz .Lb0
+ cmp $2, %ebx
+ jc .Lb1
+ jz .Lb2
+
+.Lb3: mov %rax, %rcx
+ mov %rdx, %rbp
+ xor %r10d, %r10d
+ mov 8(%rsi,%r11,8), %rax
+ dec %r11
+ jmp .Llo3
+
+.Lb2: mov %rax, %rbp
+ mov 8(%rsi,%r11,8), %rax
+ mov %rdx, %r10
+ xor %ebx, %ebx
+ add $-2, %r11
+ jmp .Llo2
+
+.Lb1: mov %rax, %r10
+ mov 8(%rsi,%r11,8), %rax
+ mov %rdx, %rbx
+ xor %ecx, %ecx
+ inc %r11
+ jmp .Llo1
+
+.Lb0: mov $0, %r10d
+ mov %rax, %rbx
+ mov 8(%rsi,%r11,8), %rax
+ mov %rdx, %rcx
+ xor %ebp, %ebp
+ jmp .Llo0
+
+ .align 32, 0x90
+.Ltop: mov $0, %ecx
+ mul %r8
+ add %rax, %r10
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rbx
+ adc $0, %ecx
+.Llo1: mul %r9
+ add %r10, (%rdi,%r11,8)
+ mov $0, %r10d
+ adc %rax, %rbx
+ mov $0, %ebp
+ mov 8(%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+ mul %r8
+ add %rax, %rbx
+ mov 8(%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+.Llo0: mul %r9
+ add %rbx, 8(%rdi,%r11,8)
+ adc %rax, %rcx
+ adc %rdx, %rbp
+ mov 16(%rsi,%r11,8), %rax
+ mul %r8
+ add %rax, %rcx
+ adc %rdx, %rbp
+ adc $0, %r10d
+ mov 16(%rsi,%r11,8), %rax
+.Llo3: mul %r9
+ add %rcx, 16(%rdi,%r11,8)
+ adc %rax, %rbp
+ adc %rdx, %r10
+ xor %ebx, %ebx
+ mov 24(%rsi,%r11,8), %rax
+ mul %r8
+ add %rax, %rbp
+ mov 24(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ adc $0, %ebx
+.Llo2: mul %r9
+ add %rbp, 24(%rdi,%r11,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+ mov 32(%rsi,%r11,8), %rax
+ add $4, %r11
+ js .Ltop
+
+.Lend: xor %ecx, %ecx
+ mul %r8
+ add %rax, %r10
+ mov (%rsi), %rax
+ adc %rdx, %rbx
+ adc %ecx, %ecx
+ mul %r9
+ add %r10, (%rdi)
+ adc %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, 8(%rdi)
+ mov %rcx, %rax
+
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_addmul_2,.-__gmpn_addmul_2
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/and_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/and_n.s
new file mode 100644
index 0000000..946906e
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/and_n.s
@@ -0,0 +1,149 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_and_n
+ .type __gmpn_and_n,@function
+
+__gmpn_and_n:
+
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: and (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+.Lb01: and (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ and (%rsi,%rcx,8), %r8
+ and 8(%rsi,%rcx,8), %r9
+ nop
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ and 16(%rsi,%rcx,8), %r8
+ and 24(%rsi,%rcx,8), %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_and_n,.-__gmpn_and_n
+
+
+
+
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/andn_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/andn_n.s
new file mode 100644
index 0000000..aee1df4
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/andn_n.s
@@ -0,0 +1,154 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_andn_n
+ .type __gmpn_andn_n,@function
+
+__gmpn_andn_n:
+
+
+ mov (%rdx), %r8
+ not %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: and (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+ .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+.Lb01: and (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+ not %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ not %r9
+ and (%rsi,%rcx,8), %r8
+ and 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+ not %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ not %r9
+ and 16(%rsi,%rcx,8), %r8
+ and 24(%rsi,%rcx,8), %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_andn_n,.-__gmpn_andn_n
+
+
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_dbm1c.s b/vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_dbm1c.s
new file mode 100644
index 0000000..2fda4a0
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_dbm1c.s
@@ -0,0 +1,121 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_bdiv_dbm1c
+ .type __gmpn_bdiv_dbm1c,@function
+
+__gmpn_bdiv_dbm1c:
+
+
+
+ mov (%rsi), %rax
+ mov %rdx, %r9
+ mov %edx, %r11d
+ mul %rcx
+ lea (%rsi,%r9,8), %rsi
+ lea (%rdi,%r9,8), %rdi
+ neg %r9
+ and $3, %r11d
+ jz .Llo0
+ lea -4(%r9,%r11), %r9
+ cmp $2, %r11d
+ jc .Llo1
+ jz .Llo2
+ jmp .Llo3
+
+ .align 16, 0x90
+.Ltop: mov (%rsi,%r9,8), %rax
+ mul %rcx
+.Llo0: sub %rax, %r8
+ mov %r8, (%rdi,%r9,8)
+ sbb %rdx, %r8
+ mov 8(%rsi,%r9,8), %rax
+ mul %rcx
+.Llo3: sub %rax, %r8
+ mov %r8, 8(%rdi,%r9,8)
+ sbb %rdx, %r8
+ mov 16(%rsi,%r9,8), %rax
+ mul %rcx
+.Llo2: sub %rax, %r8
+ mov %r8, 16(%rdi,%r9,8)
+ sbb %rdx, %r8
+ mov 24(%rsi,%r9,8), %rax
+ mul %rcx
+.Llo1: sub %rax, %r8
+ mov %r8, 24(%rdi,%r9,8)
+ sbb %rdx, %r8
+ add $4, %r9
+ jnz .Ltop
+
+ mov %r8, %rax
+
+ ret
+ .size __gmpn_bdiv_dbm1c,.-__gmpn_bdiv_dbm1c
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_q_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_q_1.s
new file mode 100644
index 0000000..4f58778
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/bdiv_q_1.s
@@ -0,0 +1,198 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_bdiv_q_1
+ .type __gmpn_bdiv_q_1,@function
+
+__gmpn_bdiv_q_1:
+
+
+ push %rbx
+
+ mov %rcx, %rax
+ xor %ecx, %ecx
+ mov %rdx, %r10
+
+ bt $0, %eax
+ jnc .Levn
+
+.Lodd: mov %rax, %rbx
+ shr %eax
+ and $127, %eax
+
+ mov __gmp_binvert_limb_table@GOTPCREL(%rip), %rdx
+
+
+
+ movzbl (%rdx,%rax), %eax
+
+ mov %rbx, %r11
+
+ lea (%rax,%rax), %edx
+ imul %eax, %eax
+ imul %ebx, %eax
+ sub %eax, %edx
+
+ lea (%rdx,%rdx), %eax
+ imul %edx, %edx
+ imul %ebx, %edx
+ sub %edx, %eax
+
+ lea (%rax,%rax), %r8
+ imul %rax, %rax
+ imul %rbx, %rax
+ sub %rax, %r8
+
+ jmp .Lpi1
+
+.Levn: bsf %rax, %rcx
+ shr %cl, %rax
+ jmp .Lodd
+ .size __gmpn_bdiv_q_1,.-__gmpn_bdiv_q_1
+
+ .globl __gmpn_pi1_bdiv_q_1
+ .type __gmpn_pi1_bdiv_q_1,@function
+
+__gmpn_pi1_bdiv_q_1:
+
+
+
+
+ push %rbx
+
+ mov %rcx, %r11
+ mov %rdx, %r10
+ mov %r9, %rcx
+
+.Lpi1: mov (%rsi), %rax
+
+ dec %r10
+ jz .Lone
+
+ mov 8(%rsi), %rdx
+ lea (%rsi,%r10,8), %rsi
+ lea (%rdi,%r10,8), %rdi
+ neg %r10
+
+ shrd %cl, %rdx, %rax
+
+ xor %ebx, %ebx
+ jmp .Lent
+
+ .align 8, 0x90
+.Ltop:
+
+
+
+
+
+
+
+ mul %r11
+ mov (%rsi,%r10,8), %rax
+ mov 8(%rsi,%r10,8), %r9
+ shrd %cl, %r9, %rax
+ nop
+ sub %rbx, %rax
+ setc %bl
+ sub %rdx, %rax
+ adc $0, %ebx
+.Lent: imul %r8, %rax
+ mov %rax, (%rdi,%r10,8)
+ inc %r10
+ jnz .Ltop
+
+ mul %r11
+ mov (%rsi), %rax
+ shr %cl, %rax
+ sub %rbx, %rax
+ sub %rdx, %rax
+ imul %r8, %rax
+ mov %rax, (%rdi)
+ pop %rbx
+
+ ret
+
+.Lone: shr %cl, %rax
+ imul %r8, %rax
+ mov %rax, (%rdi)
+ pop %rbx
+
+ ret
+ .size __gmpn_pi1_bdiv_q_1,.-__gmpn_pi1_bdiv_q_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/cnd_add_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/cnd_add_n.s
new file mode 100644
index 0000000..b046e36
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/cnd_add_n.s
@@ -0,0 +1,190 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_cnd_add_n
+ .type __gmpn_cnd_add_n,@function
+
+__gmpn_cnd_add_n:
+
+
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ neg %rdi
+ sbb %rdi, %rdi
+
+ lea (%rcx,%r8,8), %rcx
+ lea (%rdx,%r8,8), %rdx
+ lea (%rsi,%r8,8), %rsi
+
+ mov %r8d, %eax
+ neg %r8
+ and $3, %eax
+ jz .Ltop
+ cmp $2, %eax
+ jc .Lb1
+ jz .Lb2
+
+.Lb3: mov (%rcx,%r8,8), %r12
+ mov 8(%rcx,%r8,8), %r13
+ mov 16(%rcx,%r8,8), %r14
+ and %rdi, %r12
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r13
+ mov 8(%rdx,%r8,8), %rbx
+ and %rdi, %r14
+ mov 16(%rdx,%r8,8), %rbp
+ add %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ adc %r13, %rbx
+ mov %rbx, 8(%rsi,%r8,8)
+ adc %r14, %rbp
+ mov %rbp, 16(%rsi,%r8,8)
+ sbb %eax, %eax
+ add $3, %r8
+ js .Ltop
+ jmp .Lend
+
+.Lb2: mov (%rcx,%r8,8), %r12
+ mov 8(%rcx,%r8,8), %r13
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r12
+ mov 8(%rdx,%r8,8), %rbx
+ and %rdi, %r13
+ add %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ adc %r13, %rbx
+ mov %rbx, 8(%rsi,%r8,8)
+ sbb %eax, %eax
+ add $2, %r8
+ js .Ltop
+ jmp .Lend
+
+.Lb1: mov (%rcx,%r8,8), %r12
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r12
+ add %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ sbb %eax, %eax
+ add $1, %r8
+ jns .Lend
+
+ .align 16, 0x90
+.Ltop: mov (%rcx,%r8,8), %r12
+ mov 8(%rcx,%r8,8), %r13
+ mov 16(%rcx,%r8,8), %r14
+ mov 24(%rcx,%r8,8), %r11
+ and %rdi, %r12
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r13
+ mov 8(%rdx,%r8,8), %rbx
+ and %rdi, %r14
+ mov 16(%rdx,%r8,8), %rbp
+ and %rdi, %r11
+ mov 24(%rdx,%r8,8), %r9
+ add %eax, %eax
+ adc %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ adc %r13, %rbx
+ mov %rbx, 8(%rsi,%r8,8)
+ adc %r14, %rbp
+ mov %rbp, 16(%rsi,%r8,8)
+ adc %r11, %r9
+ mov %r9, 24(%rsi,%r8,8)
+ sbb %eax, %eax
+ add $4, %r8
+ js .Ltop
+
+.Lend: neg %eax
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_cnd_add_n,.-__gmpn_cnd_add_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/cnd_sub_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/cnd_sub_n.s
new file mode 100644
index 0000000..596dd8f
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/cnd_sub_n.s
@@ -0,0 +1,190 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_cnd_sub_n
+ .type __gmpn_cnd_sub_n,@function
+
+__gmpn_cnd_sub_n:
+
+
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ neg %rdi
+ sbb %rdi, %rdi
+
+ lea (%rcx,%r8,8), %rcx
+ lea (%rdx,%r8,8), %rdx
+ lea (%rsi,%r8,8), %rsi
+
+ mov %r8d, %eax
+ neg %r8
+ and $3, %eax
+ jz .Ltop
+ cmp $2, %eax
+ jc .Lb1
+ jz .Lb2
+
+.Lb3: mov (%rcx,%r8,8), %r12
+ mov 8(%rcx,%r8,8), %r13
+ mov 16(%rcx,%r8,8), %r14
+ and %rdi, %r12
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r13
+ mov 8(%rdx,%r8,8), %rbx
+ and %rdi, %r14
+ mov 16(%rdx,%r8,8), %rbp
+ sub %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ sbb %r13, %rbx
+ mov %rbx, 8(%rsi,%r8,8)
+ sbb %r14, %rbp
+ mov %rbp, 16(%rsi,%r8,8)
+ sbb %eax, %eax
+ add $3, %r8
+ js .Ltop
+ jmp .Lend
+
+.Lb2: mov (%rcx,%r8,8), %r12
+ mov 8(%rcx,%r8,8), %r13
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r12
+ mov 8(%rdx,%r8,8), %rbx
+ and %rdi, %r13
+ sub %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ sbb %r13, %rbx
+ mov %rbx, 8(%rsi,%r8,8)
+ sbb %eax, %eax
+ add $2, %r8
+ js .Ltop
+ jmp .Lend
+
+.Lb1: mov (%rcx,%r8,8), %r12
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r12
+ sub %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ sbb %eax, %eax
+ add $1, %r8
+ jns .Lend
+
+ .align 16, 0x90
+.Ltop: mov (%rcx,%r8,8), %r12
+ mov 8(%rcx,%r8,8), %r13
+ mov 16(%rcx,%r8,8), %r14
+ mov 24(%rcx,%r8,8), %r11
+ and %rdi, %r12
+ mov (%rdx,%r8,8), %r10
+ and %rdi, %r13
+ mov 8(%rdx,%r8,8), %rbx
+ and %rdi, %r14
+ mov 16(%rdx,%r8,8), %rbp
+ and %rdi, %r11
+ mov 24(%rdx,%r8,8), %r9
+ add %eax, %eax
+ sbb %r12, %r10
+ mov %r10, (%rsi,%r8,8)
+ sbb %r13, %rbx
+ mov %rbx, 8(%rsi,%r8,8)
+ sbb %r14, %rbp
+ mov %rbp, 16(%rsi,%r8,8)
+ sbb %r11, %r9
+ mov %r9, 24(%rsi,%r8,8)
+ sbb %eax, %eax
+ add $4, %r8
+ js .Ltop
+
+.Lend: neg %eax
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_cnd_sub_n,.-__gmpn_cnd_sub_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/com.s b/vere/ext/gmp/gen/x86_64-linux/mpn/com.s
new file mode 100644
index 0000000..ff14001
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/com.s
@@ -0,0 +1,110 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_com
+ .type __gmpn_com,@function
+
+__gmpn_com:
+
+
+ movq (%rsi), %r8
+ movl %edx, %eax
+ leaq (%rsi,%rdx,8), %rsi
+ leaq (%rdi,%rdx,8), %rdi
+ negq %rdx
+ andl $3, %eax
+ je .Lb00
+ cmpl $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: notq %r8
+ movq %r8, (%rdi,%rdx,8)
+ decq %rdx
+ jmp .Le11
+.Lb10: addq $-2, %rdx
+ jmp .Le10
+ .byte 0x90,0x90,0x90,0x90,0x90,0x90
+.Lb01: notq %r8
+ movq %r8, (%rdi,%rdx,8)
+ incq %rdx
+ jz .Lret
+
+.Loop: movq (%rsi,%rdx,8), %r8
+.Lb00: movq 8(%rsi,%rdx,8), %r9
+ notq %r8
+ notq %r9
+ movq %r8, (%rdi,%rdx,8)
+ movq %r9, 8(%rdi,%rdx,8)
+.Le11: movq 16(%rsi,%rdx,8), %r8
+.Le10: movq 24(%rsi,%rdx,8), %r9
+ notq %r8
+ notq %r9
+ movq %r8, 16(%rdi,%rdx,8)
+ movq %r9, 24(%rdi,%rdx,8)
+ addq $4, %rdx
+ jnc .Loop
+.Lret:
+ ret
+ .size __gmpn_com,.-__gmpn_com
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/copyd.s b/vere/ext/gmp/gen/x86_64-linux/mpn/copyd.s
new file mode 100644
index 0000000..f375481
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/copyd.s
@@ -0,0 +1,108 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 64, 0x90
+ .globl __gmpn_copyd
+ .type __gmpn_copyd,@function
+
+__gmpn_copyd:
+
+ lea -8(%rsi,%rdx,8), %rsi
+ lea (%rdi,%rdx,8), %rdi
+ sub $4, %rdx
+ jc .Lend
+ nop
+
+.Ltop: mov (%rsi), %rax
+ mov -8(%rsi), %r9
+ lea -32(%rdi), %rdi
+ mov -16(%rsi), %r10
+ mov -24(%rsi), %r11
+ lea -32(%rsi), %rsi
+ mov %rax, 24(%rdi)
+ mov %r9, 16(%rdi)
+ sub $4, %rdx
+ mov %r10, 8(%rdi)
+ mov %r11, (%rdi)
+ jnc .Ltop
+
+.Lend: shr %edx
+ jnc 1f
+ mov (%rsi), %rax
+ mov %rax, -8(%rdi)
+ lea -8(%rdi), %rdi
+ lea -8(%rsi), %rsi
+1: shr %edx
+ jnc 1f
+ mov (%rsi), %rax
+ mov -8(%rsi), %r9
+ mov %rax, -8(%rdi)
+ mov %r9, -16(%rdi)
+1: ret
+ .size __gmpn_copyd,.-__gmpn_copyd
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/copyi.s b/vere/ext/gmp/gen/x86_64-linux/mpn/copyi.s
new file mode 100644
index 0000000..dc746b2
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/copyi.s
@@ -0,0 +1,107 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 64, 0x90
+ .byte 0,0,0,0,0,0
+ .globl __gmpn_copyi
+ .type __gmpn_copyi,@function
+
+__gmpn_copyi:
+
+ lea -8(%rdi), %rdi
+ sub $4, %rdx
+ jc .Lend
+
+.Ltop: mov (%rsi), %rax
+ mov 8(%rsi), %r9
+ lea 32(%rdi), %rdi
+ mov 16(%rsi), %r10
+ mov 24(%rsi), %r11
+ lea 32(%rsi), %rsi
+ mov %rax, -24(%rdi)
+ mov %r9, -16(%rdi)
+ sub $4, %rdx
+ mov %r10, -8(%rdi)
+ mov %r11, (%rdi)
+ jnc .Ltop
+
+.Lend: shr %edx
+ jnc 1f
+ mov (%rsi), %rax
+ mov %rax, 8(%rdi)
+ lea 8(%rdi), %rdi
+ lea 8(%rsi), %rsi
+1: shr %edx
+ jnc 1f
+ mov (%rsi), %rax
+ mov 8(%rsi), %r9
+ mov %rax, 8(%rdi)
+ mov %r9, 16(%rdi)
+1: ret
+ .size __gmpn_copyi,.-__gmpn_copyi
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_1n_pi1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_1n_pi1.s
new file mode 100644
index 0000000..fd8ce8e
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_1n_pi1.s
@@ -0,0 +1,261 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_div_qr_1n_pi1
+ .type __gmpn_div_qr_1n_pi1,@function
+
+__gmpn_div_qr_1n_pi1:
+
+
+
+
+ dec %rdx
+ jnz .Lfirst
+
+
+
+ lea 1(%rcx), %r10
+ mov %rcx, %rax
+ mul %r9
+ mov (%rsi), %r11
+ add %r11, %rax
+ adc %r10, %rdx
+ mov %rdx, %r10
+ imul %r8, %rdx
+ sub %rdx, %r11
+ cmp %r11, %rax
+ lea (%r11, %r8), %rax
+ cmovnc %r11, %rax
+ sbb $0, %r10
+ cmp %r8, %rax
+ jc .Lsingle_div_done
+ sub %r8, %rax
+ add $1, %r10
+.Lsingle_div_done:
+ mov %r10, (%rdi)
+
+ ret
+.Lfirst:
+
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbx
+ push %rbp
+
+ mov %r8, %rbp
+ imul %r9, %rbp
+ neg %rbp
+ mov %rbp, %rbx
+ sub %r8, %rbx
+
+
+ push %r8
+ mov %rdx, %r8
+
+ mov %r9, %rax
+ mul %rcx
+ mov %rax, %r13
+ add %rcx, %rdx
+ mov %rdx, %r10
+
+ mov %rbp, %rax
+ mul %rcx
+ mov -8(%rsi, %r8, 8), %r11
+ mov (%rsi, %r8, 8), %rcx
+ mov %r10, (%rdi, %r8, 8)
+ add %rax, %r11
+ adc %rdx, %rcx
+ sbb %r12, %r12
+ dec %r8
+ mov %rcx, %rax
+ jz .Lfinal
+ mov $0, %r14d
+
+ .align 16, 0x90
+
+
+
+
+.Lloop:
+
+
+ cmovc %r9, %r14
+ mov %r12, %r15
+ neg %r15
+ mul %r9
+ add %rdx, %r14
+ adc $0, %r15
+ add %r13, %r14
+ mov %rax, %r13
+ mov %rbp, %rax
+ lea (%rbx, %r11), %r10
+ adc $0, %r15
+
+
+ mul %rcx
+ and %rbp, %r12
+ add %r12, %r11
+ cmovnc %r11, %r10
+
+
+ adc %rcx, %r14
+ mov -8(%rsi, %r8, 8), %r11
+ adc %r15, 8(%rdi, %r8, 8)
+ jc .Lq_incr
+.Lq_incr_done:
+ add %rax, %r11
+ mov %r10, %rax
+ adc %rdx, %rax
+ mov %r14, (%rdi, %r8, 8)
+ mov $0, %r14d
+ sbb %r12, %r12
+ dec %r8
+ mov %rax, %rcx
+ jnz .Lloop
+
+.Lfinal:
+ pop %r8
+
+ mov %r12, %r14
+ and %r8, %r12
+ sub %r12, %rax
+ neg %r14
+
+ mov %rax, %rcx
+ sub %r8, %rax
+ cmovc %rcx, %rax
+ sbb $-1, %r14
+
+ lea 1(%rax), %r10
+ mul %r9
+ add %r11, %rax
+ adc %r10, %rdx
+ mov %rdx, %r10
+ imul %r8, %rdx
+ sub %rdx, %r11
+ cmp %r11, %rax
+ lea (%r11, %r8), %rax
+ cmovnc %r11, %rax
+ sbb $0, %r10
+ cmp %r8, %rax
+ jc .Ldiv_done
+ sub %r8, %rax
+ add $1, %r10
+.Ldiv_done:
+ add %r10, %r13
+ mov %r13, (%rdi)
+ adc %r14, 8(%rdi)
+ jnc .Ldone
+.Lfinal_q_incr:
+ addq $1, 16(%rdi)
+ lea 8(%rdi), %rdi
+ jc .Lfinal_q_incr
+
+.Ldone:
+ pop %rbp
+ pop %rbx
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ ret
+
+.Lq_incr:
+
+ lea 16(%rdi, %r8, 8), %rcx
+.Lq_incr_loop:
+ addq $1, (%rcx)
+ jnc .Lq_incr_done
+ lea 8(%rcx), %rcx
+ jmp .Lq_incr_loop
+ .size __gmpn_div_qr_1n_pi1,.-__gmpn_div_qr_1n_pi1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2n_pi1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2n_pi1.s
new file mode 100644
index 0000000..67618f7
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2n_pi1.s
@@ -0,0 +1,171 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_div_qr_2n_pi1
+ .type __gmpn_div_qr_2n_pi1,@function
+
+__gmpn_div_qr_2n_pi1:
+
+
+
+
+
+ mov 8(%rsp), %r10
+ mov %rdx, %r11
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbx
+
+ mov -16(%r11, %rcx, 8), %r12
+ mov -8(%r11, %rcx, 8), %rbx
+
+ mov %r12, %r14
+ mov %rbx, %r13
+ sub %r9, %r14
+ sbb %r8, %r13
+ cmovnc %r14, %r12
+ cmovnc %r13, %rbx
+
+ sbb %rax, %rax
+ inc %rax
+ push %rax
+ lea -2(%rcx), %rcx
+ mov %r8, %r15
+ neg %r15
+
+ jmp .Lnext
+
+ .align 16, 0x90
+.Lloop:
+
+
+
+ mov %r10, %rax
+ mul %rbx
+ mov %r12, %r14
+ add %rax, %r14
+ adc %rbx, %rdx
+ mov %rdx, %r13
+ imul %r15, %rdx
+ mov %r9, %rax
+ lea (%rdx, %r12), %rbx
+ mul %r13
+ mov (%r11, %rcx, 8), %r12
+ sub %r9, %r12
+ sbb %r8, %rbx
+ sub %rax, %r12
+ sbb %rdx, %rbx
+ xor %eax, %eax
+ xor %edx, %edx
+ cmp %r14, %rbx
+ cmovnc %r9, %rax
+ cmovnc %r8, %rdx
+ adc $0, %r13
+ nop
+ add %rax, %r12
+ adc %rdx, %rbx
+ cmp %r8, %rbx
+ jae .Lfix
+.Lbck:
+ mov %r13, (%rdi, %rcx, 8)
+.Lnext:
+ sub $1, %rcx
+ jnc .Lloop
+.Lend:
+ mov %rbx, 8(%rsi)
+ mov %r12, (%rsi)
+
+
+ pop %rax
+
+ pop %rbx
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ ret
+
+.Lfix:
+ seta %dl
+ cmp %r9, %r12
+ setae %al
+ orb %dl, %al
+ je .Lbck
+ inc %r13
+ sub %r9, %r12
+ sbb %r8, %rbx
+ jmp .Lbck
+ .size __gmpn_div_qr_2n_pi1,.-__gmpn_div_qr_2n_pi1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2u_pi1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2u_pi1.s
new file mode 100644
index 0000000..a11a847
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/div_qr_2u_pi1.s
@@ -0,0 +1,211 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+
+ .globl __gmpn_div_qr_2u_pi1
+ .type __gmpn_div_qr_2u_pi1,@function
+
+__gmpn_div_qr_2u_pi1:
+
+ mov 0+16(%rsp), %r10
+ mov %rdx, %r11
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbx
+ push %rbp
+ push %rsi
+
+ lea -2(%rcx), %rbp
+ mov %r8, %r15
+ neg %r15
+
+
+ movl 56+8(%rsp), %ecx
+
+
+
+ xor %ebx, %ebx
+ mov 8(%r11, %rbp, 8), %r12
+ shld %cl, %r12, %rbx
+
+
+ mov %r10, %rax
+ mul %rbx
+ mov (%r11, %rbp, 8), %rsi
+ shld %cl, %rsi, %r12
+ mov %r12, %r14
+ add %rax, %r14
+ adc %rbx, %rdx
+ mov %rdx, %r13
+ imul %r15, %rdx
+ mov %r9, %rax
+ lea (%rdx, %r12), %rbx
+ mul %r13
+ mov %rsi, %r12
+ shl %cl, %r12
+ sub %r9, %r12
+ sbb %r8, %rbx
+ sub %rax, %r12
+ sbb %rdx, %rbx
+ xor %eax, %eax
+ xor %edx, %edx
+ cmp %r14, %rbx
+ cmovnc %r9, %rax
+ cmovnc %r8, %rdx
+ adc $0, %r13
+ nop
+ add %rax, %r12
+ adc %rdx, %rbx
+ cmp %r8, %rbx
+ jae .Lfix_qh
+.Lbck_qh:
+ push %r13
+
+ jmp .Lnext
+
+ .align 16, 0x90
+.Lloop:
+
+
+
+ mov %r10, %rax
+ mul %rbx
+ mov (%r11, %rbp, 8), %rsi
+ xor %r13d, %r13d
+ shld %cl, %rsi, %r13
+ or %r13, %r12
+ mov %r12, %r14
+ add %rax, %r14
+ adc %rbx, %rdx
+ mov %rdx, %r13
+ imul %r15, %rdx
+ mov %r9, %rax
+ lea (%rdx, %r12), %rbx
+ mul %r13
+ mov %rsi, %r12
+ shl %cl, %r12
+ sub %r9, %r12
+ sbb %r8, %rbx
+ sub %rax, %r12
+ sbb %rdx, %rbx
+ xor %eax, %eax
+ xor %edx, %edx
+ cmp %r14, %rbx
+ cmovnc %r9, %rax
+ cmovnc %r8, %rdx
+ adc $0, %r13
+ nop
+ add %rax, %r12
+ adc %rdx, %rbx
+ cmp %r8, %rbx
+ jae .Lfix
+.Lbck:
+ mov %r13, (%rdi, %rbp, 8)
+.Lnext:
+ sub $1, %rbp
+ jnc .Lloop
+.Lend:
+
+ pop %rax
+ pop %rsi
+ shrd %cl, %rbx, %r12
+ shr %cl, %rbx
+ mov %rbx, 8(%rsi)
+ mov %r12, (%rsi)
+
+ pop %rbp
+ pop %rbx
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+ ret
+
+.Lfix:
+ seta %dl
+ cmp %r9, %r12
+ setae %al
+ orb %dl, %al
+ je .Lbck
+ inc %r13
+ sub %r9, %r12
+ sbb %r8, %rbx
+ jmp .Lbck
+
+
+.Lfix_qh:
+ seta %dl
+ cmp %r9, %r12
+ setae %al
+ orb %dl, %al
+ je .Lbck_qh
+ inc %r13
+ sub %r9, %r12
+ sbb %r8, %rbx
+ jmp .Lbck_qh
+ .size __gmpn_div_qr_2u_pi1,.-__gmpn_div_qr_2u_pi1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/dive_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/dive_1.s
new file mode 100644
index 0000000..23a35c8
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/dive_1.s
@@ -0,0 +1,175 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_divexact_1
+ .type __gmpn_divexact_1,@function
+
+__gmpn_divexact_1:
+
+
+ push %rbx
+
+ mov %rcx, %rax
+ xor %ecx, %ecx
+ mov %rdx, %r8
+
+ bt $0, %eax
+ jnc .Levn
+
+.Lodd: mov %rax, %rbx
+ shr %eax
+ and $127, %eax
+
+ mov __gmp_binvert_limb_table@GOTPCREL(%rip), %rdx
+
+
+
+ movzbl (%rdx,%rax), %eax
+
+ mov %rbx, %r11
+
+ lea (%rax,%rax), %edx
+ imul %eax, %eax
+ imul %ebx, %eax
+ sub %eax, %edx
+
+ lea (%rdx,%rdx), %eax
+ imul %edx, %edx
+ imul %ebx, %edx
+ sub %edx, %eax
+
+ lea (%rax,%rax), %r10
+ imul %rax, %rax
+ imul %rbx, %rax
+ sub %rax, %r10
+
+ lea (%rsi,%r8,8), %rsi
+ lea -8(%rdi,%r8,8), %rdi
+ neg %r8
+
+ mov (%rsi,%r8,8), %rax
+
+ inc %r8
+ jz .Lone
+
+ mov (%rsi,%r8,8), %rdx
+
+ shrd %cl, %rdx, %rax
+
+ xor %ebx, %ebx
+ jmp .Lent
+
+.Levn: bsf %rax, %rcx
+ shr %cl, %rax
+ jmp .Lodd
+
+ .align 8, 0x90
+.Ltop:
+
+
+
+
+
+
+
+
+
+
+ mul %r11
+ mov -8(%rsi,%r8,8), %rax
+ mov (%rsi,%r8,8), %r9
+ shrd %cl, %r9, %rax
+ nop
+ sub %rbx, %rax
+ setc %bl
+ sub %rdx, %rax
+ adc $0, %rbx
+.Lent: imul %r10, %rax
+ mov %rax, (%rdi,%r8,8)
+ inc %r8
+ jnz .Ltop
+
+ mul %r11
+ mov -8(%rsi), %rax
+ shr %cl, %rax
+ sub %rbx, %rax
+ sub %rdx, %rax
+ imul %r10, %rax
+ mov %rax, (%rdi)
+ pop %rbx
+
+ ret
+
+.Lone: shr %cl, %rax
+ imul %r10, %rax
+ mov %rax, (%rdi)
+ pop %rbx
+
+ ret
+
+ .size __gmpn_divexact_1,.-__gmpn_divexact_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/divrem_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/divrem_1.s
new file mode 100644
index 0000000..e689bd2
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/divrem_1.s
@@ -0,0 +1,335 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_preinv_divrem_1
+ .type __gmpn_preinv_divrem_1,@function
+
+__gmpn_preinv_divrem_1:
+
+
+
+
+ xor %eax, %eax
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov %rsi, %r12
+ mov %rcx, %rbx
+ add %rsi, %rcx
+ mov %rdx, %rsi
+
+ lea -8(%rdi,%rcx,8), %rdi
+
+ test %r8, %r8
+ js .Lnent
+
+ mov 40(%rsp), %cl
+ shl %cl, %r8
+ jmp .Luent
+ .size __gmpn_preinv_divrem_1,.-__gmpn_preinv_divrem_1
+
+ .align 16, 0x90
+ .globl __gmpn_divrem_1
+ .type __gmpn_divrem_1,@function
+
+__gmpn_divrem_1:
+
+
+
+ xor %eax, %eax
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov %rsi, %r12
+ mov %rcx, %rbx
+ add %rsi, %rcx
+ mov %rdx, %rsi
+ je .Lret
+
+ lea -8(%rdi,%rcx,8), %rdi
+ xor %ebp, %ebp
+
+ test %r8, %r8
+ jns .Lunnormalized
+
+.Lnormalized:
+ test %rbx, %rbx
+ je .L8
+ mov -8(%rsi,%rbx,8), %rbp
+ dec %rbx
+ mov %rbp, %rax
+ sub %r8, %rbp
+ cmovc %rax, %rbp
+ sbb %eax, %eax
+ inc %eax
+ mov %rax, (%rdi)
+ lea -8(%rdi), %rdi
+.L8:
+ push %rdi
+ push %rsi
+ push %r8
+ mov %r8, %rdi
+
+
+
+ call __gmpn_invert_limb@PLT
+
+
+ pop %r8
+ pop %rsi
+ pop %rdi
+
+ mov %rax, %r9
+ mov %rbp, %rax
+ jmp .Lnent
+
+ .align 16, 0x90
+.Lntop:mov (%rsi,%rbx,8), %r10
+ mul %r9
+ add %r10, %rax
+ adc %rbp, %rdx
+ mov %rax, %rbp
+ mov %rdx, %r13
+ imul %r8, %rdx
+ sub %rdx, %r10
+ mov %r8, %rax
+ add %r10, %rax
+ cmp %rbp, %r10
+ cmovc %r10, %rax
+ adc $-1, %r13
+ cmp %r8, %rax
+ jae .Lnfx
+.Lnok: mov %r13, (%rdi)
+ sub $8, %rdi
+.Lnent:lea 1(%rax), %rbp
+ dec %rbx
+ jns .Lntop
+
+ xor %ecx, %ecx
+ jmp .Lfrac
+
+.Lnfx: sub %r8, %rax
+ inc %r13
+ jmp .Lnok
+
+.Lunnormalized:
+ test %rbx, %rbx
+ je .L44
+ mov -8(%rsi,%rbx,8), %rax
+ cmp %r8, %rax
+ jae .L44
+ mov %rbp, (%rdi)
+ mov %rax, %rbp
+ lea -8(%rdi), %rdi
+ je .Lret
+ dec %rbx
+.L44:
+ bsr %r8, %rcx
+ not %ecx
+ shl %cl, %r8
+ shl %cl, %rbp
+
+ push %rcx
+ push %rdi
+ push %rsi
+ push %r8
+ sub $8, %rsp
+ mov %r8, %rdi
+
+
+
+ call __gmpn_invert_limb@PLT
+
+ add $8, %rsp
+
+ pop %r8
+ pop %rsi
+ pop %rdi
+ pop %rcx
+
+ mov %rax, %r9
+ mov %rbp, %rax
+ test %rbx, %rbx
+ je .Lfrac
+
+.Luent:dec %rbx
+ mov (%rsi,%rbx,8), %rbp
+ neg %ecx
+ shr %cl, %rbp
+ neg %ecx
+ or %rbp, %rax
+ jmp .Lent
+
+ .align 16, 0x90
+.Lutop:mov (%rsi,%rbx,8), %r10
+ shl %cl, %rbp
+ neg %ecx
+ shr %cl, %r10
+ neg %ecx
+ or %r10, %rbp
+ mul %r9
+ add %rbp, %rax
+ adc %r11, %rdx
+ mov %rax, %r11
+ mov %rdx, %r13
+ imul %r8, %rdx
+ sub %rdx, %rbp
+ mov %r8, %rax
+ add %rbp, %rax
+ cmp %r11, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r13
+ cmp %r8, %rax
+ jae .Lufx
+.Luok: mov %r13, (%rdi)
+ sub $8, %rdi
+.Lent: mov (%rsi,%rbx,8), %rbp
+ dec %rbx
+ lea 1(%rax), %r11
+ jns .Lutop
+
+.Luend:shl %cl, %rbp
+ mul %r9
+ add %rbp, %rax
+ adc %r11, %rdx
+ mov %rax, %r11
+ mov %rdx, %r13
+ imul %r8, %rdx
+ sub %rdx, %rbp
+ mov %r8, %rax
+ add %rbp, %rax
+ cmp %r11, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r13
+ cmp %r8, %rax
+ jae .Lefx
+.Leok: mov %r13, (%rdi)
+ sub $8, %rdi
+ jmp .Lfrac
+
+.Lufx: sub %r8, %rax
+ inc %r13
+ jmp .Luok
+.Lefx: sub %r8, %rax
+ inc %r13
+ jmp .Leok
+
+.Lfrac:mov %r8, %rbp
+ neg %rbp
+ jmp .Lfent
+
+ .align 16, 0x90
+.Lftop:mul %r9
+ add %r11, %rdx
+ mov %rax, %r11
+ mov %rdx, %r13
+ imul %rbp, %rdx
+ mov %r8, %rax
+ add %rdx, %rax
+ cmp %r11, %rdx
+ cmovc %rdx, %rax
+ adc $-1, %r13
+ mov %r13, (%rdi)
+ sub $8, %rdi
+.Lfent:lea 1(%rax), %r11
+ dec %r12
+ jns .Lftop
+
+ shr %cl, %rax
+.Lret: pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+
+ ret
+ .size __gmpn_divrem_1,.-__gmpn_divrem_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/divrem_2.s b/vere/ext/gmp/gen/x86_64-linux/mpn/divrem_2.s
new file mode 100644
index 0000000..b1c0d5b
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/divrem_2.s
@@ -0,0 +1,208 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_divrem_2
+ .type __gmpn_divrem_2,@function
+
+__gmpn_divrem_2:
+
+
+
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ lea -24(%rdx,%rcx,8), %r12
+ mov %rsi, %r13
+ push %rbp
+ mov %rdi, %rbp
+ push %rbx
+ mov 8(%r8), %r11
+ mov 16(%r12), %rbx
+ mov (%r8), %r8
+ mov 8(%r12), %r10
+
+ xor %r15d, %r15d
+ cmp %rbx, %r11
+ ja .L2
+ setb %dl
+ cmp %r10, %r8
+ setbe %al
+ orb %al, %dl
+ je .L2
+ inc %r15d
+ sub %r8, %r10
+ sbb %r11, %rbx
+.L2:
+ lea -3(%rcx,%r13), %r14
+ test %r14, %r14
+ js .Lend
+
+ push %r8
+ push %r10
+ push %r11
+ mov %r11, %rdi
+
+
+
+ call __gmpn_invert_limb@PLT
+
+
+ pop %r11
+ pop %r10
+ pop %r8
+
+ mov %r11, %rdx
+ mov %rax, %rdi
+ imul %rax, %rdx
+ mov %rdx, %r9
+ mul %r8
+ xor %ecx, %ecx
+ add %r8, %r9
+ adc $-1, %rcx
+ add %rdx, %r9
+ adc $0, %rcx
+ js 2f
+1: dec %rdi
+ sub %r11, %r9
+ sbb $0, %rcx
+ jns 1b
+2:
+
+ lea (%rbp,%r14,8), %rbp
+ mov %r11, %rsi
+ neg %rsi
+
+
+
+
+ .align 16, 0x90
+.Ltop: mov %rdi, %rax
+ mul %rbx
+ mov %r10, %rcx
+ add %rax, %rcx
+ adc %rbx, %rdx
+ mov %rdx, %r9
+ imul %rsi, %rdx
+ mov %r8, %rax
+ lea (%rdx, %r10), %rbx
+ xor %r10d, %r10d
+ mul %r9
+ cmp %r14, %r13
+ jg .L19
+ mov (%r12), %r10
+ sub $8, %r12
+.L19: sub %r8, %r10
+ sbb %r11, %rbx
+ sub %rax, %r10
+ sbb %rdx, %rbx
+ xor %eax, %eax
+ xor %edx, %edx
+ cmp %rcx, %rbx
+ cmovnc %r8, %rax
+ cmovnc %r11, %rdx
+ adc $0, %r9
+ nop
+ add %rax, %r10
+ adc %rdx, %rbx
+ cmp %r11, %rbx
+ jae .Lfix
+.Lbck: mov %r9, (%rbp)
+ sub $8, %rbp
+ dec %r14
+ jns .Ltop
+
+.Lend: mov %r10, 8(%r12)
+ mov %rbx, 16(%r12)
+ pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+ pop %r14
+ mov %r15, %rax
+ pop %r15
+
+ ret
+
+.Lfix: seta %dl
+ cmp %r8, %r10
+ setae %al
+ orb %dl, %al
+ je .Lbck
+ inc %r9
+ sub %r8, %r10
+ sbb %r11, %rbx
+ jmp .Lbck
+ .size __gmpn_divrem_2,.-__gmpn_divrem_2
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/fib_table.c b/vere/ext/gmp/gen/x86_64-linux/mpn/fib_table.c
new file mode 100644
index 0000000..a830475
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/fib_table.c
@@ -0,0 +1,107 @@
+/* This file generated by gen-fib.c - DO NOT EDIT. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS != 64
+Error, error, this data is for 64 bits
+#endif
+
+const mp_limb_t
+__gmp_fib_table[FIB_TABLE_LIMIT+2] = {
+ CNST_LIMB (0x1), /* -1 */
+ CNST_LIMB (0x0), /* 0 */
+ CNST_LIMB (0x1), /* 1 */
+ CNST_LIMB (0x1), /* 2 */
+ CNST_LIMB (0x2), /* 3 */
+ CNST_LIMB (0x3), /* 4 */
+ CNST_LIMB (0x5), /* 5 */
+ CNST_LIMB (0x8), /* 6 */
+ CNST_LIMB (0xd), /* 7 */
+ CNST_LIMB (0x15), /* 8 */
+ CNST_LIMB (0x22), /* 9 */
+ CNST_LIMB (0x37), /* 10 */
+ CNST_LIMB (0x59), /* 11 */
+ CNST_LIMB (0x90), /* 12 */
+ CNST_LIMB (0xe9), /* 13 */
+ CNST_LIMB (0x179), /* 14 */
+ CNST_LIMB (0x262), /* 15 */
+ CNST_LIMB (0x3db), /* 16 */
+ CNST_LIMB (0x63d), /* 17 */
+ CNST_LIMB (0xa18), /* 18 */
+ CNST_LIMB (0x1055), /* 19 */
+ CNST_LIMB (0x1a6d), /* 20 */
+ CNST_LIMB (0x2ac2), /* 21 */
+ CNST_LIMB (0x452f), /* 22 */
+ CNST_LIMB (0x6ff1), /* 23 */
+ CNST_LIMB (0xb520), /* 24 */
+ CNST_LIMB (0x12511), /* 25 */
+ CNST_LIMB (0x1da31), /* 26 */
+ CNST_LIMB (0x2ff42), /* 27 */
+ CNST_LIMB (0x4d973), /* 28 */
+ CNST_LIMB (0x7d8b5), /* 29 */
+ CNST_LIMB (0xcb228), /* 30 */
+ CNST_LIMB (0x148add), /* 31 */
+ CNST_LIMB (0x213d05), /* 32 */
+ CNST_LIMB (0x35c7e2), /* 33 */
+ CNST_LIMB (0x5704e7), /* 34 */
+ CNST_LIMB (0x8cccc9), /* 35 */
+ CNST_LIMB (0xe3d1b0), /* 36 */
+ CNST_LIMB (0x1709e79), /* 37 */
+ CNST_LIMB (0x2547029), /* 38 */
+ CNST_LIMB (0x3c50ea2), /* 39 */
+ CNST_LIMB (0x6197ecb), /* 40 */
+ CNST_LIMB (0x9de8d6d), /* 41 */
+ CNST_LIMB (0xff80c38), /* 42 */
+ CNST_LIMB (0x19d699a5), /* 43 */
+ CNST_LIMB (0x29cea5dd), /* 44 */
+ CNST_LIMB (0x43a53f82), /* 45 */
+ CNST_LIMB (0x6d73e55f), /* 46 */
+ CNST_LIMB (0xb11924e1), /* 47 */
+ CNST_LIMB (0x11e8d0a40), /* 48 */
+ CNST_LIMB (0x1cfa62f21), /* 49 */
+ CNST_LIMB (0x2ee333961), /* 50 */
+ CNST_LIMB (0x4bdd96882), /* 51 */
+ CNST_LIMB (0x7ac0ca1e3), /* 52 */
+ CNST_LIMB (0xc69e60a65), /* 53 */
+ CNST_LIMB (0x1415f2ac48), /* 54 */
+ CNST_LIMB (0x207fd8b6ad), /* 55 */
+ CNST_LIMB (0x3495cb62f5), /* 56 */
+ CNST_LIMB (0x5515a419a2), /* 57 */
+ CNST_LIMB (0x89ab6f7c97), /* 58 */
+ CNST_LIMB (0xdec1139639), /* 59 */
+ CNST_LIMB (0x1686c8312d0), /* 60 */
+ CNST_LIMB (0x2472d96a909), /* 61 */
+ CNST_LIMB (0x3af9a19bbd9), /* 62 */
+ CNST_LIMB (0x5f6c7b064e2), /* 63 */
+ CNST_LIMB (0x9a661ca20bb), /* 64 */
+ CNST_LIMB (0xf9d297a859d), /* 65 */
+ CNST_LIMB (0x19438b44a658), /* 66 */
+ CNST_LIMB (0x28e0b4bf2bf5), /* 67 */
+ CNST_LIMB (0x42244003d24d), /* 68 */
+ CNST_LIMB (0x6b04f4c2fe42), /* 69 */
+ CNST_LIMB (0xad2934c6d08f), /* 70 */
+ CNST_LIMB (0x1182e2989ced1), /* 71 */
+ CNST_LIMB (0x1c5575e509f60), /* 72 */
+ CNST_LIMB (0x2dd8587da6e31), /* 73 */
+ CNST_LIMB (0x4a2dce62b0d91), /* 74 */
+ CNST_LIMB (0x780626e057bc2), /* 75 */
+ CNST_LIMB (0xc233f54308953), /* 76 */
+ CNST_LIMB (0x13a3a1c2360515), /* 77 */
+ CNST_LIMB (0x1fc6e116668e68), /* 78 */
+ CNST_LIMB (0x336a82d89c937d), /* 79 */
+ CNST_LIMB (0x533163ef0321e5), /* 80 */
+ CNST_LIMB (0x869be6c79fb562), /* 81 */
+ CNST_LIMB (0xd9cd4ab6a2d747), /* 82 */
+ CNST_LIMB (0x16069317e428ca9), /* 83 */
+ CNST_LIMB (0x23a367c34e563f0), /* 84 */
+ CNST_LIMB (0x39a9fadb327f099), /* 85 */
+ CNST_LIMB (0x5d4d629e80d5489), /* 86 */
+ CNST_LIMB (0x96f75d79b354522), /* 87 */
+ CNST_LIMB (0xf444c01834299ab), /* 88 */
+ CNST_LIMB (0x18b3c1d91e77decd), /* 89 */
+ CNST_LIMB (0x27f80ddaa1ba7878), /* 90 */
+ CNST_LIMB (0x40abcfb3c0325745), /* 91 */
+ CNST_LIMB (0x68a3dd8e61eccfbd), /* 92 */
+ CNST_LIMB (0xa94fad42221f2702), /* 93 */
+};
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/gcd_11.s b/vere/ext/gmp/gen/x86_64-linux/mpn/gcd_11.s
new file mode 100644
index 0000000..cf35d25
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/gcd_11.s
@@ -0,0 +1,256 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .section .rodata
+ .align 64, 0x90
+ctz_table:
+
+ .byte 7
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 6
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+
+ .size ctz_table,.-ctz_table
+
+
+
+
+
+
+
+
+ .text
+ .align 64, 0x90
+ .globl __gmpn_gcd_11
+ .type __gmpn_gcd_11,@function
+
+__gmpn_gcd_11:
+
+
+ mov ctz_table@GOTPCREL(%rip), %r8
+
+
+ jmp .Lent
+
+ .align 16, 0x90
+.Ltop: cmovc %rdx, %rdi
+ cmovc %rax, %rsi
+.Lmid: and $127, %edx
+ movzbl (%r8,%rdx), %ecx
+ jz .Lshift_alot
+ shr %cl, %rdi
+.Lent: mov %rdi, %rax
+ mov %rsi, %rdx
+ sub %rdi, %rdx
+ sub %rsi, %rdi
+ jnz .Ltop
+
+.Lend:
+
+
+ ret
+
+.Lshift_alot:
+ shr $7, %rdi
+ mov %rdi, %rdx
+ jmp .Lmid
+ .size __gmpn_gcd_11,.-__gmpn_gcd_11
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/gcd_22.s b/vere/ext/gmp/gen/x86_64-linux/mpn/gcd_22.s
new file mode 100644
index 0000000..60f4c71
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/gcd_22.s
@@ -0,0 +1,434 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .section .rodata
+ .align 64, 0x90
+ctz_table:
+
+ .byte 8
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 6
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 6
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 2
+ .byte 0
+ .byte 1
+ .byte 0
+
+ .size ctz_table,.-ctz_table
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 64, 0x90
+ .globl __gmpn_gcd_22
+ .type __gmpn_gcd_22,@function
+
+__gmpn_gcd_22:
+
+
+ mov %rcx, %rax
+
+ mov ctz_table@GOTPCREL(%rip), %r10
+
+
+
+ .align 16, 0x90
+.Ltop: mov %rax, %rcx
+ sub %rsi, %rcx
+ jz .Llowz
+ mov %rdx, %r11
+ sbb %rdi, %r11
+
+ mov %rsi, %r8
+ mov %rdi, %r9
+
+ sub %rax, %rsi
+ sbb %rdx, %rdi
+
+.Lbck: cmovc %rcx, %rsi
+ cmovc %r11, %rdi
+ cmovc %r8, %rax
+ cmovc %r9, %rdx
+
+ and $255, %ecx
+ movzbl (%r10,%rcx), %ecx
+ jz .Lcount_better
+
+.Lshr: shr %cl, %rsi
+ mov %rdi, %r11
+ shr %cl, %rdi
+ neg %rcx
+ shl %cl, %r11
+ or %r11, %rsi
+
+ test %rdx, %rdx
+ jnz .Ltop
+ test %rdi, %rdi
+ jnz .Ltop
+
+.Lgcd_11:
+ mov %rax, %rdi
+
+ jmp __gmpn_gcd_11@PLT
+
+
+.Lcount_better:
+ rep;bsf %rsi, %rcx
+ jmp .Lshr
+
+.Llowz:
+
+
+ mov %rdx, %rcx
+ sub %rdi, %rcx
+ je .Lend
+
+ xor %r11, %r11
+ mov %rsi, %r8
+ mov %rdi, %r9
+ mov %rdi, %rsi
+ xor %rdi, %rdi
+ sub %rdx, %rsi
+ jmp .Lbck
+
+.Lend:
+
+
+ ret
+ .size __gmpn_gcd_22,.-__gmpn_gcd_22
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/hamdist.s b/vere/ext/gmp/gen/x86_64-linux/mpn/hamdist.s
new file mode 100644
index 0000000..1ab3a8c
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/hamdist.s
@@ -0,0 +1,167 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_hamdist
+ .type __gmpn_hamdist,@function
+
+__gmpn_hamdist:
+
+
+ push %rbx
+ mov $0x5555555555555555, %r10
+ push %rbp
+ mov $0x3333333333333333, %r11
+ push %r12
+ lea (%rdi,%rdx,8), %rdi
+ mov $0x0f0f0f0f0f0f0f0f, %rcx
+ lea (%rsi,%rdx,8), %rsi
+ neg %rdx
+ mov $0x0101010101010101, %r12
+ xor %eax, %eax
+ test $1, %dl
+ jz .Ltop
+
+ mov (%rdi,%rdx,8), %r8
+ xor (%rsi,%rdx,8), %r8
+
+ mov %r8, %r9
+ shr %r8
+ and %r10, %r8
+ sub %r8, %r9
+
+ mov %r9, %r8
+ shr $2, %r9
+ and %r11, %r8
+ and %r11, %r9
+ add %r8, %r9
+
+ dec %rdx
+ jmp .Lmid
+
+ .align 16, 0x90
+.Ltop: mov (%rdi,%rdx,8), %r8
+ mov 8(%rdi,%rdx,8), %rbx
+ xor (%rsi,%rdx,8), %r8
+ xor 8(%rsi,%rdx,8), %rbx
+
+ mov %r8, %r9
+ mov %rbx, %rbp
+ shr %r8
+ shr %rbx
+ and %r10, %r8
+ and %r10, %rbx
+ sub %r8, %r9
+ sub %rbx, %rbp
+
+ mov %r9, %r8
+ mov %rbp, %rbx
+ shr $2, %r9
+ shr $2, %rbp
+ and %r11, %r8
+ and %r11, %r9
+ and %r11, %rbx
+ and %r11, %rbp
+ add %r8, %r9
+ add %rbx, %rbp
+
+ add %rbp, %r9
+.Lmid: mov %r9, %r8
+ shr $4, %r9
+ and %rcx, %r8
+ and %rcx, %r9
+ add %r8, %r9
+
+ imul %r12, %r9
+ shr $56, %r9
+
+ add %r9, %rax
+ add $2, %rdx
+ jnc .Ltop
+
+.Lend:
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_hamdist,.-__gmpn_hamdist
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb.s b/vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb.s
new file mode 100644
index 0000000..d7352e7
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb.s
@@ -0,0 +1,123 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.hidden __gmpn_invert_limb_table
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_invert_limb
+ .type __gmpn_invert_limb,@function
+
+__gmpn_invert_limb:
+
+
+ mov %rdi, %rax
+ shr $55, %rax
+
+ lea -512+__gmpn_invert_limb_table(%rip), %r8
+
+ movzwl (%r8,%rax,2), %ecx
+
+
+ mov %rdi, %rsi
+ mov %ecx, %eax
+ imul %ecx, %ecx
+ shr $24, %rsi
+ inc %rsi
+ imul %rsi, %rcx
+ shr $40, %rcx
+ sal $11, %eax
+ dec %eax
+ sub %ecx, %eax
+
+
+ mov $0x1000000000000000, %rcx
+ imul %rax, %rsi
+ sub %rsi, %rcx
+ imul %rax, %rcx
+ sal $13, %rax
+ shr $47, %rcx
+ add %rax, %rcx
+
+
+ mov %rdi, %rsi
+ shr %rsi
+ sbb %rax, %rax
+ sub %rax, %rsi
+ imul %rcx, %rsi
+ and %rcx, %rax
+ shr %rax
+ sub %rsi, %rax
+ mul %rcx
+ sal $31, %rcx
+ shr %rdx
+ add %rdx, %rcx
+
+ mov %rdi, %rax
+ mul %rcx
+ add %rdi, %rax
+ mov %rcx, %rax
+ adc %rdi, %rdx
+ sub %rdx, %rax
+
+
+ ret
+ .size __gmpn_invert_limb,.-__gmpn_invert_limb
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb_table.s b/vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb_table.s
new file mode 100644
index 0000000..a990458
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/invert_limb_table.s
@@ -0,0 +1,313 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.hidden __gmpn_invert_limb_table
+
+
+
+
+ .section .rodata
+ .align 2, 0x90
+ .globl __gmpn_invert_limb_table
+__gmpn_invert_limb_table:
+ .value 2045
+ .value 2037
+ .value 2029
+ .value 2021
+ .value 2013
+ .value 2005
+ .value 1998
+ .value 1990
+ .value 1983
+ .value 1975
+ .value 1968
+ .value 1960
+ .value 1953
+ .value 1946
+ .value 1938
+ .value 1931
+ .value 1924
+ .value 1917
+ .value 1910
+ .value 1903
+ .value 1896
+ .value 1889
+ .value 1883
+ .value 1876
+ .value 1869
+ .value 1863
+ .value 1856
+ .value 1849
+ .value 1843
+ .value 1836
+ .value 1830
+ .value 1824
+ .value 1817
+ .value 1811
+ .value 1805
+ .value 1799
+ .value 1792
+ .value 1786
+ .value 1780
+ .value 1774
+ .value 1768
+ .value 1762
+ .value 1756
+ .value 1750
+ .value 1745
+ .value 1739
+ .value 1733
+ .value 1727
+ .value 1722
+ .value 1716
+ .value 1710
+ .value 1705
+ .value 1699
+ .value 1694
+ .value 1688
+ .value 1683
+ .value 1677
+ .value 1672
+ .value 1667
+ .value 1661
+ .value 1656
+ .value 1651
+ .value 1646
+ .value 1641
+ .value 1636
+ .value 1630
+ .value 1625
+ .value 1620
+ .value 1615
+ .value 1610
+ .value 1605
+ .value 1600
+ .value 1596
+ .value 1591
+ .value 1586
+ .value 1581
+ .value 1576
+ .value 1572
+ .value 1567
+ .value 1562
+ .value 1558
+ .value 1553
+ .value 1548
+ .value 1544
+ .value 1539
+ .value 1535
+ .value 1530
+ .value 1526
+ .value 1521
+ .value 1517
+ .value 1513
+ .value 1508
+ .value 1504
+ .value 1500
+ .value 1495
+ .value 1491
+ .value 1487
+ .value 1483
+ .value 1478
+ .value 1474
+ .value 1470
+ .value 1466
+ .value 1462
+ .value 1458
+ .value 1454
+ .value 1450
+ .value 1446
+ .value 1442
+ .value 1438
+ .value 1434
+ .value 1430
+ .value 1426
+ .value 1422
+ .value 1418
+ .value 1414
+ .value 1411
+ .value 1407
+ .value 1403
+ .value 1399
+ .value 1396
+ .value 1392
+ .value 1388
+ .value 1384
+ .value 1381
+ .value 1377
+ .value 1374
+ .value 1370
+ .value 1366
+ .value 1363
+ .value 1359
+ .value 1356
+ .value 1352
+ .value 1349
+ .value 1345
+ .value 1342
+ .value 1338
+ .value 1335
+ .value 1332
+ .value 1328
+ .value 1325
+ .value 1322
+ .value 1318
+ .value 1315
+ .value 1312
+ .value 1308
+ .value 1305
+ .value 1302
+ .value 1299
+ .value 1295
+ .value 1292
+ .value 1289
+ .value 1286
+ .value 1283
+ .value 1280
+ .value 1276
+ .value 1273
+ .value 1270
+ .value 1267
+ .value 1264
+ .value 1261
+ .value 1258
+ .value 1255
+ .value 1252
+ .value 1249
+ .value 1246
+ .value 1243
+ .value 1240
+ .value 1237
+ .value 1234
+ .value 1231
+ .value 1228
+ .value 1226
+ .value 1223
+ .value 1220
+ .value 1217
+ .value 1214
+ .value 1211
+ .value 1209
+ .value 1206
+ .value 1203
+ .value 1200
+ .value 1197
+ .value 1195
+ .value 1192
+ .value 1189
+ .value 1187
+ .value 1184
+ .value 1181
+ .value 1179
+ .value 1176
+ .value 1173
+ .value 1171
+ .value 1168
+ .value 1165
+ .value 1163
+ .value 1160
+ .value 1158
+ .value 1155
+ .value 1153
+ .value 1150
+ .value 1148
+ .value 1145
+ .value 1143
+ .value 1140
+ .value 1138
+ .value 1135
+ .value 1133
+ .value 1130
+ .value 1128
+ .value 1125
+ .value 1123
+ .value 1121
+ .value 1118
+ .value 1116
+ .value 1113
+ .value 1111
+ .value 1109
+ .value 1106
+ .value 1104
+ .value 1102
+ .value 1099
+ .value 1097
+ .value 1095
+ .value 1092
+ .value 1090
+ .value 1088
+ .value 1086
+ .value 1083
+ .value 1081
+ .value 1079
+ .value 1077
+ .value 1074
+ .value 1072
+ .value 1070
+ .value 1068
+ .value 1066
+ .value 1064
+ .value 1061
+ .value 1059
+ .value 1057
+ .value 1055
+ .value 1053
+ .value 1051
+ .value 1049
+ .value 1047
+ .value 1044
+ .value 1042
+ .value 1040
+ .value 1038
+ .value 1036
+ .value 1034
+ .value 1032
+ .value 1030
+ .value 1028
+ .value 1026
+ .value 1024
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/ior_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/ior_n.s
new file mode 100644
index 0000000..6509f28
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/ior_n.s
@@ -0,0 +1,149 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_ior_n
+ .type __gmpn_ior_n,@function
+
+__gmpn_ior_n:
+
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: or (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+.Lb01: or (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ or (%rsi,%rcx,8), %r8
+ or 8(%rsi,%rcx,8), %r9
+ nop
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ or 16(%rsi,%rcx,8), %r8
+ or 24(%rsi,%rcx,8), %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_ior_n,.-__gmpn_ior_n
+
+
+
+
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/iorn_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/iorn_n.s
new file mode 100644
index 0000000..b199ca3
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/iorn_n.s
@@ -0,0 +1,154 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_iorn_n
+ .type __gmpn_iorn_n,@function
+
+__gmpn_iorn_n:
+
+
+ mov (%rdx), %r8
+ not %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: or (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+ .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+.Lb01: or (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+ not %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ not %r9
+ or (%rsi,%rcx,8), %r8
+ or 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+ not %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ not %r9
+ or 16(%rsi,%rcx,8), %r8
+ or 24(%rsi,%rcx,8), %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_iorn_n,.-__gmpn_iorn_n
+
+
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/jacobitab.h b/vere/ext/gmp/gen/x86_64-linux/mpn/jacobitab.h
new file mode 100644
index 0000000..4bdbfcc
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/jacobitab.h
@@ -0,0 +1,13 @@
+ 0, 0, 0, 0, 0,12, 8, 4, 1, 1, 1, 1, 1,13, 9, 5,
+ 2, 2, 2, 2, 2, 6,10,14, 3, 3, 3, 3, 3, 7,11,15,
+ 4,16, 6,18, 4, 0,12, 8, 5,17, 7,19, 5, 1,13, 9,
+ 6,18, 4,16, 6,10,14, 2, 7,19, 5,17, 7,11,15, 3,
+ 8,10, 9,11, 8, 4, 0,12, 9,11, 8,10, 9, 5, 1,13,
+10, 9,11, 8,10,14, 2, 6,11, 8,10, 9,11,15, 3, 7,
+12,22,24,20,12, 8, 4, 0,13,23,25,21,13, 9, 5, 1,
+25,21,13,23,14, 2, 6,10,24,20,12,22,15, 3, 7,11,
+16, 6,18, 4,16,16,16,16,17, 7,19, 5,17,17,17,17,
+18, 4,16, 6,18,22,19,23,19, 5,17, 7,19,23,18,22,
+20,12,22,24,20,20,20,20,21,13,23,25,21,21,21,21,
+22,24,20,12,22,19,23,18,23,25,21,13,23,18,22,19,
+24,20,12,22,15, 3, 7,11,25,21,13,23,14, 2, 6,10,
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/lshift.s b/vere/ext/gmp/gen/x86_64-linux/mpn/lshift.s
new file mode 100644
index 0000000..89e9566
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/lshift.s
@@ -0,0 +1,186 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_lshift
+ .type __gmpn_lshift,@function
+
+__gmpn_lshift:
+
+
+ neg %ecx
+ mov -8(%rsi,%rdx,8), %rax
+ shr %cl, %rax
+
+ neg %ecx
+ lea 1(%rdx), %r8d
+ and $3, %r8d
+ je .Lrlx
+
+ dec %r8d
+ jne .L1
+
+ mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ shr %cl, %r8
+ or %r8, %r10
+ mov %r10, -8(%rdi,%rdx,8)
+ dec %rdx
+ jmp .Lrll
+
+.L1: dec %r8d
+ je .L1x
+
+ mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ shr %cl, %r8
+ or %r8, %r10
+ mov %r10, -8(%rdi,%rdx,8)
+ dec %rdx
+ neg %ecx
+.L1x:
+ cmp $1, %rdx
+ je .Last
+ mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ mov -16(%rsi,%rdx,8), %r11
+ shl %cl, %r11
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ mov -24(%rsi,%rdx,8), %r9
+ shr %cl, %r8
+ or %r8, %r10
+ shr %cl, %r9
+ or %r9, %r11
+ mov %r10, -8(%rdi,%rdx,8)
+ mov %r11, -16(%rdi,%rdx,8)
+ sub $2, %rdx
+
+.Lrll: neg %ecx
+.Lrlx: mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ mov -16(%rsi,%rdx,8), %r11
+ shl %cl, %r11
+
+ sub $4, %rdx
+ jb .Lend
+ .align 16, 0x90
+.Ltop:
+
+ neg %ecx
+ mov 16(%rsi,%rdx,8), %r8
+ mov 8(%rsi,%rdx,8), %r9
+ shr %cl, %r8
+ or %r8, %r10
+ shr %cl, %r9
+ or %r9, %r11
+ mov %r10, 24(%rdi,%rdx,8)
+ mov %r11, 16(%rdi,%rdx,8)
+
+ mov 0(%rsi,%rdx,8), %r8
+ mov -8(%rsi,%rdx,8), %r9
+ shr %cl, %r8
+ shr %cl, %r9
+
+
+ neg %ecx
+ mov 8(%rsi,%rdx,8), %r10
+ mov 0(%rsi,%rdx,8), %r11
+ shl %cl, %r10
+ or %r10, %r8
+ shl %cl, %r11
+ or %r11, %r9
+ mov %r8, 8(%rdi,%rdx,8)
+ mov %r9, 0(%rdi,%rdx,8)
+
+ mov -8(%rsi,%rdx,8), %r10
+ mov -16(%rsi,%rdx,8), %r11
+ shl %cl, %r10
+ shl %cl, %r11
+
+ sub $4, %rdx
+ jae .Ltop
+.Lend:
+ neg %ecx
+ mov 8(%rsi), %r8
+ shr %cl, %r8
+ or %r8, %r10
+ mov (%rsi), %r9
+ shr %cl, %r9
+ or %r9, %r11
+ mov %r10, 16(%rdi)
+ mov %r11, 8(%rdi)
+
+ neg %ecx
+.Last: mov (%rsi), %r10
+ shl %cl, %r10
+ mov %r10, (%rdi)
+
+ ret
+ .size __gmpn_lshift,.-__gmpn_lshift
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/lshiftc.s b/vere/ext/gmp/gen/x86_64-linux/mpn/lshiftc.s
new file mode 100644
index 0000000..6809940
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/lshiftc.s
@@ -0,0 +1,197 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_lshiftc
+ .type __gmpn_lshiftc,@function
+
+__gmpn_lshiftc:
+
+
+ neg %ecx
+ mov -8(%rsi,%rdx,8), %rax
+ shr %cl, %rax
+
+ neg %ecx
+ lea 1(%rdx), %r8d
+ and $3, %r8d
+ je .Lrlx
+
+ dec %r8d
+ jne .L1
+
+ mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ shr %cl, %r8
+ or %r8, %r10
+ not %r10
+ mov %r10, -8(%rdi,%rdx,8)
+ dec %rdx
+ jmp .Lrll
+
+.L1: dec %r8d
+ je .L1x
+
+ mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ shr %cl, %r8
+ or %r8, %r10
+ not %r10
+ mov %r10, -8(%rdi,%rdx,8)
+ dec %rdx
+ neg %ecx
+.L1x:
+ cmp $1, %rdx
+ je .Last
+ mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ mov -16(%rsi,%rdx,8), %r11
+ shl %cl, %r11
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ mov -24(%rsi,%rdx,8), %r9
+ shr %cl, %r8
+ or %r8, %r10
+ shr %cl, %r9
+ or %r9, %r11
+ not %r10
+ not %r11
+ mov %r10, -8(%rdi,%rdx,8)
+ mov %r11, -16(%rdi,%rdx,8)
+ sub $2, %rdx
+
+.Lrll: neg %ecx
+.Lrlx: mov -8(%rsi,%rdx,8), %r10
+ shl %cl, %r10
+ mov -16(%rsi,%rdx,8), %r11
+ shl %cl, %r11
+
+ sub $4, %rdx
+ jb .Lend
+ .align 16, 0x90
+.Ltop:
+
+ neg %ecx
+ mov 16(%rsi,%rdx,8), %r8
+ mov 8(%rsi,%rdx,8), %r9
+ shr %cl, %r8
+ or %r8, %r10
+ shr %cl, %r9
+ or %r9, %r11
+ not %r10
+ not %r11
+ mov %r10, 24(%rdi,%rdx,8)
+ mov %r11, 16(%rdi,%rdx,8)
+
+ mov 0(%rsi,%rdx,8), %r8
+ mov -8(%rsi,%rdx,8), %r9
+ shr %cl, %r8
+ shr %cl, %r9
+
+
+ neg %ecx
+ mov 8(%rsi,%rdx,8), %r10
+ mov 0(%rsi,%rdx,8), %r11
+ shl %cl, %r10
+ or %r10, %r8
+ shl %cl, %r11
+ or %r11, %r9
+ not %r8
+ not %r9
+ mov %r8, 8(%rdi,%rdx,8)
+ mov %r9, 0(%rdi,%rdx,8)
+
+ mov -8(%rsi,%rdx,8), %r10
+ mov -16(%rsi,%rdx,8), %r11
+ shl %cl, %r10
+ shl %cl, %r11
+
+ sub $4, %rdx
+ jae .Ltop
+.Lend:
+ neg %ecx
+ mov 8(%rsi), %r8
+ shr %cl, %r8
+ or %r8, %r10
+ mov (%rsi), %r9
+ shr %cl, %r9
+ or %r9, %r11
+ not %r10
+ not %r11
+ mov %r10, 16(%rdi)
+ mov %r11, 8(%rdi)
+
+ neg %ecx
+.Last: mov (%rsi), %r10
+ shl %cl, %r10
+ not %r10
+ mov %r10, (%rdi)
+
+ ret
+ .size __gmpn_lshiftc,.-__gmpn_lshiftc
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_1.s
new file mode 100644
index 0000000..a8e3198
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_1.s
@@ -0,0 +1,241 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mod_1_1p
+ .type __gmpn_mod_1_1p,@function
+
+__gmpn_mod_1_1p:
+
+
+ push %rbp
+ push %rbx
+ mov %rdx, %rbx
+ mov %rcx, %r8
+
+ mov -8(%rdi, %rsi, 8), %rax
+ cmp $3, %rsi
+ jnc .Lfirst
+ mov -16(%rdi, %rsi, 8), %rbp
+ jmp .Lreduce_two
+
+.Lfirst:
+
+ mov 24(%r8), %r11
+ mul %r11
+ mov -24(%rdi, %rsi, 8), %rbp
+ add %rax, %rbp
+ mov -16(%rdi, %rsi, 8), %rax
+ adc %rdx, %rax
+ sbb %rcx, %rcx
+ sub $4, %rsi
+ jc .Lreduce_three
+
+ mov %r11, %r10
+ sub %rbx, %r10
+
+ .align 16, 0x90
+.Ltop: and %r11, %rcx
+ lea (%r10, %rbp), %r9
+ mul %r11
+ add %rbp, %rcx
+ mov (%rdi, %rsi, 8), %rbp
+ cmovc %r9, %rcx
+ add %rax, %rbp
+ mov %rcx, %rax
+ adc %rdx, %rax
+ sbb %rcx, %rcx
+ sub $1, %rsi
+ jnc .Ltop
+
+.Lreduce_three:
+
+ and %rbx, %rcx
+ sub %rcx, %rax
+
+.Lreduce_two:
+ mov 8(%r8), %ecx
+ test %ecx, %ecx
+ jz .Lnormalized
+
+
+ mulq 16(%r8)
+ xor %r9, %r9
+ add %rax, %rbp
+ adc %rdx, %r9
+ mov %r9, %rax
+
+
+
+ shld %cl, %rbp, %rax
+
+ shl %cl, %rbp
+ jmp .Ludiv
+
+.Lnormalized:
+ mov %rax, %r9
+ sub %rbx, %r9
+ cmovnc %r9, %rax
+
+.Ludiv:
+ lea 1(%rax), %r9
+ mulq (%r8)
+ add %rbp, %rax
+ adc %r9, %rdx
+ imul %rbx, %rdx
+ sub %rdx, %rbp
+ cmp %rbp, %rax
+ lea (%rbx, %rbp), %rax
+ cmovnc %rbp, %rax
+ cmp %rbx, %rax
+ jnc .Lfix
+.Lok: shr %cl, %rax
+
+ pop %rbx
+ pop %rbp
+
+ ret
+.Lfix: sub %rbx, %rax
+ jmp .Lok
+ .size __gmpn_mod_1_1p,.-__gmpn_mod_1_1p
+
+ .align 16, 0x90
+ .globl __gmpn_mod_1_1p_cps
+ .type __gmpn_mod_1_1p_cps,@function
+
+__gmpn_mod_1_1p_cps:
+
+
+ push %rbp
+ bsr %rsi, %rcx
+ push %rbx
+ mov %rdi, %rbx
+ push %r12
+ xor $63, %ecx
+ mov %rsi, %r12
+ mov %ecx, %ebp
+ sal %cl, %r12
+ mov %r12, %rdi
+
+
+
+ call __gmpn_invert_limb@PLT
+
+
+ neg %r12
+ mov %r12, %r8
+ mov %rax, (%rbx)
+ mov %rbp, 8(%rbx)
+ imul %rax, %r12
+ mov %r12, 24(%rbx)
+ mov %ebp, %ecx
+ test %ecx, %ecx
+ jz .Lz
+
+ mov $1, %edx
+
+ shld %cl, %rax, %rdx
+
+ imul %rdx, %r8
+ shr %cl, %r8
+ mov %r8, 16(%rbx)
+.Lz:
+ pop %r12
+ pop %rbx
+ pop %rbp
+
+ ret
+ .size __gmpn_mod_1_1p_cps,.-__gmpn_mod_1_1p_cps
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_2.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_2.s
new file mode 100644
index 0000000..1a19107
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_2.s
@@ -0,0 +1,252 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mod_1s_2p
+ .type __gmpn_mod_1s_2p,@function
+
+__gmpn_mod_1s_2p:
+
+
+ push %r14
+ test $1, %sil
+ mov %rdx, %r14
+ push %r13
+ mov %rcx, %r13
+ push %r12
+ push %rbp
+ push %rbx
+ mov 16(%rcx), %r10
+ mov 24(%rcx), %rbx
+ mov 32(%rcx), %rbp
+ je .Lb0
+ dec %rsi
+ je .Lone
+ mov -8(%rdi,%rsi,8), %rax
+ mul %r10
+ mov %rax, %r9
+ mov %rdx, %r8
+ mov (%rdi,%rsi,8), %rax
+ add -16(%rdi,%rsi,8), %r9
+ adc $0, %r8
+ mul %rbx
+ add %rax, %r9
+ adc %rdx, %r8
+ jmp .L11
+
+.Lb0: mov -8(%rdi,%rsi,8), %r8
+ mov -16(%rdi,%rsi,8), %r9
+
+.L11: sub $4, %rsi
+ jb .Led2
+ lea 40(%rdi,%rsi,8), %rdi
+ mov -40(%rdi), %r11
+ mov -32(%rdi), %rax
+ jmp .Lm0
+
+ .align 16, 0x90
+.Ltop: mov -24(%rdi), %r9
+ add %rax, %r11
+ mov -16(%rdi), %rax
+ adc %rdx, %r12
+ mul %r10
+ add %rax, %r9
+ mov %r11, %rax
+ mov %rdx, %r8
+ adc $0, %r8
+ mul %rbx
+ add %rax, %r9
+ mov %r12, %rax
+ adc %rdx, %r8
+ mul %rbp
+ sub $2, %rsi
+ jb .Led1
+ mov -40(%rdi), %r11
+ add %rax, %r9
+ mov -32(%rdi), %rax
+ adc %rdx, %r8
+.Lm0: mul %r10
+ add %rax, %r11
+ mov %r9, %rax
+ mov %rdx, %r12
+ adc $0, %r12
+ mul %rbx
+ add %rax, %r11
+ lea -32(%rdi), %rdi
+ mov %r8, %rax
+ adc %rdx, %r12
+ mul %rbp
+ sub $2, %rsi
+ jae .Ltop
+
+.Led0: mov %r11, %r9
+ mov %r12, %r8
+.Led1: add %rax, %r9
+ adc %rdx, %r8
+.Led2: mov 8(%r13), %edi
+ mov %r8, %rax
+ mov %r9, %r8
+ mul %r10
+ add %rax, %r8
+ adc $0, %rdx
+.L1: xor %ecx, %ecx
+ mov %r8, %r9
+ sub %edi, %ecx
+ shr %cl, %r9
+ mov %edi, %ecx
+ sal %cl, %rdx
+ or %rdx, %r9
+ sal %cl, %r8
+ mov %r9, %rax
+ mulq (%r13)
+ mov %rax, %rsi
+ inc %r9
+ add %r8, %rsi
+ adc %r9, %rdx
+ imul %r14, %rdx
+ sub %rdx, %r8
+ lea (%r8,%r14), %rax
+ cmp %r8, %rsi
+ cmovc %rax, %r8
+ mov %r8, %rax
+ sub %r14, %rax
+ cmovc %r8, %rax
+ mov %edi, %ecx
+ shr %cl, %rax
+ pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+ pop %r14
+
+ ret
+.Lone:
+ mov (%rdi), %r8
+ mov 8(%rcx), %edi
+ xor %rdx, %rdx
+ jmp .L1
+ .size __gmpn_mod_1s_2p,.-__gmpn_mod_1s_2p
+
+ .align 16, 0x90
+ .globl __gmpn_mod_1s_2p_cps
+ .type __gmpn_mod_1s_2p_cps,@function
+
+__gmpn_mod_1s_2p_cps:
+
+
+ push %rbp
+ bsr %rsi, %rcx
+ push %rbx
+ mov %rdi, %rbx
+ push %r12
+ xor $63, %ecx
+ mov %rsi, %r12
+ mov %ecx, %ebp
+ sal %cl, %r12
+ mov %r12, %rdi
+
+
+
+ call __gmpn_invert_limb@PLT
+
+
+ mov %r12, %r8
+ mov %rax, %r11
+ mov %rax, (%rbx)
+ mov %rbp, 8(%rbx)
+ neg %r8
+ mov %ebp, %ecx
+ mov $1, %esi
+
+ shld %cl, %rax, %rsi
+
+ imul %r8, %rsi
+ mul %rsi
+
+ add %rsi, %rdx
+ shr %cl, %rsi
+ mov %rsi, 16(%rbx)
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr %cl, %rsi
+ mov %rsi, 24(%rbx)
+
+ not %rdx
+ imul %r12, %rdx
+ add %rdx, %r12
+ cmp %rdx, %rax
+ cmovnc %rdx, %r12
+
+ shr %cl, %r12
+ mov %r12, 32(%rbx)
+
+ pop %r12
+ pop %rbx
+ pop %rbp
+
+ ret
+ .size __gmpn_mod_1s_2p_cps,.-__gmpn_mod_1s_2p_cps
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_4.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_4.s
new file mode 100644
index 0000000..491753d
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_1_4.s
@@ -0,0 +1,283 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mod_1s_4p
+ .type __gmpn_mod_1s_4p,@function
+
+__gmpn_mod_1s_4p:
+
+
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov %rdx, %r15
+ mov %rcx, %r14
+ mov 16(%rcx), %r11
+ mov 24(%rcx), %rbx
+ mov 32(%rcx), %rbp
+ mov 40(%rcx), %r13
+ mov 48(%rcx), %r12
+ xor %r8d, %r8d
+ mov %esi, %edx
+ and $3, %edx
+ je .Lb0
+ cmp $2, %edx
+ jc .Lb1
+ je .Lb2
+
+.Lb3: lea -24(%rdi,%rsi,8), %rdi
+ mov 8(%rdi), %rax
+ mul %r11
+ mov (%rdi), %r9
+ add %rax, %r9
+ adc %rdx, %r8
+ mov 16(%rdi), %rax
+ mul %rbx
+ jmp .Lm0
+
+ .align 8, 0x90
+.Lb0: lea -32(%rdi,%rsi,8), %rdi
+ mov 8(%rdi), %rax
+ mul %r11
+ mov (%rdi), %r9
+ add %rax, %r9
+ adc %rdx, %r8
+ mov 16(%rdi), %rax
+ mul %rbx
+ add %rax, %r9
+ adc %rdx, %r8
+ mov 24(%rdi), %rax
+ mul %rbp
+ jmp .Lm0
+
+ .align 8, 0x90
+.Lb1: lea -8(%rdi,%rsi,8), %rdi
+ mov (%rdi), %r9
+ jmp .Lm1
+
+ .align 8, 0x90
+.Lb2: lea -16(%rdi,%rsi,8), %rdi
+ mov 8(%rdi), %r8
+ mov (%rdi), %r9
+ jmp .Lm1
+
+ .align 16, 0x90
+.Ltop: mov -24(%rdi), %rax
+ mov -32(%rdi), %r10
+ mul %r11
+ add %rax, %r10
+ mov -16(%rdi), %rax
+ mov $0, %ecx
+ adc %rdx, %rcx
+ mul %rbx
+ add %rax, %r10
+ mov -8(%rdi), %rax
+ adc %rdx, %rcx
+ sub $32, %rdi
+ mul %rbp
+ add %rax, %r10
+ mov %r13, %rax
+ adc %rdx, %rcx
+ mul %r9
+ add %rax, %r10
+ mov %r12, %rax
+ adc %rdx, %rcx
+ mul %r8
+ mov %r10, %r9
+ mov %rcx, %r8
+.Lm0: add %rax, %r9
+ adc %rdx, %r8
+.Lm1: sub $4, %rsi
+ ja .Ltop
+
+.Lend: mov 8(%r14), %esi
+ mov %r8, %rax
+ mul %r11
+ mov %rax, %r8
+ add %r9, %r8
+ adc $0, %rdx
+ xor %ecx, %ecx
+ sub %esi, %ecx
+ mov %r8, %rdi
+ shr %cl, %rdi
+ mov %esi, %ecx
+ sal %cl, %rdx
+ or %rdx, %rdi
+ mov %rdi, %rax
+ mulq (%r14)
+ mov %r15, %rbx
+ mov %rax, %r9
+ sal %cl, %r8
+ inc %rdi
+ add %r8, %r9
+ adc %rdi, %rdx
+ imul %rbx, %rdx
+ sub %rdx, %r8
+ lea (%r8,%rbx), %rax
+ cmp %r8, %r9
+ cmovc %rax, %r8
+ mov %r8, %rax
+ sub %rbx, %rax
+ cmovc %r8, %rax
+ shr %cl, %rax
+ pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ ret
+ .size __gmpn_mod_1s_4p,.-__gmpn_mod_1s_4p
+
+ .align 16, 0x90
+ .globl __gmpn_mod_1s_4p_cps
+ .type __gmpn_mod_1s_4p_cps,@function
+
+__gmpn_mod_1s_4p_cps:
+
+
+ push %rbp
+ bsr %rsi, %rcx
+ push %rbx
+ mov %rdi, %rbx
+ push %r12
+ xor $63, %ecx
+ mov %rsi, %r12
+ mov %ecx, %ebp
+ sal %cl, %r12
+ mov %r12, %rdi
+
+
+
+ call __gmpn_invert_limb@PLT
+
+
+ mov %r12, %r8
+ mov %rax, %r11
+ mov %rax, (%rbx)
+ mov %rbp, 8(%rbx)
+ neg %r8
+ mov %ebp, %ecx
+ mov $1, %esi
+
+ shld %cl, %rax, %rsi
+
+ imul %r8, %rsi
+ mul %rsi
+
+ add %rsi, %rdx
+ shr %cl, %rsi
+ mov %rsi, 16(%rbx)
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr %cl, %rsi
+ mov %rsi, 24(%rbx)
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr %cl, %rsi
+ mov %rsi, 32(%rbx)
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr %cl, %rsi
+ mov %rsi, 40(%rbx)
+
+ not %rdx
+ imul %r12, %rdx
+ add %rdx, %r12
+ cmp %rdx, %rax
+ cmovnc %rdx, %r12
+
+ shr %cl, %r12
+ mov %r12, 48(%rbx)
+
+ pop %r12
+ pop %rbx
+ pop %rbp
+
+ ret
+ .size __gmpn_mod_1s_4p_cps,.-__gmpn_mod_1s_4p_cps
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mod_34lsub1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_34lsub1.s
new file mode 100644
index 0000000..e2a2ebb
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mod_34lsub1.s
@@ -0,0 +1,228 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_mod_34lsub1
+ .type __gmpn_mod_34lsub1,@function
+
+__gmpn_mod_34lsub1:
+
+
+
+ mov $0x0000FFFFFFFFFFFF, %r11
+
+ mov (%rdi), %rax
+
+ cmp $2, %rsi
+ ja .Lgt2
+
+ jb .Lone
+
+ mov 8(%rdi), %rsi
+ mov %rax, %rdx
+ shr $48, %rax
+
+ and %r11, %rdx
+ add %rdx, %rax
+ mov %esi, %edx
+
+ shr $32, %rsi
+ add %rsi, %rax
+
+ shl $16, %rdx
+ add %rdx, %rax
+.Lone:
+ ret
+
+
+
+
+
+.Lgt2: mov 8(%rdi), %rcx
+ mov 16(%rdi), %rdx
+ xor %r9, %r9
+ add $24, %rdi
+ sub $12, %rsi
+ jc .Lend
+ .align 16, 0x90
+.Ltop:
+ add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ adc $0, %r9
+ add 24(%rdi), %rax
+ adc 32(%rdi), %rcx
+ adc 40(%rdi), %rdx
+ adc $0, %r9
+ add 48(%rdi), %rax
+ adc 56(%rdi), %rcx
+ adc 64(%rdi), %rdx
+ adc $0, %r9
+ add $72, %rdi
+ sub $9, %rsi
+ jnc .Ltop
+
+.Lend:
+ lea .Ltab(%rip), %r8
+ movslq 36(%r8,%rsi,4), %r10
+ add %r10, %r8
+ jmp *%r8
+
+ .section .data.rel.ro.local,"a",@progbits
+ .align 8, 0x90
+.Ltab: .long .L0-.Ltab
+ .long .L1-.Ltab
+ .long .L2-.Ltab
+ .long .L3-.Ltab
+ .long .L4-.Ltab
+ .long .L5-.Ltab
+ .long .L6-.Ltab
+ .long .L7-.Ltab
+ .long .L8-.Ltab
+ .text
+
+.L6: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ adc $0, %r9
+ add $24, %rdi
+.L3: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ jmp .Lcj1
+
+.L7: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ adc $0, %r9
+ add $24, %rdi
+.L4: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ adc $0, %r9
+ add $24, %rdi
+.L1: add (%rdi), %rax
+ adc $0, %rcx
+ jmp .Lcj2
+
+.L8: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ adc $0, %r9
+ add $24, %rdi
+.L5: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+ adc 16(%rdi), %rdx
+ adc $0, %r9
+ add $24, %rdi
+.L2: add (%rdi), %rax
+ adc 8(%rdi), %rcx
+
+.Lcj2: adc $0, %rdx
+.Lcj1: adc $0, %r9
+.L0: add %r9, %rax
+ adc $0, %rcx
+ adc $0, %rdx
+ adc $0, %rax
+
+ mov %rax, %rdi
+ shr $48, %rax
+
+ and %r11, %rdi
+ mov %ecx, %r10d
+
+ shr $32, %rcx
+
+ add %rdi, %rax
+ movzwl %dx, %edi
+ shl $16, %r10
+
+ add %rcx, %rax
+ shr $16, %rdx
+
+ add %r10, %rax
+ shl $32, %rdi
+
+ add %rdx, %rax
+ add %rdi, %rax
+
+
+ ret
+ .size __gmpn_mod_34lsub1,.-__gmpn_mod_34lsub1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mode1o.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mode1o.s
new file mode 100644
index 0000000..bff06a3
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mode1o.s
@@ -0,0 +1,189 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_modexact_1_odd
+ .type __gmpn_modexact_1_odd,@function
+
+__gmpn_modexact_1_odd:
+
+
+ mov $0, %ecx
+
+
+ .globl __gmpn_modexact_1c_odd
+ .type __gmpn_modexact_1c_odd,@function
+
+__gmpn_modexact_1c_odd:
+
+
+.Lent:
+
+
+
+
+
+ mov %rdx, %r8
+ shr %edx
+
+ mov __gmp_binvert_limb_table@GOTPCREL(%rip), %r9
+
+
+
+ and $127, %edx
+ mov %rcx, %r10
+
+ movzbl (%r9,%rdx), %edx
+
+ mov (%rdi), %rax
+ lea (%rdi,%rsi,8), %r11
+ mov %r8, %rdi
+
+ lea (%rdx,%rdx), %ecx
+ imul %edx, %edx
+
+ neg %rsi
+
+ imul %edi, %edx
+
+ sub %edx, %ecx
+
+ lea (%rcx,%rcx), %edx
+ imul %ecx, %ecx
+
+ imul %edi, %ecx
+
+ sub %ecx, %edx
+ xor %ecx, %ecx
+
+ lea (%rdx,%rdx), %r9
+ imul %rdx, %rdx
+
+ imul %r8, %rdx
+
+ sub %rdx, %r9
+ mov %r10, %rdx
+
+
+
+ inc %rsi
+ jz .Lone
+
+
+ .align 16, 0x90
+.Ltop:
+
+
+
+
+
+
+
+
+
+ sub %rdx, %rax
+
+ adc $0, %rcx
+ imul %r9, %rax
+
+ mul %r8
+
+ mov (%r11,%rsi,8), %rax
+ sub %rcx, %rax
+ setc %cl
+
+ inc %rsi
+ jnz .Ltop
+
+
+.Lone:
+ sub %rdx, %rax
+
+ adc $0, %rcx
+ imul %r9, %rax
+
+ mul %r8
+
+ lea (%rcx,%rdx), %rax
+
+ ret
+
+ .size __gmpn_modexact_1c_odd,.-__gmpn_modexact_1c_odd
+ .size __gmpn_modexact_1_odd,.-__gmpn_modexact_1_odd
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mp_bases.c b/vere/ext/gmp/gen/x86_64-linux/mpn/mp_bases.c
new file mode 100644
index 0000000..c72c531
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mp_bases.c
@@ -0,0 +1,268 @@
+/* This file generated by gen-bases.c - DO NOT EDIT. */
+
+#include "gmp-impl.h"
+
+#if GMP_NUMB_BITS != 64
+Error, error, this data is for 64 bits
+#endif
+
+const struct bases mp_bases[257] =
+{
+ /* 0 */ { 0, 0, 0, 0, 0 },
+ /* 1 */ { 0, 0, 0, 0, 0 },
+ /* 2 */ { 64, CNST_LIMB(0xffffffffffffffff), CNST_LIMB(0x1fffffffffffffff), CNST_LIMB(0x1), CNST_LIMB(0x0) },
+ /* 3 */ { 40, CNST_LIMB(0xa1849cc1a9a9e94e), CNST_LIMB(0x32b803473f7ad0f3), CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },
+ /* 4 */ { 32, CNST_LIMB(0x7fffffffffffffff), CNST_LIMB(0x3fffffffffffffff), CNST_LIMB(0x2), CNST_LIMB(0x0) },
+ /* 5 */ { 27, CNST_LIMB(0x6e40d1a4143dcb94), CNST_LIMB(0x4a4d3c25e68dc57f), CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90) },
+ /* 6 */ { 24, CNST_LIMB(0x6308c91b702a7cf4), CNST_LIMB(0x52b803473f7ad0f3), CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295) },
+ /* 7 */ { 22, CNST_LIMB(0x5b3064eb3aa6d388), CNST_LIMB(0x59d5d9fd5010b366), CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b) },
+ /* 8 */ { 21, CNST_LIMB(0x5555555555555555), CNST_LIMB(0x5fffffffffffffff), CNST_LIMB(0x3), CNST_LIMB(0x0) },
+ /* 9 */ { 20, CNST_LIMB(0x50c24e60d4d4f4a7), CNST_LIMB(0x6570068e7ef5a1e7), CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },
+ /* 10 */ { 19, CNST_LIMB(0x4d104d427de7fbcc), CNST_LIMB(0x6a4d3c25e68dc57f), CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a) },
+ /* 11 */ { 18, CNST_LIMB(0x4a00270775914e88), CNST_LIMB(0x6eb3a9f01975077f), CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b) },
+ /* 12 */ { 17, CNST_LIMB(0x4768ce0d05818e12), CNST_LIMB(0x72b803473f7ad0f3), CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f) },
+ /* 13 */ { 17, CNST_LIMB(0x452e53e365907bda), CNST_LIMB(0x766a008e4788cbcd), CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3) },
+ /* 14 */ { 16, CNST_LIMB(0x433cfffb4b5aae55), CNST_LIMB(0x79d5d9fd5010b366), CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86) },
+ /* 15 */ { 16, CNST_LIMB(0x41867711b4f85355), CNST_LIMB(0x7d053f6d26089673), CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48) },
+ /* 16 */ { 16, CNST_LIMB(0x3fffffffffffffff), CNST_LIMB(0x7fffffffffffffff), CNST_LIMB(0x4), CNST_LIMB(0x0) },
+ /* 17 */ { 15, CNST_LIMB(0x3ea16afd58b10966), CNST_LIMB(0x82cc7edf592262cf), CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323) },
+ /* 18 */ { 15, CNST_LIMB(0x3d64598d154dc4de), CNST_LIMB(0x8570068e7ef5a1e7), CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c) },
+ /* 19 */ { 15, CNST_LIMB(0x3c43c23018bb5563), CNST_LIMB(0x87ef05ae409a0288), CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec) },
+ /* 20 */ { 14, CNST_LIMB(0x3b3b9a42873069c7), CNST_LIMB(0x8a4d3c25e68dc57f), CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e) },
+ /* 21 */ { 14, CNST_LIMB(0x3a4898f06cf41ac9), CNST_LIMB(0x8c8ddd448f8b845a), CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239) },
+ /* 22 */ { 14, CNST_LIMB(0x39680b13582e7c18), CNST_LIMB(0x8eb3a9f01975077f), CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08) },
+ /* 23 */ { 14, CNST_LIMB(0x3897b2b751ae561a), CNST_LIMB(0x90c10500d63aa658), CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38) },
+ /* 24 */ { 13, CNST_LIMB(0x37d5aed131f19c98), CNST_LIMB(0x92b803473f7ad0f3), CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1) },
+ /* 25 */ { 13, CNST_LIMB(0x372068d20a1ee5ca), CNST_LIMB(0x949a784bcd1b8afe), CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5) },
+ /* 26 */ { 13, CNST_LIMB(0x3676867e5d60de29), CNST_LIMB(0x966a008e4788cbcd), CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9) },
+ /* 27 */ { 13, CNST_LIMB(0x35d6deeb388df86f), CNST_LIMB(0x982809d5be7072db), CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae) },
+ /* 28 */ { 13, CNST_LIMB(0x354071d61c77fa2e), CNST_LIMB(0x99d5d9fd5010b366), CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636) },
+ /* 29 */ { 13, CNST_LIMB(0x34b260c5671b18ac), CNST_LIMB(0x9b74948f5532da4b), CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d) },
+ /* 30 */ { 13, CNST_LIMB(0x342be986572b45cc), CNST_LIMB(0x9d053f6d26089673), CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6) },
+ /* 31 */ { 12, CNST_LIMB(0x33ac61b998fbbdf2), CNST_LIMB(0x9e88c6b3626a72aa), CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5) },
+ /* 32 */ { 12, CNST_LIMB(0x3333333333333333), CNST_LIMB(0x9fffffffffffffff), CNST_LIMB(0x5), CNST_LIMB(0x0) },
+ /* 33 */ { 12, CNST_LIMB(0x32bfd90114c12861), CNST_LIMB(0xa16bad3758efd873), CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f) },
+ /* 34 */ { 12, CNST_LIMB(0x3251dcf6169e45f2), CNST_LIMB(0xa2cc7edf592262cf), CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec) },
+ /* 35 */ { 12, CNST_LIMB(0x31e8d59f180dc630), CNST_LIMB(0xa4231623369e78e5), CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509) },
+ /* 36 */ { 12, CNST_LIMB(0x3184648db8153e7a), CNST_LIMB(0xa570068e7ef5a1e7), CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295) },
+ /* 37 */ { 12, CNST_LIMB(0x312434e89c35dacd), CNST_LIMB(0xa6b3d78b6d3b24fb), CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f) },
+ /* 38 */ { 12, CNST_LIMB(0x30c7fa349460a541), CNST_LIMB(0xa7ef05ae409a0288), CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544) },
+ /* 39 */ { 12, CNST_LIMB(0x306f6f4c8432bc6d), CNST_LIMB(0xa92203d587039cc1), CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c) },
+ /* 40 */ { 12, CNST_LIMB(0x301a557ffbfdd252), CNST_LIMB(0xaa4d3c25e68dc57f), CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197) },
+ /* 41 */ { 11, CNST_LIMB(0x2fc873d1fda55f3b), CNST_LIMB(0xab7110e6ce866f2b), CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae) },
+ /* 42 */ { 11, CNST_LIMB(0x2f799652a4e6dc49), CNST_LIMB(0xac8ddd448f8b845a), CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7) },
+ /* 43 */ { 11, CNST_LIMB(0x2f2d8d8f64460aad), CNST_LIMB(0xada3f5fb9c415052), CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94) },
+ /* 44 */ { 11, CNST_LIMB(0x2ee42e164e8f53a4), CNST_LIMB(0xaeb3a9f01975077f), CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a) },
+ /* 45 */ { 11, CNST_LIMB(0x2e9d500984041dbd), CNST_LIMB(0xafbd42b465836767), CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46) },
+ /* 46 */ { 11, CNST_LIMB(0x2e58cec05a6a8144), CNST_LIMB(0xb0c10500d63aa658), CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083) },
+ /* 47 */ { 11, CNST_LIMB(0x2e1688743ef9104c), CNST_LIMB(0xb1bf311e95d00de3), CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988) },
+ /* 48 */ { 11, CNST_LIMB(0x2dd65df7a583598f), CNST_LIMB(0xb2b803473f7ad0f3), CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167) },
+ /* 49 */ { 11, CNST_LIMB(0x2d9832759d5369c4), CNST_LIMB(0xb3abb3faa02166cc), CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b) },
+ /* 50 */ { 11, CNST_LIMB(0x2d5beb38dcd1394c), CNST_LIMB(0xb49a784bcd1b8afe), CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962) },
+ /* 51 */ { 11, CNST_LIMB(0x2d216f7943e2ba6a), CNST_LIMB(0xb5848226989d33c3), CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e) },
+ /* 52 */ { 11, CNST_LIMB(0x2ce8a82efbb3ff2c), CNST_LIMB(0xb66a008e4788cbcd), CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d) },
+ /* 53 */ { 11, CNST_LIMB(0x2cb17fea7ad7e332), CNST_LIMB(0xb74b1fd64e0753c6), CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25) },
+ /* 54 */ { 11, CNST_LIMB(0x2c7be2b0cfa1ba50), CNST_LIMB(0xb82809d5be7072db), CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a) },
+ /* 55 */ { 11, CNST_LIMB(0x2c47bddba92d7463), CNST_LIMB(0xb900e6160002ccfe), CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d) },
+ /* 56 */ { 11, CNST_LIMB(0x2c14fffcaa8b131e), CNST_LIMB(0xb9d5d9fd5010b366), CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1) },
+ /* 57 */ { 10, CNST_LIMB(0x2be398c3a38be053), CNST_LIMB(0xbaa708f58014d37c), CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1) },
+ /* 58 */ { 10, CNST_LIMB(0x2bb378e758451068), CNST_LIMB(0xbb74948f5532da4b), CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c) },
+ /* 59 */ { 10, CNST_LIMB(0x2b8492108be5e5f7), CNST_LIMB(0xbc3e9ca2e1a05533), CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492) },
+ /* 60 */ { 10, CNST_LIMB(0x2b56d6c70d55481b), CNST_LIMB(0xbd053f6d26089673), CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5) },
+ /* 61 */ { 10, CNST_LIMB(0x2b2a3a608c72ddd5), CNST_LIMB(0xbdc899ab3ff56c5e), CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416) },
+ /* 62 */ { 10, CNST_LIMB(0x2afeb0f1060c7e41), CNST_LIMB(0xbe88c6b3626a72aa), CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5) },
+ /* 63 */ { 10, CNST_LIMB(0x2ad42f3c9aca595c), CNST_LIMB(0xbf45e08bcf06554e), CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641) },
+ /* 64 */ { 10, CNST_LIMB(0x2aaaaaaaaaaaaaaa), CNST_LIMB(0xbfffffffffffffff), CNST_LIMB(0x6), CNST_LIMB(0x0) },
+ /* 65 */ { 10, CNST_LIMB(0x2a82193a13425883), CNST_LIMB(0xc0b73cb42e16914c), CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1) },
+ /* 66 */ { 10, CNST_LIMB(0x2a5a717672f66450), CNST_LIMB(0xc16bad3758efd873), CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33) },
+ /* 67 */ { 10, CNST_LIMB(0x2a33aa6e56d9c71c), CNST_LIMB(0xc21d6713f453f356), CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0) },
+ /* 68 */ { 10, CNST_LIMB(0x2a0dbbaa3bdfcea4), CNST_LIMB(0xc2cc7edf592262cf), CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409) },
+ /* 69 */ { 10, CNST_LIMB(0x29e89d244eb4bfaf), CNST_LIMB(0xc379084815b5774c), CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f) },
+ /* 70 */ { 10, CNST_LIMB(0x29c44740d7db51e6), CNST_LIMB(0xc4231623369e78e5), CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62) },
+ /* 71 */ { 10, CNST_LIMB(0x29a0b2c743b14d74), CNST_LIMB(0xc4caba789e2b8687), CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2) },
+ /* 72 */ { 10, CNST_LIMB(0x297dd8dbb7c22a2d), CNST_LIMB(0xc570068e7ef5a1e7), CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2) },
+ /* 73 */ { 10, CNST_LIMB(0x295bb2f9285c8c1b), CNST_LIMB(0xc6130af40bc0ecbf), CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b) },
+ /* 74 */ { 10, CNST_LIMB(0x293a3aebe2be1c92), CNST_LIMB(0xc6b3d78b6d3b24fb), CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5) },
+ /* 75 */ { 10, CNST_LIMB(0x29196acc815ebd9f), CNST_LIMB(0xc7527b930c965bf2), CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e) },
+ /* 76 */ { 10, CNST_LIMB(0x28f93cfb40f5c22a), CNST_LIMB(0xc7ef05ae409a0288), CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd) },
+ /* 77 */ { 10, CNST_LIMB(0x28d9ac1badc64117), CNST_LIMB(0xc88983ed6985bae5), CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec) },
+ /* 78 */ { 10, CNST_LIMB(0x28bab310a196b478), CNST_LIMB(0xc92203d587039cc1), CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505) },
+ /* 79 */ { 10, CNST_LIMB(0x289c4cf88b774469), CNST_LIMB(0xc9b892675266f66c), CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac) },
+ /* 80 */ { 10, CNST_LIMB(0x287e7529fb244e91), CNST_LIMB(0xca4d3c25e68dc57f), CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d) },
+ /* 81 */ { 10, CNST_LIMB(0x286127306a6a7a53), CNST_LIMB(0xcae00d1cfdeb43cf), CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },
+ /* 82 */ { 10, CNST_LIMB(0x28445ec93f792b1e), CNST_LIMB(0xcb7110e6ce866f2b), CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f) },
+ /* 83 */ { 10, CNST_LIMB(0x282817e1038950fa), CNST_LIMB(0xcc0052b18b0e2a19), CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47) },
+ /* 84 */ { 10, CNST_LIMB(0x280c4e90c9ab1f45), CNST_LIMB(0xcc8ddd448f8b845a), CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a) },
+ /* 85 */ { 9, CNST_LIMB(0x27f0ff1bc1ee87cd), CNST_LIMB(0xcd19bb053fb0284e), CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4) },
+ /* 86 */ { 9, CNST_LIMB(0x27d625ecf571c340), CNST_LIMB(0xcda3f5fb9c415052), CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e) },
+ /* 87 */ { 9, CNST_LIMB(0x27bbbf95282fcd45), CNST_LIMB(0xce2c97d694adab3f), CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89) },
+ /* 88 */ { 9, CNST_LIMB(0x27a1c8c8ddaf84da), CNST_LIMB(0xceb3a9f01975077f), CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa) },
+ /* 89 */ { 9, CNST_LIMB(0x27883e5e7df3f518), CNST_LIMB(0xcf393550f3aa6906), CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383) },
+ /* 90 */ { 9, CNST_LIMB(0x276f1d4c9847e90e), CNST_LIMB(0xcfbd42b465836767), CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10) },
+ /* 91 */ { 9, CNST_LIMB(0x275662a841b30191), CNST_LIMB(0xd03fda8b97997f33), CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43) },
+ /* 92 */ { 9, CNST_LIMB(0x273e0ba38d15a47b), CNST_LIMB(0xd0c10500d63aa658), CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447) },
+ /* 93 */ { 9, CNST_LIMB(0x2726158c1b13cf03), CNST_LIMB(0xd140c9faa1e5439e), CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0) },
+ /* 94 */ { 9, CNST_LIMB(0x270e7dc9c01d8e9b), CNST_LIMB(0xd1bf311e95d00de3), CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4) },
+ /* 95 */ { 9, CNST_LIMB(0x26f741dd3f070d61), CNST_LIMB(0xd23c41d42727c808), CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f) },
+ /* 96 */ { 9, CNST_LIMB(0x26e05f5f16c2159e), CNST_LIMB(0xd2b803473f7ad0f3), CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794) },
+ /* 97 */ { 9, CNST_LIMB(0x26c9d3fe61e80598), CNST_LIMB(0xd3327c6ab49ca6c8), CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc) },
+ /* 98 */ { 9, CNST_LIMB(0x26b39d7fc6ddab08), CNST_LIMB(0xd3abb3faa02166cc), CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3) },
+ /* 99 */ { 9, CNST_LIMB(0x269db9bc7772a5cc), CNST_LIMB(0xd423b07e986aa967), CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518) },
+ /* 100 */ { 9, CNST_LIMB(0x268826a13ef3fde6), CNST_LIMB(0xd49a784bcd1b8afe), CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e) },
+ /* 101 */ { 9, CNST_LIMB(0x2672e22d9dbdbd9f), CNST_LIMB(0xd510118708a8f8dd), CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f) },
+ /* 102 */ { 9, CNST_LIMB(0x265dea72f169cc99), CNST_LIMB(0xd5848226989d33c3), CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1) },
+ /* 103 */ { 9, CNST_LIMB(0x26493d93a8cb2514), CNST_LIMB(0xd5f7cff41e09aeb8), CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328) },
+ /* 104 */ { 9, CNST_LIMB(0x2634d9c282f3ef82), CNST_LIMB(0xd66a008e4788cbcd), CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c) },
+ /* 105 */ { 9, CNST_LIMB(0x2620bd41d8933adc), CNST_LIMB(0xd6db196a761949d9), CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179) },
+ /* 106 */ { 9, CNST_LIMB(0x260ce662ef04088a), CNST_LIMB(0xd74b1fd64e0753c6), CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf) },
+ /* 107 */ { 9, CNST_LIMB(0x25f95385547353fd), CNST_LIMB(0xd7ba18f93502e409), CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32) },
+ /* 108 */ { 9, CNST_LIMB(0x25e60316448db8e1), CNST_LIMB(0xd82809d5be7072db), CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188) },
+ /* 109 */ { 9, CNST_LIMB(0x25d2f390152f74f5), CNST_LIMB(0xd894f74b06ef8b40), CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c) },
+ /* 110 */ { 9, CNST_LIMB(0x25c02379aa9ad043), CNST_LIMB(0xd900e6160002ccfe), CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f) },
+ /* 111 */ { 9, CNST_LIMB(0x25ad9165f2c18907), CNST_LIMB(0xd96bdad2acb5f5ef), CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0) },
+ /* 112 */ { 9, CNST_LIMB(0x259b3bf36735c90c), CNST_LIMB(0xd9d5d9fd5010b366), CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8) },
+ /* 113 */ { 9, CNST_LIMB(0x258921cb955e7693), CNST_LIMB(0xda3ee7f38e181ed0), CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82) },
+ /* 114 */ { 9, CNST_LIMB(0x257741a2ac9170af), CNST_LIMB(0xdaa708f58014d37c), CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a) },
+ /* 115 */ { 9, CNST_LIMB(0x25659a3711bc827d), CNST_LIMB(0xdb0e4126bcc86bd7), CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5) },
+ /* 116 */ { 9, CNST_LIMB(0x25542a50f84b9c39), CNST_LIMB(0xdb74948f5532da4b), CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610) },
+ /* 117 */ { 9, CNST_LIMB(0x2542f0c20000377d), CNST_LIMB(0xdbda071cc67e6db5), CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c) },
+ /* 118 */ { 9, CNST_LIMB(0x2531ec64d772bd64), CNST_LIMB(0xdc3e9ca2e1a05533), CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977) },
+ /* 119 */ { 9, CNST_LIMB(0x25211c1ce2fb5a6e), CNST_LIMB(0xdca258dca9331635), CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e) },
+ /* 120 */ { 9, CNST_LIMB(0x25107ed5e7c3ec3b), CNST_LIMB(0xdd053f6d26089673), CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5) },
+ /* 121 */ { 9, CNST_LIMB(0x25001383bac8a744), CNST_LIMB(0xdd6753e032ea0efe), CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b) },
+ /* 122 */ { 9, CNST_LIMB(0x24efd921f390bce3), CNST_LIMB(0xddc899ab3ff56c5e), CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5) },
+ /* 123 */ { 9, CNST_LIMB(0x24dfceb3a26bb203), CNST_LIMB(0xde29142e0e01401f), CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b) },
+ /* 124 */ { 9, CNST_LIMB(0x24cff3430a0341a7), CNST_LIMB(0xde88c6b3626a72aa), CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10) },
+ /* 125 */ { 9, CNST_LIMB(0x24c045e15c149931), CNST_LIMB(0xdee7b471b3a9507d), CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90) },
+ /* 126 */ { 9, CNST_LIMB(0x24b0c5a679267ae2), CNST_LIMB(0xdf45e08bcf06554e), CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8) },
+ /* 127 */ { 9, CNST_LIMB(0x24a171b0b31461c8), CNST_LIMB(0xdfa34e1177c23362), CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1) },
+ /* 128 */ { 9, CNST_LIMB(0x2492492492492492), CNST_LIMB(0xdfffffffffffffff), CNST_LIMB(0x7), CNST_LIMB(0x0) },
+ /* 129 */ { 9, CNST_LIMB(0x24834b2c9d85cdfe), CNST_LIMB(0xe05bf942dbbc2145), CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d) },
+ /* 130 */ { 9, CNST_LIMB(0x247476f924137501), CNST_LIMB(0xe0b73cb42e16914c), CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0) },
+ /* 131 */ { 9, CNST_LIMB(0x2465cbc00a40cec0), CNST_LIMB(0xe111cd1d5133412e), CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085) },
+ /* 132 */ { 9, CNST_LIMB(0x245748bc980e0427), CNST_LIMB(0xe16bad3758efd873), CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d) },
+ /* 133 */ { 9, CNST_LIMB(0x2448ed2f49eb0633), CNST_LIMB(0xe1c4dfab90aab5ef), CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25) },
+ /* 134 */ { 9, CNST_LIMB(0x243ab85da36e3167), CNST_LIMB(0xe21d6713f453f356), CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d) },
+ /* 135 */ { 9, CNST_LIMB(0x242ca99203ea8c18), CNST_LIMB(0xe27545fba4fe385a), CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914) },
+ /* 136 */ { 9, CNST_LIMB(0x241ec01b7cce4ea0), CNST_LIMB(0xe2cc7edf592262cf), CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849) },
+ /* 137 */ { 9, CNST_LIMB(0x2410fb4da9b3b0fc), CNST_LIMB(0xe323142dc8c66b55), CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9) },
+ /* 138 */ { 9, CNST_LIMB(0x24035a808a0f315e), CNST_LIMB(0xe379084815b5774c), CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0) },
+ /* 139 */ { 8, CNST_LIMB(0x23f5dd105c67ab9d), CNST_LIMB(0xe3ce5d822ff4b643), CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf) },
+ /* 140 */ { 8, CNST_LIMB(0x23e8825d7b05abb1), CNST_LIMB(0xe4231623369e78e5), CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05) },
+ /* 141 */ { 8, CNST_LIMB(0x23db49cc3a0866fe), CNST_LIMB(0xe4773465d54aded7), CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19) },
+ /* 142 */ { 8, CNST_LIMB(0x23ce32c4c6cfb9f5), CNST_LIMB(0xe4caba789e2b8687), CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe) },
+ /* 143 */ { 8, CNST_LIMB(0x23c13cb308ab6ab7), CNST_LIMB(0xe51daa7e60fdd34c), CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7) },
+ /* 144 */ { 8, CNST_LIMB(0x23b4670682c0c709), CNST_LIMB(0xe570068e7ef5a1e7), CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757) },
+ /* 145 */ { 8, CNST_LIMB(0x23a7b13237187c8b), CNST_LIMB(0xe5c1d0b53bc09fca), CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae) },
+ /* 146 */ { 8, CNST_LIMB(0x239b1aac8ac74728), CNST_LIMB(0xe6130af40bc0ecbf), CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40) },
+ /* 147 */ { 8, CNST_LIMB(0x238ea2ef2b24c379), CNST_LIMB(0xe663b741df9c37c0), CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f) },
+ /* 148 */ { 8, CNST_LIMB(0x23824976f4045a26), CNST_LIMB(0xe6b3d78b6d3b24fb), CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d) },
+ /* 149 */ { 8, CNST_LIMB(0x23760dc3d6e4d729), CNST_LIMB(0xe7036db376537b90), CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1) },
+ /* 150 */ { 8, CNST_LIMB(0x2369ef58c30bd43e), CNST_LIMB(0xe7527b930c965bf2), CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe) },
+ /* 151 */ { 8, CNST_LIMB(0x235dedbb8e82aa1c), CNST_LIMB(0xe7a102f9d39a9331), CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e) },
+ /* 152 */ { 8, CNST_LIMB(0x23520874dfeb1ffd), CNST_LIMB(0xe7ef05ae409a0288), CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc) },
+ /* 153 */ { 8, CNST_LIMB(0x23463f1019228dd7), CNST_LIMB(0xe83c856dd81804b7), CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910) },
+ /* 154 */ { 8, CNST_LIMB(0x233a911b42aa9b3c), CNST_LIMB(0xe88983ed6985bae5), CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614) },
+ /* 155 */ { 8, CNST_LIMB(0x232efe26f7cf33f9), CNST_LIMB(0xe8d602d948f83829), CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2) },
+ /* 156 */ { 8, CNST_LIMB(0x232385c65381b485), CNST_LIMB(0xe92203d587039cc1), CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d) },
+ /* 157 */ { 8, CNST_LIMB(0x2318278edde1b39b), CNST_LIMB(0xe96d887e26cd57b7), CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782) },
+ /* 158 */ { 8, CNST_LIMB(0x230ce3187a6c2be9), CNST_LIMB(0xe9b892675266f66c), CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b) },
+ /* 159 */ { 8, CNST_LIMB(0x2301b7fd56ca21bb), CNST_LIMB(0xea03231d8d8224ba), CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd) },
+ /* 160 */ { 8, CNST_LIMB(0x22f6a5d9da38341c), CNST_LIMB(0xea4d3c25e68dc57f), CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9) },
+ /* 161 */ { 8, CNST_LIMB(0x22ebac4c9580d89f), CNST_LIMB(0xea96defe264b59be), CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753) },
+ /* 162 */ { 8, CNST_LIMB(0x22e0caf633834beb), CNST_LIMB(0xeae00d1cfdeb43cf), CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7) },
+ /* 163 */ { 8, CNST_LIMB(0x22d601796a418886), CNST_LIMB(0xeb28c7f233bdd372), CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb) },
+ /* 164 */ { 8, CNST_LIMB(0x22cb4f7aec6fd8b4), CNST_LIMB(0xeb7110e6ce866f2b), CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac) },
+ /* 165 */ { 8, CNST_LIMB(0x22c0b4a15b80d83e), CNST_LIMB(0xebb8e95d3f7d9df2), CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e) },
+ /* 166 */ { 8, CNST_LIMB(0x22b630953a28f77a), CNST_LIMB(0xec0052b18b0e2a19), CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004) },
+ /* 167 */ { 8, CNST_LIMB(0x22abc300df54ca7c), CNST_LIMB(0xec474e39705912d2), CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8) },
+ /* 168 */ { 8, CNST_LIMB(0x22a16b90698da5d2), CNST_LIMB(0xec8ddd448f8b845a), CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5) },
+ /* 169 */ { 8, CNST_LIMB(0x229729f1b2c83ded), CNST_LIMB(0xecd4011c8f11979a), CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe) },
+ /* 170 */ { 8, CNST_LIMB(0x228cfdd444992f78), CNST_LIMB(0xed19bb053fb0284e), CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532) },
+ /* 171 */ { 8, CNST_LIMB(0x2282e6e94ccb8588), CNST_LIMB(0xed5f0c3cbf8fa470), CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02) },
+ /* 172 */ { 8, CNST_LIMB(0x2278e4e392557ecf), CNST_LIMB(0xeda3f5fb9c415052), CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317) },
+ /* 173 */ { 8, CNST_LIMB(0x226ef7776aa7fd29), CNST_LIMB(0xede87974f3c81855), CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c) },
+ /* 174 */ { 8, CNST_LIMB(0x22651e5aaf5532d0), CNST_LIMB(0xee2c97d694adab3f), CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e) },
+ /* 175 */ { 8, CNST_LIMB(0x225b5944b40b4694), CNST_LIMB(0xee7052491d2c3e64), CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286) },
+ /* 176 */ { 8, CNST_LIMB(0x2251a7ee3cdfcca5), CNST_LIMB(0xeeb3a9f01975077f), CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb) },
+ /* 177 */ { 8, CNST_LIMB(0x22480a1174e913d9), CNST_LIMB(0xeef69fea211b2627), CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b) },
+ /* 178 */ { 8, CNST_LIMB(0x223e7f69e522683c), CNST_LIMB(0xef393550f3aa6906), CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1) },
+ /* 179 */ { 8, CNST_LIMB(0x223507b46b988abe), CNST_LIMB(0xef7b6b399471103e), CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef) },
+ /* 180 */ { 8, CNST_LIMB(0x222ba2af32dbbb9e), CNST_LIMB(0xefbd42b465836767), CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3) },
+ /* 181 */ { 8, CNST_LIMB(0x22225019a9b4d16c), CNST_LIMB(0xeffebccd41ffcd5c), CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb) },
+ /* 182 */ { 8, CNST_LIMB(0x22190fb47b1af172), CNST_LIMB(0xf03fda8b97997f33), CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8) },
+ /* 183 */ { 8, CNST_LIMB(0x220fe14186679801), CNST_LIMB(0xf0809cf27f703d52), CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005) },
+ /* 184 */ { 8, CNST_LIMB(0x2206c483d7c6b786), CNST_LIMB(0xf0c10500d63aa658), CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926) },
+ /* 185 */ { 8, CNST_LIMB(0x21fdb93fa0e0ccc5), CNST_LIMB(0xf10113b153c8ea7b), CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f) },
+ /* 186 */ { 8, CNST_LIMB(0x21f4bf3a31bcdcaa), CNST_LIMB(0xf140c9faa1e5439e), CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6) },
+ /* 187 */ { 8, CNST_LIMB(0x21ebd639f1d86584), CNST_LIMB(0xf18028cf72976a4e), CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497) },
+ /* 188 */ { 8, CNST_LIMB(0x21e2fe06597361a6), CNST_LIMB(0xf1bf311e95d00de3), CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67) },
+ /* 189 */ { 8, CNST_LIMB(0x21da3667eb0e8ccb), CNST_LIMB(0xf1fde3d30e812642), CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b) },
+ /* 190 */ { 8, CNST_LIMB(0x21d17f282d1a300e), CNST_LIMB(0xf23c41d42727c808), CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56) },
+ /* 191 */ { 8, CNST_LIMB(0x21c8d811a3d3c9e1), CNST_LIMB(0xf27a4c0585cbf805), CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0) },
+ /* 192 */ { 8, CNST_LIMB(0x21c040efcb50f858), CNST_LIMB(0xf2b803473f7ad0f3), CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf) },
+ /* 193 */ { 8, CNST_LIMB(0x21b7b98f11b61c1a), CNST_LIMB(0xf2f56875eb3f2614), CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a) },
+ /* 194 */ { 8, CNST_LIMB(0x21af41bcd19739ba), CNST_LIMB(0xf3327c6ab49ca6c8), CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962) },
+ /* 195 */ { 8, CNST_LIMB(0x21a6d9474c81adf0), CNST_LIMB(0xf36f3ffb6d916240), CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e) },
+ /* 196 */ { 8, CNST_LIMB(0x219e7ffda5ad572a), CNST_LIMB(0xf3abb3faa02166cc), CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86) },
+ /* 197 */ { 8, CNST_LIMB(0x219635afdcd3e46d), CNST_LIMB(0xf3e7d9379f70166a), CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca) },
+ /* 198 */ { 8, CNST_LIMB(0x218dfa2ec92d0643), CNST_LIMB(0xf423b07e986aa967), CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2) },
+ /* 199 */ { 8, CNST_LIMB(0x2185cd4c148e4ae2), CNST_LIMB(0xf45f3a98a20738a4), CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9) },
+ /* 200 */ { 8, CNST_LIMB(0x217daeda36ad7a5c), CNST_LIMB(0xf49a784bcd1b8afe), CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6) },
+ /* 201 */ { 8, CNST_LIMB(0x21759eac708452fe), CNST_LIMB(0xf4d56a5b33cec44a), CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22) },
+ /* 202 */ { 8, CNST_LIMB(0x216d9c96c7d490d4), CNST_LIMB(0xf510118708a8f8dd), CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20) },
+ /* 203 */ { 8, CNST_LIMB(0x2165a86e02cb358c), CNST_LIMB(0xf54a6e8ca5438db1), CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf) },
+ /* 204 */ { 8, CNST_LIMB(0x215dc207a3c20fdf), CNST_LIMB(0xf5848226989d33c3), CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9) },
+ /* 205 */ { 8, CNST_LIMB(0x2155e939e51e8b37), CNST_LIMB(0xf5be4d0cb51434aa), CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff) },
+ /* 206 */ { 8, CNST_LIMB(0x214e1ddbb54cd933), CNST_LIMB(0xf5f7cff41e09aeb8), CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a) },
+ /* 207 */ { 8, CNST_LIMB(0x21465fc4b2d68f98), CNST_LIMB(0xf6310b8f55304840), CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd) },
+ /* 208 */ { 8, CNST_LIMB(0x213eaecd2893dd60), CNST_LIMB(0xf66a008e4788cbcd), CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985) },
+ /* 209 */ { 8, CNST_LIMB(0x21370ace09f681c6), CNST_LIMB(0xf6a2af9e5a0f0a08), CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7) },
+ /* 210 */ { 8, CNST_LIMB(0x212f73a0ef6db7cb), CNST_LIMB(0xf6db196a761949d9), CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175) },
+ /* 211 */ { 8, CNST_LIMB(0x2127e92012e25004), CNST_LIMB(0xf7133e9b156c7be5), CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e) },
+ /* 212 */ { 8, CNST_LIMB(0x21206b264c4a39a7), CNST_LIMB(0xf74b1fd64e0753c6), CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef) },
+ /* 213 */ { 8, CNST_LIMB(0x2118f98f0e52c28f), CNST_LIMB(0xf782bdbfdda6577b), CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9) },
+ /* 214 */ { 8, CNST_LIMB(0x211194366320dc66), CNST_LIMB(0xf7ba18f93502e409), CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc) },
+ /* 215 */ { 8, CNST_LIMB(0x210a3af8e926bb78), CNST_LIMB(0xf7f1322182cf15d1), CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b) },
+ /* 216 */ { 8, CNST_LIMB(0x2102edb3d00e29a6), CNST_LIMB(0xf82809d5be7072db), CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295) },
+ /* 217 */ { 8, CNST_LIMB(0x20fbac44d5b6edc2), CNST_LIMB(0xf85ea0b0b27b2610), CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2) },
+ /* 218 */ { 8, CNST_LIMB(0x20f4768a4348ad08), CNST_LIMB(0xf894f74b06ef8b40), CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049) },
+ /* 219 */ { 8, CNST_LIMB(0x20ed4c62ea57b1f0), CNST_LIMB(0xf8cb0e3b4b3bbdb3), CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada) },
+ /* 220 */ { 8, CNST_LIMB(0x20e62dae221c087a), CNST_LIMB(0xf900e6160002ccfe), CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933) },
+ /* 221 */ { 8, CNST_LIMB(0x20df1a4bc4ba6525), CNST_LIMB(0xf9367f6da0ab2e9c), CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d) },
+ /* 222 */ { 8, CNST_LIMB(0x20d8121c2c9e506e), CNST_LIMB(0xf96bdad2acb5f5ef), CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b) },
+ /* 223 */ { 8, CNST_LIMB(0x20d1150031e51549), CNST_LIMB(0xf9a0f8d3b0e04fde), CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4) },
+ /* 224 */ { 8, CNST_LIMB(0x20ca22d927d8f54d), CNST_LIMB(0xf9d5d9fd5010b366), CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053) },
+ /* 225 */ { 8, CNST_LIMB(0x20c33b88da7c29aa), CNST_LIMB(0xfa0a7eda4c112ce6), CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48) },
+ /* 226 */ { 8, CNST_LIMB(0x20bc5ef18c233bdf), CNST_LIMB(0xfa3ee7f38e181ed0), CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9) },
+ /* 227 */ { 8, CNST_LIMB(0x20b58cf5f31e4526), CNST_LIMB(0xfa7315d02f20c7bd), CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e) },
+ /* 228 */ { 8, CNST_LIMB(0x20aec5793770a74d), CNST_LIMB(0xfaa708f58014d37c), CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b) },
+ /* 229 */ { 8, CNST_LIMB(0x20a8085ef096d530), CNST_LIMB(0xfadac1e711c832d1), CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1) },
+ /* 230 */ { 8, CNST_LIMB(0x20a1558b2359c4b1), CNST_LIMB(0xfb0e4126bcc86bd7), CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762) },
+ /* 231 */ { 8, CNST_LIMB(0x209aace23fafa72e), CNST_LIMB(0xfb418734a9008bd9), CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555) },
+ /* 232 */ { 8, CNST_LIMB(0x20940e491ea988d7), CNST_LIMB(0xfb74948f5532da4b), CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe) },
+ /* 233 */ { 8, CNST_LIMB(0x208d79a5006d7a47), CNST_LIMB(0xfba769b39e49640e), CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f) },
+ /* 234 */ { 8, CNST_LIMB(0x2086eedb8a3cead3), CNST_LIMB(0xfbda071cc67e6db5), CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3) },
+ /* 235 */ { 8, CNST_LIMB(0x20806dd2c486dcc6), CNST_LIMB(0xfc0c6d447c5dd362), CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c) },
+ /* 236 */ { 8, CNST_LIMB(0x2079f67119059fae), CNST_LIMB(0xfc3e9ca2e1a05533), CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773) },
+ /* 237 */ { 8, CNST_LIMB(0x2073889d50e7bf63), CNST_LIMB(0xfc7095ae91e1c760), CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5) },
+ /* 238 */ { 8, CNST_LIMB(0x206d243e9303d929), CNST_LIMB(0xfca258dca9331635), CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f) },
+ /* 239 */ { 8, CNST_LIMB(0x2066c93c62170aa8), CNST_LIMB(0xfcd3e6a0ca8906c2), CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c) },
+ /* 240 */ { 8, CNST_LIMB(0x2060777e9b0db0f6), CNST_LIMB(0xfd053f6d26089673), CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529) },
+ /* 241 */ { 8, CNST_LIMB(0x205a2eed73563032), CNST_LIMB(0xfd3663b27f31d529), CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1) },
+ /* 242 */ { 8, CNST_LIMB(0x2053ef71773d7e6a), CNST_LIMB(0xfd6753e032ea0efe), CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2) },
+ /* 243 */ { 8, CNST_LIMB(0x204db8f388552ea9), CNST_LIMB(0xfd9810643d6614c3), CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d) },
+ /* 244 */ { 8, CNST_LIMB(0x20478b5cdbe2bb2f), CNST_LIMB(0xfdc899ab3ff56c5e), CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6) },
+ /* 245 */ { 8, CNST_LIMB(0x20416696f957cfbf), CNST_LIMB(0xfdf8f02086af2c4b), CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2) },
+ /* 246 */ { 8, CNST_LIMB(0x203b4a8bb8d356e7), CNST_LIMB(0xfe29142e0e01401f), CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47) },
+ /* 247 */ { 8, CNST_LIMB(0x2035372541ab0f0d), CNST_LIMB(0xfe59063c8822ce56), CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016) },
+ /* 248 */ { 8, CNST_LIMB(0x202f2c4e08fd6dcc), CNST_LIMB(0xfe88c6b3626a72aa), CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18) },
+ /* 249 */ { 8, CNST_LIMB(0x202929f0d04b99e9), CNST_LIMB(0xfeb855f8ca88fb0d), CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d) },
+ /* 250 */ { 8, CNST_LIMB(0x20232ff8a41b45eb), CNST_LIMB(0xfee7b471b3a9507d), CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5) },
+ /* 251 */ { 8, CNST_LIMB(0x201d3e50daa036db), CNST_LIMB(0xff16e281db76303b), CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3) },
+ /* 252 */ { 8, CNST_LIMB(0x201754e5126d446d), CNST_LIMB(0xff45e08bcf06554e), CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d) },
+ /* 253 */ { 8, CNST_LIMB(0x201173a1312ca135), CNST_LIMB(0xff74aef0efafadd7), CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac) },
+ /* 254 */ { 8, CNST_LIMB(0x200b9a71625f3b13), CNST_LIMB(0xffa34e1177c23362), CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93) },
+ /* 255 */ { 8, CNST_LIMB(0x2005c94216230568), CNST_LIMB(0xffd1be4c7f2af942), CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f) },
+ /* 256 */ { 8, CNST_LIMB(0x1fffffffffffffff), CNST_LIMB(0xffffffffffffffff), CNST_LIMB(0x8), CNST_LIMB(0x0) },
+};
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mul_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mul_1.s
new file mode 100644
index 0000000..1644074
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mul_1.s
@@ -0,0 +1,205 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mul_1c
+ .type __gmpn_mul_1c,@function
+
+__gmpn_mul_1c:
+
+
+
+
+ push %rbx
+ mov %r8, %r10
+
+ jmp .Lcommon
+ .size __gmpn_mul_1c,.-__gmpn_mul_1c
+
+ .globl __gmpn_mul_1
+ .type __gmpn_mul_1,@function
+
+__gmpn_mul_1:
+
+
+
+
+
+ push %rbx
+ xor %r10, %r10
+.Lcommon:
+ mov (%rsi), %rax
+ mov %rdx, %rbx
+
+ mul %rcx
+ mov %rbx, %r11
+
+ add %r10, %rax
+ adc $0, %rdx
+
+ and $3, %ebx
+ jz .Lb0
+ cmp $2, %ebx
+ jz .Lb2
+ jg .Lb3
+
+.Lb1: dec %r11
+ jne .Lgt1
+ mov %rax, (%rdi)
+ jmp .Lret
+.Lgt1: lea 8(%rsi,%r11,8), %rsi
+ lea -8(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r10, %r10
+ xor %ebx, %ebx
+ mov %rax, %r9
+ mov (%rsi,%r11,8), %rax
+ mov %rdx, %r8
+ jmp .LL1
+
+.Lb0: lea (%rsi,%r11,8), %rsi
+ lea -16(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r10, %r10
+ mov %rax, %r8
+ mov %rdx, %rbx
+ jmp .LL0
+
+.Lb3: lea -8(%rsi,%r11,8), %rsi
+ lea -24(%rdi,%r11,8), %rdi
+ neg %r11
+ mov %rax, %rbx
+ mov %rdx, %r10
+ jmp .LL3
+
+.Lb2: lea -16(%rsi,%r11,8), %rsi
+ lea -32(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r8, %r8
+ xor %ebx, %ebx
+ mov %rax, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mov %rdx, %r9
+ jmp .LL2
+
+ .align 16, 0x90
+.Ltop: mov %r10, (%rdi,%r11,8)
+ add %rax, %r9
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %r8
+ mov $0, %r10d
+.LL1: mul %rcx
+ mov %r9, 8(%rdi,%r11,8)
+ add %rax, %r8
+ adc %rdx, %rbx
+.LL0: mov 8(%rsi,%r11,8), %rax
+ mul %rcx
+ mov %r8, 16(%rdi,%r11,8)
+ add %rax, %rbx
+ adc %rdx, %r10
+.LL3: mov 16(%rsi,%r11,8), %rax
+ mul %rcx
+ mov %rbx, 24(%rdi,%r11,8)
+ mov $0, %r8d
+ mov %r8, %rbx
+ add %rax, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mov %r8, %r9
+ adc %rdx, %r9
+.LL2: mul %rcx
+ add $4, %r11
+ js .Ltop
+
+ mov %r10, (%rdi,%r11,8)
+ add %rax, %r9
+ adc %r8, %rdx
+ mov %r9, 8(%rdi,%r11,8)
+ add %r8, %rdx
+.Lret: mov %rdx, %rax
+
+ pop %rbx
+
+
+ ret
+ .size __gmpn_mul_1,.-__gmpn_mul_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mul_2.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mul_2.s
new file mode 100644
index 0000000..0c3310d
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mul_2.s
@@ -0,0 +1,218 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mul_2
+ .type __gmpn_mul_2,@function
+
+__gmpn_mul_2:
+
+
+ push %rbx
+ push %rbp
+
+ mov (%rcx), %r8
+ mov 8(%rcx), %r9
+
+ mov (%rsi), %rax
+
+ mov %rdx, %r11
+ neg %r11
+ lea -8(%rsi,%rdx,8), %rsi
+ lea -8(%rdi,%rdx,8), %rdi
+
+ and $3, %edx
+ jz .Lm2p0
+ cmp $2, %edx
+ jc .Lm2p1
+ jz .Lm2p2
+.Lm2p3:
+ mul %r8
+ xor %r10d, %r10d
+ mov %rax, %rcx
+ mov %rdx, %rbp
+ mov 8(%rsi,%r11,8), %rax
+ add $-1, %r11
+ mul %r9
+ add %rax, %rbp
+ jmp .Lm23
+.Lm2p0:
+ mul %r8
+ xor %ebp, %ebp
+ mov %rax, %rbx
+ mov %rdx, %rcx
+ jmp .Lm20
+.Lm2p1:
+ mul %r8
+ xor %r10d, %r10d
+ xor %ebx, %ebx
+ xor %ecx, %ecx
+ add $1, %r11
+ jmp .Lm2top
+.Lm2p2:
+ mul %r8
+ xor %ebx, %ebx
+ xor %ecx, %ecx
+ mov %rax, %rbp
+ mov %rdx, %r10
+ mov 8(%rsi,%r11,8), %rax
+ add $-2, %r11
+ jmp .Lm22
+
+
+ .align 32, 0x90
+.Lm2top:
+ add %rax, %r10
+ adc %rdx, %rbx
+ mov 0(%rsi,%r11,8), %rax
+ adc $0, %ecx
+ mov $0, %ebp
+ mul %r9
+ add %rax, %rbx
+ mov %r10, 0(%rdi,%r11,8)
+ adc %rdx, %rcx
+ mov 8(%rsi,%r11,8), %rax
+ mul %r8
+ add %rax, %rbx
+ adc %rdx, %rcx
+ adc $0, %ebp
+.Lm20: mov 8(%rsi,%r11,8), %rax
+ mul %r9
+ add %rax, %rcx
+ adc %rdx, %rbp
+ mov 16(%rsi,%r11,8), %rax
+ mov $0, %r10d
+ mul %r8
+ add %rax, %rcx
+ mov 16(%rsi,%r11,8), %rax
+ adc %rdx, %rbp
+ adc $0, %r10d
+ mul %r9
+ add %rax, %rbp
+ mov %rbx, 8(%rdi,%r11,8)
+.Lm23: adc %rdx, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mul %r8
+ mov $0, %ebx
+ add %rax, %rbp
+ adc %rdx, %r10
+ mov %rcx, 16(%rdi,%r11,8)
+ mov 24(%rsi,%r11,8), %rax
+ mov $0, %ecx
+ adc $0, %ebx
+.Lm22: mul %r9
+ add %rax, %r10
+ mov %rbp, 24(%rdi,%r11,8)
+ adc %rdx, %rbx
+ mov 32(%rsi,%r11,8), %rax
+ mul %r8
+ add $4, %r11
+ js .Lm2top
+
+
+ add %rax, %r10
+ adc %rdx, %rbx
+ adc $0, %ecx
+ mov (%rsi), %rax
+ mul %r9
+ mov %r10, (%rdi)
+ add %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, 8(%rdi)
+ mov %rcx, %rax
+
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_mul_2,.-__gmpn_mul_2
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mul_basecase.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mul_basecase.s
new file mode 100644
index 0000000..2cfb7aa
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mul_basecase.s
@@ -0,0 +1,483 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mul_basecase
+ .type __gmpn_mul_basecase,@function
+
+__gmpn_mul_basecase:
+
+
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ xor %r13d, %r13d
+ mov (%rsi), %rax
+ mov (%rcx), %r12
+
+ sub %rdx, %r13
+ mov %r13, %r11
+ mov %edx, %ebx
+
+ lea (%rdi,%rdx,8), %rdi
+ lea (%rsi,%rdx,8), %rsi
+
+ mul %r12
+
+ test $1, %r8b
+ jz .Lmul_2
+
+
+
+
+.Lmul_1:
+ and $3, %ebx
+ jz .Lmul_1_prologue_0
+ cmp $2, %ebx
+ jc .Lmul_1_prologue_1
+ jz .Lmul_1_prologue_2
+
+.Lmul_1_prologue_3:
+ add $-1, %r11
+ lea .Laddmul_outer_3(%rip), %r14
+ mov %rax, %r10
+ mov %rdx, %rbx
+ jmp .Lmul_1_entry_3
+
+.Lmul_1_prologue_0:
+ mov %rax, %rbp
+ mov %rdx, %r10
+ lea .Laddmul_outer_0(%rip), %r14
+ jmp .Lmul_1_entry_0
+
+.Lmul_1_prologue_1:
+ cmp $-1, %r13
+ jne 2f
+ mov %rax, -8(%rdi)
+ mov %rdx, (%rdi)
+ jmp .Lret
+2: add $1, %r11
+ lea .Laddmul_outer_1(%rip), %r14
+ mov %rax, %r15
+ mov %rdx, %rbp
+ xor %r10d, %r10d
+ mov (%rsi,%r11,8), %rax
+ jmp .Lmul_1_entry_1
+
+.Lmul_1_prologue_2:
+ add $-2, %r11
+ lea .Laddmul_outer_2(%rip), %r14
+ mov %rax, %rbx
+ mov %rdx, %r15
+ mov 24(%rsi,%r11,8), %rax
+ xor %ebp, %ebp
+ xor %r10d, %r10d
+ jmp .Lmul_1_entry_2
+
+
+
+
+ .align 16, 0x90
+.Lmul_1_top:
+ mov %rbx, -16(%rdi,%r11,8)
+ add %rax, %r15
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rbp
+.Lmul_1_entry_1:
+ xor %ebx, %ebx
+ mul %r12
+ mov %r15, -8(%rdi,%r11,8)
+ add %rax, %rbp
+ adc %rdx, %r10
+.Lmul_1_entry_0:
+ mov 8(%rsi,%r11,8), %rax
+ mul %r12
+ mov %rbp, (%rdi,%r11,8)
+ add %rax, %r10
+ adc %rdx, %rbx
+.Lmul_1_entry_3:
+ mov 16(%rsi,%r11,8), %rax
+ mul %r12
+ mov %r10, 8(%rdi,%r11,8)
+ xor %ebp, %ebp
+ mov %rbp, %r10
+ add %rax, %rbx
+ mov 24(%rsi,%r11,8), %rax
+ mov %rbp, %r15
+ adc %rdx, %r15
+.Lmul_1_entry_2:
+ mul %r12
+ add $4, %r11
+ js .Lmul_1_top
+
+ mov %rbx, -16(%rdi)
+ add %rax, %r15
+ mov %r15, -8(%rdi)
+ adc %rdx, %rbp
+ mov %rbp, (%rdi)
+
+ add $-1, %r8
+ jz .Lret
+
+ mov 8(%rcx), %r12
+ mov 16(%rcx), %r9
+
+ lea 8(%rcx), %rcx
+ lea 8(%rdi), %rdi
+
+ jmp *%r14
+
+
+
+
+ .align 16, 0x90
+.Lmul_2:
+ mov 8(%rcx), %r9
+
+ and $3, %ebx
+ jz .Lmul_2_prologue_0
+ cmp $2, %ebx
+ jz .Lmul_2_prologue_2
+ jc .Lmul_2_prologue_1
+
+.Lmul_2_prologue_3:
+ lea .Laddmul_outer_3(%rip), %r14
+ add $2, %r11
+ mov %rax, -16(%rdi,%r11,8)
+ mov %rdx, %rbp
+ xor %r10d, %r10d
+ xor %ebx, %ebx
+ mov -16(%rsi,%r11,8), %rax
+ jmp .Lmul_2_entry_3
+
+ .align 16, 0x90
+.Lmul_2_prologue_0:
+ add $3, %r11
+ mov %rax, %rbx
+ mov %rdx, %r15
+ xor %ebp, %ebp
+ mov -24(%rsi,%r11,8), %rax
+ lea .Laddmul_outer_0(%rip), %r14
+ jmp .Lmul_2_entry_0
+
+ .align 16, 0x90
+.Lmul_2_prologue_1:
+ mov %rax, %r10
+ mov %rdx, %rbx
+ xor %r15d, %r15d
+ lea .Laddmul_outer_1(%rip), %r14
+ jmp .Lmul_2_entry_1
+
+ .align 16, 0x90
+.Lmul_2_prologue_2:
+ add $1, %r11
+ lea .Laddmul_outer_2(%rip), %r14
+ mov $0, %ebx
+ mov $0, %r15d
+ mov %rax, %rbp
+ mov -8(%rsi,%r11,8), %rax
+ mov %rdx, %r10
+ jmp .Lmul_2_entry_2
+
+
+
+ .align 16, 0x90
+.Lmul_2_top:
+ mov -32(%rsi,%r11,8), %rax
+ mul %r9
+ add %rax, %rbx
+ adc %rdx, %r15
+ mov -24(%rsi,%r11,8), %rax
+ xor %ebp, %ebp
+ mul %r12
+ add %rax, %rbx
+ mov -24(%rsi,%r11,8), %rax
+ adc %rdx, %r15
+ adc $0, %ebp
+.Lmul_2_entry_0:
+ mul %r9
+ add %rax, %r15
+ mov %rbx, -24(%rdi,%r11,8)
+ adc %rdx, %rbp
+ mov -16(%rsi,%r11,8), %rax
+ mul %r12
+ mov $0, %r10d
+ add %rax, %r15
+ adc %rdx, %rbp
+ mov -16(%rsi,%r11,8), %rax
+ adc $0, %r10d
+ mov $0, %ebx
+ mov %r15, -16(%rdi,%r11,8)
+.Lmul_2_entry_3:
+ mul %r9
+ add %rax, %rbp
+ mov -8(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ mov $0, %r15d
+ mul %r12
+ add %rax, %rbp
+ mov -8(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ adc %r15d, %ebx
+.Lmul_2_entry_2:
+ mul %r9
+ add %rax, %r10
+ mov %rbp, -8(%rdi,%r11,8)
+ adc %rdx, %rbx
+ mov (%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %r10
+ adc %rdx, %rbx
+ adc $0, %r15d
+.Lmul_2_entry_1:
+ add $4, %r11
+ mov %r10, -32(%rdi,%r11,8)
+ js .Lmul_2_top
+
+ mov -32(%rsi,%r11,8), %rax
+ mul %r9
+ add %rax, %rbx
+ mov %rbx, (%rdi)
+ adc %rdx, %r15
+ mov %r15, 8(%rdi)
+
+ add $-2, %r8
+ jz .Lret
+
+ mov 16(%rcx), %r12
+ mov 24(%rcx), %r9
+
+ lea 16(%rcx), %rcx
+ lea 16(%rdi), %rdi
+
+ jmp *%r14
+
+
+
+
+
+
+
+
+.Laddmul_outer_0:
+ add $3, %r13
+ lea 0(%rip), %r14
+
+ mov %r13, %r11
+ mov -24(%rsi,%r13,8), %rax
+ mul %r12
+ mov %rax, %rbx
+ mov -24(%rsi,%r13,8), %rax
+ mov %rdx, %r15
+ xor %ebp, %ebp
+ jmp .Laddmul_entry_0
+
+.Laddmul_outer_1:
+ mov %r13, %r11
+ mov (%rsi,%r13,8), %rax
+ mul %r12
+ mov %rax, %r10
+ mov (%rsi,%r13,8), %rax
+ mov %rdx, %rbx
+ xor %r15d, %r15d
+ jmp .Laddmul_entry_1
+
+.Laddmul_outer_2:
+ add $1, %r13
+ lea 0(%rip), %r14
+
+ mov %r13, %r11
+ mov -8(%rsi,%r13,8), %rax
+ mul %r12
+ xor %ebx, %ebx
+ mov %rax, %rbp
+ xor %r15d, %r15d
+ mov %rdx, %r10
+ mov -8(%rsi,%r13,8), %rax
+ jmp .Laddmul_entry_2
+
+.Laddmul_outer_3:
+ add $2, %r13
+ lea 0(%rip), %r14
+
+ mov %r13, %r11
+ mov -16(%rsi,%r13,8), %rax
+ xor %r10d, %r10d
+ mul %r12
+ mov %rax, %r15
+ mov -16(%rsi,%r13,8), %rax
+ mov %rdx, %rbp
+ jmp .Laddmul_entry_3
+
+
+
+ .align 16, 0x90
+.Laddmul_top:
+ add %r10, -32(%rdi,%r11,8)
+ adc %rax, %rbx
+ mov -24(%rsi,%r11,8), %rax
+ adc %rdx, %r15
+ xor %ebp, %ebp
+ mul %r12
+ add %rax, %rbx
+ mov -24(%rsi,%r11,8), %rax
+ adc %rdx, %r15
+ adc %ebp, %ebp
+.Laddmul_entry_0:
+ mul %r9
+ xor %r10d, %r10d
+ add %rbx, -24(%rdi,%r11,8)
+ adc %rax, %r15
+ mov -16(%rsi,%r11,8), %rax
+ adc %rdx, %rbp
+ mul %r12
+ add %rax, %r15
+ mov -16(%rsi,%r11,8), %rax
+ adc %rdx, %rbp
+ adc $0, %r10d
+.Laddmul_entry_3:
+ mul %r9
+ add %r15, -16(%rdi,%r11,8)
+ adc %rax, %rbp
+ mov -8(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ mul %r12
+ xor %ebx, %ebx
+ add %rax, %rbp
+ adc %rdx, %r10
+ mov $0, %r15d
+ mov -8(%rsi,%r11,8), %rax
+ adc %r15d, %ebx
+.Laddmul_entry_2:
+ mul %r9
+ add %rbp, -8(%rdi,%r11,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+ mov (%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %r10
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rbx
+ adc $0, %r15d
+.Laddmul_entry_1:
+ mul %r9
+ add $4, %r11
+ js .Laddmul_top
+
+ add %r10, -8(%rdi)
+ adc %rax, %rbx
+ mov %rbx, (%rdi)
+ adc %rdx, %r15
+ mov %r15, 8(%rdi)
+
+ add $-2, %r8
+ jz .Lret
+
+ lea 16(%rdi), %rdi
+ lea 16(%rcx), %rcx
+
+ mov (%rcx), %r12
+ mov 8(%rcx), %r9
+
+ jmp *%r14
+
+ .align 16, 0x90
+.Lret: pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+
+ .size __gmpn_mul_basecase,.-__gmpn_mul_basecase
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mullo_basecase.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mullo_basecase.s
new file mode 100644
index 0000000..d76272c
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mullo_basecase.s
@@ -0,0 +1,439 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mullo_basecase
+ .type __gmpn_mullo_basecase,@function
+
+__gmpn_mullo_basecase:
+
+
+ cmp $4, %rcx
+ jge .Lgen
+ mov (%rsi), %rax
+ mov (%rdx), %r8
+
+ lea .Ltab(%rip), %r9
+ movslq (%r9,%rcx,4), %r10
+ add %r10, %r9
+ jmp *%r9
+
+ .section .data.rel.ro.local,"a",@progbits
+ .align 8, 0x90
+.Ltab: .long .Ltab-.Ltab
+ .long .L1-.Ltab
+ .long .L2-.Ltab
+ .long .L3-.Ltab
+ .text
+
+.L1: imul %r8, %rax
+ mov %rax, (%rdi)
+
+ ret
+
+.L2: mov 8(%rdx), %r11
+ imul %rax, %r11
+ mul %r8
+ mov %rax, (%rdi)
+ imul 8(%rsi), %r8
+ lea (%r11, %rdx), %rax
+ add %r8, %rax
+ mov %rax, 8(%rdi)
+
+ ret
+
+.L3: mov 8(%rdx), %r9
+ mov 16(%rdx), %r11
+ mul %r8
+ mov %rax, (%rdi)
+ mov (%rsi), %rax
+ mov %rdx, %rcx
+ mul %r9
+ imul 8(%rsi), %r9
+ mov 16(%rsi), %r10
+ imul %r8, %r10
+ add %rax, %rcx
+ adc %rdx, %r9
+ add %r10, %r9
+ mov 8(%rsi), %rax
+ mul %r8
+ add %rax, %rcx
+ adc %rdx, %r9
+ mov %r11, %rax
+ imul (%rsi), %rax
+ add %rax, %r9
+ mov %rcx, 8(%rdi)
+ mov %r9, 16(%rdi)
+
+ ret
+
+.L0m4:
+.L1m4:
+.L2m4:
+.L3m4:
+.Lgen: push %rbx
+ push %rbp
+ push %r13
+ push %r14
+ push %r15
+
+ mov (%rsi), %rax
+ mov (%rdx), %r13
+ mov %rdx, %r11
+
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ neg %rcx
+
+ mul %r13
+
+ test $1, %cl
+ jz .Lmul_2
+
+.Lmul_1:
+ lea -8(%rdi), %rdi
+ lea -8(%rsi), %rsi
+ test $2, %cl
+ jnz .Lmul_1_prologue_3
+
+.Lmul_1_prologue_2:
+ lea -1(%rcx), %r9
+ lea .Laddmul_outer_1(%rip), %r8
+ mov %rax, %rbx
+ mov %rdx, %r15
+ xor %ebp, %ebp
+ xor %r10d, %r10d
+ mov 16(%rsi,%rcx,8), %rax
+ jmp .Lmul_1_entry_2
+
+.Lmul_1_prologue_3:
+ lea 1(%rcx), %r9
+ lea .Laddmul_outer_3(%rip), %r8
+ mov %rax, %rbp
+ mov %rdx, %r10
+ xor %ebx, %ebx
+ jmp .Lmul_1_entry_0
+
+ .align 16, 0x90
+.Lmul_1_top:
+ mov %rbx, -16(%rdi,%r9,8)
+ add %rax, %r15
+ mov (%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ xor %ebx, %ebx
+ mul %r13
+ mov %r15, -8(%rdi,%r9,8)
+ add %rax, %rbp
+ adc %rdx, %r10
+.Lmul_1_entry_0:
+ mov 8(%rsi,%r9,8), %rax
+ mul %r13
+ mov %rbp, (%rdi,%r9,8)
+ add %rax, %r10
+ adc %rdx, %rbx
+ mov 16(%rsi,%r9,8), %rax
+ mul %r13
+ mov %r10, 8(%rdi,%r9,8)
+ xor %ebp, %ebp
+ mov %rbp, %r10
+ add %rax, %rbx
+ mov 24(%rsi,%r9,8), %rax
+ mov %rbp, %r15
+ adc %rdx, %r15
+.Lmul_1_entry_2:
+ mul %r13
+ add $4, %r9
+ js .Lmul_1_top
+
+ mov %rbx, -16(%rdi)
+ add %rax, %r15
+ mov %r15, -8(%rdi)
+ adc %rdx, %rbp
+
+ imul (%rsi), %r13
+ add %r13, %rbp
+ mov %rbp, (%rdi)
+
+ add $1, %rcx
+ jz .Lret
+
+ mov 8(%r11), %r13
+ mov 16(%r11), %r14
+
+ lea 16(%rsi), %rsi
+ lea 8(%r11), %r11
+ lea 24(%rdi), %rdi
+
+ jmp *%r8
+
+
+.Lmul_2:
+ mov 8(%r11), %r14
+ test $2, %cl
+ jz .Lmul_2_prologue_3
+
+ .align 16, 0x90
+.Lmul_2_prologue_1:
+ lea 0(%rcx), %r9
+ mov %rax, %r10
+ mov %rdx, %rbx
+ xor %r15d, %r15d
+ mov (%rsi,%rcx,8), %rax
+ lea .Laddmul_outer_3(%rip), %r8
+ jmp .Lmul_2_entry_1
+
+ .align 16, 0x90
+.Lmul_2_prologue_3:
+ lea 2(%rcx), %r9
+ mov $0, %r10d
+ mov %rax, %r15
+ mov (%rsi,%rcx,8), %rax
+ mov %rdx, %rbp
+ lea .Laddmul_outer_1(%rip), %r8
+ jmp .Lmul_2_entry_3
+
+ .align 16, 0x90
+.Lmul_2_top:
+ mov -32(%rsi,%r9,8), %rax
+ mul %r14
+ add %rax, %rbx
+ adc %rdx, %r15
+ mov -24(%rsi,%r9,8), %rax
+ xor %ebp, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov -24(%rsi,%r9,8), %rax
+ adc %rdx, %r15
+ adc $0, %ebp
+ mul %r14
+ add %rax, %r15
+ mov %rbx, -24(%rdi,%r9,8)
+ adc %rdx, %rbp
+ mov -16(%rsi,%r9,8), %rax
+ mul %r13
+ mov $0, %r10d
+ add %rax, %r15
+ adc %rdx, %rbp
+ mov -16(%rsi,%r9,8), %rax
+ adc $0, %r10d
+.Lmul_2_entry_3:
+ mov $0, %ebx
+ mov %r15, -16(%rdi,%r9,8)
+ mul %r14
+ add %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ mov $0, %r15d
+ mul %r13
+ add %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ adc %r15d, %ebx
+ mul %r14
+ add %rax, %r10
+ mov %rbp, -8(%rdi,%r9,8)
+ adc %rdx, %rbx
+ mov (%rsi,%r9,8), %rax
+ mul %r13
+ add %rax, %r10
+ adc %rdx, %rbx
+ adc $0, %r15d
+.Lmul_2_entry_1:
+ add $4, %r9
+ mov %r10, -32(%rdi,%r9,8)
+ js .Lmul_2_top
+
+ imul -16(%rsi), %r14
+ add %r14, %rbx
+ imul -8(%rsi), %r13
+ add %r13, %rbx
+ mov %rbx, -8(%rdi)
+
+ add $2, %rcx
+ jz .Lret
+
+ mov 16(%r11), %r13
+ mov 24(%r11), %r14
+
+ lea 16(%r11), %r11
+ lea 16(%rdi), %rdi
+
+ jmp *%r8
+
+
+.Laddmul_outer_1:
+ lea -2(%rcx), %r9
+ mov -16(%rsi,%rcx,8), %rax
+ mul %r13
+ mov %rax, %r10
+ mov -16(%rsi,%rcx,8), %rax
+ mov %rdx, %rbx
+ xor %r15d, %r15d
+ lea .Laddmul_outer_3(%rip), %r8
+ jmp .Laddmul_entry_1
+
+.Laddmul_outer_3:
+ lea 0(%rcx), %r9
+ mov -16(%rsi,%rcx,8), %rax
+ xor %r10d, %r10d
+ mul %r13
+ mov %rax, %r15
+ mov -16(%rsi,%rcx,8), %rax
+ mov %rdx, %rbp
+ lea .Laddmul_outer_1(%rip), %r8
+ jmp .Laddmul_entry_3
+
+ .align 16, 0x90
+.Laddmul_top:
+ add %r10, -32(%rdi,%r9,8)
+ adc %rax, %rbx
+ mov -24(%rsi,%r9,8), %rax
+ adc %rdx, %r15
+ xor %ebp, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov -24(%rsi,%r9,8), %rax
+ adc %rdx, %r15
+ adc %ebp, %ebp
+ mul %r14
+ xor %r10d, %r10d
+ add %rbx, -24(%rdi,%r9,8)
+ adc %rax, %r15
+ mov -16(%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ mul %r13
+ add %rax, %r15
+ mov -16(%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ adc $0, %r10d
+.Laddmul_entry_3:
+ mul %r14
+ add %r15, -16(%rdi,%r9,8)
+ adc %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ mul %r13
+ xor %ebx, %ebx
+ add %rax, %rbp
+ adc %rdx, %r10
+ mov $0, %r15d
+ mov -8(%rsi,%r9,8), %rax
+ adc %r15d, %ebx
+ mul %r14
+ add %rbp, -8(%rdi,%r9,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+ mov (%rsi,%r9,8), %rax
+ mul %r13
+ add %rax, %r10
+ mov (%rsi,%r9,8), %rax
+ adc %rdx, %rbx
+ adc $0, %r15d
+.Laddmul_entry_1:
+ mul %r14
+ add $4, %r9
+ js .Laddmul_top
+
+ add %r10, -32(%rdi)
+ adc %rax, %rbx
+
+ imul -24(%rsi), %r13
+ add %r13, %rbx
+ add %rbx, -24(%rdi)
+
+ add $2, %rcx
+ jns .Lret
+
+ lea 16(%r11), %r11
+
+ mov (%r11), %r13
+ mov 8(%r11), %r14
+
+ lea -16(%rsi), %rsi
+
+ jmp *%r8
+
+.Lret: pop %r15
+ pop %r14
+ pop %r13
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_mullo_basecase,.-__gmpn_mullo_basecase
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/mulmid_basecase.s b/vere/ext/gmp/gen/x86_64-linux/mpn/mulmid_basecase.s
new file mode 100644
index 0000000..b607e84
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/mulmid_basecase.s
@@ -0,0 +1,573 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_mulmid_basecase
+ .type __gmpn_mulmid_basecase,@function
+
+__gmpn_mulmid_basecase:
+
+
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rcx, %r15
+
+
+ lea 1(%rdx), %r13
+ sub %r8, %r13
+
+ lea (%rdi,%r13,8), %rdi
+
+ cmp $4, %r13
+ jc .Ldiagonal
+
+ lea (%rsi,%rdx,8), %rsi
+
+ test $1, %r8
+ jz .Lmul_2
+
+
+
+
+.Lmul_1:
+ mov %r13d, %ebx
+
+ neg %r13
+ mov (%rsi,%r13,8), %rax
+ mov (%r15), %r12
+ mul %r12
+
+ and $-4, %r13
+ mov %r13, %r11
+
+ and $3, %ebx
+ jz .Lmul_1_prologue_0
+ cmp $2, %ebx
+ jc .Lmul_1_prologue_1
+ jz .Lmul_1_prologue_2
+
+.Lmul_1_prologue_3:
+ mov %rax, %r10
+ mov %rdx, %rbx
+ lea .Laddmul_prologue_3(%rip), %r14
+ jmp .Lmul_1_entry_3
+
+ .align 16, 0x90
+.Lmul_1_prologue_0:
+ mov %rax, %rbp
+ mov %rdx, %r10
+ lea .Laddmul_prologue_0(%rip), %r14
+ jmp .Lmul_1_entry_0
+
+ .align 16, 0x90
+.Lmul_1_prologue_1:
+ add $4, %r11
+ mov %rax, %rcx
+ mov %rdx, %rbp
+ mov $0, %r10d
+ mov (%rsi,%r11,8), %rax
+ lea .Laddmul_prologue_1(%rip), %r14
+ jmp .Lmul_1_entry_1
+
+ .align 16, 0x90
+.Lmul_1_prologue_2:
+ mov %rax, %rbx
+ mov %rdx, %rcx
+ mov 24(%rsi,%r11,8), %rax
+ mov $0, %ebp
+ mov $0, %r10d
+ lea .Laddmul_prologue_2(%rip), %r14
+ jmp .Lmul_1_entry_2
+
+
+
+
+ .align 16, 0x90
+.Lmul_1_top:
+ mov %rbx, -16(%rdi,%r11,8)
+ add %rax, %rcx
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rbp
+.Lmul_1_entry_1:
+ mov $0, %ebx
+ mul %r12
+ mov %rcx, -8(%rdi,%r11,8)
+ add %rax, %rbp
+ adc %rdx, %r10
+.Lmul_1_entry_0:
+ mov 8(%rsi,%r11,8), %rax
+ mul %r12
+ mov %rbp, (%rdi,%r11,8)
+ add %rax, %r10
+ adc %rdx, %rbx
+.Lmul_1_entry_3:
+ mov 16(%rsi,%r11,8), %rax
+ mul %r12
+ mov %r10, 8(%rdi,%r11,8)
+ mov $0, %ebp
+ mov %rbp, %r10
+ add %rax, %rbx
+ mov 24(%rsi,%r11,8), %rax
+ mov %rbp, %rcx
+ adc %rdx, %rcx
+.Lmul_1_entry_2:
+ mul %r12
+ add $4, %r11
+ js .Lmul_1_top
+
+ mov %rbx, -16(%rdi)
+ add %rax, %rcx
+ mov %rcx, -8(%rdi)
+ mov %rbp, 8(%rdi)
+ adc %rdx, %rbp
+ mov %rbp, (%rdi)
+
+ dec %r8
+ jz .Lret
+
+ lea -8(%rsi), %rsi
+ lea 8(%r15), %r15
+
+ mov %r13, %r11
+ mov (%r15), %r12
+ mov 8(%r15), %r9
+
+ jmp *%r14
+
+
+
+
+ .align 16, 0x90
+.Lmul_2:
+ mov %r13d, %ebx
+
+ neg %r13
+ mov -8(%rsi,%r13,8), %rax
+ mov (%r15), %r12
+ mov 8(%r15), %r9
+ mul %r9
+
+ and $-4, %r13
+ mov %r13, %r11
+
+ and $3, %ebx
+ jz .Lmul_2_prologue_0
+ cmp $2, %ebx
+ jc .Lmul_2_prologue_1
+ jz .Lmul_2_prologue_2
+
+.Lmul_2_prologue_3:
+ mov %rax, %rcx
+ mov %rdx, %rbp
+ lea .Laddmul_prologue_3(%rip), %r14
+ jmp .Lmul_2_entry_3
+
+ .align 16, 0x90
+.Lmul_2_prologue_0:
+ mov %rax, %rbx
+ mov %rdx, %rcx
+ lea .Laddmul_prologue_0(%rip), %r14
+ jmp .Lmul_2_entry_0
+
+ .align 16, 0x90
+.Lmul_2_prologue_1:
+ mov %rax, %r10
+ mov %rdx, %rbx
+ mov $0, %ecx
+ lea .Laddmul_prologue_1(%rip), %r14
+ jmp .Lmul_2_entry_1
+
+ .align 16, 0x90
+.Lmul_2_prologue_2:
+ mov %rax, %rbp
+ mov %rdx, %r10
+ mov $0, %ebx
+ mov 16(%rsi,%r11,8), %rax
+ lea .Laddmul_prologue_2(%rip), %r14
+ jmp .Lmul_2_entry_2
+
+
+
+
+ .align 16, 0x90
+.Lmul_2_top:
+ mov -8(%rsi,%r11,8), %rax
+ mul %r9
+ add %rax, %rbx
+ adc %rdx, %rcx
+.Lmul_2_entry_0:
+ mov $0, %ebp
+ mov (%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %rbx
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+ mul %r9
+ add %rax, %rcx
+ mov %rbx, (%rdi,%r11,8)
+ adc %rdx, %rbp
+.Lmul_2_entry_3:
+ mov 8(%rsi,%r11,8), %rax
+ mul %r12
+ mov $0, %r10d
+ add %rax, %rcx
+ adc %rdx, %rbp
+ mov $0, %ebx
+ adc $0, %r10d
+ mov 8(%rsi,%r11,8), %rax
+ mov %rcx, 8(%rdi,%r11,8)
+ mul %r9
+ add %rax, %rbp
+ mov 16(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+.Lmul_2_entry_2:
+ mov $0, %ecx
+ mul %r12
+ add %rax, %rbp
+ mov 16(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ adc $0, %ebx
+ mul %r9
+ add %rax, %r10
+ mov %rbp, 16(%rdi,%r11,8)
+ adc %rdx, %rbx
+.Lmul_2_entry_1:
+ mov 24(%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %r10
+ adc %rdx, %rbx
+ adc $0, %ecx
+ add $4, %r11
+ mov %r10, -8(%rdi,%r11,8)
+ jnz .Lmul_2_top
+
+ mov %rbx, (%rdi)
+ mov %rcx, 8(%rdi)
+
+ sub $2, %r8
+ jz .Lret
+
+ lea 16(%r15), %r15
+ lea -16(%rsi), %rsi
+
+ mov %r13, %r11
+ mov (%r15), %r12
+ mov 8(%r15), %r9
+
+ jmp *%r14
+
+
+
+
+ .align 16, 0x90
+.Laddmul_prologue_0:
+ mov -8(%rsi,%r11,8), %rax
+ mul %r9
+ mov %rax, %rcx
+ mov %rdx, %rbp
+ mov $0, %r10d
+ jmp .Laddmul_entry_0
+
+ .align 16, 0x90
+.Laddmul_prologue_1:
+ mov 16(%rsi,%r11,8), %rax
+ mul %r9
+ mov %rax, %rbx
+ mov %rdx, %rcx
+ mov $0, %ebp
+ mov 24(%rsi,%r11,8), %rax
+ jmp .Laddmul_entry_1
+
+ .align 16, 0x90
+.Laddmul_prologue_2:
+ mov 8(%rsi,%r11,8), %rax
+ mul %r9
+ mov %rax, %r10
+ mov %rdx, %rbx
+ mov $0, %ecx
+ jmp .Laddmul_entry_2
+
+ .align 16, 0x90
+.Laddmul_prologue_3:
+ mov (%rsi,%r11,8), %rax
+ mul %r9
+ mov %rax, %rbp
+ mov %rdx, %r10
+ mov $0, %ebx
+ mov $0, %ecx
+ jmp .Laddmul_entry_3
+
+
+
+ .align 16, 0x90
+.Laddmul_top:
+ mov $0, %r10d
+ add %rax, %rbx
+ mov -8(%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+ mul %r9
+ add %rbx, -8(%rdi,%r11,8)
+ adc %rax, %rcx
+ adc %rdx, %rbp
+.Laddmul_entry_0:
+ mov (%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %rcx
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %rbp
+ adc $0, %r10d
+ mul %r9
+ add %rcx, (%rdi,%r11,8)
+ mov $0, %ecx
+ adc %rax, %rbp
+ mov $0, %ebx
+ adc %rdx, %r10
+.Laddmul_entry_3:
+ mov 8(%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %rbp
+ mov 8(%rsi,%r11,8), %rax
+ adc %rdx, %r10
+ adc $0, %ebx
+ mul %r9
+ add %rbp, 8(%rdi,%r11,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+.Laddmul_entry_2:
+ mov 16(%rsi,%r11,8), %rax
+ mul %r12
+ add %rax, %r10
+ mov 16(%rsi,%r11,8), %rax
+ adc %rdx, %rbx
+ adc $0, %ecx
+ mul %r9
+ add %r10, 16(%rdi,%r11,8)
+ nop
+ adc %rax, %rbx
+ mov $0, %ebp
+ mov 24(%rsi,%r11,8), %rax
+ adc %rdx, %rcx
+.Laddmul_entry_1:
+ mul %r12
+ add $4, %r11
+ jnz .Laddmul_top
+
+ add %rax, %rbx
+ adc %rdx, %rcx
+ adc $0, %ebp
+
+ add %rbx, -8(%rdi)
+ adc %rcx, (%rdi)
+ adc %rbp, 8(%rdi)
+
+ sub $2, %r8
+ jz .Lret
+
+ lea 16(%r15), %r15
+ lea -16(%rsi), %rsi
+
+ mov %r13, %r11
+ mov (%r15), %r12
+ mov 8(%r15), %r9
+
+ jmp *%r14
+
+
+
+
+ .align 16, 0x90
+.Ldiagonal:
+ xor %ebx, %ebx
+ xor %ecx, %ecx
+ xor %ebp, %ebp
+
+ neg %r13
+
+ mov %r8d, %eax
+ and $3, %eax
+ jz .Ldiag_prologue_0
+ cmp $2, %eax
+ jc .Ldiag_prologue_1
+ jz .Ldiag_prologue_2
+
+.Ldiag_prologue_3:
+ lea -8(%r15), %r15
+ mov %r15, %r10
+ add $1, %r8
+ mov %r8, %r11
+ lea .Ldiag_entry_3(%rip), %r14
+ jmp .Ldiag_entry_3
+
+.Ldiag_prologue_0:
+ mov %r15, %r10
+ mov %r8, %r11
+ lea 0(%rip), %r14
+ mov -8(%rsi,%r11,8), %rax
+ jmp .Ldiag_entry_0
+
+.Ldiag_prologue_1:
+ lea 8(%r15), %r15
+ mov %r15, %r10
+ add $3, %r8
+ mov %r8, %r11
+ lea 0(%rip), %r14
+ mov -8(%r10), %rax
+ jmp .Ldiag_entry_1
+
+.Ldiag_prologue_2:
+ lea -16(%r15), %r15
+ mov %r15, %r10
+ add $2, %r8
+ mov %r8, %r11
+ lea 0(%rip), %r14
+ mov 16(%r10), %rax
+ jmp .Ldiag_entry_2
+
+
+
+
+ .align 16, 0x90
+.Ldiag_top:
+ add %rax, %rbx
+ adc %rdx, %rcx
+ mov -8(%rsi,%r11,8), %rax
+ adc $0, %rbp
+.Ldiag_entry_0:
+ mulq (%r10)
+ add %rax, %rbx
+ adc %rdx, %rcx
+ adc $0, %rbp
+.Ldiag_entry_3:
+ mov -16(%rsi,%r11,8), %rax
+ mulq 8(%r10)
+ add %rax, %rbx
+ mov 16(%r10), %rax
+ adc %rdx, %rcx
+ adc $0, %rbp
+.Ldiag_entry_2:
+ mulq -24(%rsi,%r11,8)
+ add %rax, %rbx
+ mov 24(%r10), %rax
+ adc %rdx, %rcx
+ lea 32(%r10), %r10
+ adc $0, %rbp
+.Ldiag_entry_1:
+ mulq -32(%rsi,%r11,8)
+ sub $4, %r11
+ jnz .Ldiag_top
+
+ add %rax, %rbx
+ adc %rdx, %rcx
+ adc $0, %rbp
+
+ mov %rbx, (%rdi,%r13,8)
+
+ inc %r13
+ jz .Ldiag_end
+
+ mov %r8, %r11
+ mov %r15, %r10
+
+ lea 8(%rsi), %rsi
+ mov %rcx, %rbx
+ mov %rbp, %rcx
+ xor %ebp, %ebp
+
+ jmp *%r14
+
+.Ldiag_end:
+ mov %rcx, (%rdi)
+ mov %rbp, 8(%rdi)
+
+.Lret: pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_mulmid_basecase,.-__gmpn_mulmid_basecase
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/nand_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/nand_n.s
new file mode 100644
index 0000000..04593b9
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/nand_n.s
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_nand_n
+ .type __gmpn_nand_n,@function
+
+__gmpn_nand_n:
+
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: and (%rsi,%rcx,8), %r8
+ not %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+ .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+.Lb01: and (%rsi,%rcx,8), %r8
+ not %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ and (%rsi,%rcx,8), %r8
+ not %r8
+ and 8(%rsi,%rcx,8), %r9
+ not %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ and 16(%rsi,%rcx,8), %r8
+ not %r8
+ and 24(%rsi,%rcx,8), %r9
+ not %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_nand_n,.-__gmpn_nand_n
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/nior_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/nior_n.s
new file mode 100644
index 0000000..8ea0437
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/nior_n.s
@@ -0,0 +1,155 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_nior_n
+ .type __gmpn_nior_n,@function
+
+__gmpn_nior_n:
+
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: or (%rsi,%rcx,8), %r8
+ not %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+ .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+.Lb01: or (%rsi,%rcx,8), %r8
+ not %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ or (%rsi,%rcx,8), %r8
+ not %r8
+ or 8(%rsi,%rcx,8), %r9
+ not %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ or 16(%rsi,%rcx,8), %r8
+ not %r8
+ or 24(%rsi,%rcx,8), %r9
+ not %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_nior_n,.-__gmpn_nior_n
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/perfsqr.h b/vere/ext/gmp/gen/x86_64-linux/mpn/perfsqr.h
new file mode 100644
index 0000000..80c5eb7
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/perfsqr.h
@@ -0,0 +1,46 @@
+/* This file generated by gen-psqr.c - DO NOT EDIT. */
+
+#if GMP_LIMB_BITS != 64 || GMP_NAIL_BITS != 0
+Error, error, this data is for 64 bit limb and 0 bit nail
+#endif
+
+/* Non-zero bit indicates a quadratic residue mod 0x100.
+ This test identifies 82.81% as non-squares (212/256). */
+static const mp_limb_t
+sq_res_0x100[4] = {
+ CNST_LIMB(0x202021202030213),
+ CNST_LIMB(0x202021202020213),
+ CNST_LIMB(0x202021202030212),
+ CNST_LIMB(0x202021202020212),
+};
+
+/* 2^48-1 = 3^2 * 5 * 7 * 13 * 17 * 97 ... */
+#define PERFSQR_MOD_BITS 49
+
+/* This test identifies 97.81% as non-squares. */
+#define PERFSQR_MOD_TEST(up, usize) \
+ do { \
+ mp_limb_t r; \
+ PERFSQR_MOD_34 (r, up, usize); \
+ \
+ /* 69.23% */ \
+ PERFSQR_MOD_2 (r, CNST_LIMB(91), CNST_LIMB(0xfd2fd2fd2fd3), \
+ CNST_LIMB(0x2191240), CNST_LIMB(0x8850a206953820e1)); \
+ \
+ /* 68.24% */ \
+ PERFSQR_MOD_2 (r, CNST_LIMB(85), CNST_LIMB(0xfcfcfcfcfcfd), \
+ CNST_LIMB(0x82158), CNST_LIMB(0x10b48c4b4206a105)); \
+ \
+ /* 55.56% */ \
+ PERFSQR_MOD_1 (r, CNST_LIMB( 9), CNST_LIMB(0xe38e38e38e39), \
+ CNST_LIMB(0x93)); \
+ \
+ /* 49.48% */ \
+ PERFSQR_MOD_2 (r, CNST_LIMB(97), CNST_LIMB(0xfd5c5f02a3a1), \
+ CNST_LIMB(0x1eb628b47), CNST_LIMB(0x6067981b8b451b5f)); \
+ } while (0)
+
+/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, 99.62% non-squares. */
+
+/* helper for tests/mpz/t-perfsqr.c */
+#define PERFSQR_DIVISORS { 256, 91, 85, 9, 97, }
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/popcount.s b/vere/ext/gmp/gen/x86_64-linux/mpn/popcount.s
new file mode 100644
index 0000000..243219e
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/popcount.s
@@ -0,0 +1,160 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_popcount
+ .type __gmpn_popcount,@function
+
+__gmpn_popcount:
+
+
+ push %rbx
+ mov $0x5555555555555555, %r10
+ push %rbp
+ mov $0x3333333333333333, %r11
+ lea (%rdi,%rsi,8), %rdi
+ mov $0x0f0f0f0f0f0f0f0f, %rcx
+ neg %rsi
+ mov $0x0101010101010101, %rdx
+ xor %eax, %eax
+ test $1, %sil
+ jz .Ltop
+
+ mov (%rdi,%rsi,8), %r8
+
+ mov %r8, %r9
+ shr %r8
+ and %r10, %r8
+ sub %r8, %r9
+
+ mov %r9, %r8
+ shr $2, %r9
+ and %r11, %r8
+ and %r11, %r9
+ add %r8, %r9
+
+ dec %rsi
+ jmp .Lmid
+
+ .align 16, 0x90
+.Ltop: mov (%rdi,%rsi,8), %r8
+ mov 8(%rdi,%rsi,8), %rbx
+
+ mov %r8, %r9
+ mov %rbx, %rbp
+ shr %r8
+ shr %rbx
+ and %r10, %r8
+ and %r10, %rbx
+ sub %r8, %r9
+ sub %rbx, %rbp
+
+ mov %r9, %r8
+ mov %rbp, %rbx
+ shr $2, %r9
+ shr $2, %rbp
+ and %r11, %r8
+ and %r11, %r9
+ and %r11, %rbx
+ and %r11, %rbp
+ add %r8, %r9
+ add %rbx, %rbp
+
+ add %rbp, %r9
+.Lmid: mov %r9, %r8
+ shr $4, %r9
+ and %rcx, %r8
+ and %rcx, %r9
+ add %r8, %r9
+
+ imul %rdx, %r9
+ shr $56, %r9
+
+ add %r9, %rax
+ add $2, %rsi
+ jnc .Ltop
+
+.Lend:
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_popcount,.-__gmpn_popcount
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/redc_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/redc_1.s
new file mode 100644
index 0000000..da7fd88
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/redc_1.s
@@ -0,0 +1,603 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_redc_1
+ .type __gmpn_redc_1,@function
+
+__gmpn_redc_1:
+
+
+
+ push %rbp
+ mov (%rsi), %rbp
+ push %rbx
+ imul %r8, %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rcx, %r12
+ neg %r12
+ lea (%rdx,%rcx,8), %r13
+ lea -16(%rsi,%rcx,8), %rsi
+
+ mov %ecx, %eax
+ and $3, %eax
+ lea 4(%rax), %r9
+ cmp $4, %ecx
+ cmovg %r9, %rax
+ lea .Ltab(%rip), %r9
+
+ movslq (%r9,%rax,4), %rax
+ add %r9, %rax
+ jmp *%rax
+
+
+ .section .data.rel.ro.local,"a",@progbits
+ .align 8, 0x90
+.Ltab: .long .L0-.Ltab
+ .long .L1-.Ltab
+ .long .L2-.Ltab
+ .long .L3-.Ltab
+ .long .L0m4-.Ltab
+ .long .L1m4-.Ltab
+ .long .L2m4-.Ltab
+ .long .L3m4-.Ltab
+ .text
+
+ .align 16, 0x90
+.L1: mov (%rdx), %rax
+ mul %rbp
+ add 8(%rsi), %rax
+ adc 16(%rsi), %rdx
+ mov %rdx, (%rdi)
+ mov $0, %eax
+ adc %eax, %eax
+ jmp .Lret
+
+
+ .align 16, 0x90
+.L2: mov (%rdx), %rax
+ mul %rbp
+ xor %r14d, %r14d
+ mov %rax, %r10
+ mov -8(%r13), %rax
+ mov %rdx, %r9
+ mul %rbp
+ add (%rsi), %r10
+ adc %rax, %r9
+ adc %rdx, %r14
+ add 8(%rsi), %r9
+ adc $0, %r14
+ mov %r9, %rbp
+ imul %r8, %rbp
+ mov -16(%r13), %rax
+ mul %rbp
+ xor %ebx, %ebx
+ mov %rax, %r10
+ mov -8(%r13), %rax
+ mov %rdx, %r11
+ mul %rbp
+ add %r9, %r10
+ adc %rax, %r11
+ adc %rdx, %rbx
+ add 16(%rsi), %r11
+ adc $0, %rbx
+ xor %eax, %eax
+ add %r11, %r14
+ adc 24(%rsi), %rbx
+ mov %r14, (%rdi)
+ mov %rbx, 8(%rdi)
+ adc %eax, %eax
+ jmp .Lret
+
+
+.L3: mov (%rdx), %rax
+ mul %rbp
+ mov %rax, %rbx
+ mov %rdx, %r10
+ mov -16(%r13), %rax
+ mul %rbp
+ xor %r9d, %r9d
+ xor %r14d, %r14d
+ add -8(%rsi), %rbx
+ adc %rax, %r10
+ mov -8(%r13), %rax
+ adc %rdx, %r9
+ mul %rbp
+ add (%rsi), %r10
+ mov %r10, (%rsi)
+ adc %rax, %r9
+ adc %rdx, %r14
+ mov %r10, %rbp
+ imul %r8, %rbp
+ add %r9, 8(%rsi)
+ adc $0, %r14
+ mov %r14, -8(%rsi)
+
+ mov -24(%r13), %rax
+ mul %rbp
+ mov %rax, %rbx
+ mov %rdx, %r10
+ mov -16(%r13), %rax
+ mul %rbp
+ xor %r9d, %r9d
+ xor %r14d, %r14d
+ add (%rsi), %rbx
+ adc %rax, %r10
+ mov -8(%r13), %rax
+ adc %rdx, %r9
+ mul %rbp
+ add 8(%rsi), %r10
+ mov %r10, 8(%rsi)
+ adc %rax, %r9
+ adc %rdx, %r14
+ mov %r10, %rbp
+ imul %r8, %rbp
+ add %r9, 16(%rsi)
+ adc $0, %r14
+ mov %r14, (%rsi)
+
+ mov -24(%r13), %rax
+ mul %rbp
+ mov %rax, %rbx
+ mov %rdx, %r10
+ mov -16(%r13), %rax
+ mul %rbp
+ xor %r9d, %r9d
+ xor %r14d, %r14d
+ add 8(%rsi), %rbx
+ adc %rax, %r10
+ mov -8(%r13), %rax
+ adc %rdx, %r9
+ mul %rbp
+ add 16(%rsi), %r10
+ adc %rax, %r9
+ adc %rdx, %r14
+ add 24(%rsi), %r9
+ adc $0, %r14
+
+ xor %eax, %eax
+ add -8(%rsi), %r10
+ adc (%rsi), %r9
+ adc 32(%rsi), %r14
+ mov %r10, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r14, 16(%rdi)
+ adc %eax, %eax
+ jmp .Lret
+
+
+ .align 16, 0x90
+.L2m4:
+.Llo2: mov (%r13,%r12,8), %rax
+ mul %rbp
+ xor %r14d, %r14d
+ xor %ebx, %ebx
+ mov %rax, %r10
+ mov 8(%r13,%r12,8), %rax
+ mov 24(%rsi,%r12,8), %r15
+ mov %rdx, %r9
+ mul %rbp
+ add 16(%rsi,%r12,8), %r10
+ adc %rax, %r9
+ mov 16(%r13,%r12,8), %rax
+ adc %rdx, %r14
+ mul %rbp
+ mov $0, %r10d
+ lea 2(%r12), %r11
+ add %r9, %r15
+ imul %r8, %r15
+ jmp .Le2
+
+ .align 16, 0x90
+.Lli2: add %r10, (%rsi,%r11,8)
+ adc %rax, %r9
+ mov (%r13,%r11,8), %rax
+ adc %rdx, %r14
+ xor %r10d, %r10d
+ mul %rbp
+.Le2: add %r9, 8(%rsi,%r11,8)
+ adc %rax, %r14
+ adc %rdx, %rbx
+ mov 8(%r13,%r11,8), %rax
+ mul %rbp
+ add %r14, 16(%rsi,%r11,8)
+ adc %rax, %rbx
+ adc %rdx, %r10
+ mov 16(%r13,%r11,8), %rax
+ mul %rbp
+ add %rbx, 24(%rsi,%r11,8)
+ mov $0, %r14d
+ mov %r14, %rbx
+ adc %rax, %r10
+ mov 24(%r13,%r11,8), %rax
+ mov %r14, %r9
+ adc %rdx, %r9
+ mul %rbp
+ add $4, %r11
+ js .Lli2
+
+.Lle2: add %r10, (%rsi)
+ adc %rax, %r9
+ adc %r14, %rdx
+ add %r9, 8(%rsi)
+ adc $0, %rdx
+ mov %rdx, 16(%rsi,%r12,8)
+ add $8, %rsi
+ mov %r15, %rbp
+ dec %rcx
+ jnz .Llo2
+
+ mov %r12, %rcx
+ sar $2, %rcx
+ lea 32(%rsi,%r12,8), %rsi
+ lea (%rsi,%r12,8), %rdx
+
+ mov -16(%rsi), %r8
+ mov -8(%rsi), %r9
+ add -16(%rdx), %r8
+ adc -8(%rdx), %r9
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ lea 16(%rdi), %rdi
+ jmp .Laddx
+
+
+ .align 16, 0x90
+.L1m4:
+.Llo1: mov (%r13,%r12,8), %rax
+ xor %r9, %r9
+ xor %ebx, %ebx
+ mul %rbp
+ mov %rax, %r9
+ mov 8(%r13,%r12,8), %rax
+ mov 24(%rsi,%r12,8), %r15
+ mov %rdx, %r14
+ mov $0, %r10d
+ mul %rbp
+ add 16(%rsi,%r12,8), %r9
+ adc %rax, %r14
+ adc %rdx, %rbx
+ mov 16(%r13,%r12,8), %rax
+ mul %rbp
+ lea 1(%r12), %r11
+ add %r14, %r15
+ imul %r8, %r15
+ jmp .Le1
+
+ .align 16, 0x90
+.Lli1: add %r10, (%rsi,%r11,8)
+ adc %rax, %r9
+ mov (%r13,%r11,8), %rax
+ adc %rdx, %r14
+ xor %r10d, %r10d
+ mul %rbp
+ add %r9, 8(%rsi,%r11,8)
+ adc %rax, %r14
+ adc %rdx, %rbx
+ mov 8(%r13,%r11,8), %rax
+ mul %rbp
+.Le1: add %r14, 16(%rsi,%r11,8)
+ adc %rax, %rbx
+ adc %rdx, %r10
+ mov 16(%r13,%r11,8), %rax
+ mul %rbp
+ add %rbx, 24(%rsi,%r11,8)
+ mov $0, %r14d
+ mov %r14, %rbx
+ adc %rax, %r10
+ mov 24(%r13,%r11,8), %rax
+ mov %r14, %r9
+ adc %rdx, %r9
+ mul %rbp
+ add $4, %r11
+ js .Lli1
+
+.Lle1: add %r10, (%rsi)
+ adc %rax, %r9
+ adc %r14, %rdx
+ add %r9, 8(%rsi)
+ adc $0, %rdx
+ mov %rdx, 16(%rsi,%r12,8)
+ add $8, %rsi
+ mov %r15, %rbp
+ dec %rcx
+ jnz .Llo1
+
+ mov %r12, %rcx
+ sar $2, %rcx
+ lea 24(%rsi,%r12,8), %rsi
+ lea (%rsi,%r12,8), %rdx
+
+ mov -8(%rsi), %r8
+ add -8(%rdx), %r8
+ mov %r8, (%rdi)
+ lea 8(%rdi), %rdi
+ jmp .Laddx
+
+
+ .align 16, 0x90
+.L0:
+.L0m4:
+.Llo0: mov (%r13,%r12,8), %rax
+ mov %r12, %r11
+ mul %rbp
+ xor %r10d, %r10d
+ mov %rax, %r14
+ mov %rdx, %rbx
+ mov 8(%r13,%r12,8), %rax
+ mov 24(%rsi,%r12,8), %r15
+ mul %rbp
+ add 16(%rsi,%r12,8), %r14
+ adc %rax, %rbx
+ adc %rdx, %r10
+ add %rbx, %r15
+ imul %r8, %r15
+ jmp .Le0
+
+ .align 16, 0x90
+.Lli0: add %r10, (%rsi,%r11,8)
+ adc %rax, %r9
+ mov (%r13,%r11,8), %rax
+ adc %rdx, %r14
+ xor %r10d, %r10d
+ mul %rbp
+ add %r9, 8(%rsi,%r11,8)
+ adc %rax, %r14
+ adc %rdx, %rbx
+ mov 8(%r13,%r11,8), %rax
+ mul %rbp
+ add %r14, 16(%rsi,%r11,8)
+ adc %rax, %rbx
+ adc %rdx, %r10
+.Le0: mov 16(%r13,%r11,8), %rax
+ mul %rbp
+ add %rbx, 24(%rsi,%r11,8)
+ mov $0, %r14d
+ mov %r14, %rbx
+ adc %rax, %r10
+ mov 24(%r13,%r11,8), %rax
+ mov %r14, %r9
+ adc %rdx, %r9
+ mul %rbp
+ add $4, %r11
+ js .Lli0
+
+.Lle0: add %r10, (%rsi)
+ adc %rax, %r9
+ adc %r14, %rdx
+ add %r9, 8(%rsi)
+ adc $0, %rdx
+ mov %rdx, 16(%rsi,%r12,8)
+ add $8, %rsi
+ mov %r15, %rbp
+ dec %rcx
+ jnz .Llo0
+
+ mov %r12, %rcx
+ sar $2, %rcx
+ clc
+ lea 16(%rsi,%r12,8), %rsi
+ lea (%rsi,%r12,8), %rdx
+ jmp .Laddy
+
+
+ .align 16, 0x90
+.L3m4:
+.Llo3: mov (%r13,%r12,8), %rax
+ mul %rbp
+ mov %rax, %rbx
+ mov %rdx, %r10
+ mov 8(%r13,%r12,8), %rax
+ mov 24(%rsi,%r12,8), %r15
+ mul %rbp
+ add 16(%rsi,%r12,8), %rbx
+ mov $0, %ebx
+ mov %rbx, %r14
+ adc %rax, %r10
+ mov 16(%r13,%r12,8), %rax
+ mov %r14, %r9
+ adc %rdx, %r9
+ add %r10, %r15
+ mul %rbp
+ lea 3(%r12), %r11
+ imul %r8, %r15
+
+
+ .align 16, 0x90
+.Lli3: add %r10, (%rsi,%r11,8)
+ adc %rax, %r9
+ mov (%r13,%r11,8), %rax
+ adc %rdx, %r14
+ xor %r10d, %r10d
+ mul %rbp
+ add %r9, 8(%rsi,%r11,8)
+ adc %rax, %r14
+ adc %rdx, %rbx
+ mov 8(%r13,%r11,8), %rax
+ mul %rbp
+ add %r14, 16(%rsi,%r11,8)
+ adc %rax, %rbx
+ adc %rdx, %r10
+ mov 16(%r13,%r11,8), %rax
+ mul %rbp
+ add %rbx, 24(%rsi,%r11,8)
+ mov $0, %r14d
+ mov %r14, %rbx
+ adc %rax, %r10
+ mov 24(%r13,%r11,8), %rax
+ mov %r14, %r9
+ adc %rdx, %r9
+ mul %rbp
+ add $4, %r11
+ js .Lli3
+
+.Lle3: add %r10, (%rsi)
+ adc %rax, %r9
+ adc %r14, %rdx
+ add %r9, 8(%rsi)
+ adc $0, %rdx
+ mov %rdx, 16(%rsi,%r12,8)
+ mov %r15, %rbp
+ lea 8(%rsi), %rsi
+ dec %rcx
+ jnz .Llo3
+
+
+
+ mov %r12, %rcx
+ sar $2, %rcx
+ lea 40(%rsi,%r12,8), %rsi
+ lea (%rsi,%r12,8), %rdx
+
+ mov -24(%rsi), %r8
+ mov -16(%rsi), %r9
+ mov -8(%rsi), %r10
+ add -24(%rdx), %r8
+ adc -16(%rdx), %r9
+ adc -8(%rdx), %r10
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ lea 24(%rdi), %rdi
+
+.Laddx:inc %rcx
+ jz .Lad3
+
+.Laddy:mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ inc %rcx
+ jmp .Lmid
+
+
+.Lal3: adc (%rdx), %r8
+ adc 8(%rdx), %r9
+ adc 16(%rdx), %r10
+ adc 24(%rdx), %r11
+ mov %r8, (%rdi)
+ lea 32(%rsi), %rsi
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ inc %rcx
+ mov %r11, 24(%rdi)
+ lea 32(%rdx), %rdx
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ lea 32(%rdi), %rdi
+.Lmid: mov 16(%rsi), %r10
+ mov 24(%rsi), %r11
+ jnz .Lal3
+
+.Lae3: adc (%rdx), %r8
+ adc 8(%rdx), %r9
+ adc 16(%rdx), %r10
+ adc 24(%rdx), %r11
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ mov %r11, 24(%rdi)
+
+.Lad3: mov %ecx, %eax
+ adc %eax, %eax
+
+.Lret: pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ pop %rbp
+
+ ret
+ .size __gmpn_redc_1,.-__gmpn_redc_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh1_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh1_n.s
new file mode 100644
index 0000000..ac1323b
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh1_n.s
@@ -0,0 +1,179 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_rsblsh1_n
+ .type __gmpn_rsblsh1_n,@function
+
+__gmpn_rsblsh1_n:
+
+
+ push %rbp
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,8), %rdx
+ neg %rcx
+ xor %ebp, %ebp
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: add %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ mov 16(%rdx,%rcx,8), %r10
+ adc %r10, %r10
+ sbb %eax, %eax
+ sub (%rsi,%rcx,8), %r8
+ sbb 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+ sbb 16(%rsi,%rcx,8), %r10
+ mov %r10, 16(%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ add $3, %rcx
+ jmp .Lent
+
+.Lb10: add %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ sbb %eax, %eax
+ sub (%rsi,%rcx,8), %r8
+ sbb 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ add $2, %rcx
+ jmp .Lent
+
+.Lb01: add %r8, %r8
+ sbb %eax, %eax
+ sub (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ inc %rcx
+.Lent: jns .Lend
+
+ .align 16, 0x90
+.Ltop: add %eax, %eax
+
+ mov (%rdx,%rcx,8), %r8
+.Lb00: adc %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ mov 16(%rdx,%rcx,8), %r10
+ adc %r10, %r10
+ mov 24(%rdx,%rcx,8), %r11
+ adc %r11, %r11
+
+ sbb %eax, %eax
+ add %ebp, %ebp
+
+ sbb (%rsi,%rcx,8), %r8
+ nop
+ sbb 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+ sbb 16(%rsi,%rcx,8), %r10
+ sbb 24(%rsi,%rcx,8), %r11
+ mov %r10, 16(%rdi,%rcx,8)
+ mov %r11, 24(%rdi,%rcx,8)
+
+ sbb %ebp, %ebp
+ add $4, %rcx
+ js .Ltop
+
+.Lend:
+
+
+ sub %eax, %ebp
+ movslq %ebp, %rax
+
+ pop %rbp
+
+ ret
+ .size __gmpn_rsblsh1_n,.-__gmpn_rsblsh1_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh2_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh2_n.s
new file mode 100644
index 0000000..e9f079a
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh2_n.s
@@ -0,0 +1,204 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_rsblsh2_n
+ .type __gmpn_rsblsh2_n,@function
+
+__gmpn_rsblsh2_n:
+
+
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov (%rdx), %r8
+ lea (,%r8,4), %r12
+ shr $62, %r8
+
+ mov %ecx, %eax
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,8), %rdx
+ neg %rcx
+ and $3, %al
+ je .Lb00
+ cmp $2, %al
+ jc .Lb01
+ je .Lb10
+
+.Lb11: mov 8(%rdx,%rcx,8), %r10
+ lea (%r8,%r10,4), %r14
+ shr $62, %r10
+ mov 16(%rdx,%rcx,8), %r11
+ lea (%r10,%r11,4), %r15
+ shr $62, %r11
+ sub (%rsi,%rcx,8), %r12
+ sbb 8(%rsi,%rcx,8), %r14
+ sbb 16(%rsi,%rcx,8), %r15
+ sbb %eax, %eax
+ mov %r12, (%rdi,%rcx,8)
+ mov %r14, 8(%rdi,%rcx,8)
+ mov %r15, 16(%rdi,%rcx,8)
+ add $3, %rcx
+ js .Ltop
+ jmp .Lend
+
+.Lb01: mov %r8, %r11
+ sub (%rsi,%rcx,8), %r12
+ sbb %eax, %eax
+ mov %r12, (%rdi,%rcx,8)
+ add $1, %rcx
+ js .Ltop
+ jmp .Lend
+
+.Lb10: mov 8(%rdx,%rcx,8), %r11
+ lea (%r8,%r11,4), %r15
+ shr $62, %r11
+ sub (%rsi,%rcx,8), %r12
+ sbb 8(%rsi,%rcx,8), %r15
+ sbb %eax, %eax
+ mov %r12, (%rdi,%rcx,8)
+ mov %r15, 8(%rdi,%rcx,8)
+ add $2, %rcx
+ js .Ltop
+ jmp .Lend
+
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ mov 16(%rdx,%rcx,8), %r10
+ jmp .Le00
+
+ .align 16, 0x90
+.Ltop: mov 16(%rdx,%rcx,8), %r10
+ mov (%rdx,%rcx,8), %r8
+ mov 8(%rdx,%rcx,8), %r9
+ lea (%r11,%r8,4), %r12
+ shr $62, %r8
+.Le00: lea (%r8,%r9,4), %r13
+ shr $62, %r9
+ mov 24(%rdx,%rcx,8), %r11
+ lea (%r9,%r10,4), %r14
+ shr $62, %r10
+ lea (%r10,%r11,4), %r15
+ shr $62, %r11
+ add %eax, %eax
+ sbb (%rsi,%rcx,8), %r12
+ sbb 8(%rsi,%rcx,8), %r13
+ sbb 16(%rsi,%rcx,8), %r14
+ sbb 24(%rsi,%rcx,8), %r15
+ mov %r12, (%rdi,%rcx,8)
+ mov %r13, 8(%rdi,%rcx,8)
+ mov %r14, 16(%rdi,%rcx,8)
+ sbb %eax, %eax
+ mov %r15, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ js .Ltop
+.Lend:
+
+
+ add %r11d, %eax
+ movslq %eax, %rax
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+
+ ret
+ .size __gmpn_rsblsh2_n,.-__gmpn_rsblsh2_n
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh_n.s
new file mode 100644
index 0000000..d439217
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/rsblsh_n.s
@@ -0,0 +1,228 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_rsblsh_n
+ .type __gmpn_rsblsh_n,@function
+
+__gmpn_rsblsh_n:
+
+
+
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov (%rdx), %rax
+
+ mov $0, %ebp
+ sub %rcx, %rbp
+
+ lea -16(%rsi,%rcx,8), %rsi
+ lea -16(%rdi,%rcx,8), %rdi
+ lea 16(%rdx,%rcx,8), %r12
+
+ mov %rcx, %r9
+
+ mov %r8, %rcx
+ mov $1, %r8d
+ shl %cl, %r8
+
+ mul %r8
+
+ and $3, %r9d
+ jz .Lb0
+ cmp $2, %r9d
+ jc .Lb1
+ jz .Lb2
+
+.Lb3: mov %rax, %r11
+ sub 16(%rsi,%rbp,8), %r11
+ mov -8(%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov (%r12,%rbp,8), %rax
+ mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ add $3, %rbp
+ jnz .Llo3
+ jmp .Lcj3
+
+.Lb2: mov %rax, %rbx
+ mov -8(%r12,%rbp,8), %rax
+ mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ add $2, %rbp
+ jz .Lcj2
+ mov %rdx, %r10
+ mov -16(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r10
+ xor %ecx, %ecx
+ jmp .Llo2
+
+.Lb1: mov %rax, %r9
+ mov %rdx, %r10
+ add $1, %rbp
+ jnz .Lgt1
+ sub 8(%rsi,%rbp,8), %r9
+ jmp .Lcj1
+.Lgt1: mov -16(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r10
+ mov %rdx, %r11
+ mov -8(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r11
+ sub 8(%rsi,%rbp,8), %r9
+ sbb 16(%rsi,%rbp,8), %r10
+ sbb 24(%rsi,%rbp,8), %r11
+ mov (%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ jmp .Llo1
+
+.Lb0: mov %rax, %r10
+ mov %rdx, %r11
+ mov -8(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r11
+ sub 16(%rsi,%rbp,8), %r10
+ sbb 24(%rsi,%rbp,8), %r11
+ mov (%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov 8(%r12,%rbp,8), %rax
+ add $4, %rbp
+ jz .Lend
+
+ .align 8, 0x90
+.Ltop: mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ mov %r10, -16(%rdi,%rbp,8)
+.Llo3: mov %rdx, %r10
+ mov -16(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r10
+ mov %r11, -8(%rdi,%rbp,8)
+.Llo2: mov %rdx, %r11
+ mov -8(%r12,%rbp,8), %rax
+ mul %r8
+ or %rax, %r11
+ add %ecx, %ecx
+ sbb (%rsi,%rbp,8), %rbx
+ sbb 8(%rsi,%rbp,8), %r9
+ sbb 16(%rsi,%rbp,8), %r10
+ sbb 24(%rsi,%rbp,8), %r11
+ mov (%r12,%rbp,8), %rax
+ sbb %ecx, %ecx
+ mov %rbx, (%rdi,%rbp,8)
+.Llo1: mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov %r9, 8(%rdi,%rbp,8)
+.Llo0: mov 8(%r12,%rbp,8), %rax
+ add $4, %rbp
+ jnz .Ltop
+
+.Lend: mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ mov %r10, -16(%rdi,%rbp,8)
+.Lcj3: mov %r11, -8(%rdi,%rbp,8)
+.Lcj2: add %ecx, %ecx
+ sbb (%rsi,%rbp,8), %rbx
+ sbb 8(%rsi,%rbp,8), %r9
+ mov %rbx, (%rdi,%rbp,8)
+.Lcj1: mov %r9, 8(%rdi,%rbp,8)
+ mov %rdx, %rax
+ sbb $0, %rax
+ pop %rbx
+ pop %rbp
+ pop %r12
+
+ ret
+ .size __gmpn_rsblsh_n,.-__gmpn_rsblsh_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/rsh1add_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/rsh1add_n.s
new file mode 100644
index 0000000..8554f6f
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/rsh1add_n.s
@@ -0,0 +1,203 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_rsh1add_nc
+ .type __gmpn_rsh1add_nc,@function
+
+__gmpn_rsh1add_nc:
+
+
+
+ push %rbx
+
+ xor %eax, %eax
+ neg %r8
+ mov (%rsi), %rbx
+ adc (%rdx), %rbx
+ jmp .Lent
+ .size __gmpn_rsh1add_nc,.-__gmpn_rsh1add_nc
+
+ .align 16, 0x90
+ .globl __gmpn_rsh1add_n
+ .type __gmpn_rsh1add_n,@function
+
+__gmpn_rsh1add_n:
+
+
+ push %rbx
+
+ xor %eax, %eax
+ mov (%rsi), %rbx
+ add (%rdx), %rbx
+.Lent:
+ rcr %rbx
+ adc %eax, %eax
+
+ mov %ecx, %r11d
+ and $3, %r11d
+
+ cmp $1, %r11d
+ je .Ldo
+
+.Ln1: cmp $2, %r11d
+ jne .Ln2
+ add %rbx, %rbx
+ mov 8(%rsi), %r10
+ adc 8(%rdx), %r10
+ lea 8(%rsi), %rsi
+ lea 8(%rdx), %rdx
+ lea 8(%rdi), %rdi
+ rcr %r10
+ rcr %rbx
+ mov %rbx, -8(%rdi)
+ jmp .Lcj1
+
+.Ln2: cmp $3, %r11d
+ jne .Ln3
+ add %rbx, %rbx
+ mov 8(%rsi), %r9
+ mov 16(%rsi), %r10
+ adc 8(%rdx), %r9
+ adc 16(%rdx), %r10
+ lea 16(%rsi), %rsi
+ lea 16(%rdx), %rdx
+ lea 16(%rdi), %rdi
+ rcr %r10
+ rcr %r9
+ rcr %rbx
+ mov %rbx, -16(%rdi)
+ jmp .Lcj2
+
+.Ln3: dec %rcx
+ add %rbx, %rbx
+ mov 8(%rsi), %r8
+ mov 16(%rsi), %r9
+ adc 8(%rdx), %r8
+ adc 16(%rdx), %r9
+ mov 24(%rsi), %r10
+ adc 24(%rdx), %r10
+ lea 24(%rsi), %rsi
+ lea 24(%rdx), %rdx
+ lea 24(%rdi), %rdi
+ rcr %r10
+ rcr %r9
+ rcr %r8
+ rcr %rbx
+ mov %rbx, -24(%rdi)
+ mov %r8, -16(%rdi)
+.Lcj2: mov %r9, -8(%rdi)
+.Lcj1: mov %r10, %rbx
+
+.Ldo:
+ shr $2, %rcx
+ je .Lend
+ .align 16, 0x90
+.Ltop: add %rbx, %rbx
+
+ mov 8(%rsi), %r8
+ mov 16(%rsi), %r9
+ adc 8(%rdx), %r8
+ adc 16(%rdx), %r9
+ mov 24(%rsi), %r10
+ mov 32(%rsi), %r11
+ adc 24(%rdx), %r10
+ adc 32(%rdx), %r11
+
+ lea 32(%rsi), %rsi
+ lea 32(%rdx), %rdx
+
+ rcr %r11
+ rcr %r10
+ rcr %r9
+ rcr %r8
+
+ rcr %rbx
+ mov %rbx, (%rdi)
+ mov %r8, 8(%rdi)
+ mov %r9, 16(%rdi)
+ mov %r10, 24(%rdi)
+ mov %r11, %rbx
+
+ lea 32(%rdi), %rdi
+ dec %rcx
+ jne .Ltop
+
+.Lend: mov %rbx, (%rdi)
+ pop %rbx
+
+ ret
+ .size __gmpn_rsh1add_n,.-__gmpn_rsh1add_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/rsh1sub_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/rsh1sub_n.s
new file mode 100644
index 0000000..ff06ece
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/rsh1sub_n.s
@@ -0,0 +1,203 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_rsh1sub_nc
+ .type __gmpn_rsh1sub_nc,@function
+
+__gmpn_rsh1sub_nc:
+
+
+
+ push %rbx
+
+ xor %eax, %eax
+ neg %r8
+ mov (%rsi), %rbx
+ sbb (%rdx), %rbx
+ jmp .Lent
+ .size __gmpn_rsh1sub_nc,.-__gmpn_rsh1sub_nc
+
+ .align 16, 0x90
+ .globl __gmpn_rsh1sub_n
+ .type __gmpn_rsh1sub_n,@function
+
+__gmpn_rsh1sub_n:
+
+
+ push %rbx
+
+ xor %eax, %eax
+ mov (%rsi), %rbx
+ sub (%rdx), %rbx
+.Lent:
+ rcr %rbx
+ adc %eax, %eax
+
+ mov %ecx, %r11d
+ and $3, %r11d
+
+ cmp $1, %r11d
+ je .Ldo
+
+.Ln1: cmp $2, %r11d
+ jne .Ln2
+ add %rbx, %rbx
+ mov 8(%rsi), %r10
+ sbb 8(%rdx), %r10
+ lea 8(%rsi), %rsi
+ lea 8(%rdx), %rdx
+ lea 8(%rdi), %rdi
+ rcr %r10
+ rcr %rbx
+ mov %rbx, -8(%rdi)
+ jmp .Lcj1
+
+.Ln2: cmp $3, %r11d
+ jne .Ln3
+ add %rbx, %rbx
+ mov 8(%rsi), %r9
+ mov 16(%rsi), %r10
+ sbb 8(%rdx), %r9
+ sbb 16(%rdx), %r10
+ lea 16(%rsi), %rsi
+ lea 16(%rdx), %rdx
+ lea 16(%rdi), %rdi
+ rcr %r10
+ rcr %r9
+ rcr %rbx
+ mov %rbx, -16(%rdi)
+ jmp .Lcj2
+
+.Ln3: dec %rcx
+ add %rbx, %rbx
+ mov 8(%rsi), %r8
+ mov 16(%rsi), %r9
+ sbb 8(%rdx), %r8
+ sbb 16(%rdx), %r9
+ mov 24(%rsi), %r10
+ sbb 24(%rdx), %r10
+ lea 24(%rsi), %rsi
+ lea 24(%rdx), %rdx
+ lea 24(%rdi), %rdi
+ rcr %r10
+ rcr %r9
+ rcr %r8
+ rcr %rbx
+ mov %rbx, -24(%rdi)
+ mov %r8, -16(%rdi)
+.Lcj2: mov %r9, -8(%rdi)
+.Lcj1: mov %r10, %rbx
+
+.Ldo:
+ shr $2, %rcx
+ je .Lend
+ .align 16, 0x90
+.Ltop: add %rbx, %rbx
+
+ mov 8(%rsi), %r8
+ mov 16(%rsi), %r9
+ sbb 8(%rdx), %r8
+ sbb 16(%rdx), %r9
+ mov 24(%rsi), %r10
+ mov 32(%rsi), %r11
+ sbb 24(%rdx), %r10
+ sbb 32(%rdx), %r11
+
+ lea 32(%rsi), %rsi
+ lea 32(%rdx), %rdx
+
+ rcr %r11
+ rcr %r10
+ rcr %r9
+ rcr %r8
+
+ rcr %rbx
+ mov %rbx, (%rdi)
+ mov %r8, 8(%rdi)
+ mov %r9, 16(%rdi)
+ mov %r10, 24(%rdi)
+ mov %r11, %rbx
+
+ lea 32(%rdi), %rdi
+ dec %rcx
+ jne .Ltop
+
+.Lend: mov %rbx, (%rdi)
+ pop %rbx
+
+ ret
+ .size __gmpn_rsh1sub_n,.-__gmpn_rsh1sub_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/rshift.s b/vere/ext/gmp/gen/x86_64-linux/mpn/rshift.s
new file mode 100644
index 0000000..8ddd7b5
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/rshift.s
@@ -0,0 +1,191 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_rshift
+ .type __gmpn_rshift,@function
+
+__gmpn_rshift:
+
+
+ neg %ecx
+ mov (%rsi), %rax
+ shl %cl, %rax
+ neg %ecx
+
+ lea 1(%rdx), %r8d
+
+ lea -8(%rsi,%rdx,8), %rsi
+ lea -8(%rdi,%rdx,8), %rdi
+ neg %rdx
+
+ and $3, %r8d
+ je .Lrlx
+
+ dec %r8d
+ jne .L1
+
+ mov 8(%rsi,%rdx,8), %r10
+ shr %cl, %r10
+ neg %ecx
+ mov 16(%rsi,%rdx,8), %r8
+ shl %cl, %r8
+ or %r8, %r10
+ mov %r10, 8(%rdi,%rdx,8)
+ inc %rdx
+ jmp .Lrll
+
+.L1: dec %r8d
+ je .L1x
+
+ mov 8(%rsi,%rdx,8), %r10
+ shr %cl, %r10
+ neg %ecx
+ mov 16(%rsi,%rdx,8), %r8
+ shl %cl, %r8
+ or %r8, %r10
+ mov %r10, 8(%rdi,%rdx,8)
+ inc %rdx
+ neg %ecx
+.L1x:
+ cmp $-1, %rdx
+ je .Last
+ mov 8(%rsi,%rdx,8), %r10
+ shr %cl, %r10
+ mov 16(%rsi,%rdx,8), %r11
+ shr %cl, %r11
+ neg %ecx
+ mov 16(%rsi,%rdx,8), %r8
+ mov 24(%rsi,%rdx,8), %r9
+ shl %cl, %r8
+ or %r8, %r10
+ shl %cl, %r9
+ or %r9, %r11
+ mov %r10, 8(%rdi,%rdx,8)
+ mov %r11, 16(%rdi,%rdx,8)
+ add $2, %rdx
+
+.Lrll: neg %ecx
+.Lrlx: mov 8(%rsi,%rdx,8), %r10
+ shr %cl, %r10
+ mov 16(%rsi,%rdx,8), %r11
+ shr %cl, %r11
+
+ add $4, %rdx
+ jb .Lend
+ .align 16, 0x90
+.Ltop:
+
+ neg %ecx
+ mov -16(%rsi,%rdx,8), %r8
+ mov -8(%rsi,%rdx,8), %r9
+ shl %cl, %r8
+ or %r8, %r10
+ shl %cl, %r9
+ or %r9, %r11
+ mov %r10, -24(%rdi,%rdx,8)
+ mov %r11, -16(%rdi,%rdx,8)
+
+ mov (%rsi,%rdx,8), %r8
+ mov 8(%rsi,%rdx,8), %r9
+ shl %cl, %r8
+ shl %cl, %r9
+
+
+ neg %ecx
+ mov -8(%rsi,%rdx,8), %r10
+ mov 0(%rsi,%rdx,8), %r11
+ shr %cl, %r10
+ or %r10, %r8
+ shr %cl, %r11
+ or %r11, %r9
+ mov %r8, -8(%rdi,%rdx,8)
+ mov %r9, 0(%rdi,%rdx,8)
+
+ mov 8(%rsi,%rdx,8), %r10
+ mov 16(%rsi,%rdx,8), %r11
+ shr %cl, %r10
+ shr %cl, %r11
+
+ add $4, %rdx
+ jae .Ltop
+.Lend:
+ neg %ecx
+ mov -8(%rsi), %r8
+ shl %cl, %r8
+ or %r8, %r10
+ mov (%rsi), %r9
+ shl %cl, %r9
+ or %r9, %r11
+ mov %r10, -16(%rdi)
+ mov %r11, -8(%rdi)
+
+ neg %ecx
+.Last: mov (%rsi), %r10
+ shr %cl, %r10
+ mov %r10, (%rdi)
+
+ ret
+ .size __gmpn_rshift,.-__gmpn_rshift
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sec_tabselect.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sec_tabselect.s
new file mode 100644
index 0000000..7a50a70
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sec_tabselect.s
@@ -0,0 +1,190 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sec_tabselect
+ .type __gmpn_sec_tabselect,@function
+
+__gmpn_sec_tabselect:
+
+
+
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rdx, %r9
+ add $-4, %r9
+ js .Louter_end
+
+.Louter_top:
+ mov %rcx, %rbp
+ push %rsi
+ xor %r12d, %r12d
+ xor %r13d, %r13d
+ xor %r14d, %r14d
+ xor %r15d, %r15d
+ mov %r8, %rbx
+
+ .align 16, 0x90
+.Ltop: sub $1, %rbx
+ sbb %rax, %rax
+ mov 0(%rsi), %r10
+ mov 8(%rsi), %r11
+ and %rax, %r10
+ and %rax, %r11
+ or %r10, %r12
+ or %r11, %r13
+ mov 16(%rsi), %r10
+ mov 24(%rsi), %r11
+ and %rax, %r10
+ and %rax, %r11
+ or %r10, %r14
+ or %r11, %r15
+ lea (%rsi,%rdx,8), %rsi
+ add $-1, %rbp
+ jne .Ltop
+
+ mov %r12, 0(%rdi)
+ mov %r13, 8(%rdi)
+ mov %r14, 16(%rdi)
+ mov %r15, 24(%rdi)
+ pop %rsi
+ lea 32(%rsi), %rsi
+ lea 32(%rdi), %rdi
+ add $-4, %r9
+ jns .Louter_top
+.Louter_end:
+
+ test $2, %dl
+ jz .Lb0x
+.Lb1x: mov %rcx, %rbp
+ push %rsi
+ xor %r12d, %r12d
+ xor %r13d, %r13d
+ mov %r8, %rbx
+ .align 16, 0x90
+.Ltp2: sub $1, %rbx
+ sbb %rax, %rax
+ mov 0(%rsi), %r10
+ mov 8(%rsi), %r11
+ and %rax, %r10
+ and %rax, %r11
+ or %r10, %r12
+ or %r11, %r13
+ lea (%rsi,%rdx,8), %rsi
+ add $-1, %rbp
+ jne .Ltp2
+ mov %r12, 0(%rdi)
+ mov %r13, 8(%rdi)
+ pop %rsi
+ lea 16(%rsi), %rsi
+ lea 16(%rdi), %rdi
+
+.Lb0x: test $1, %dl
+ jz .Lb00
+.Lb01: mov %rcx, %rbp
+ xor %r12d, %r12d
+ mov %r8, %rbx
+ .align 16, 0x90
+.Ltp1: sub $1, %rbx
+ sbb %rax, %rax
+ mov 0(%rsi), %r10
+ and %rax, %r10
+ or %r10, %r12
+ lea (%rsi,%rdx,8), %rsi
+ add $-1, %rbp
+ jne .Ltp1
+ mov %r12, 0(%rdi)
+
+.Lb00: pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_sec_tabselect,.-__gmpn_sec_tabselect
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sqr_basecase.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sqr_basecase.s
new file mode 100644
index 0000000..eb24851
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sqr_basecase.s
@@ -0,0 +1,818 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sqr_basecase
+ .type __gmpn_sqr_basecase,@function
+
+__gmpn_sqr_basecase:
+
+
+ mov %edx, %ecx
+ mov %edx, %r11d
+
+ add $-40, %rsp
+
+ and $3, %ecx
+ cmp $4, %edx
+ lea 4(%rcx), %r8
+
+ mov %rbx, 32(%rsp)
+ mov %rbp, 24(%rsp)
+ mov %r12, 16(%rsp)
+ mov %r13, 8(%rsp)
+ mov %r14, (%rsp)
+
+ cmovg %r8, %rcx
+
+ lea .Ltab(%rip), %rax
+ movslq (%rax,%rcx,4), %r10
+ add %r10, %rax
+ jmp *%rax
+
+ .section .data.rel.ro.local,"a",@progbits
+ .align 8, 0x90
+.Ltab: .long .L4-.Ltab
+ .long .L1-.Ltab
+ .long .L2-.Ltab
+ .long .L3-.Ltab
+ .long .L0m4-.Ltab
+ .long .L1m4-.Ltab
+ .long .L2m4-.Ltab
+ .long .L3m4-.Ltab
+ .text
+
+.L1: mov (%rsi), %rax
+ mul %rax
+ add $40, %rsp
+ mov %rax, (%rdi)
+ mov %rdx, 8(%rdi)
+
+ ret
+
+.L2: mov (%rsi), %rax
+ mov %rax, %r8
+ mul %rax
+ mov 8(%rsi), %r11
+ mov %rax, (%rdi)
+ mov %r11, %rax
+ mov %rdx, %r9
+ mul %rax
+ add $40, %rsp
+ mov %rax, %r10
+ mov %r11, %rax
+ mov %rdx, %r11
+ mul %r8
+ xor %r8, %r8
+ add %rax, %r9
+ adc %rdx, %r10
+ adc %r8, %r11
+ add %rax, %r9
+ mov %r9, 8(%rdi)
+ adc %rdx, %r10
+ mov %r10, 16(%rdi)
+ adc %r8, %r11
+ mov %r11, 24(%rdi)
+
+ ret
+
+.L3: mov (%rsi), %rax
+ mov %rax, %r10
+ mul %rax
+ mov 8(%rsi), %r11
+ mov %rax, (%rdi)
+ mov %r11, %rax
+ mov %rdx, 8(%rdi)
+ mul %rax
+ mov 16(%rsi), %rcx
+ mov %rax, 16(%rdi)
+ mov %rcx, %rax
+ mov %rdx, 24(%rdi)
+ mul %rax
+ mov %rax, 32(%rdi)
+ mov %rdx, 40(%rdi)
+
+ mov %r11, %rax
+ mul %r10
+ mov %rax, %r8
+ mov %rcx, %rax
+ mov %rdx, %r9
+ mul %r10
+ xor %r10, %r10
+ add %rax, %r9
+ mov %r11, %rax
+ mov %r10, %r11
+ adc %rdx, %r10
+
+ mul %rcx
+ add $40, %rsp
+ add %rax, %r10
+ adc %r11, %rdx
+ add %r8, %r8
+ adc %r9, %r9
+ adc %r10, %r10
+ adc %rdx, %rdx
+ adc %r11, %r11
+ add %r8, 8(%rdi)
+ adc %r9, 16(%rdi)
+ adc %r10, 24(%rdi)
+ adc %rdx, 32(%rdi)
+ adc %r11, 40(%rdi)
+
+ ret
+
+.L4: mov (%rsi), %rax
+ mov %rax, %r11
+ mul %rax
+ mov 8(%rsi), %rbx
+ mov %rax, (%rdi)
+ mov %rbx, %rax
+ mov %rdx, 8(%rdi)
+ mul %rax
+ mov %rax, 16(%rdi)
+ mov %rdx, 24(%rdi)
+ mov 16(%rsi), %rax
+ mul %rax
+ mov %rax, 32(%rdi)
+ mov %rdx, 40(%rdi)
+ mov 24(%rsi), %rax
+ mul %rax
+ mov %rax, 48(%rdi)
+ mov %rbx, %rax
+ mov %rdx, 56(%rdi)
+
+ mul %r11
+ add $32, %rsp
+ mov %rax, %r8
+ mov %rdx, %r9
+ mov 16(%rsi), %rax
+ mul %r11
+ xor %r10, %r10
+ add %rax, %r9
+ adc %rdx, %r10
+ mov 24(%rsi), %rax
+ mul %r11
+ xor %r11, %r11
+ add %rax, %r10
+ adc %rdx, %r11
+ mov 16(%rsi), %rax
+ mul %rbx
+ xor %rcx, %rcx
+ add %rax, %r10
+ adc %rdx, %r11
+ adc $0, %rcx
+ mov 24(%rsi), %rax
+ mul %rbx
+ pop %rbx
+ add %rax, %r11
+ adc %rdx, %rcx
+ mov 16(%rsi), %rdx
+ mov 24(%rsi), %rax
+ mul %rdx
+ add %rax, %rcx
+ adc $0, %rdx
+
+ add %r8, %r8
+ adc %r9, %r9
+ adc %r10, %r10
+ adc %r11, %r11
+ adc %rcx, %rcx
+ mov $0, %eax
+ adc %rdx, %rdx
+
+ adc %rax, %rax
+ add %r8, 8(%rdi)
+ adc %r9, 16(%rdi)
+ adc %r10, 24(%rdi)
+ adc %r11, 32(%rdi)
+ adc %rcx, 40(%rdi)
+ adc %rdx, 48(%rdi)
+ adc %rax, 56(%rdi)
+
+ ret
+
+
+.L0m4:
+ lea -16(%rdi,%r11,8), %r12
+ mov (%rsi), %r13
+ mov 8(%rsi), %rax
+ lea (%rsi,%r11,8), %rsi
+
+ lea -4(%r11), %r8
+
+ xor %r9d, %r9d
+ sub %r11, %r9
+
+ mul %r13
+ xor %ebp, %ebp
+ mov %rax, %rbx
+ mov 16(%rsi,%r9,8), %rax
+ mov %rdx, %r10
+ jmp .LL3
+
+ .align 16, 0x90
+.Lmul_1_m3_top:
+ add %rax, %rbp
+ mov %r10, (%r12,%r9,8)
+ mov (%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ xor %ebx, %ebx
+ mul %r13
+ xor %r10d, %r10d
+ mov %rbp, 8(%r12,%r9,8)
+ add %rax, %rcx
+ adc %rdx, %rbx
+ mov 8(%rsi,%r9,8), %rax
+ mov %rcx, 16(%r12,%r9,8)
+ xor %ebp, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov 16(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+.LL3: xor %ecx, %ecx
+ mul %r13
+ add %rax, %r10
+ mov 24(%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ mov %rbx, 24(%r12,%r9,8)
+ mul %r13
+ add $4, %r9
+ js .Lmul_1_m3_top
+
+ add %rax, %rbp
+ mov %r10, (%r12)
+ adc %rdx, %rcx
+ mov %rbp, 8(%r12)
+ mov %rcx, 16(%r12)
+
+ lea 16(%r12), %r12
+ lea -8(%rsi), %rsi
+ jmp .Ldowhile
+
+
+.L1m4:
+ lea 8(%rdi,%r11,8), %r12
+ mov (%rsi), %r13
+ mov 8(%rsi), %rax
+ lea 8(%rsi,%r11,8), %rsi
+
+ lea -3(%r11), %r8
+
+ lea -3(%r11), %r9
+ neg %r9
+
+ mov %rax, %r14
+ mul %r13
+ mov %rdx, %rcx
+ xor %ebp, %ebp
+ mov %rax, 8(%rdi)
+ jmp .Lm0
+
+ .align 16, 0x90
+.Lmul_2_m0_top:
+ mul %r14
+ add %rax, %rbx
+ adc %rdx, %rcx
+ mov -24(%rsi,%r9,8), %rax
+ mov $0, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov -24(%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+ mul %r14
+ add %rax, %rcx
+ mov %rbx, -24(%r12,%r9,8)
+ adc %rdx, %rbp
+.Lm0: mov -16(%rsi,%r9,8), %rax
+ mul %r13
+ mov $0, %r10d
+ add %rax, %rcx
+ adc %rdx, %rbp
+ mov -16(%rsi,%r9,8), %rax
+ adc $0, %r10d
+ mov $0, %ebx
+ mov %rcx, -16(%r12,%r9,8)
+ mul %r14
+ add %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ mov $0, %ecx
+ mul %r13
+ add %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ adc $0, %ebx
+ mul %r14
+ add %rax, %r10
+ mov %rbp, -8(%r12,%r9,8)
+ adc %rdx, %rbx
+.Lm2x: mov (%rsi,%r9,8), %rax
+ mul %r13
+ add %rax, %r10
+ adc %rdx, %rbx
+ adc $0, %ecx
+ add $4, %r9
+ mov -32(%rsi,%r9,8), %rax
+ mov %r10, -32(%r12,%r9,8)
+ js .Lmul_2_m0_top
+
+ mul %r14
+ add %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, -8(%r12)
+ mov %rcx, (%r12)
+
+ lea -16(%rsi), %rsi
+ lea 0(%r12), %r12
+ jmp .Ldowhile_end
+
+
+.L2m4:
+ lea -16(%rdi,%r11,8), %r12
+ mov (%rsi), %r13
+ mov 8(%rsi), %rax
+ lea (%rsi,%r11,8), %rsi
+
+ lea -4(%r11), %r8
+
+ lea -2(%r11), %r9
+ neg %r9
+
+ mul %r13
+ mov %rax, %rbp
+ mov (%rsi,%r9,8), %rax
+ mov %rdx, %rcx
+ jmp .LL1
+
+ .align 16, 0x90
+.Lmul_1_m1_top:
+ add %rax, %rbp
+ mov %r10, (%r12,%r9,8)
+ mov (%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+.LL1: xor %ebx, %ebx
+ mul %r13
+ xor %r10d, %r10d
+ mov %rbp, 8(%r12,%r9,8)
+ add %rax, %rcx
+ adc %rdx, %rbx
+ mov 8(%rsi,%r9,8), %rax
+ mov %rcx, 16(%r12,%r9,8)
+ xor %ebp, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov 16(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ xor %ecx, %ecx
+ mul %r13
+ add %rax, %r10
+ mov 24(%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ mov %rbx, 24(%r12,%r9,8)
+ mul %r13
+ add $4, %r9
+ js .Lmul_1_m1_top
+
+ add %rax, %rbp
+ mov %r10, (%r12)
+ adc %rdx, %rcx
+ mov %rbp, 8(%r12)
+ mov %rcx, 16(%r12)
+
+ lea 16(%r12), %r12
+ lea -8(%rsi), %rsi
+ jmp .Ldowhile_mid
+
+
+.L3m4:
+ lea 8(%rdi,%r11,8), %r12
+ mov (%rsi), %r13
+ mov 8(%rsi), %rax
+ lea 8(%rsi,%r11,8), %rsi
+
+ lea -5(%r11), %r8
+
+ lea -1(%r11), %r9
+ neg %r9
+
+ mov %rax, %r14
+ mul %r13
+ mov %rdx, %r10
+ xor %ebx, %ebx
+ xor %ecx, %ecx
+ mov %rax, 8(%rdi)
+ jmp .Lm2
+
+ .align 16, 0x90
+.Lmul_2_m2_top:
+ mul %r14
+ add %rax, %rbx
+ adc %rdx, %rcx
+ mov -24(%rsi,%r9,8), %rax
+ mov $0, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov -24(%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+ mul %r14
+ add %rax, %rcx
+ mov %rbx, -24(%r12,%r9,8)
+ adc %rdx, %rbp
+ mov -16(%rsi,%r9,8), %rax
+ mul %r13
+ mov $0, %r10d
+ add %rax, %rcx
+ adc %rdx, %rbp
+ mov -16(%rsi,%r9,8), %rax
+ adc $0, %r10d
+ mov $0, %ebx
+ mov %rcx, -16(%r12,%r9,8)
+ mul %r14
+ add %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ mov $0, %ecx
+ mul %r13
+ add %rax, %rbp
+ mov -8(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ adc $0, %ebx
+ mul %r14
+ add %rax, %r10
+ mov %rbp, -8(%r12,%r9,8)
+ adc %rdx, %rbx
+.Lm2: mov (%rsi,%r9,8), %rax
+ mul %r13
+ add %rax, %r10
+ adc %rdx, %rbx
+ adc $0, %ecx
+ add $4, %r9
+ mov -32(%rsi,%r9,8), %rax
+ mov %r10, -32(%r12,%r9,8)
+ js .Lmul_2_m2_top
+
+ mul %r14
+ add %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, -8(%r12)
+ mov %rcx, (%r12)
+
+ lea -16(%rsi), %rsi
+ jmp .Ldowhile_mid
+
+.Ldowhile:
+
+ lea 4(%r8), %r9
+ neg %r9
+
+ mov 16(%rsi,%r9,8), %r13
+ mov 24(%rsi,%r9,8), %r14
+ mov 24(%rsi,%r9,8), %rax
+ mul %r13
+ xor %r10d, %r10d
+ add %rax, 24(%r12,%r9,8)
+ adc %rdx, %r10
+ xor %ebx, %ebx
+ xor %ecx, %ecx
+ jmp .Lam2
+
+ .align 16, 0x90
+.Laddmul_2_m2_top:
+ add %r10, (%r12,%r9,8)
+ adc %rax, %rbx
+ mov 8(%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ mov $0, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov 8(%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+ mul %r14
+ add %rbx, 8(%r12,%r9,8)
+ adc %rax, %rcx
+ adc %rdx, %rbp
+ mov 16(%rsi,%r9,8), %rax
+ mov $0, %r10d
+ mul %r13
+ add %rax, %rcx
+ mov 16(%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ adc $0, %r10d
+ mul %r14
+ add %rcx, 16(%r12,%r9,8)
+ adc %rax, %rbp
+ mov 24(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ mul %r13
+ mov $0, %ebx
+ add %rax, %rbp
+ adc %rdx, %r10
+ mov $0, %ecx
+ mov 24(%rsi,%r9,8), %rax
+ adc $0, %ebx
+ mul %r14
+ add %rbp, 24(%r12,%r9,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+.Lam2: mov 32(%rsi,%r9,8), %rax
+ mul %r13
+ add %rax, %r10
+ mov 32(%rsi,%r9,8), %rax
+ adc %rdx, %rbx
+ adc $0, %ecx
+ mul %r14
+ add $4, %r9
+ js .Laddmul_2_m2_top
+
+ add %r10, (%r12)
+ adc %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, 8(%r12)
+ mov %rcx, 16(%r12)
+
+ lea 16(%r12), %r12
+
+ add $-2, %r8d
+
+.Ldowhile_mid:
+
+ lea 2(%r8), %r9
+ neg %r9
+
+ mov (%rsi,%r9,8), %r13
+ mov 8(%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %rax
+ mul %r13
+ xor %ecx, %ecx
+ add %rax, 8(%r12,%r9,8)
+ adc %rdx, %rcx
+ xor %ebp, %ebp
+ jmp .L20
+
+ .align 16, 0x90
+.Laddmul_2_m0_top:
+ add %r10, (%r12,%r9,8)
+ adc %rax, %rbx
+ mov 8(%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ mov $0, %ebp
+ mul %r13
+ add %rax, %rbx
+ mov 8(%rsi,%r9,8), %rax
+ adc %rdx, %rcx
+ adc $0, %ebp
+ mul %r14
+ add %rbx, 8(%r12,%r9,8)
+ adc %rax, %rcx
+ adc %rdx, %rbp
+.L20: mov 16(%rsi,%r9,8), %rax
+ mov $0, %r10d
+ mul %r13
+ add %rax, %rcx
+ mov 16(%rsi,%r9,8), %rax
+ adc %rdx, %rbp
+ adc $0, %r10d
+ mul %r14
+ add %rcx, 16(%r12,%r9,8)
+ adc %rax, %rbp
+ mov 24(%rsi,%r9,8), %rax
+ adc %rdx, %r10
+ mul %r13
+ mov $0, %ebx
+ add %rax, %rbp
+ adc %rdx, %r10
+ mov $0, %ecx
+ mov 24(%rsi,%r9,8), %rax
+ adc $0, %ebx
+ mul %r14
+ add %rbp, 24(%r12,%r9,8)
+ adc %rax, %r10
+ adc %rdx, %rbx
+ mov 32(%rsi,%r9,8), %rax
+ mul %r13
+ add %rax, %r10
+ mov 32(%rsi,%r9,8), %rax
+ adc %rdx, %rbx
+ adc $0, %ecx
+ mul %r14
+ add $4, %r9
+ js .Laddmul_2_m0_top
+
+ add %r10, (%r12)
+ adc %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, 8(%r12)
+ mov %rcx, 16(%r12)
+
+ lea 16(%r12), %r12
+.Ldowhile_end:
+
+ add $-2, %r8d
+ jne .Ldowhile
+
+
+ mov -16(%rsi), %r13
+ mov -8(%rsi), %r14
+ mov -8(%rsi), %rax
+ mul %r13
+ xor %r10d, %r10d
+ add %rax, -8(%r12)
+ adc %rdx, %r10
+ xor %ebx, %ebx
+ xor %ecx, %ecx
+ mov (%rsi), %rax
+ mul %r13
+ add %rax, %r10
+ mov (%rsi), %rax
+ adc %rdx, %rbx
+ mul %r14
+ add %r10, (%r12)
+ adc %rax, %rbx
+ adc %rdx, %rcx
+ mov %rbx, 8(%r12)
+ mov %rcx, 16(%r12)
+
+
+ lea -4(%r11,%r11), %r9
+
+ mov 8(%rdi), %r11
+ lea -8(%rsi), %rsi
+ lea (%rdi,%r9,8), %rdi
+ neg %r9
+ mov (%rsi,%r9,4), %rax
+ mul %rax
+ test $2, %r9b
+ jnz .Lodd
+
+.Levn: add %r11, %r11
+ sbb %ebx, %ebx
+ add %rdx, %r11
+ mov %rax, (%rdi,%r9,8)
+ jmp .Ld0
+
+.Lodd: add %r11, %r11
+ sbb %ebp, %ebp
+ add %rdx, %r11
+ mov %rax, (%rdi,%r9,8)
+ lea -2(%r9), %r9
+ jmp .Ld1
+
+ .align 16, 0x90
+.Ltop: mov (%rsi,%r9,4), %rax
+ mul %rax
+ add %ebp, %ebp
+ adc %rax, %r10
+ adc %rdx, %r11
+ mov %r10, (%rdi,%r9,8)
+.Ld0: mov %r11, 8(%rdi,%r9,8)
+ mov 16(%rdi,%r9,8), %r10
+ adc %r10, %r10
+ mov 24(%rdi,%r9,8), %r11
+ adc %r11, %r11
+ nop
+ sbb %ebp, %ebp
+ mov 8(%rsi,%r9,4), %rax
+ mul %rax
+ add %ebx, %ebx
+ adc %rax, %r10
+ adc %rdx, %r11
+ mov %r10, 16(%rdi,%r9,8)
+.Ld1: mov %r11, 24(%rdi,%r9,8)
+ mov 32(%rdi,%r9,8), %r10
+ adc %r10, %r10
+ mov 40(%rdi,%r9,8), %r11
+ adc %r11, %r11
+ sbb %ebx, %ebx
+ add $4, %r9
+ js .Ltop
+
+ mov (%rsi), %rax
+ mul %rax
+ add %ebp, %ebp
+ adc %rax, %r10
+ adc %rdx, %r11
+ mov %r10, (%rdi)
+ mov %r11, 8(%rdi)
+ mov 16(%rdi), %r10
+ adc %r10, %r10
+ sbb %ebp, %ebp
+ neg %ebp
+ mov 8(%rsi), %rax
+ mul %rax
+ add %ebx, %ebx
+ adc %rax, %r10
+ adc %rbp, %rdx
+ mov %r10, 16(%rdi)
+ mov %rdx, 24(%rdi)
+
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_sqr_basecase,.-__gmpn_sqr_basecase
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sqr_diag_addlsh1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sqr_diag_addlsh1.s
new file mode 100644
index 0000000..7203603
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sqr_diag_addlsh1.s
@@ -0,0 +1,130 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_sqr_diag_addlsh1
+ .type __gmpn_sqr_diag_addlsh1,@function
+
+__gmpn_sqr_diag_addlsh1:
+
+
+ push %rbx
+
+ dec %rcx
+ shl %rcx
+
+ mov (%rdx), %rax
+
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,4), %r11
+ neg %rcx
+
+ mul %rax
+ mov %rax, (%rdi,%rcx,8)
+
+ xor %ebx, %ebx
+ jmp .Lmid
+
+ .align 16, 0x90
+.Ltop: add %r10, %r8
+ adc %rax, %r9
+ mov %r8, -8(%rdi,%rcx,8)
+ mov %r9, (%rdi,%rcx,8)
+.Lmid: mov 8(%r11,%rcx,4), %rax
+ mov (%rsi,%rcx,8), %r8
+ mov 8(%rsi,%rcx,8), %r9
+ adc %r8, %r8
+ adc %r9, %r9
+ lea (%rdx,%rbx), %r10
+ setc %bl
+ mul %rax
+ add $2, %rcx
+ js .Ltop
+
+.Lend: add %r10, %r8
+ adc %rax, %r9
+ mov %r8, -8(%rdi)
+ mov %r9, (%rdi)
+ adc %rbx, %rdx
+ mov %rdx, 8(%rdi)
+
+ pop %rbx
+
+ ret
+ .size __gmpn_sqr_diag_addlsh1,.-__gmpn_sqr_diag_addlsh1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err1_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err1_n.s
new file mode 100644
index 0000000..cbef8af
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err1_n.s
@@ -0,0 +1,237 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sub_err1_n
+ .type __gmpn_sub_err1_n,@function
+
+__gmpn_sub_err1_n:
+
+ mov 8(%rsp), %rax
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ lea (%rsi,%r9,8), %rsi
+ lea (%rdx,%r9,8), %rdx
+ lea (%rdi,%r9,8), %rdi
+
+ mov %r9d, %r10d
+ and $3, %r10d
+ jz .L0mod4
+ cmp $2, %r10d
+ jc .L1mod4
+ jz .L2mod4
+.L3mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ xor %r10d, %r10d
+ xor %r11d, %r11d
+ lea -24(%r8,%r9,8), %r8
+ neg %r9
+
+ shr $1, %al
+ mov (%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %r15
+ sbb (%rdx,%r9,8), %r14
+ mov %r14, (%rdi,%r9,8)
+ cmovc 16(%r8), %rbx
+ sbb 8(%rdx,%r9,8), %r15
+ mov %r15, 8(%rdi,%r9,8)
+ cmovc 8(%r8), %r10
+ mov 16(%rsi,%r9,8), %r14
+ sbb 16(%rdx,%r9,8), %r14
+ mov %r14, 16(%rdi,%r9,8)
+ cmovc (%r8), %r11
+ setc %al
+ add %r10, %rbx
+ adc $0, %rbp
+ add %r11, %rbx
+ adc $0, %rbp
+
+ add $3, %r9
+ jnz .Lloop
+ jmp .Lend
+
+ .align 16, 0x90
+.L0mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ lea (%r8,%r9,8), %r8
+ neg %r9
+ jmp .Lloop
+
+ .align 16, 0x90
+.L1mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ lea -8(%r8,%r9,8), %r8
+ neg %r9
+
+ shr $1, %al
+ mov (%rsi,%r9,8), %r14
+ sbb (%rdx,%r9,8), %r14
+ mov %r14, (%rdi,%r9,8)
+ cmovc (%r8), %rbx
+ setc %al
+
+ add $1, %r9
+ jnz .Lloop
+ jmp .Lend
+
+ .align 16, 0x90
+.L2mod4:
+ xor %ebx, %ebx
+ xor %ebp, %ebp
+ xor %r10d, %r10d
+ lea -16(%r8,%r9,8), %r8
+ neg %r9
+
+ shr $1, %al
+ mov (%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %r15
+ sbb (%rdx,%r9,8), %r14
+ mov %r14, (%rdi,%r9,8)
+ cmovc 8(%r8), %rbx
+ sbb 8(%rdx,%r9,8), %r15
+ mov %r15, 8(%rdi,%r9,8)
+ cmovc (%r8), %r10
+ setc %al
+ add %r10, %rbx
+ adc $0, %rbp
+
+ add $2, %r9
+ jnz .Lloop
+ jmp .Lend
+
+ .align 32, 0x90
+.Lloop:
+ shr $1, %al
+ mov -8(%r8), %r10
+ mov $0, %r13d
+ mov (%rsi,%r9,8), %r14
+ mov 8(%rsi,%r9,8), %r15
+ sbb (%rdx,%r9,8), %r14
+ cmovnc %r13, %r10
+ sbb 8(%rdx,%r9,8), %r15
+ mov -16(%r8), %r11
+ mov %r14, (%rdi,%r9,8)
+ mov 16(%rsi,%r9,8), %r14
+ mov %r15, 8(%rdi,%r9,8)
+ cmovnc %r13, %r11
+ mov -24(%r8), %r12
+ sbb 16(%rdx,%r9,8), %r14
+ cmovnc %r13, %r12
+ mov 24(%rsi,%r9,8), %r15
+ sbb 24(%rdx,%r9,8), %r15
+ cmovc -32(%r8), %r13
+ setc %al
+ add %r10, %rbx
+ adc $0, %rbp
+ add %r11, %rbx
+ adc $0, %rbp
+ add %r12, %rbx
+ adc $0, %rbp
+ mov %r14, 16(%rdi,%r9,8)
+ add %r13, %rbx
+ lea -32(%r8), %r8
+ adc $0, %rbp
+ mov %r15, 24(%rdi,%r9,8)
+ add $4, %r9
+ jnz .Lloop
+
+.Lend:
+ mov %rbx, (%rcx)
+ mov %rbp, 8(%rcx)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+ .size __gmpn_sub_err1_n,.-__gmpn_sub_err1_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err2_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err2_n.s
new file mode 100644
index 0000000..77ebcb7
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err2_n.s
@@ -0,0 +1,184 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sub_err2_n
+ .type __gmpn_sub_err2_n,@function
+
+__gmpn_sub_err2_n:
+
+ mov 16(%rsp), %rax
+ mov 8(%rsp), %r10
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ xor %ebp, %ebp
+ xor %r11d, %r11d
+ xor %r12d, %r12d
+ xor %r13d, %r13d
+
+ sub %r8, %r9
+
+ lea (%rdi,%r10,8), %rdi
+ lea (%rsi,%r10,8), %rsi
+ lea (%rdx,%r10,8), %rdx
+
+ test $1, %r10
+ jnz .Lodd
+
+ lea -8(%r8,%r10,8), %r8
+ neg %r10
+ jmp .Ltop
+
+ .align 16, 0x90
+.Lodd:
+ lea -16(%r8,%r10,8), %r8
+ neg %r10
+ shr $1, %rax
+ mov (%rsi,%r10,8), %rbx
+ sbb (%rdx,%r10,8), %rbx
+ cmovc 8(%r8), %rbp
+ cmovc 8(%r8,%r9), %r12
+ mov %rbx, (%rdi,%r10,8)
+ sbb %rax, %rax
+ inc %r10
+ jz .Lend
+
+ .align 16, 0x90
+.Ltop:
+ mov (%rsi,%r10,8), %rbx
+ shr $1, %rax
+ sbb (%rdx,%r10,8), %rbx
+ mov %rbx, (%rdi,%r10,8)
+ sbb %r14, %r14
+
+ mov 8(%rsi,%r10,8), %rbx
+ sbb 8(%rdx,%r10,8), %rbx
+ mov %rbx, 8(%rdi,%r10,8)
+ sbb %rax, %rax
+
+ mov (%r8), %rbx
+ and %r14, %rbx
+ add %rbx, %rbp
+ adc $0, %r11
+
+ and (%r8,%r9), %r14
+ add %r14, %r12
+ adc $0, %r13
+
+ mov -8(%r8), %rbx
+ and %rax, %rbx
+ add %rbx, %rbp
+ adc $0, %r11
+
+ mov -8(%r8,%r9), %rbx
+ and %rax, %rbx
+ add %rbx, %r12
+ adc $0, %r13
+
+ add $2, %r10
+ lea -16(%r8), %r8
+ jnz .Ltop
+.Lend:
+
+ mov %rbp, (%rcx)
+ mov %r11, 8(%rcx)
+ mov %r12, 16(%rcx)
+ mov %r13, 24(%rcx)
+
+ and $1, %eax
+
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+ .size __gmpn_sub_err2_n,.-__gmpn_sub_err2_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err3_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err3_n.s
new file mode 100644
index 0000000..b995ec7
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_err3_n.s
@@ -0,0 +1,168 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sub_err3_n
+ .type __gmpn_sub_err3_n,@function
+
+__gmpn_sub_err3_n:
+
+ mov 24(%rsp), %rax
+ mov 16(%rsp), %r10
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ push %rcx
+ mov 64(%rsp), %rcx
+
+ xor %ebp, %ebp
+ xor %r11d, %r11d
+ xor %r12d, %r12d
+ xor %r13d, %r13d
+ xor %r14d, %r14d
+ xor %r15d, %r15d
+
+ sub %r8, %r9
+ sub %r8, %rcx
+
+ lea -8(%r8,%r10,8), %r8
+ lea (%rdi,%r10,8), %rdi
+ lea (%rsi,%r10,8), %rsi
+ lea (%rdx,%r10,8), %rdx
+ neg %r10
+
+ .align 16, 0x90
+.Ltop:
+ shr $1, %rax
+ mov (%rsi,%r10,8), %rax
+ sbb (%rdx,%r10,8), %rax
+ mov %rax, (%rdi,%r10,8)
+ sbb %rax, %rax
+
+ mov (%r8), %rbx
+ and %rax, %rbx
+ add %rbx, %rbp
+ adc $0, %r11
+
+ mov (%r8,%r9), %rbx
+ and %rax, %rbx
+ add %rbx, %r12
+ adc $0, %r13
+
+ mov (%r8,%rcx), %rbx
+ and %rax, %rbx
+ add %rbx, %r14
+ adc $0, %r15
+
+ lea -8(%r8), %r8
+ inc %r10
+ jnz .Ltop
+
+.Lend:
+ and $1, %eax
+ pop %rcx
+
+ mov %rbp, (%rcx)
+ mov %r11, 8(%rcx)
+ mov %r12, 16(%rcx)
+ mov %r13, 24(%rcx)
+ mov %r14, 32(%rcx)
+ mov %r15, 40(%rcx)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+ .size __gmpn_sub_err3_n,.-__gmpn_sub_err3_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sub_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_n.s
new file mode 100644
index 0000000..8c1db0a
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sub_n.s
@@ -0,0 +1,194 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sub_nc
+ .type __gmpn_sub_nc,@function
+
+__gmpn_sub_nc:
+
+
+
+ mov %ecx, %eax
+ shr $2, %rcx
+ and $3, %eax
+ bt $0, %r8
+ jrcxz .Llt4
+
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ dec %rcx
+ jmp .Lmid
+
+ .size __gmpn_sub_nc,.-__gmpn_sub_nc
+ .align 16, 0x90
+ .globl __gmpn_sub_n
+ .type __gmpn_sub_n,@function
+
+__gmpn_sub_n:
+
+
+ mov %ecx, %eax
+ shr $2, %rcx
+ and $3, %eax
+ jrcxz .Llt4
+
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ dec %rcx
+ jmp .Lmid
+
+.Llt4: dec %eax
+ mov (%rsi), %r8
+ jnz .L2
+ sbb (%rdx), %r8
+ mov %r8, (%rdi)
+ adc %eax, %eax
+
+ ret
+
+.L2: dec %eax
+ mov 8(%rsi), %r9
+ jnz .L3
+ sbb (%rdx), %r8
+ sbb 8(%rdx), %r9
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ adc %eax, %eax
+
+ ret
+
+.L3: mov 16(%rsi), %r10
+ sbb (%rdx), %r8
+ sbb 8(%rdx), %r9
+ sbb 16(%rdx), %r10
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ setc %al
+
+ ret
+
+ .align 16, 0x90
+.Ltop: sbb (%rdx), %r8
+ sbb 8(%rdx), %r9
+ sbb 16(%rdx), %r10
+ sbb 24(%rdx), %r11
+ mov %r8, (%rdi)
+ lea 32(%rsi), %rsi
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ dec %rcx
+ mov %r11, 24(%rdi)
+ lea 32(%rdx), %rdx
+ mov (%rsi), %r8
+ mov 8(%rsi), %r9
+ lea 32(%rdi), %rdi
+.Lmid: mov 16(%rsi), %r10
+ mov 24(%rsi), %r11
+ jnz .Ltop
+
+.Lend: lea 32(%rsi), %rsi
+ sbb (%rdx), %r8
+ sbb 8(%rdx), %r9
+ sbb 16(%rdx), %r10
+ sbb 24(%rdx), %r11
+ lea 32(%rdx), %rdx
+ mov %r8, (%rdi)
+ mov %r9, 8(%rdi)
+ mov %r10, 16(%rdi)
+ mov %r11, 24(%rdi)
+ lea 32(%rdi), %rdi
+
+ inc %eax
+ dec %eax
+ jnz .Llt4
+ adc %eax, %eax
+
+ ret
+ .size __gmpn_sub_n,.-__gmpn_sub_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/sublsh1_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/sublsh1_n.s
new file mode 100644
index 0000000..d257a05
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/sublsh1_n.s
@@ -0,0 +1,175 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_sublsh1_n
+ .type __gmpn_sublsh1_n,@function
+
+__gmpn_sublsh1_n:
+
+
+ push %rbx
+ push %rbp
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdi,%rcx,8), %rdi
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdx,%rcx,8), %rdx
+ neg %rcx
+ xor %ebp, %ebp
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: add %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ mov 16(%rdx,%rcx,8), %r10
+ adc %r10, %r10
+ sbb %eax, %eax
+ mov (%rsi,%rcx,8), %rbp
+ mov 8(%rsi,%rcx,8), %rbx
+ sub %r8, %rbp
+ sbb %r9, %rbx
+ mov %rbp, (%rdi,%rcx,8)
+ mov %rbx, 8(%rdi,%rcx,8)
+ mov 16(%rsi,%rcx,8), %rbp
+ sbb %r10, %rbp
+ mov %rbp, 16(%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ add $3, %rcx
+ jmp .Lent
+
+.Lb10: add %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ sbb %eax, %eax
+ mov (%rsi,%rcx,8), %rbp
+ mov 8(%rsi,%rcx,8), %rbx
+ sub %r8, %rbp
+ sbb %r9, %rbx
+ mov %rbp, (%rdi,%rcx,8)
+ mov %rbx, 8(%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ add $2, %rcx
+ jmp .Lent
+
+.Lb01: add %r8, %r8
+ sbb %eax, %eax
+ mov (%rsi,%rcx,8), %rbp
+ sub %r8, %rbp
+ mov %rbp, (%rdi,%rcx,8)
+ sbb %ebp, %ebp
+ inc %rcx
+.Lent: jns .Lend
+
+ .align 16, 0x90
+.Ltop: add %eax, %eax
+
+ mov (%rdx,%rcx,8), %r8
+.Lb00: adc %r8, %r8
+ mov 8(%rdx,%rcx,8), %r9
+ adc %r9, %r9
+ mov 16(%rdx,%rcx,8), %r10
+ adc %r10, %r10
+ mov 24(%rdx,%rcx,8), %r11
+ adc %r11, %r11
+
+ sbb %eax, %eax
+ add %ebp, %ebp
+
+ mov (%rsi,%rcx,8), %rbp
+ mov 8(%rsi,%rcx,8), %rbx
+ sbb %r8, %rbp
+ sbb %r9, %rbx
+ mov %rbp, (%rdi,%rcx,8)
+ mov %rbx, 8(%rdi,%rcx,8)
+ mov 16(%rsi,%rcx,8), %rbp
+ mov 24(%rsi,%rcx,8), %rbx
+ sbb %r10, %rbp
+ sbb %r11, %rbx
+ mov %rbp, 16(%rdi,%rcx,8)
+ mov %rbx, 24(%rdi,%rcx,8)
+
+ sbb %ebp, %ebp
+ add $4, %rcx
+ js .Ltop
+
+.Lend: add %ebp, %eax
+ neg %eax
+
+ pop %rbp
+ pop %rbx
+
+ ret
+ .size __gmpn_sublsh1_n,.-__gmpn_sublsh1_n
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/submul_1.s b/vere/ext/gmp/gen/x86_64-linux/mpn/submul_1.s
new file mode 100644
index 0000000..5e34932
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/submul_1.s
@@ -0,0 +1,196 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 16, 0x90
+ .globl __gmpn_submul_1
+ .type __gmpn_submul_1,@function
+
+__gmpn_submul_1:
+
+
+
+
+
+
+ mov (%rsi), %rax
+ push %rbx
+ mov %rdx, %rbx
+
+ mul %rcx
+ mov %rbx, %r11
+
+ and $3, %ebx
+ jz .Lb0
+ cmp $2, %ebx
+ jz .Lb2
+ jg .Lb3
+
+.Lb1: dec %r11
+ jne .Lgt1
+ sub %rax, (%rdi)
+ jmp .Lret
+.Lgt1: lea 8(%rsi,%r11,8), %rsi
+ lea -8(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r10, %r10
+ xor %ebx, %ebx
+ mov %rax, %r9
+ mov (%rsi,%r11,8), %rax
+ mov %rdx, %r8
+ jmp .LL1
+
+.Lb0: lea (%rsi,%r11,8), %rsi
+ lea -16(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r10, %r10
+ mov %rax, %r8
+ mov %rdx, %rbx
+ jmp .LL0
+
+.Lb3: lea -8(%rsi,%r11,8), %rsi
+ lea -24(%rdi,%r11,8), %rdi
+ neg %r11
+ mov %rax, %rbx
+ mov %rdx, %r10
+ jmp .LL3
+
+.Lb2: lea -16(%rsi,%r11,8), %rsi
+ lea -32(%rdi,%r11,8), %rdi
+ neg %r11
+ xor %r8, %r8
+ xor %ebx, %ebx
+ mov %rax, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mov %rdx, %r9
+ jmp .LL2
+
+ .align 16, 0x90
+.Ltop: sub %r10, (%rdi,%r11,8)
+ adc %rax, %r9
+ mov (%rsi,%r11,8), %rax
+ adc %rdx, %r8
+ mov $0, %r10d
+.LL1: mul %rcx
+ sub %r9, 8(%rdi,%r11,8)
+ adc %rax, %r8
+ adc %rdx, %rbx
+.LL0: mov 8(%rsi,%r11,8), %rax
+ mul %rcx
+ sub %r8, 16(%rdi,%r11,8)
+ adc %rax, %rbx
+ adc %rdx, %r10
+.LL3: mov 16(%rsi,%r11,8), %rax
+ mul %rcx
+ sub %rbx, 24(%rdi,%r11,8)
+ mov $0, %r8d
+ mov %r8, %rbx
+ adc %rax, %r10
+ mov 24(%rsi,%r11,8), %rax
+ mov %r8, %r9
+ adc %rdx, %r9
+.LL2: mul %rcx
+ add $4, %r11
+ js .Ltop
+
+ sub %r10, (%rdi,%r11,8)
+ adc %rax, %r9
+ adc %r8, %rdx
+ sub %r9, 8(%rdi,%r11,8)
+.Lret: adc $0, %rdx
+ mov %rdx, %rax
+
+ pop %rbx
+
+
+ ret
+ .size __gmpn_submul_1,.-__gmpn_submul_1
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/xnor_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/xnor_n.s
new file mode 100644
index 0000000..4db0497
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/xnor_n.s
@@ -0,0 +1,154 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_xnor_n
+ .type __gmpn_xnor_n,@function
+
+__gmpn_xnor_n:
+
+
+ mov (%rdx), %r8
+ not %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: xor (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+ .byte 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
+.Lb01: xor (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+ not %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ not %r9
+ xor (%rsi,%rcx,8), %r8
+ xor 8(%rsi,%rcx,8), %r9
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+ not %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ not %r9
+ xor 16(%rsi,%rcx,8), %r8
+ xor 24(%rsi,%rcx,8), %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_xnor_n,.-__gmpn_xnor_n
+
+
+
diff --git a/vere/ext/gmp/gen/x86_64-linux/mpn/xor_n.s b/vere/ext/gmp/gen/x86_64-linux/mpn/xor_n.s
new file mode 100644
index 0000000..8ef14d0
--- /dev/null
+++ b/vere/ext/gmp/gen/x86_64-linux/mpn/xor_n.s
@@ -0,0 +1,149 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .text
+ .align 32, 0x90
+ .globl __gmpn_xor_n
+ .type __gmpn_xor_n,@function
+
+__gmpn_xor_n:
+
+
+ mov (%rdx), %r8
+ mov %ecx, %eax
+ lea (%rdx,%rcx,8), %rdx
+ lea (%rsi,%rcx,8), %rsi
+ lea (%rdi,%rcx,8), %rdi
+ neg %rcx
+ and $3, %eax
+ je .Lb00
+ cmp $2, %eax
+ jc .Lb01
+ je .Lb10
+
+.Lb11: xor (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ dec %rcx
+ jmp .Le11
+.Lb10: add $-2, %rcx
+ jmp .Le10
+.Lb01: xor (%rsi,%rcx,8), %r8
+ mov %r8, (%rdi,%rcx,8)
+ inc %rcx
+ jz .Lret
+
+.Ltop: mov (%rdx,%rcx,8), %r8
+.Lb00: mov 8(%rdx,%rcx,8), %r9
+ xor (%rsi,%rcx,8), %r8
+ xor 8(%rsi,%rcx,8), %r9
+ nop
+ mov %r8, (%rdi,%rcx,8)
+ mov %r9, 8(%rdi,%rcx,8)
+.Le11: mov 16(%rdx,%rcx,8), %r8
+.Le10: mov 24(%rdx,%rcx,8), %r9
+ xor 16(%rsi,%rcx,8), %r8
+ xor 24(%rsi,%rcx,8), %r9
+ mov %r8, 16(%rdi,%rcx,8)
+ mov %r9, 24(%rdi,%rcx,8)
+ add $4, %rcx
+ jnc .Ltop
+
+.Lret:
+ ret
+ .size __gmpn_xor_n,.-__gmpn_xor_n
+
+
+
+
+