1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
.text
.align 3
.globl ___gmpn_submul_1
___gmpn_submul_1:
adds x15, xzr, xzr
tbz x2, #0, L1
ldr x4, [x1],#8
mul x8, x4, x3
umulh x12, x4, x3
ldr x4, [x0]
subs x8, x4, x8
csinc x15, x12, x12, cs
str x8, [x0],#8
L1: tbz x2, #1, L2
ldp x4, x5, [x1],#16
mul x8, x4, x3
umulh x12, x4, x3
mul x9, x5, x3
umulh x13, x5, x3
adds x8, x8, x15
adcs x9, x9, x12
ldp x4, x5, [x0]
adc x15, x13, xzr
subs x8, x4, x8
sbcs x9, x5, x9
csinc x15, x15, x15, cs
stp x8, x9, [x0],#16
L2: lsr x2, x2, #2
cbz x2, Lle3
ldp x4, x5, [x1],#32
ldp x6, x7, [x1,#-16]
b Lmid
Lle3: mov x0, x15
ret
.align 4
Ltop: ldp x4, x5, [x1],#32
ldp x6, x7, [x1,#-16]
subs x8, x16, x8
sbcs x9, x17, x9
stp x8, x9, [x0],#32
sbcs x10, x12, x10
sbcs x11, x13, x11
stp x10, x11, [x0,#-16]
csinc x15, x15, x15, cs
Lmid: sub x2, x2, #1
mul x8, x4, x3
umulh x12, x4, x3
mul x9, x5, x3
umulh x13, x5, x3
adds x8, x8, x15
mul x10, x6, x3
umulh x14, x6, x3
adcs x9, x9, x12
mul x11, x7, x3
umulh x15, x7, x3
adcs x10, x10, x13
ldp x16, x17, [x0]
adcs x11, x11, x14
ldp x12, x13, [x0,#16]
adc x15, x15, xzr
cbnz x2, Ltop
subs x8, x16, x8
sbcs x9, x17, x9
sbcs x10, x12, x10
sbcs x11, x13, x11
stp x8, x9, [x0]
stp x10, x11, [x0,#16]
csinc x0, x15, x15, cs
ret
|