summaryrefslogtreecommitdiff
path: root/CryptoPkg/Library/OpensslLib/OpensslGen/IA32-MSFT/crypto/bn/x86-gf2m.nasm
blob: 7750777de2bea10722dfc3067a769c347ed00931 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
%ifidn __OUTPUT_FORMAT__,obj
section code    use32 class=code align=64
%elifidn __OUTPUT_FORMAT__,win32
$@feat.00 equ 1
section .text   code align=64
%else
section .text   code
%endif
;extern _OPENSSL_ia32cap_P
align   16
__mul_1x1_mmx:
        sub     esp,36
        mov     ecx,eax
        lea     edx,[eax*1+eax]
        and     ecx,1073741823
        lea     ebp,[edx*1+edx]
        mov     DWORD [esp],0
        and     edx,2147483647
        movd    mm2,eax
        movd    mm3,ebx
        mov     DWORD [4+esp],ecx
        xor     ecx,edx
        pxor    mm5,mm5
        pxor    mm4,mm4
        mov     DWORD [8+esp],edx
        xor     edx,ebp
        mov     DWORD [12+esp],ecx
        pcmpgtd mm5,mm2
        paddd   mm2,mm2
        xor     ecx,edx
        mov     DWORD [16+esp],ebp
        xor     ebp,edx
        pand    mm5,mm3
        pcmpgtd mm4,mm2
        mov     DWORD [20+esp],ecx
        xor     ebp,ecx
        psllq   mm5,31
        pand    mm4,mm3
        mov     DWORD [24+esp],edx
        mov     esi,7
        mov     DWORD [28+esp],ebp
        mov     ebp,esi
        and     esi,ebx
        shr     ebx,3
        mov     edi,ebp
        psllq   mm4,30
        and     edi,ebx
        shr     ebx,3
        movd    mm0,DWORD [esi*4+esp]
        mov     esi,ebp
        and     esi,ebx
        shr     ebx,3
        movd    mm2,DWORD [edi*4+esp]
        mov     edi,ebp
        psllq   mm2,3
        and     edi,ebx
        shr     ebx,3
        pxor    mm0,mm2
        movd    mm1,DWORD [esi*4+esp]
        mov     esi,ebp
        psllq   mm1,6
        and     esi,ebx
        shr     ebx,3
        pxor    mm0,mm1
        movd    mm2,DWORD [edi*4+esp]
        mov     edi,ebp
        psllq   mm2,9
        and     edi,ebx
        shr     ebx,3
        pxor    mm0,mm2
        movd    mm1,DWORD [esi*4+esp]
        mov     esi,ebp
        psllq   mm1,12
        and     esi,ebx
        shr     ebx,3
        pxor    mm0,mm1
        movd    mm2,DWORD [edi*4+esp]
        mov     edi,ebp
        psllq   mm2,15
        and     edi,ebx
        shr     ebx,3
        pxor    mm0,mm2
        movd    mm1,DWORD [esi*4+esp]
        mov     esi,ebp
        psllq   mm1,18
        and     esi,ebx
        shr     ebx,3
        pxor    mm0,mm1
        movd    mm2,DWORD [edi*4+esp]
        mov     edi,ebp
        psllq   mm2,21
        and     edi,ebx
        shr     ebx,3
        pxor    mm0,mm2
        movd    mm1,DWORD [esi*4+esp]
        mov     esi,ebp
        psllq   mm1,24
        and     esi,ebx
        shr     ebx,3
        pxor    mm0,mm1
        movd    mm2,DWORD [edi*4+esp]
        pxor    mm0,mm4
        psllq   mm2,27
        pxor    mm0,mm2
        movd    mm1,DWORD [esi*4+esp]
        pxor    mm0,mm5
        psllq   mm1,30
        add     esp,36
        pxor    mm0,mm1
        ret
align   16
__mul_1x1_ialu:
        sub     esp,36
        mov     ecx,eax
        lea     edx,[eax*1+eax]
        lea     ebp,[eax*4]
        and     ecx,1073741823
        lea     edi,[eax*1+eax]
        sar     eax,31
        mov     DWORD [esp],0
        and     edx,2147483647
        mov     DWORD [4+esp],ecx
        xor     ecx,edx
        mov     DWORD [8+esp],edx
        xor     edx,ebp
        mov     DWORD [12+esp],ecx
        xor     ecx,edx
        mov     DWORD [16+esp],ebp
        xor     ebp,edx
        mov     DWORD [20+esp],ecx
        xor     ebp,ecx
        sar     edi,31
        and     eax,ebx
        mov     DWORD [24+esp],edx
        and     edi,ebx
        mov     DWORD [28+esp],ebp
        mov     edx,eax
        shl     eax,31
        mov     ecx,edi
        shr     edx,1
        mov     esi,7
        shl     edi,30
        and     esi,ebx
        shr     ecx,2
        xor     eax,edi
        shr     ebx,3
        mov     edi,7
        and     edi,ebx
        shr     ebx,3
        xor     edx,ecx
        xor     eax,DWORD [esi*4+esp]
        mov     esi,7
        and     esi,ebx
        shr     ebx,3
        mov     ebp,DWORD [edi*4+esp]
        mov     edi,7
        mov     ecx,ebp
        shl     ebp,3
        and     edi,ebx
        shr     ecx,29
        xor     eax,ebp
        shr     ebx,3
        xor     edx,ecx
        mov     ecx,DWORD [esi*4+esp]
        mov     esi,7
        mov     ebp,ecx
        shl     ecx,6
        and     esi,ebx
        shr     ebp,26
        xor     eax,ecx
        shr     ebx,3
        xor     edx,ebp
        mov     ebp,DWORD [edi*4+esp]
        mov     edi,7
        mov     ecx,ebp
        shl     ebp,9
        and     edi,ebx
        shr     ecx,23
        xor     eax,ebp
        shr     ebx,3
        xor     edx,ecx
        mov     ecx,DWORD [esi*4+esp]
        mov     esi,7
        mov     ebp,ecx
        shl     ecx,12
        and     esi,ebx
        shr     ebp,20
        xor     eax,ecx
        shr     ebx,3
        xor     edx,ebp
        mov     ebp,DWORD [edi*4+esp]
        mov     edi,7
        mov     ecx,ebp
        shl     ebp,15
        and     edi,ebx
        shr     ecx,17
        xor     eax,ebp
        shr     ebx,3
        xor     edx,ecx
        mov     ecx,DWORD [esi*4+esp]
        mov     esi,7
        mov     ebp,ecx
        shl     ecx,18
        and     esi,ebx
        shr     ebp,14
        xor     eax,ecx
        shr     ebx,3
        xor     edx,ebp
        mov     ebp,DWORD [edi*4+esp]
        mov     edi,7
        mov     ecx,ebp
        shl     ebp,21
        and     edi,ebx
        shr     ecx,11
        xor     eax,ebp
        shr     ebx,3
        xor     edx,ecx
        mov     ecx,DWORD [esi*4+esp]
        mov     esi,7
        mov     ebp,ecx
        shl     ecx,24
        and     esi,ebx
        shr     ebp,8
        xor     eax,ecx
        shr     ebx,3
        xor     edx,ebp
        mov     ebp,DWORD [edi*4+esp]
        mov     ecx,ebp
        shl     ebp,27
        mov     edi,DWORD [esi*4+esp]
        shr     ecx,5
        mov     esi,edi
        xor     eax,ebp
        shl     edi,30
        xor     edx,ecx
        shr     esi,2
        xor     eax,edi
        xor     edx,esi
        add     esp,36
        ret
global  _bn_GF2m_mul_2x2
align   16
_bn_GF2m_mul_2x2:
L$_bn_GF2m_mul_2x2_begin:
        lea     edx,[_OPENSSL_ia32cap_P]
        mov     eax,DWORD [edx]
        mov     edx,DWORD [4+edx]
        test    eax,8388608
        jz      NEAR L$000ialu
        test    eax,16777216
        jz      NEAR L$001mmx
        test    edx,2
        jz      NEAR L$001mmx
        movups  xmm0,[8+esp]
        shufps  xmm0,xmm0,177
db      102,15,58,68,192,1
        mov     eax,DWORD [4+esp]
        movups  [eax],xmm0
        ret
align   16
L$001mmx:
        push    ebp
        push    ebx
        push    esi
        push    edi
        mov     eax,DWORD [24+esp]
        mov     ebx,DWORD [32+esp]
        call    __mul_1x1_mmx
        movq    mm7,mm0
        mov     eax,DWORD [28+esp]
        mov     ebx,DWORD [36+esp]
        call    __mul_1x1_mmx
        movq    mm6,mm0
        mov     eax,DWORD [24+esp]
        mov     ebx,DWORD [32+esp]
        xor     eax,DWORD [28+esp]
        xor     ebx,DWORD [36+esp]
        call    __mul_1x1_mmx
        pxor    mm0,mm7
        mov     eax,DWORD [20+esp]
        pxor    mm0,mm6
        movq    mm2,mm0
        psllq   mm0,32
        pop     edi
        psrlq   mm2,32
        pop     esi
        pxor    mm0,mm6
        pop     ebx
        pxor    mm2,mm7
        movq    [eax],mm0
        pop     ebp
        movq    [8+eax],mm2
        emms
        ret
align   16
L$000ialu:
        push    ebp
        push    ebx
        push    esi
        push    edi
        sub     esp,20
        mov     eax,DWORD [44+esp]
        mov     ebx,DWORD [52+esp]
        call    __mul_1x1_ialu
        mov     DWORD [8+esp],eax
        mov     DWORD [12+esp],edx
        mov     eax,DWORD [48+esp]
        mov     ebx,DWORD [56+esp]
        call    __mul_1x1_ialu
        mov     DWORD [esp],eax
        mov     DWORD [4+esp],edx
        mov     eax,DWORD [44+esp]
        mov     ebx,DWORD [52+esp]
        xor     eax,DWORD [48+esp]
        xor     ebx,DWORD [56+esp]
        call    __mul_1x1_ialu
        mov     ebp,DWORD [40+esp]
        mov     ebx,DWORD [esp]
        mov     ecx,DWORD [4+esp]
        mov     edi,DWORD [8+esp]
        mov     esi,DWORD [12+esp]
        xor     eax,edx
        xor     edx,ecx
        xor     eax,ebx
        mov     DWORD [ebp],ebx
        xor     edx,edi
        mov     DWORD [12+ebp],esi
        xor     eax,esi
        add     esp,20
        xor     edx,esi
        pop     edi
        xor     eax,edx
        pop     esi
        mov     DWORD [8+ebp],edx
        pop     ebx
        mov     DWORD [4+ebp],eax
        pop     ebp
        ret
db      71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
db      99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32
db      67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
db      112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
db      62,0
segment .bss
common  _OPENSSL_ia32cap_P 16