1 | default rel
|
---|
2 | %define XMMWORD
|
---|
3 | %define YMMWORD
|
---|
4 | %define ZMMWORD
|
---|
5 | section .text code align=64
|
---|
6 |
|
---|
7 | EXTERN OPENSSL_ia32cap_P
|
---|
8 |
|
---|
9 | global gcm_gmult_4bit
|
---|
10 |
|
---|
11 | ALIGN 16
|
---|
12 | gcm_gmult_4bit:
|
---|
13 | mov QWORD[8+rsp],rdi ;WIN64 prologue
|
---|
14 | mov QWORD[16+rsp],rsi
|
---|
15 | mov rax,rsp
|
---|
16 | $L$SEH_begin_gcm_gmult_4bit:
|
---|
17 | mov rdi,rcx
|
---|
18 | mov rsi,rdx
|
---|
19 |
|
---|
20 |
|
---|
21 |
|
---|
22 | DB 243,15,30,250
|
---|
23 | push rbx
|
---|
24 |
|
---|
25 | push rbp
|
---|
26 |
|
---|
27 | push r12
|
---|
28 |
|
---|
29 | push r13
|
---|
30 |
|
---|
31 | push r14
|
---|
32 |
|
---|
33 | push r15
|
---|
34 |
|
---|
35 | sub rsp,280
|
---|
36 |
|
---|
37 | $L$gmult_prologue:
|
---|
38 |
|
---|
39 | movzx r8,BYTE[15+rdi]
|
---|
40 | lea r11,[$L$rem_4bit]
|
---|
41 | xor rax,rax
|
---|
42 | xor rbx,rbx
|
---|
43 | mov al,r8b
|
---|
44 | mov bl,r8b
|
---|
45 | shl al,4
|
---|
46 | mov rcx,14
|
---|
47 | mov r8,QWORD[8+rax*1+rsi]
|
---|
48 | mov r9,QWORD[rax*1+rsi]
|
---|
49 | and bl,0xf0
|
---|
50 | mov rdx,r8
|
---|
51 | jmp NEAR $L$oop1
|
---|
52 |
|
---|
53 | ALIGN 16
|
---|
54 | $L$oop1:
|
---|
55 | shr r8,4
|
---|
56 | and rdx,0xf
|
---|
57 | mov r10,r9
|
---|
58 | mov al,BYTE[rcx*1+rdi]
|
---|
59 | shr r9,4
|
---|
60 | xor r8,QWORD[8+rbx*1+rsi]
|
---|
61 | shl r10,60
|
---|
62 | xor r9,QWORD[rbx*1+rsi]
|
---|
63 | mov bl,al
|
---|
64 | xor r9,QWORD[rdx*8+r11]
|
---|
65 | mov rdx,r8
|
---|
66 | shl al,4
|
---|
67 | xor r8,r10
|
---|
68 | dec rcx
|
---|
69 | js NEAR $L$break1
|
---|
70 |
|
---|
71 | shr r8,4
|
---|
72 | and rdx,0xf
|
---|
73 | mov r10,r9
|
---|
74 | shr r9,4
|
---|
75 | xor r8,QWORD[8+rax*1+rsi]
|
---|
76 | shl r10,60
|
---|
77 | xor r9,QWORD[rax*1+rsi]
|
---|
78 | and bl,0xf0
|
---|
79 | xor r9,QWORD[rdx*8+r11]
|
---|
80 | mov rdx,r8
|
---|
81 | xor r8,r10
|
---|
82 | jmp NEAR $L$oop1
|
---|
83 |
|
---|
84 | ALIGN 16
|
---|
85 | $L$break1:
|
---|
86 | shr r8,4
|
---|
87 | and rdx,0xf
|
---|
88 | mov r10,r9
|
---|
89 | shr r9,4
|
---|
90 | xor r8,QWORD[8+rax*1+rsi]
|
---|
91 | shl r10,60
|
---|
92 | xor r9,QWORD[rax*1+rsi]
|
---|
93 | and bl,0xf0
|
---|
94 | xor r9,QWORD[rdx*8+r11]
|
---|
95 | mov rdx,r8
|
---|
96 | xor r8,r10
|
---|
97 |
|
---|
98 | shr r8,4
|
---|
99 | and rdx,0xf
|
---|
100 | mov r10,r9
|
---|
101 | shr r9,4
|
---|
102 | xor r8,QWORD[8+rbx*1+rsi]
|
---|
103 | shl r10,60
|
---|
104 | xor r9,QWORD[rbx*1+rsi]
|
---|
105 | xor r8,r10
|
---|
106 | xor r9,QWORD[rdx*8+r11]
|
---|
107 |
|
---|
108 | bswap r8
|
---|
109 | bswap r9
|
---|
110 | mov QWORD[8+rdi],r8
|
---|
111 | mov QWORD[rdi],r9
|
---|
112 |
|
---|
113 | lea rsi,[((280+48))+rsp]
|
---|
114 |
|
---|
115 | mov rbx,QWORD[((-8))+rsi]
|
---|
116 |
|
---|
117 | lea rsp,[rsi]
|
---|
118 |
|
---|
119 | $L$gmult_epilogue:
|
---|
120 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
---|
121 | mov rsi,QWORD[16+rsp]
|
---|
122 | DB 0F3h,0C3h ;repret
|
---|
123 |
|
---|
124 | $L$SEH_end_gcm_gmult_4bit:
|
---|
125 | global gcm_ghash_4bit
|
---|
126 |
|
---|
127 | ALIGN 16
|
---|
128 | gcm_ghash_4bit:
|
---|
129 | mov QWORD[8+rsp],rdi ;WIN64 prologue
|
---|
130 | mov QWORD[16+rsp],rsi
|
---|
131 | mov rax,rsp
|
---|
132 | $L$SEH_begin_gcm_ghash_4bit:
|
---|
133 | mov rdi,rcx
|
---|
134 | mov rsi,rdx
|
---|
135 | mov rdx,r8
|
---|
136 | mov rcx,r9
|
---|
137 |
|
---|
138 |
|
---|
139 |
|
---|
140 | DB 243,15,30,250
|
---|
141 | push rbx
|
---|
142 |
|
---|
143 | push rbp
|
---|
144 |
|
---|
145 | push r12
|
---|
146 |
|
---|
147 | push r13
|
---|
148 |
|
---|
149 | push r14
|
---|
150 |
|
---|
151 | push r15
|
---|
152 |
|
---|
153 | sub rsp,280
|
---|
154 |
|
---|
155 | $L$ghash_prologue:
|
---|
156 | mov r14,rdx
|
---|
157 | mov r15,rcx
|
---|
158 | sub rsi,-128
|
---|
159 | lea rbp,[((16+128))+rsp]
|
---|
160 | xor edx,edx
|
---|
161 | mov r8,QWORD[((0+0-128))+rsi]
|
---|
162 | mov rax,QWORD[((0+8-128))+rsi]
|
---|
163 | mov dl,al
|
---|
164 | shr rax,4
|
---|
165 | mov r10,r8
|
---|
166 | shr r8,4
|
---|
167 | mov r9,QWORD[((16+0-128))+rsi]
|
---|
168 | shl dl,4
|
---|
169 | mov rbx,QWORD[((16+8-128))+rsi]
|
---|
170 | shl r10,60
|
---|
171 | mov BYTE[rsp],dl
|
---|
172 | or rax,r10
|
---|
173 | mov dl,bl
|
---|
174 | shr rbx,4
|
---|
175 | mov r10,r9
|
---|
176 | shr r9,4
|
---|
177 | mov QWORD[rbp],r8
|
---|
178 | mov r8,QWORD[((32+0-128))+rsi]
|
---|
179 | shl dl,4
|
---|
180 | mov QWORD[((0-128))+rbp],rax
|
---|
181 | mov rax,QWORD[((32+8-128))+rsi]
|
---|
182 | shl r10,60
|
---|
183 | mov BYTE[1+rsp],dl
|
---|
184 | or rbx,r10
|
---|
185 | mov dl,al
|
---|
186 | shr rax,4
|
---|
187 | mov r10,r8
|
---|
188 | shr r8,4
|
---|
189 | mov QWORD[8+rbp],r9
|
---|
190 | mov r9,QWORD[((48+0-128))+rsi]
|
---|
191 | shl dl,4
|
---|
192 | mov QWORD[((8-128))+rbp],rbx
|
---|
193 | mov rbx,QWORD[((48+8-128))+rsi]
|
---|
194 | shl r10,60
|
---|
195 | mov BYTE[2+rsp],dl
|
---|
196 | or rax,r10
|
---|
197 | mov dl,bl
|
---|
198 | shr rbx,4
|
---|
199 | mov r10,r9
|
---|
200 | shr r9,4
|
---|
201 | mov QWORD[16+rbp],r8
|
---|
202 | mov r8,QWORD[((64+0-128))+rsi]
|
---|
203 | shl dl,4
|
---|
204 | mov QWORD[((16-128))+rbp],rax
|
---|
205 | mov rax,QWORD[((64+8-128))+rsi]
|
---|
206 | shl r10,60
|
---|
207 | mov BYTE[3+rsp],dl
|
---|
208 | or rbx,r10
|
---|
209 | mov dl,al
|
---|
210 | shr rax,4
|
---|
211 | mov r10,r8
|
---|
212 | shr r8,4
|
---|
213 | mov QWORD[24+rbp],r9
|
---|
214 | mov r9,QWORD[((80+0-128))+rsi]
|
---|
215 | shl dl,4
|
---|
216 | mov QWORD[((24-128))+rbp],rbx
|
---|
217 | mov rbx,QWORD[((80+8-128))+rsi]
|
---|
218 | shl r10,60
|
---|
219 | mov BYTE[4+rsp],dl
|
---|
220 | or rax,r10
|
---|
221 | mov dl,bl
|
---|
222 | shr rbx,4
|
---|
223 | mov r10,r9
|
---|
224 | shr r9,4
|
---|
225 | mov QWORD[32+rbp],r8
|
---|
226 | mov r8,QWORD[((96+0-128))+rsi]
|
---|
227 | shl dl,4
|
---|
228 | mov QWORD[((32-128))+rbp],rax
|
---|
229 | mov rax,QWORD[((96+8-128))+rsi]
|
---|
230 | shl r10,60
|
---|
231 | mov BYTE[5+rsp],dl
|
---|
232 | or rbx,r10
|
---|
233 | mov dl,al
|
---|
234 | shr rax,4
|
---|
235 | mov r10,r8
|
---|
236 | shr r8,4
|
---|
237 | mov QWORD[40+rbp],r9
|
---|
238 | mov r9,QWORD[((112+0-128))+rsi]
|
---|
239 | shl dl,4
|
---|
240 | mov QWORD[((40-128))+rbp],rbx
|
---|
241 | mov rbx,QWORD[((112+8-128))+rsi]
|
---|
242 | shl r10,60
|
---|
243 | mov BYTE[6+rsp],dl
|
---|
244 | or rax,r10
|
---|
245 | mov dl,bl
|
---|
246 | shr rbx,4
|
---|
247 | mov r10,r9
|
---|
248 | shr r9,4
|
---|
249 | mov QWORD[48+rbp],r8
|
---|
250 | mov r8,QWORD[((128+0-128))+rsi]
|
---|
251 | shl dl,4
|
---|
252 | mov QWORD[((48-128))+rbp],rax
|
---|
253 | mov rax,QWORD[((128+8-128))+rsi]
|
---|
254 | shl r10,60
|
---|
255 | mov BYTE[7+rsp],dl
|
---|
256 | or rbx,r10
|
---|
257 | mov dl,al
|
---|
258 | shr rax,4
|
---|
259 | mov r10,r8
|
---|
260 | shr r8,4
|
---|
261 | mov QWORD[56+rbp],r9
|
---|
262 | mov r9,QWORD[((144+0-128))+rsi]
|
---|
263 | shl dl,4
|
---|
264 | mov QWORD[((56-128))+rbp],rbx
|
---|
265 | mov rbx,QWORD[((144+8-128))+rsi]
|
---|
266 | shl r10,60
|
---|
267 | mov BYTE[8+rsp],dl
|
---|
268 | or rax,r10
|
---|
269 | mov dl,bl
|
---|
270 | shr rbx,4
|
---|
271 | mov r10,r9
|
---|
272 | shr r9,4
|
---|
273 | mov QWORD[64+rbp],r8
|
---|
274 | mov r8,QWORD[((160+0-128))+rsi]
|
---|
275 | shl dl,4
|
---|
276 | mov QWORD[((64-128))+rbp],rax
|
---|
277 | mov rax,QWORD[((160+8-128))+rsi]
|
---|
278 | shl r10,60
|
---|
279 | mov BYTE[9+rsp],dl
|
---|
280 | or rbx,r10
|
---|
281 | mov dl,al
|
---|
282 | shr rax,4
|
---|
283 | mov r10,r8
|
---|
284 | shr r8,4
|
---|
285 | mov QWORD[72+rbp],r9
|
---|
286 | mov r9,QWORD[((176+0-128))+rsi]
|
---|
287 | shl dl,4
|
---|
288 | mov QWORD[((72-128))+rbp],rbx
|
---|
289 | mov rbx,QWORD[((176+8-128))+rsi]
|
---|
290 | shl r10,60
|
---|
291 | mov BYTE[10+rsp],dl
|
---|
292 | or rax,r10
|
---|
293 | mov dl,bl
|
---|
294 | shr rbx,4
|
---|
295 | mov r10,r9
|
---|
296 | shr r9,4
|
---|
297 | mov QWORD[80+rbp],r8
|
---|
298 | mov r8,QWORD[((192+0-128))+rsi]
|
---|
299 | shl dl,4
|
---|
300 | mov QWORD[((80-128))+rbp],rax
|
---|
301 | mov rax,QWORD[((192+8-128))+rsi]
|
---|
302 | shl r10,60
|
---|
303 | mov BYTE[11+rsp],dl
|
---|
304 | or rbx,r10
|
---|
305 | mov dl,al
|
---|
306 | shr rax,4
|
---|
307 | mov r10,r8
|
---|
308 | shr r8,4
|
---|
309 | mov QWORD[88+rbp],r9
|
---|
310 | mov r9,QWORD[((208+0-128))+rsi]
|
---|
311 | shl dl,4
|
---|
312 | mov QWORD[((88-128))+rbp],rbx
|
---|
313 | mov rbx,QWORD[((208+8-128))+rsi]
|
---|
314 | shl r10,60
|
---|
315 | mov BYTE[12+rsp],dl
|
---|
316 | or rax,r10
|
---|
317 | mov dl,bl
|
---|
318 | shr rbx,4
|
---|
319 | mov r10,r9
|
---|
320 | shr r9,4
|
---|
321 | mov QWORD[96+rbp],r8
|
---|
322 | mov r8,QWORD[((224+0-128))+rsi]
|
---|
323 | shl dl,4
|
---|
324 | mov QWORD[((96-128))+rbp],rax
|
---|
325 | mov rax,QWORD[((224+8-128))+rsi]
|
---|
326 | shl r10,60
|
---|
327 | mov BYTE[13+rsp],dl
|
---|
328 | or rbx,r10
|
---|
329 | mov dl,al
|
---|
330 | shr rax,4
|
---|
331 | mov r10,r8
|
---|
332 | shr r8,4
|
---|
333 | mov QWORD[104+rbp],r9
|
---|
334 | mov r9,QWORD[((240+0-128))+rsi]
|
---|
335 | shl dl,4
|
---|
336 | mov QWORD[((104-128))+rbp],rbx
|
---|
337 | mov rbx,QWORD[((240+8-128))+rsi]
|
---|
338 | shl r10,60
|
---|
339 | mov BYTE[14+rsp],dl
|
---|
340 | or rax,r10
|
---|
341 | mov dl,bl
|
---|
342 | shr rbx,4
|
---|
343 | mov r10,r9
|
---|
344 | shr r9,4
|
---|
345 | mov QWORD[112+rbp],r8
|
---|
346 | shl dl,4
|
---|
347 | mov QWORD[((112-128))+rbp],rax
|
---|
348 | shl r10,60
|
---|
349 | mov BYTE[15+rsp],dl
|
---|
350 | or rbx,r10
|
---|
351 | mov QWORD[120+rbp],r9
|
---|
352 | mov QWORD[((120-128))+rbp],rbx
|
---|
353 | add rsi,-128
|
---|
354 | mov r8,QWORD[8+rdi]
|
---|
355 | mov r9,QWORD[rdi]
|
---|
356 | add r15,r14
|
---|
357 | lea r11,[$L$rem_8bit]
|
---|
358 | jmp NEAR $L$outer_loop
|
---|
359 | ALIGN 16
|
---|
360 | $L$outer_loop:
|
---|
361 | xor r9,QWORD[r14]
|
---|
362 | mov rdx,QWORD[8+r14]
|
---|
363 | lea r14,[16+r14]
|
---|
364 | xor rdx,r8
|
---|
365 | mov QWORD[rdi],r9
|
---|
366 | mov QWORD[8+rdi],rdx
|
---|
367 | shr rdx,32
|
---|
368 | xor rax,rax
|
---|
369 | rol edx,8
|
---|
370 | mov al,dl
|
---|
371 | movzx ebx,dl
|
---|
372 | shl al,4
|
---|
373 | shr ebx,4
|
---|
374 | rol edx,8
|
---|
375 | mov r8,QWORD[8+rax*1+rsi]
|
---|
376 | mov r9,QWORD[rax*1+rsi]
|
---|
377 | mov al,dl
|
---|
378 | movzx ecx,dl
|
---|
379 | shl al,4
|
---|
380 | movzx r12,BYTE[rbx*1+rsp]
|
---|
381 | shr ecx,4
|
---|
382 | xor r12,r8
|
---|
383 | mov r10,r9
|
---|
384 | shr r8,8
|
---|
385 | movzx r12,r12b
|
---|
386 | shr r9,8
|
---|
387 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
388 | shl r10,56
|
---|
389 | xor r9,QWORD[rbx*8+rbp]
|
---|
390 | rol edx,8
|
---|
391 | xor r8,QWORD[8+rax*1+rsi]
|
---|
392 | xor r9,QWORD[rax*1+rsi]
|
---|
393 | mov al,dl
|
---|
394 | xor r8,r10
|
---|
395 | movzx r12,WORD[r12*2+r11]
|
---|
396 | movzx ebx,dl
|
---|
397 | shl al,4
|
---|
398 | movzx r13,BYTE[rcx*1+rsp]
|
---|
399 | shr ebx,4
|
---|
400 | shl r12,48
|
---|
401 | xor r13,r8
|
---|
402 | mov r10,r9
|
---|
403 | xor r9,r12
|
---|
404 | shr r8,8
|
---|
405 | movzx r13,r13b
|
---|
406 | shr r9,8
|
---|
407 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
408 | shl r10,56
|
---|
409 | xor r9,QWORD[rcx*8+rbp]
|
---|
410 | rol edx,8
|
---|
411 | xor r8,QWORD[8+rax*1+rsi]
|
---|
412 | xor r9,QWORD[rax*1+rsi]
|
---|
413 | mov al,dl
|
---|
414 | xor r8,r10
|
---|
415 | movzx r13,WORD[r13*2+r11]
|
---|
416 | movzx ecx,dl
|
---|
417 | shl al,4
|
---|
418 | movzx r12,BYTE[rbx*1+rsp]
|
---|
419 | shr ecx,4
|
---|
420 | shl r13,48
|
---|
421 | xor r12,r8
|
---|
422 | mov r10,r9
|
---|
423 | xor r9,r13
|
---|
424 | shr r8,8
|
---|
425 | movzx r12,r12b
|
---|
426 | mov edx,DWORD[8+rdi]
|
---|
427 | shr r9,8
|
---|
428 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
429 | shl r10,56
|
---|
430 | xor r9,QWORD[rbx*8+rbp]
|
---|
431 | rol edx,8
|
---|
432 | xor r8,QWORD[8+rax*1+rsi]
|
---|
433 | xor r9,QWORD[rax*1+rsi]
|
---|
434 | mov al,dl
|
---|
435 | xor r8,r10
|
---|
436 | movzx r12,WORD[r12*2+r11]
|
---|
437 | movzx ebx,dl
|
---|
438 | shl al,4
|
---|
439 | movzx r13,BYTE[rcx*1+rsp]
|
---|
440 | shr ebx,4
|
---|
441 | shl r12,48
|
---|
442 | xor r13,r8
|
---|
443 | mov r10,r9
|
---|
444 | xor r9,r12
|
---|
445 | shr r8,8
|
---|
446 | movzx r13,r13b
|
---|
447 | shr r9,8
|
---|
448 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
449 | shl r10,56
|
---|
450 | xor r9,QWORD[rcx*8+rbp]
|
---|
451 | rol edx,8
|
---|
452 | xor r8,QWORD[8+rax*1+rsi]
|
---|
453 | xor r9,QWORD[rax*1+rsi]
|
---|
454 | mov al,dl
|
---|
455 | xor r8,r10
|
---|
456 | movzx r13,WORD[r13*2+r11]
|
---|
457 | movzx ecx,dl
|
---|
458 | shl al,4
|
---|
459 | movzx r12,BYTE[rbx*1+rsp]
|
---|
460 | shr ecx,4
|
---|
461 | shl r13,48
|
---|
462 | xor r12,r8
|
---|
463 | mov r10,r9
|
---|
464 | xor r9,r13
|
---|
465 | shr r8,8
|
---|
466 | movzx r12,r12b
|
---|
467 | shr r9,8
|
---|
468 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
469 | shl r10,56
|
---|
470 | xor r9,QWORD[rbx*8+rbp]
|
---|
471 | rol edx,8
|
---|
472 | xor r8,QWORD[8+rax*1+rsi]
|
---|
473 | xor r9,QWORD[rax*1+rsi]
|
---|
474 | mov al,dl
|
---|
475 | xor r8,r10
|
---|
476 | movzx r12,WORD[r12*2+r11]
|
---|
477 | movzx ebx,dl
|
---|
478 | shl al,4
|
---|
479 | movzx r13,BYTE[rcx*1+rsp]
|
---|
480 | shr ebx,4
|
---|
481 | shl r12,48
|
---|
482 | xor r13,r8
|
---|
483 | mov r10,r9
|
---|
484 | xor r9,r12
|
---|
485 | shr r8,8
|
---|
486 | movzx r13,r13b
|
---|
487 | shr r9,8
|
---|
488 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
489 | shl r10,56
|
---|
490 | xor r9,QWORD[rcx*8+rbp]
|
---|
491 | rol edx,8
|
---|
492 | xor r8,QWORD[8+rax*1+rsi]
|
---|
493 | xor r9,QWORD[rax*1+rsi]
|
---|
494 | mov al,dl
|
---|
495 | xor r8,r10
|
---|
496 | movzx r13,WORD[r13*2+r11]
|
---|
497 | movzx ecx,dl
|
---|
498 | shl al,4
|
---|
499 | movzx r12,BYTE[rbx*1+rsp]
|
---|
500 | shr ecx,4
|
---|
501 | shl r13,48
|
---|
502 | xor r12,r8
|
---|
503 | mov r10,r9
|
---|
504 | xor r9,r13
|
---|
505 | shr r8,8
|
---|
506 | movzx r12,r12b
|
---|
507 | mov edx,DWORD[4+rdi]
|
---|
508 | shr r9,8
|
---|
509 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
510 | shl r10,56
|
---|
511 | xor r9,QWORD[rbx*8+rbp]
|
---|
512 | rol edx,8
|
---|
513 | xor r8,QWORD[8+rax*1+rsi]
|
---|
514 | xor r9,QWORD[rax*1+rsi]
|
---|
515 | mov al,dl
|
---|
516 | xor r8,r10
|
---|
517 | movzx r12,WORD[r12*2+r11]
|
---|
518 | movzx ebx,dl
|
---|
519 | shl al,4
|
---|
520 | movzx r13,BYTE[rcx*1+rsp]
|
---|
521 | shr ebx,4
|
---|
522 | shl r12,48
|
---|
523 | xor r13,r8
|
---|
524 | mov r10,r9
|
---|
525 | xor r9,r12
|
---|
526 | shr r8,8
|
---|
527 | movzx r13,r13b
|
---|
528 | shr r9,8
|
---|
529 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
530 | shl r10,56
|
---|
531 | xor r9,QWORD[rcx*8+rbp]
|
---|
532 | rol edx,8
|
---|
533 | xor r8,QWORD[8+rax*1+rsi]
|
---|
534 | xor r9,QWORD[rax*1+rsi]
|
---|
535 | mov al,dl
|
---|
536 | xor r8,r10
|
---|
537 | movzx r13,WORD[r13*2+r11]
|
---|
538 | movzx ecx,dl
|
---|
539 | shl al,4
|
---|
540 | movzx r12,BYTE[rbx*1+rsp]
|
---|
541 | shr ecx,4
|
---|
542 | shl r13,48
|
---|
543 | xor r12,r8
|
---|
544 | mov r10,r9
|
---|
545 | xor r9,r13
|
---|
546 | shr r8,8
|
---|
547 | movzx r12,r12b
|
---|
548 | shr r9,8
|
---|
549 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
550 | shl r10,56
|
---|
551 | xor r9,QWORD[rbx*8+rbp]
|
---|
552 | rol edx,8
|
---|
553 | xor r8,QWORD[8+rax*1+rsi]
|
---|
554 | xor r9,QWORD[rax*1+rsi]
|
---|
555 | mov al,dl
|
---|
556 | xor r8,r10
|
---|
557 | movzx r12,WORD[r12*2+r11]
|
---|
558 | movzx ebx,dl
|
---|
559 | shl al,4
|
---|
560 | movzx r13,BYTE[rcx*1+rsp]
|
---|
561 | shr ebx,4
|
---|
562 | shl r12,48
|
---|
563 | xor r13,r8
|
---|
564 | mov r10,r9
|
---|
565 | xor r9,r12
|
---|
566 | shr r8,8
|
---|
567 | movzx r13,r13b
|
---|
568 | shr r9,8
|
---|
569 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
570 | shl r10,56
|
---|
571 | xor r9,QWORD[rcx*8+rbp]
|
---|
572 | rol edx,8
|
---|
573 | xor r8,QWORD[8+rax*1+rsi]
|
---|
574 | xor r9,QWORD[rax*1+rsi]
|
---|
575 | mov al,dl
|
---|
576 | xor r8,r10
|
---|
577 | movzx r13,WORD[r13*2+r11]
|
---|
578 | movzx ecx,dl
|
---|
579 | shl al,4
|
---|
580 | movzx r12,BYTE[rbx*1+rsp]
|
---|
581 | shr ecx,4
|
---|
582 | shl r13,48
|
---|
583 | xor r12,r8
|
---|
584 | mov r10,r9
|
---|
585 | xor r9,r13
|
---|
586 | shr r8,8
|
---|
587 | movzx r12,r12b
|
---|
588 | mov edx,DWORD[rdi]
|
---|
589 | shr r9,8
|
---|
590 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
591 | shl r10,56
|
---|
592 | xor r9,QWORD[rbx*8+rbp]
|
---|
593 | rol edx,8
|
---|
594 | xor r8,QWORD[8+rax*1+rsi]
|
---|
595 | xor r9,QWORD[rax*1+rsi]
|
---|
596 | mov al,dl
|
---|
597 | xor r8,r10
|
---|
598 | movzx r12,WORD[r12*2+r11]
|
---|
599 | movzx ebx,dl
|
---|
600 | shl al,4
|
---|
601 | movzx r13,BYTE[rcx*1+rsp]
|
---|
602 | shr ebx,4
|
---|
603 | shl r12,48
|
---|
604 | xor r13,r8
|
---|
605 | mov r10,r9
|
---|
606 | xor r9,r12
|
---|
607 | shr r8,8
|
---|
608 | movzx r13,r13b
|
---|
609 | shr r9,8
|
---|
610 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
611 | shl r10,56
|
---|
612 | xor r9,QWORD[rcx*8+rbp]
|
---|
613 | rol edx,8
|
---|
614 | xor r8,QWORD[8+rax*1+rsi]
|
---|
615 | xor r9,QWORD[rax*1+rsi]
|
---|
616 | mov al,dl
|
---|
617 | xor r8,r10
|
---|
618 | movzx r13,WORD[r13*2+r11]
|
---|
619 | movzx ecx,dl
|
---|
620 | shl al,4
|
---|
621 | movzx r12,BYTE[rbx*1+rsp]
|
---|
622 | shr ecx,4
|
---|
623 | shl r13,48
|
---|
624 | xor r12,r8
|
---|
625 | mov r10,r9
|
---|
626 | xor r9,r13
|
---|
627 | shr r8,8
|
---|
628 | movzx r12,r12b
|
---|
629 | shr r9,8
|
---|
630 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
631 | shl r10,56
|
---|
632 | xor r9,QWORD[rbx*8+rbp]
|
---|
633 | rol edx,8
|
---|
634 | xor r8,QWORD[8+rax*1+rsi]
|
---|
635 | xor r9,QWORD[rax*1+rsi]
|
---|
636 | mov al,dl
|
---|
637 | xor r8,r10
|
---|
638 | movzx r12,WORD[r12*2+r11]
|
---|
639 | movzx ebx,dl
|
---|
640 | shl al,4
|
---|
641 | movzx r13,BYTE[rcx*1+rsp]
|
---|
642 | shr ebx,4
|
---|
643 | shl r12,48
|
---|
644 | xor r13,r8
|
---|
645 | mov r10,r9
|
---|
646 | xor r9,r12
|
---|
647 | shr r8,8
|
---|
648 | movzx r13,r13b
|
---|
649 | shr r9,8
|
---|
650 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
651 | shl r10,56
|
---|
652 | xor r9,QWORD[rcx*8+rbp]
|
---|
653 | rol edx,8
|
---|
654 | xor r8,QWORD[8+rax*1+rsi]
|
---|
655 | xor r9,QWORD[rax*1+rsi]
|
---|
656 | mov al,dl
|
---|
657 | xor r8,r10
|
---|
658 | movzx r13,WORD[r13*2+r11]
|
---|
659 | movzx ecx,dl
|
---|
660 | shl al,4
|
---|
661 | movzx r12,BYTE[rbx*1+rsp]
|
---|
662 | and ecx,240
|
---|
663 | shl r13,48
|
---|
664 | xor r12,r8
|
---|
665 | mov r10,r9
|
---|
666 | xor r9,r13
|
---|
667 | shr r8,8
|
---|
668 | movzx r12,r12b
|
---|
669 | mov edx,DWORD[((-4))+rdi]
|
---|
670 | shr r9,8
|
---|
671 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
672 | shl r10,56
|
---|
673 | xor r9,QWORD[rbx*8+rbp]
|
---|
674 | movzx r12,WORD[r12*2+r11]
|
---|
675 | xor r8,QWORD[8+rax*1+rsi]
|
---|
676 | xor r9,QWORD[rax*1+rsi]
|
---|
677 | shl r12,48
|
---|
678 | xor r8,r10
|
---|
679 | xor r9,r12
|
---|
680 | movzx r13,r8b
|
---|
681 | shr r8,4
|
---|
682 | mov r10,r9
|
---|
683 | shl r13b,4
|
---|
684 | shr r9,4
|
---|
685 | xor r8,QWORD[8+rcx*1+rsi]
|
---|
686 | movzx r13,WORD[r13*2+r11]
|
---|
687 | shl r10,60
|
---|
688 | xor r9,QWORD[rcx*1+rsi]
|
---|
689 | xor r8,r10
|
---|
690 | shl r13,48
|
---|
691 | bswap r8
|
---|
692 | xor r9,r13
|
---|
693 | bswap r9
|
---|
694 | cmp r14,r15
|
---|
695 | jb NEAR $L$outer_loop
|
---|
696 | mov QWORD[8+rdi],r8
|
---|
697 | mov QWORD[rdi],r9
|
---|
698 |
|
---|
699 | lea rsi,[((280+48))+rsp]
|
---|
700 |
|
---|
701 | mov r15,QWORD[((-48))+rsi]
|
---|
702 |
|
---|
703 | mov r14,QWORD[((-40))+rsi]
|
---|
704 |
|
---|
705 | mov r13,QWORD[((-32))+rsi]
|
---|
706 |
|
---|
707 | mov r12,QWORD[((-24))+rsi]
|
---|
708 |
|
---|
709 | mov rbp,QWORD[((-16))+rsi]
|
---|
710 |
|
---|
711 | mov rbx,QWORD[((-8))+rsi]
|
---|
712 |
|
---|
713 | lea rsp,[rsi]
|
---|
714 |
|
---|
715 | $L$ghash_epilogue:
|
---|
716 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
---|
717 | mov rsi,QWORD[16+rsp]
|
---|
718 | DB 0F3h,0C3h ;repret
|
---|
719 |
|
---|
720 | $L$SEH_end_gcm_ghash_4bit:
|
---|
721 | global gcm_init_clmul
|
---|
722 |
|
---|
723 | ALIGN 16
|
---|
724 | gcm_init_clmul:
|
---|
725 |
|
---|
726 | $L$_init_clmul:
|
---|
727 | $L$SEH_begin_gcm_init_clmul:
|
---|
728 |
|
---|
729 | DB 0x48,0x83,0xec,0x18
|
---|
730 | DB 0x0f,0x29,0x34,0x24
|
---|
731 | movdqu xmm2,XMMWORD[rdx]
|
---|
732 | pshufd xmm2,xmm2,78
|
---|
733 |
|
---|
734 |
|
---|
735 | pshufd xmm4,xmm2,255
|
---|
736 | movdqa xmm3,xmm2
|
---|
737 | psllq xmm2,1
|
---|
738 | pxor xmm5,xmm5
|
---|
739 | psrlq xmm3,63
|
---|
740 | pcmpgtd xmm5,xmm4
|
---|
741 | pslldq xmm3,8
|
---|
742 | por xmm2,xmm3
|
---|
743 |
|
---|
744 |
|
---|
745 | pand xmm5,XMMWORD[$L$0x1c2_polynomial]
|
---|
746 | pxor xmm2,xmm5
|
---|
747 |
|
---|
748 |
|
---|
749 | pshufd xmm6,xmm2,78
|
---|
750 | movdqa xmm0,xmm2
|
---|
751 | pxor xmm6,xmm2
|
---|
752 | movdqa xmm1,xmm0
|
---|
753 | pshufd xmm3,xmm0,78
|
---|
754 | pxor xmm3,xmm0
|
---|
755 | DB 102,15,58,68,194,0
|
---|
756 | DB 102,15,58,68,202,17
|
---|
757 | DB 102,15,58,68,222,0
|
---|
758 | pxor xmm3,xmm0
|
---|
759 | pxor xmm3,xmm1
|
---|
760 |
|
---|
761 | movdqa xmm4,xmm3
|
---|
762 | psrldq xmm3,8
|
---|
763 | pslldq xmm4,8
|
---|
764 | pxor xmm1,xmm3
|
---|
765 | pxor xmm0,xmm4
|
---|
766 |
|
---|
767 | movdqa xmm4,xmm0
|
---|
768 | movdqa xmm3,xmm0
|
---|
769 | psllq xmm0,5
|
---|
770 | pxor xmm3,xmm0
|
---|
771 | psllq xmm0,1
|
---|
772 | pxor xmm0,xmm3
|
---|
773 | psllq xmm0,57
|
---|
774 | movdqa xmm3,xmm0
|
---|
775 | pslldq xmm0,8
|
---|
776 | psrldq xmm3,8
|
---|
777 | pxor xmm0,xmm4
|
---|
778 | pxor xmm1,xmm3
|
---|
779 |
|
---|
780 |
|
---|
781 | movdqa xmm4,xmm0
|
---|
782 | psrlq xmm0,1
|
---|
783 | pxor xmm1,xmm4
|
---|
784 | pxor xmm4,xmm0
|
---|
785 | psrlq xmm0,5
|
---|
786 | pxor xmm0,xmm4
|
---|
787 | psrlq xmm0,1
|
---|
788 | pxor xmm0,xmm1
|
---|
789 | pshufd xmm3,xmm2,78
|
---|
790 | pshufd xmm4,xmm0,78
|
---|
791 | pxor xmm3,xmm2
|
---|
792 | movdqu XMMWORD[rcx],xmm2
|
---|
793 | pxor xmm4,xmm0
|
---|
794 | movdqu XMMWORD[16+rcx],xmm0
|
---|
795 | DB 102,15,58,15,227,8
|
---|
796 | movdqu XMMWORD[32+rcx],xmm4
|
---|
797 | movdqa xmm1,xmm0
|
---|
798 | pshufd xmm3,xmm0,78
|
---|
799 | pxor xmm3,xmm0
|
---|
800 | DB 102,15,58,68,194,0
|
---|
801 | DB 102,15,58,68,202,17
|
---|
802 | DB 102,15,58,68,222,0
|
---|
803 | pxor xmm3,xmm0
|
---|
804 | pxor xmm3,xmm1
|
---|
805 |
|
---|
806 | movdqa xmm4,xmm3
|
---|
807 | psrldq xmm3,8
|
---|
808 | pslldq xmm4,8
|
---|
809 | pxor xmm1,xmm3
|
---|
810 | pxor xmm0,xmm4
|
---|
811 |
|
---|
812 | movdqa xmm4,xmm0
|
---|
813 | movdqa xmm3,xmm0
|
---|
814 | psllq xmm0,5
|
---|
815 | pxor xmm3,xmm0
|
---|
816 | psllq xmm0,1
|
---|
817 | pxor xmm0,xmm3
|
---|
818 | psllq xmm0,57
|
---|
819 | movdqa xmm3,xmm0
|
---|
820 | pslldq xmm0,8
|
---|
821 | psrldq xmm3,8
|
---|
822 | pxor xmm0,xmm4
|
---|
823 | pxor xmm1,xmm3
|
---|
824 |
|
---|
825 |
|
---|
826 | movdqa xmm4,xmm0
|
---|
827 | psrlq xmm0,1
|
---|
828 | pxor xmm1,xmm4
|
---|
829 | pxor xmm4,xmm0
|
---|
830 | psrlq xmm0,5
|
---|
831 | pxor xmm0,xmm4
|
---|
832 | psrlq xmm0,1
|
---|
833 | pxor xmm0,xmm1
|
---|
834 | movdqa xmm5,xmm0
|
---|
835 | movdqa xmm1,xmm0
|
---|
836 | pshufd xmm3,xmm0,78
|
---|
837 | pxor xmm3,xmm0
|
---|
838 | DB 102,15,58,68,194,0
|
---|
839 | DB 102,15,58,68,202,17
|
---|
840 | DB 102,15,58,68,222,0
|
---|
841 | pxor xmm3,xmm0
|
---|
842 | pxor xmm3,xmm1
|
---|
843 |
|
---|
844 | movdqa xmm4,xmm3
|
---|
845 | psrldq xmm3,8
|
---|
846 | pslldq xmm4,8
|
---|
847 | pxor xmm1,xmm3
|
---|
848 | pxor xmm0,xmm4
|
---|
849 |
|
---|
850 | movdqa xmm4,xmm0
|
---|
851 | movdqa xmm3,xmm0
|
---|
852 | psllq xmm0,5
|
---|
853 | pxor xmm3,xmm0
|
---|
854 | psllq xmm0,1
|
---|
855 | pxor xmm0,xmm3
|
---|
856 | psllq xmm0,57
|
---|
857 | movdqa xmm3,xmm0
|
---|
858 | pslldq xmm0,8
|
---|
859 | psrldq xmm3,8
|
---|
860 | pxor xmm0,xmm4
|
---|
861 | pxor xmm1,xmm3
|
---|
862 |
|
---|
863 |
|
---|
864 | movdqa xmm4,xmm0
|
---|
865 | psrlq xmm0,1
|
---|
866 | pxor xmm1,xmm4
|
---|
867 | pxor xmm4,xmm0
|
---|
868 | psrlq xmm0,5
|
---|
869 | pxor xmm0,xmm4
|
---|
870 | psrlq xmm0,1
|
---|
871 | pxor xmm0,xmm1
|
---|
872 | pshufd xmm3,xmm5,78
|
---|
873 | pshufd xmm4,xmm0,78
|
---|
874 | pxor xmm3,xmm5
|
---|
875 | movdqu XMMWORD[48+rcx],xmm5
|
---|
876 | pxor xmm4,xmm0
|
---|
877 | movdqu XMMWORD[64+rcx],xmm0
|
---|
878 | DB 102,15,58,15,227,8
|
---|
879 | movdqu XMMWORD[80+rcx],xmm4
|
---|
880 | movaps xmm6,XMMWORD[rsp]
|
---|
881 | lea rsp,[24+rsp]
|
---|
882 | $L$SEH_end_gcm_init_clmul:
|
---|
883 | DB 0F3h,0C3h ;repret
|
---|
884 |
|
---|
885 |
|
---|
886 | global gcm_gmult_clmul
|
---|
887 |
|
---|
888 | ALIGN 16
|
---|
889 | gcm_gmult_clmul:
|
---|
890 |
|
---|
891 | DB 243,15,30,250
|
---|
892 | $L$_gmult_clmul:
|
---|
893 | movdqu xmm0,XMMWORD[rcx]
|
---|
894 | movdqa xmm5,XMMWORD[$L$bswap_mask]
|
---|
895 | movdqu xmm2,XMMWORD[rdx]
|
---|
896 | movdqu xmm4,XMMWORD[32+rdx]
|
---|
897 | DB 102,15,56,0,197
|
---|
898 | movdqa xmm1,xmm0
|
---|
899 | pshufd xmm3,xmm0,78
|
---|
900 | pxor xmm3,xmm0
|
---|
901 | DB 102,15,58,68,194,0
|
---|
902 | DB 102,15,58,68,202,17
|
---|
903 | DB 102,15,58,68,220,0
|
---|
904 | pxor xmm3,xmm0
|
---|
905 | pxor xmm3,xmm1
|
---|
906 |
|
---|
907 | movdqa xmm4,xmm3
|
---|
908 | psrldq xmm3,8
|
---|
909 | pslldq xmm4,8
|
---|
910 | pxor xmm1,xmm3
|
---|
911 | pxor xmm0,xmm4
|
---|
912 |
|
---|
913 | movdqa xmm4,xmm0
|
---|
914 | movdqa xmm3,xmm0
|
---|
915 | psllq xmm0,5
|
---|
916 | pxor xmm3,xmm0
|
---|
917 | psllq xmm0,1
|
---|
918 | pxor xmm0,xmm3
|
---|
919 | psllq xmm0,57
|
---|
920 | movdqa xmm3,xmm0
|
---|
921 | pslldq xmm0,8
|
---|
922 | psrldq xmm3,8
|
---|
923 | pxor xmm0,xmm4
|
---|
924 | pxor xmm1,xmm3
|
---|
925 |
|
---|
926 |
|
---|
927 | movdqa xmm4,xmm0
|
---|
928 | psrlq xmm0,1
|
---|
929 | pxor xmm1,xmm4
|
---|
930 | pxor xmm4,xmm0
|
---|
931 | psrlq xmm0,5
|
---|
932 | pxor xmm0,xmm4
|
---|
933 | psrlq xmm0,1
|
---|
934 | pxor xmm0,xmm1
|
---|
935 | DB 102,15,56,0,197
|
---|
936 | movdqu XMMWORD[rcx],xmm0
|
---|
937 | DB 0F3h,0C3h ;repret
|
---|
938 |
|
---|
939 |
|
---|
940 | global gcm_ghash_clmul
|
---|
941 |
|
---|
942 | ALIGN 32
|
---|
943 | gcm_ghash_clmul:
|
---|
944 |
|
---|
945 | DB 243,15,30,250
|
---|
946 | $L$_ghash_clmul:
|
---|
947 | lea rax,[((-136))+rsp]
|
---|
948 | $L$SEH_begin_gcm_ghash_clmul:
|
---|
949 |
|
---|
950 | DB 0x48,0x8d,0x60,0xe0
|
---|
951 | DB 0x0f,0x29,0x70,0xe0
|
---|
952 | DB 0x0f,0x29,0x78,0xf0
|
---|
953 | DB 0x44,0x0f,0x29,0x00
|
---|
954 | DB 0x44,0x0f,0x29,0x48,0x10
|
---|
955 | DB 0x44,0x0f,0x29,0x50,0x20
|
---|
956 | DB 0x44,0x0f,0x29,0x58,0x30
|
---|
957 | DB 0x44,0x0f,0x29,0x60,0x40
|
---|
958 | DB 0x44,0x0f,0x29,0x68,0x50
|
---|
959 | DB 0x44,0x0f,0x29,0x70,0x60
|
---|
960 | DB 0x44,0x0f,0x29,0x78,0x70
|
---|
961 | movdqa xmm10,XMMWORD[$L$bswap_mask]
|
---|
962 |
|
---|
963 | movdqu xmm0,XMMWORD[rcx]
|
---|
964 | movdqu xmm2,XMMWORD[rdx]
|
---|
965 | movdqu xmm7,XMMWORD[32+rdx]
|
---|
966 | DB 102,65,15,56,0,194
|
---|
967 |
|
---|
968 | sub r9,0x10
|
---|
969 | jz NEAR $L$odd_tail
|
---|
970 |
|
---|
971 | movdqu xmm6,XMMWORD[16+rdx]
|
---|
972 | mov eax,DWORD[((OPENSSL_ia32cap_P+4))]
|
---|
973 | cmp r9,0x30
|
---|
974 | jb NEAR $L$skip4x
|
---|
975 |
|
---|
976 | and eax,71303168
|
---|
977 | cmp eax,4194304
|
---|
978 | je NEAR $L$skip4x
|
---|
979 |
|
---|
980 | sub r9,0x30
|
---|
981 | mov rax,0xA040608020C0E000
|
---|
982 | movdqu xmm14,XMMWORD[48+rdx]
|
---|
983 | movdqu xmm15,XMMWORD[64+rdx]
|
---|
984 |
|
---|
985 |
|
---|
986 |
|
---|
987 |
|
---|
988 | movdqu xmm3,XMMWORD[48+r8]
|
---|
989 | movdqu xmm11,XMMWORD[32+r8]
|
---|
990 | DB 102,65,15,56,0,218
|
---|
991 | DB 102,69,15,56,0,218
|
---|
992 | movdqa xmm5,xmm3
|
---|
993 | pshufd xmm4,xmm3,78
|
---|
994 | pxor xmm4,xmm3
|
---|
995 | DB 102,15,58,68,218,0
|
---|
996 | DB 102,15,58,68,234,17
|
---|
997 | DB 102,15,58,68,231,0
|
---|
998 |
|
---|
999 | movdqa xmm13,xmm11
|
---|
1000 | pshufd xmm12,xmm11,78
|
---|
1001 | pxor xmm12,xmm11
|
---|
1002 | DB 102,68,15,58,68,222,0
|
---|
1003 | DB 102,68,15,58,68,238,17
|
---|
1004 | DB 102,68,15,58,68,231,16
|
---|
1005 | xorps xmm3,xmm11
|
---|
1006 | xorps xmm5,xmm13
|
---|
1007 | movups xmm7,XMMWORD[80+rdx]
|
---|
1008 | xorps xmm4,xmm12
|
---|
1009 |
|
---|
1010 | movdqu xmm11,XMMWORD[16+r8]
|
---|
1011 | movdqu xmm8,XMMWORD[r8]
|
---|
1012 | DB 102,69,15,56,0,218
|
---|
1013 | DB 102,69,15,56,0,194
|
---|
1014 | movdqa xmm13,xmm11
|
---|
1015 | pshufd xmm12,xmm11,78
|
---|
1016 | pxor xmm0,xmm8
|
---|
1017 | pxor xmm12,xmm11
|
---|
1018 | DB 102,69,15,58,68,222,0
|
---|
1019 | movdqa xmm1,xmm0
|
---|
1020 | pshufd xmm8,xmm0,78
|
---|
1021 | pxor xmm8,xmm0
|
---|
1022 | DB 102,69,15,58,68,238,17
|
---|
1023 | DB 102,68,15,58,68,231,0
|
---|
1024 | xorps xmm3,xmm11
|
---|
1025 | xorps xmm5,xmm13
|
---|
1026 |
|
---|
1027 | lea r8,[64+r8]
|
---|
1028 | sub r9,0x40
|
---|
1029 | jc NEAR $L$tail4x
|
---|
1030 |
|
---|
1031 | jmp NEAR $L$mod4_loop
|
---|
1032 | ALIGN 32
|
---|
1033 | $L$mod4_loop:
|
---|
1034 | DB 102,65,15,58,68,199,0
|
---|
1035 | xorps xmm4,xmm12
|
---|
1036 | movdqu xmm11,XMMWORD[48+r8]
|
---|
1037 | DB 102,69,15,56,0,218
|
---|
1038 | DB 102,65,15,58,68,207,17
|
---|
1039 | xorps xmm0,xmm3
|
---|
1040 | movdqu xmm3,XMMWORD[32+r8]
|
---|
1041 | movdqa xmm13,xmm11
|
---|
1042 | DB 102,68,15,58,68,199,16
|
---|
1043 | pshufd xmm12,xmm11,78
|
---|
1044 | xorps xmm1,xmm5
|
---|
1045 | pxor xmm12,xmm11
|
---|
1046 | DB 102,65,15,56,0,218
|
---|
1047 | movups xmm7,XMMWORD[32+rdx]
|
---|
1048 | xorps xmm8,xmm4
|
---|
1049 | DB 102,68,15,58,68,218,0
|
---|
1050 | pshufd xmm4,xmm3,78
|
---|
1051 |
|
---|
1052 | pxor xmm8,xmm0
|
---|
1053 | movdqa xmm5,xmm3
|
---|
1054 | pxor xmm8,xmm1
|
---|
1055 | pxor xmm4,xmm3
|
---|
1056 | movdqa xmm9,xmm8
|
---|
1057 | DB 102,68,15,58,68,234,17
|
---|
1058 | pslldq xmm8,8
|
---|
1059 | psrldq xmm9,8
|
---|
1060 | pxor xmm0,xmm8
|
---|
1061 | movdqa xmm8,XMMWORD[$L$7_mask]
|
---|
1062 | pxor xmm1,xmm9
|
---|
1063 | DB 102,76,15,110,200
|
---|
1064 |
|
---|
1065 | pand xmm8,xmm0
|
---|
1066 | DB 102,69,15,56,0,200
|
---|
1067 | pxor xmm9,xmm0
|
---|
1068 | DB 102,68,15,58,68,231,0
|
---|
1069 | psllq xmm9,57
|
---|
1070 | movdqa xmm8,xmm9
|
---|
1071 | pslldq xmm9,8
|
---|
1072 | DB 102,15,58,68,222,0
|
---|
1073 | psrldq xmm8,8
|
---|
1074 | pxor xmm0,xmm9
|
---|
1075 | pxor xmm1,xmm8
|
---|
1076 | movdqu xmm8,XMMWORD[r8]
|
---|
1077 |
|
---|
1078 | movdqa xmm9,xmm0
|
---|
1079 | psrlq xmm0,1
|
---|
1080 | DB 102,15,58,68,238,17
|
---|
1081 | xorps xmm3,xmm11
|
---|
1082 | movdqu xmm11,XMMWORD[16+r8]
|
---|
1083 | DB 102,69,15,56,0,218
|
---|
1084 | DB 102,15,58,68,231,16
|
---|
1085 | xorps xmm5,xmm13
|
---|
1086 | movups xmm7,XMMWORD[80+rdx]
|
---|
1087 | DB 102,69,15,56,0,194
|
---|
1088 | pxor xmm1,xmm9
|
---|
1089 | pxor xmm9,xmm0
|
---|
1090 | psrlq xmm0,5
|
---|
1091 |
|
---|
1092 | movdqa xmm13,xmm11
|
---|
1093 | pxor xmm4,xmm12
|
---|
1094 | pshufd xmm12,xmm11,78
|
---|
1095 | pxor xmm0,xmm9
|
---|
1096 | pxor xmm1,xmm8
|
---|
1097 | pxor xmm12,xmm11
|
---|
1098 | DB 102,69,15,58,68,222,0
|
---|
1099 | psrlq xmm0,1
|
---|
1100 | pxor xmm0,xmm1
|
---|
1101 | movdqa xmm1,xmm0
|
---|
1102 | DB 102,69,15,58,68,238,17
|
---|
1103 | xorps xmm3,xmm11
|
---|
1104 | pshufd xmm8,xmm0,78
|
---|
1105 | pxor xmm8,xmm0
|
---|
1106 |
|
---|
1107 | DB 102,68,15,58,68,231,0
|
---|
1108 | xorps xmm5,xmm13
|
---|
1109 |
|
---|
1110 | lea r8,[64+r8]
|
---|
1111 | sub r9,0x40
|
---|
1112 | jnc NEAR $L$mod4_loop
|
---|
1113 |
|
---|
1114 | $L$tail4x:
|
---|
1115 | DB 102,65,15,58,68,199,0
|
---|
1116 | DB 102,65,15,58,68,207,17
|
---|
1117 | DB 102,68,15,58,68,199,16
|
---|
1118 | xorps xmm4,xmm12
|
---|
1119 | xorps xmm0,xmm3
|
---|
1120 | xorps xmm1,xmm5
|
---|
1121 | pxor xmm1,xmm0
|
---|
1122 | pxor xmm8,xmm4
|
---|
1123 |
|
---|
1124 | pxor xmm8,xmm1
|
---|
1125 | pxor xmm1,xmm0
|
---|
1126 |
|
---|
1127 | movdqa xmm9,xmm8
|
---|
1128 | psrldq xmm8,8
|
---|
1129 | pslldq xmm9,8
|
---|
1130 | pxor xmm1,xmm8
|
---|
1131 | pxor xmm0,xmm9
|
---|
1132 |
|
---|
1133 | movdqa xmm4,xmm0
|
---|
1134 | movdqa xmm3,xmm0
|
---|
1135 | psllq xmm0,5
|
---|
1136 | pxor xmm3,xmm0
|
---|
1137 | psllq xmm0,1
|
---|
1138 | pxor xmm0,xmm3
|
---|
1139 | psllq xmm0,57
|
---|
1140 | movdqa xmm3,xmm0
|
---|
1141 | pslldq xmm0,8
|
---|
1142 | psrldq xmm3,8
|
---|
1143 | pxor xmm0,xmm4
|
---|
1144 | pxor xmm1,xmm3
|
---|
1145 |
|
---|
1146 |
|
---|
1147 | movdqa xmm4,xmm0
|
---|
1148 | psrlq xmm0,1
|
---|
1149 | pxor xmm1,xmm4
|
---|
1150 | pxor xmm4,xmm0
|
---|
1151 | psrlq xmm0,5
|
---|
1152 | pxor xmm0,xmm4
|
---|
1153 | psrlq xmm0,1
|
---|
1154 | pxor xmm0,xmm1
|
---|
1155 | add r9,0x40
|
---|
1156 | jz NEAR $L$done
|
---|
1157 | movdqu xmm7,XMMWORD[32+rdx]
|
---|
1158 | sub r9,0x10
|
---|
1159 | jz NEAR $L$odd_tail
|
---|
1160 | $L$skip4x:
|
---|
1161 |
|
---|
1162 |
|
---|
1163 |
|
---|
1164 |
|
---|
1165 |
|
---|
1166 | movdqu xmm8,XMMWORD[r8]
|
---|
1167 | movdqu xmm3,XMMWORD[16+r8]
|
---|
1168 | DB 102,69,15,56,0,194
|
---|
1169 | DB 102,65,15,56,0,218
|
---|
1170 | pxor xmm0,xmm8
|
---|
1171 |
|
---|
1172 | movdqa xmm5,xmm3
|
---|
1173 | pshufd xmm4,xmm3,78
|
---|
1174 | pxor xmm4,xmm3
|
---|
1175 | DB 102,15,58,68,218,0
|
---|
1176 | DB 102,15,58,68,234,17
|
---|
1177 | DB 102,15,58,68,231,0
|
---|
1178 |
|
---|
1179 | lea r8,[32+r8]
|
---|
1180 | nop
|
---|
1181 | sub r9,0x20
|
---|
1182 | jbe NEAR $L$even_tail
|
---|
1183 | nop
|
---|
1184 | jmp NEAR $L$mod_loop
|
---|
1185 |
|
---|
1186 | ALIGN 32
|
---|
1187 | $L$mod_loop:
|
---|
1188 | movdqa xmm1,xmm0
|
---|
1189 | movdqa xmm8,xmm4
|
---|
1190 | pshufd xmm4,xmm0,78
|
---|
1191 | pxor xmm4,xmm0
|
---|
1192 |
|
---|
1193 | DB 102,15,58,68,198,0
|
---|
1194 | DB 102,15,58,68,206,17
|
---|
1195 | DB 102,15,58,68,231,16
|
---|
1196 |
|
---|
1197 | pxor xmm0,xmm3
|
---|
1198 | pxor xmm1,xmm5
|
---|
1199 | movdqu xmm9,XMMWORD[r8]
|
---|
1200 | pxor xmm8,xmm0
|
---|
1201 | DB 102,69,15,56,0,202
|
---|
1202 | movdqu xmm3,XMMWORD[16+r8]
|
---|
1203 |
|
---|
1204 | pxor xmm8,xmm1
|
---|
1205 | pxor xmm1,xmm9
|
---|
1206 | pxor xmm4,xmm8
|
---|
1207 | DB 102,65,15,56,0,218
|
---|
1208 | movdqa xmm8,xmm4
|
---|
1209 | psrldq xmm8,8
|
---|
1210 | pslldq xmm4,8
|
---|
1211 | pxor xmm1,xmm8
|
---|
1212 | pxor xmm0,xmm4
|
---|
1213 |
|
---|
1214 | movdqa xmm5,xmm3
|
---|
1215 |
|
---|
1216 | movdqa xmm9,xmm0
|
---|
1217 | movdqa xmm8,xmm0
|
---|
1218 | psllq xmm0,5
|
---|
1219 | pxor xmm8,xmm0
|
---|
1220 | DB 102,15,58,68,218,0
|
---|
1221 | psllq xmm0,1
|
---|
1222 | pxor xmm0,xmm8
|
---|
1223 | psllq xmm0,57
|
---|
1224 | movdqa xmm8,xmm0
|
---|
1225 | pslldq xmm0,8
|
---|
1226 | psrldq xmm8,8
|
---|
1227 | pxor xmm0,xmm9
|
---|
1228 | pshufd xmm4,xmm5,78
|
---|
1229 | pxor xmm1,xmm8
|
---|
1230 | pxor xmm4,xmm5
|
---|
1231 |
|
---|
1232 | movdqa xmm9,xmm0
|
---|
1233 | psrlq xmm0,1
|
---|
1234 | DB 102,15,58,68,234,17
|
---|
1235 | pxor xmm1,xmm9
|
---|
1236 | pxor xmm9,xmm0
|
---|
1237 | psrlq xmm0,5
|
---|
1238 | pxor xmm0,xmm9
|
---|
1239 | lea r8,[32+r8]
|
---|
1240 | psrlq xmm0,1
|
---|
1241 | DB 102,15,58,68,231,0
|
---|
1242 | pxor xmm0,xmm1
|
---|
1243 |
|
---|
1244 | sub r9,0x20
|
---|
1245 | ja NEAR $L$mod_loop
|
---|
1246 |
|
---|
1247 | $L$even_tail:
|
---|
1248 | movdqa xmm1,xmm0
|
---|
1249 | movdqa xmm8,xmm4
|
---|
1250 | pshufd xmm4,xmm0,78
|
---|
1251 | pxor xmm4,xmm0
|
---|
1252 |
|
---|
1253 | DB 102,15,58,68,198,0
|
---|
1254 | DB 102,15,58,68,206,17
|
---|
1255 | DB 102,15,58,68,231,16
|
---|
1256 |
|
---|
1257 | pxor xmm0,xmm3
|
---|
1258 | pxor xmm1,xmm5
|
---|
1259 | pxor xmm8,xmm0
|
---|
1260 | pxor xmm8,xmm1
|
---|
1261 | pxor xmm4,xmm8
|
---|
1262 | movdqa xmm8,xmm4
|
---|
1263 | psrldq xmm8,8
|
---|
1264 | pslldq xmm4,8
|
---|
1265 | pxor xmm1,xmm8
|
---|
1266 | pxor xmm0,xmm4
|
---|
1267 |
|
---|
1268 | movdqa xmm4,xmm0
|
---|
1269 | movdqa xmm3,xmm0
|
---|
1270 | psllq xmm0,5
|
---|
1271 | pxor xmm3,xmm0
|
---|
1272 | psllq xmm0,1
|
---|
1273 | pxor xmm0,xmm3
|
---|
1274 | psllq xmm0,57
|
---|
1275 | movdqa xmm3,xmm0
|
---|
1276 | pslldq xmm0,8
|
---|
1277 | psrldq xmm3,8
|
---|
1278 | pxor xmm0,xmm4
|
---|
1279 | pxor xmm1,xmm3
|
---|
1280 |
|
---|
1281 |
|
---|
1282 | movdqa xmm4,xmm0
|
---|
1283 | psrlq xmm0,1
|
---|
1284 | pxor xmm1,xmm4
|
---|
1285 | pxor xmm4,xmm0
|
---|
1286 | psrlq xmm0,5
|
---|
1287 | pxor xmm0,xmm4
|
---|
1288 | psrlq xmm0,1
|
---|
1289 | pxor xmm0,xmm1
|
---|
1290 | test r9,r9
|
---|
1291 | jnz NEAR $L$done
|
---|
1292 |
|
---|
1293 | $L$odd_tail:
|
---|
1294 | movdqu xmm8,XMMWORD[r8]
|
---|
1295 | DB 102,69,15,56,0,194
|
---|
1296 | pxor xmm0,xmm8
|
---|
1297 | movdqa xmm1,xmm0
|
---|
1298 | pshufd xmm3,xmm0,78
|
---|
1299 | pxor xmm3,xmm0
|
---|
1300 | DB 102,15,58,68,194,0
|
---|
1301 | DB 102,15,58,68,202,17
|
---|
1302 | DB 102,15,58,68,223,0
|
---|
1303 | pxor xmm3,xmm0
|
---|
1304 | pxor xmm3,xmm1
|
---|
1305 |
|
---|
1306 | movdqa xmm4,xmm3
|
---|
1307 | psrldq xmm3,8
|
---|
1308 | pslldq xmm4,8
|
---|
1309 | pxor xmm1,xmm3
|
---|
1310 | pxor xmm0,xmm4
|
---|
1311 |
|
---|
1312 | movdqa xmm4,xmm0
|
---|
1313 | movdqa xmm3,xmm0
|
---|
1314 | psllq xmm0,5
|
---|
1315 | pxor xmm3,xmm0
|
---|
1316 | psllq xmm0,1
|
---|
1317 | pxor xmm0,xmm3
|
---|
1318 | psllq xmm0,57
|
---|
1319 | movdqa xmm3,xmm0
|
---|
1320 | pslldq xmm0,8
|
---|
1321 | psrldq xmm3,8
|
---|
1322 | pxor xmm0,xmm4
|
---|
1323 | pxor xmm1,xmm3
|
---|
1324 |
|
---|
1325 |
|
---|
1326 | movdqa xmm4,xmm0
|
---|
1327 | psrlq xmm0,1
|
---|
1328 | pxor xmm1,xmm4
|
---|
1329 | pxor xmm4,xmm0
|
---|
1330 | psrlq xmm0,5
|
---|
1331 | pxor xmm0,xmm4
|
---|
1332 | psrlq xmm0,1
|
---|
1333 | pxor xmm0,xmm1
|
---|
1334 | $L$done:
|
---|
1335 | DB 102,65,15,56,0,194
|
---|
1336 | movdqu XMMWORD[rcx],xmm0
|
---|
1337 | movaps xmm6,XMMWORD[rsp]
|
---|
1338 | movaps xmm7,XMMWORD[16+rsp]
|
---|
1339 | movaps xmm8,XMMWORD[32+rsp]
|
---|
1340 | movaps xmm9,XMMWORD[48+rsp]
|
---|
1341 | movaps xmm10,XMMWORD[64+rsp]
|
---|
1342 | movaps xmm11,XMMWORD[80+rsp]
|
---|
1343 | movaps xmm12,XMMWORD[96+rsp]
|
---|
1344 | movaps xmm13,XMMWORD[112+rsp]
|
---|
1345 | movaps xmm14,XMMWORD[128+rsp]
|
---|
1346 | movaps xmm15,XMMWORD[144+rsp]
|
---|
1347 | lea rsp,[168+rsp]
|
---|
1348 | $L$SEH_end_gcm_ghash_clmul:
|
---|
1349 | DB 0F3h,0C3h ;repret
|
---|
1350 |
|
---|
1351 |
|
---|
1352 | global gcm_init_avx
|
---|
1353 |
|
---|
1354 | ALIGN 32
|
---|
1355 | gcm_init_avx:
|
---|
1356 |
|
---|
1357 | $L$SEH_begin_gcm_init_avx:
|
---|
1358 |
|
---|
1359 | DB 0x48,0x83,0xec,0x18
|
---|
1360 | DB 0x0f,0x29,0x34,0x24
|
---|
1361 | vzeroupper
|
---|
1362 |
|
---|
1363 | vmovdqu xmm2,XMMWORD[rdx]
|
---|
1364 | vpshufd xmm2,xmm2,78
|
---|
1365 |
|
---|
1366 |
|
---|
1367 | vpshufd xmm4,xmm2,255
|
---|
1368 | vpsrlq xmm3,xmm2,63
|
---|
1369 | vpsllq xmm2,xmm2,1
|
---|
1370 | vpxor xmm5,xmm5,xmm5
|
---|
1371 | vpcmpgtd xmm5,xmm5,xmm4
|
---|
1372 | vpslldq xmm3,xmm3,8
|
---|
1373 | vpor xmm2,xmm2,xmm3
|
---|
1374 |
|
---|
1375 |
|
---|
1376 | vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
|
---|
1377 | vpxor xmm2,xmm2,xmm5
|
---|
1378 |
|
---|
1379 | vpunpckhqdq xmm6,xmm2,xmm2
|
---|
1380 | vmovdqa xmm0,xmm2
|
---|
1381 | vpxor xmm6,xmm6,xmm2
|
---|
1382 | mov r10,4
|
---|
1383 | jmp NEAR $L$init_start_avx
|
---|
1384 | ALIGN 32
|
---|
1385 | $L$init_loop_avx:
|
---|
1386 | vpalignr xmm5,xmm4,xmm3,8
|
---|
1387 | vmovdqu XMMWORD[(-16)+rcx],xmm5
|
---|
1388 | vpunpckhqdq xmm3,xmm0,xmm0
|
---|
1389 | vpxor xmm3,xmm3,xmm0
|
---|
1390 | vpclmulqdq xmm1,xmm0,xmm2,0x11
|
---|
1391 | vpclmulqdq xmm0,xmm0,xmm2,0x00
|
---|
1392 | vpclmulqdq xmm3,xmm3,xmm6,0x00
|
---|
1393 | vpxor xmm4,xmm1,xmm0
|
---|
1394 | vpxor xmm3,xmm3,xmm4
|
---|
1395 |
|
---|
1396 | vpslldq xmm4,xmm3,8
|
---|
1397 | vpsrldq xmm3,xmm3,8
|
---|
1398 | vpxor xmm0,xmm0,xmm4
|
---|
1399 | vpxor xmm1,xmm1,xmm3
|
---|
1400 | vpsllq xmm3,xmm0,57
|
---|
1401 | vpsllq xmm4,xmm0,62
|
---|
1402 | vpxor xmm4,xmm4,xmm3
|
---|
1403 | vpsllq xmm3,xmm0,63
|
---|
1404 | vpxor xmm4,xmm4,xmm3
|
---|
1405 | vpslldq xmm3,xmm4,8
|
---|
1406 | vpsrldq xmm4,xmm4,8
|
---|
1407 | vpxor xmm0,xmm0,xmm3
|
---|
1408 | vpxor xmm1,xmm1,xmm4
|
---|
1409 |
|
---|
1410 | vpsrlq xmm4,xmm0,1
|
---|
1411 | vpxor xmm1,xmm1,xmm0
|
---|
1412 | vpxor xmm0,xmm0,xmm4
|
---|
1413 | vpsrlq xmm4,xmm4,5
|
---|
1414 | vpxor xmm0,xmm0,xmm4
|
---|
1415 | vpsrlq xmm0,xmm0,1
|
---|
1416 | vpxor xmm0,xmm0,xmm1
|
---|
1417 | $L$init_start_avx:
|
---|
1418 | vmovdqa xmm5,xmm0
|
---|
1419 | vpunpckhqdq xmm3,xmm0,xmm0
|
---|
1420 | vpxor xmm3,xmm3,xmm0
|
---|
1421 | vpclmulqdq xmm1,xmm0,xmm2,0x11
|
---|
1422 | vpclmulqdq xmm0,xmm0,xmm2,0x00
|
---|
1423 | vpclmulqdq xmm3,xmm3,xmm6,0x00
|
---|
1424 | vpxor xmm4,xmm1,xmm0
|
---|
1425 | vpxor xmm3,xmm3,xmm4
|
---|
1426 |
|
---|
1427 | vpslldq xmm4,xmm3,8
|
---|
1428 | vpsrldq xmm3,xmm3,8
|
---|
1429 | vpxor xmm0,xmm0,xmm4
|
---|
1430 | vpxor xmm1,xmm1,xmm3
|
---|
1431 | vpsllq xmm3,xmm0,57
|
---|
1432 | vpsllq xmm4,xmm0,62
|
---|
1433 | vpxor xmm4,xmm4,xmm3
|
---|
1434 | vpsllq xmm3,xmm0,63
|
---|
1435 | vpxor xmm4,xmm4,xmm3
|
---|
1436 | vpslldq xmm3,xmm4,8
|
---|
1437 | vpsrldq xmm4,xmm4,8
|
---|
1438 | vpxor xmm0,xmm0,xmm3
|
---|
1439 | vpxor xmm1,xmm1,xmm4
|
---|
1440 |
|
---|
1441 | vpsrlq xmm4,xmm0,1
|
---|
1442 | vpxor xmm1,xmm1,xmm0
|
---|
1443 | vpxor xmm0,xmm0,xmm4
|
---|
1444 | vpsrlq xmm4,xmm4,5
|
---|
1445 | vpxor xmm0,xmm0,xmm4
|
---|
1446 | vpsrlq xmm0,xmm0,1
|
---|
1447 | vpxor xmm0,xmm0,xmm1
|
---|
1448 | vpshufd xmm3,xmm5,78
|
---|
1449 | vpshufd xmm4,xmm0,78
|
---|
1450 | vpxor xmm3,xmm3,xmm5
|
---|
1451 | vmovdqu XMMWORD[rcx],xmm5
|
---|
1452 | vpxor xmm4,xmm4,xmm0
|
---|
1453 | vmovdqu XMMWORD[16+rcx],xmm0
|
---|
1454 | lea rcx,[48+rcx]
|
---|
1455 | sub r10,1
|
---|
1456 | jnz NEAR $L$init_loop_avx
|
---|
1457 |
|
---|
1458 | vpalignr xmm5,xmm3,xmm4,8
|
---|
1459 | vmovdqu XMMWORD[(-16)+rcx],xmm5
|
---|
1460 |
|
---|
1461 | vzeroupper
|
---|
1462 | movaps xmm6,XMMWORD[rsp]
|
---|
1463 | lea rsp,[24+rsp]
|
---|
1464 | $L$SEH_end_gcm_init_avx:
|
---|
1465 | DB 0F3h,0C3h ;repret
|
---|
1466 |
|
---|
1467 |
|
---|
1468 | global gcm_gmult_avx
|
---|
1469 |
|
---|
1470 | ALIGN 32
|
---|
1471 | gcm_gmult_avx:
|
---|
1472 |
|
---|
1473 | DB 243,15,30,250
|
---|
1474 | jmp NEAR $L$_gmult_clmul
|
---|
1475 |
|
---|
1476 |
|
---|
1477 | global gcm_ghash_avx
|
---|
1478 |
|
---|
1479 | ALIGN 32
|
---|
1480 | gcm_ghash_avx:
|
---|
1481 |
|
---|
1482 | DB 243,15,30,250
|
---|
1483 | lea rax,[((-136))+rsp]
|
---|
1484 | $L$SEH_begin_gcm_ghash_avx:
|
---|
1485 |
|
---|
1486 | DB 0x48,0x8d,0x60,0xe0
|
---|
1487 | DB 0x0f,0x29,0x70,0xe0
|
---|
1488 | DB 0x0f,0x29,0x78,0xf0
|
---|
1489 | DB 0x44,0x0f,0x29,0x00
|
---|
1490 | DB 0x44,0x0f,0x29,0x48,0x10
|
---|
1491 | DB 0x44,0x0f,0x29,0x50,0x20
|
---|
1492 | DB 0x44,0x0f,0x29,0x58,0x30
|
---|
1493 | DB 0x44,0x0f,0x29,0x60,0x40
|
---|
1494 | DB 0x44,0x0f,0x29,0x68,0x50
|
---|
1495 | DB 0x44,0x0f,0x29,0x70,0x60
|
---|
1496 | DB 0x44,0x0f,0x29,0x78,0x70
|
---|
1497 | vzeroupper
|
---|
1498 |
|
---|
1499 | vmovdqu xmm10,XMMWORD[rcx]
|
---|
1500 | lea r10,[$L$0x1c2_polynomial]
|
---|
1501 | lea rdx,[64+rdx]
|
---|
1502 | vmovdqu xmm13,XMMWORD[$L$bswap_mask]
|
---|
1503 | vpshufb xmm10,xmm10,xmm13
|
---|
1504 | cmp r9,0x80
|
---|
1505 | jb NEAR $L$short_avx
|
---|
1506 | sub r9,0x80
|
---|
1507 |
|
---|
1508 | vmovdqu xmm14,XMMWORD[112+r8]
|
---|
1509 | vmovdqu xmm6,XMMWORD[((0-64))+rdx]
|
---|
1510 | vpshufb xmm14,xmm14,xmm13
|
---|
1511 | vmovdqu xmm7,XMMWORD[((32-64))+rdx]
|
---|
1512 |
|
---|
1513 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1514 | vmovdqu xmm15,XMMWORD[96+r8]
|
---|
1515 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1516 | vpxor xmm9,xmm9,xmm14
|
---|
1517 | vpshufb xmm15,xmm15,xmm13
|
---|
1518 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1519 | vmovdqu xmm6,XMMWORD[((16-64))+rdx]
|
---|
1520 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1521 | vmovdqu xmm14,XMMWORD[80+r8]
|
---|
1522 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1523 | vpxor xmm8,xmm8,xmm15
|
---|
1524 |
|
---|
1525 | vpshufb xmm14,xmm14,xmm13
|
---|
1526 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1527 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1528 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1529 | vmovdqu xmm6,XMMWORD[((48-64))+rdx]
|
---|
1530 | vpxor xmm9,xmm9,xmm14
|
---|
1531 | vmovdqu xmm15,XMMWORD[64+r8]
|
---|
1532 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1533 | vmovdqu xmm7,XMMWORD[((80-64))+rdx]
|
---|
1534 |
|
---|
1535 | vpshufb xmm15,xmm15,xmm13
|
---|
1536 | vpxor xmm3,xmm3,xmm0
|
---|
1537 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1538 | vpxor xmm4,xmm4,xmm1
|
---|
1539 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1540 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1541 | vmovdqu xmm6,XMMWORD[((64-64))+rdx]
|
---|
1542 | vpxor xmm5,xmm5,xmm2
|
---|
1543 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1544 | vpxor xmm8,xmm8,xmm15
|
---|
1545 |
|
---|
1546 | vmovdqu xmm14,XMMWORD[48+r8]
|
---|
1547 | vpxor xmm0,xmm0,xmm3
|
---|
1548 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1549 | vpxor xmm1,xmm1,xmm4
|
---|
1550 | vpshufb xmm14,xmm14,xmm13
|
---|
1551 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1552 | vmovdqu xmm6,XMMWORD[((96-64))+rdx]
|
---|
1553 | vpxor xmm2,xmm2,xmm5
|
---|
1554 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1555 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1556 | vmovdqu xmm7,XMMWORD[((128-64))+rdx]
|
---|
1557 | vpxor xmm9,xmm9,xmm14
|
---|
1558 |
|
---|
1559 | vmovdqu xmm15,XMMWORD[32+r8]
|
---|
1560 | vpxor xmm3,xmm3,xmm0
|
---|
1561 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1562 | vpxor xmm4,xmm4,xmm1
|
---|
1563 | vpshufb xmm15,xmm15,xmm13
|
---|
1564 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1565 | vmovdqu xmm6,XMMWORD[((112-64))+rdx]
|
---|
1566 | vpxor xmm5,xmm5,xmm2
|
---|
1567 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1568 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1569 | vpxor xmm8,xmm8,xmm15
|
---|
1570 |
|
---|
1571 | vmovdqu xmm14,XMMWORD[16+r8]
|
---|
1572 | vpxor xmm0,xmm0,xmm3
|
---|
1573 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1574 | vpxor xmm1,xmm1,xmm4
|
---|
1575 | vpshufb xmm14,xmm14,xmm13
|
---|
1576 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1577 | vmovdqu xmm6,XMMWORD[((144-64))+rdx]
|
---|
1578 | vpxor xmm2,xmm2,xmm5
|
---|
1579 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1580 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1581 | vmovdqu xmm7,XMMWORD[((176-64))+rdx]
|
---|
1582 | vpxor xmm9,xmm9,xmm14
|
---|
1583 |
|
---|
1584 | vmovdqu xmm15,XMMWORD[r8]
|
---|
1585 | vpxor xmm3,xmm3,xmm0
|
---|
1586 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1587 | vpxor xmm4,xmm4,xmm1
|
---|
1588 | vpshufb xmm15,xmm15,xmm13
|
---|
1589 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1590 | vmovdqu xmm6,XMMWORD[((160-64))+rdx]
|
---|
1591 | vpxor xmm5,xmm5,xmm2
|
---|
1592 | vpclmulqdq xmm2,xmm9,xmm7,0x10
|
---|
1593 |
|
---|
1594 | lea r8,[128+r8]
|
---|
1595 | cmp r9,0x80
|
---|
1596 | jb NEAR $L$tail_avx
|
---|
1597 |
|
---|
1598 | vpxor xmm15,xmm15,xmm10
|
---|
1599 | sub r9,0x80
|
---|
1600 | jmp NEAR $L$oop8x_avx
|
---|
1601 |
|
---|
1602 | ALIGN 32
|
---|
1603 | $L$oop8x_avx:
|
---|
1604 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1605 | vmovdqu xmm14,XMMWORD[112+r8]
|
---|
1606 | vpxor xmm3,xmm3,xmm0
|
---|
1607 | vpxor xmm8,xmm8,xmm15
|
---|
1608 | vpclmulqdq xmm10,xmm15,xmm6,0x00
|
---|
1609 | vpshufb xmm14,xmm14,xmm13
|
---|
1610 | vpxor xmm4,xmm4,xmm1
|
---|
1611 | vpclmulqdq xmm11,xmm15,xmm6,0x11
|
---|
1612 | vmovdqu xmm6,XMMWORD[((0-64))+rdx]
|
---|
1613 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1614 | vpxor xmm5,xmm5,xmm2
|
---|
1615 | vpclmulqdq xmm12,xmm8,xmm7,0x00
|
---|
1616 | vmovdqu xmm7,XMMWORD[((32-64))+rdx]
|
---|
1617 | vpxor xmm9,xmm9,xmm14
|
---|
1618 |
|
---|
1619 | vmovdqu xmm15,XMMWORD[96+r8]
|
---|
1620 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1621 | vpxor xmm10,xmm10,xmm3
|
---|
1622 | vpshufb xmm15,xmm15,xmm13
|
---|
1623 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1624 | vxorps xmm11,xmm11,xmm4
|
---|
1625 | vmovdqu xmm6,XMMWORD[((16-64))+rdx]
|
---|
1626 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1627 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1628 | vpxor xmm12,xmm12,xmm5
|
---|
1629 | vxorps xmm8,xmm8,xmm15
|
---|
1630 |
|
---|
1631 | vmovdqu xmm14,XMMWORD[80+r8]
|
---|
1632 | vpxor xmm12,xmm12,xmm10
|
---|
1633 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1634 | vpxor xmm12,xmm12,xmm11
|
---|
1635 | vpslldq xmm9,xmm12,8
|
---|
1636 | vpxor xmm3,xmm3,xmm0
|
---|
1637 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1638 | vpsrldq xmm12,xmm12,8
|
---|
1639 | vpxor xmm10,xmm10,xmm9
|
---|
1640 | vmovdqu xmm6,XMMWORD[((48-64))+rdx]
|
---|
1641 | vpshufb xmm14,xmm14,xmm13
|
---|
1642 | vxorps xmm11,xmm11,xmm12
|
---|
1643 | vpxor xmm4,xmm4,xmm1
|
---|
1644 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1645 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1646 | vmovdqu xmm7,XMMWORD[((80-64))+rdx]
|
---|
1647 | vpxor xmm9,xmm9,xmm14
|
---|
1648 | vpxor xmm5,xmm5,xmm2
|
---|
1649 |
|
---|
1650 | vmovdqu xmm15,XMMWORD[64+r8]
|
---|
1651 | vpalignr xmm12,xmm10,xmm10,8
|
---|
1652 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1653 | vpshufb xmm15,xmm15,xmm13
|
---|
1654 | vpxor xmm0,xmm0,xmm3
|
---|
1655 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1656 | vmovdqu xmm6,XMMWORD[((64-64))+rdx]
|
---|
1657 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1658 | vpxor xmm1,xmm1,xmm4
|
---|
1659 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1660 | vxorps xmm8,xmm8,xmm15
|
---|
1661 | vpxor xmm2,xmm2,xmm5
|
---|
1662 |
|
---|
1663 | vmovdqu xmm14,XMMWORD[48+r8]
|
---|
1664 | vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
|
---|
1665 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1666 | vpshufb xmm14,xmm14,xmm13
|
---|
1667 | vpxor xmm3,xmm3,xmm0
|
---|
1668 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1669 | vmovdqu xmm6,XMMWORD[((96-64))+rdx]
|
---|
1670 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1671 | vpxor xmm4,xmm4,xmm1
|
---|
1672 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1673 | vmovdqu xmm7,XMMWORD[((128-64))+rdx]
|
---|
1674 | vpxor xmm9,xmm9,xmm14
|
---|
1675 | vpxor xmm5,xmm5,xmm2
|
---|
1676 |
|
---|
1677 | vmovdqu xmm15,XMMWORD[32+r8]
|
---|
1678 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1679 | vpshufb xmm15,xmm15,xmm13
|
---|
1680 | vpxor xmm0,xmm0,xmm3
|
---|
1681 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1682 | vmovdqu xmm6,XMMWORD[((112-64))+rdx]
|
---|
1683 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1684 | vpxor xmm1,xmm1,xmm4
|
---|
1685 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1686 | vpxor xmm8,xmm8,xmm15
|
---|
1687 | vpxor xmm2,xmm2,xmm5
|
---|
1688 | vxorps xmm10,xmm10,xmm12
|
---|
1689 |
|
---|
1690 | vmovdqu xmm14,XMMWORD[16+r8]
|
---|
1691 | vpalignr xmm12,xmm10,xmm10,8
|
---|
1692 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1693 | vpshufb xmm14,xmm14,xmm13
|
---|
1694 | vpxor xmm3,xmm3,xmm0
|
---|
1695 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1696 | vmovdqu xmm6,XMMWORD[((144-64))+rdx]
|
---|
1697 | vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
|
---|
1698 | vxorps xmm12,xmm12,xmm11
|
---|
1699 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1700 | vpxor xmm4,xmm4,xmm1
|
---|
1701 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1702 | vmovdqu xmm7,XMMWORD[((176-64))+rdx]
|
---|
1703 | vpxor xmm9,xmm9,xmm14
|
---|
1704 | vpxor xmm5,xmm5,xmm2
|
---|
1705 |
|
---|
1706 | vmovdqu xmm15,XMMWORD[r8]
|
---|
1707 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1708 | vpshufb xmm15,xmm15,xmm13
|
---|
1709 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1710 | vmovdqu xmm6,XMMWORD[((160-64))+rdx]
|
---|
1711 | vpxor xmm15,xmm15,xmm12
|
---|
1712 | vpclmulqdq xmm2,xmm9,xmm7,0x10
|
---|
1713 | vpxor xmm15,xmm15,xmm10
|
---|
1714 |
|
---|
1715 | lea r8,[128+r8]
|
---|
1716 | sub r9,0x80
|
---|
1717 | jnc NEAR $L$oop8x_avx
|
---|
1718 |
|
---|
1719 | add r9,0x80
|
---|
1720 | jmp NEAR $L$tail_no_xor_avx
|
---|
1721 |
|
---|
1722 | ALIGN 32
|
---|
1723 | $L$short_avx:
|
---|
1724 | vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8]
|
---|
1725 | lea r8,[r9*1+r8]
|
---|
1726 | vmovdqu xmm6,XMMWORD[((0-64))+rdx]
|
---|
1727 | vmovdqu xmm7,XMMWORD[((32-64))+rdx]
|
---|
1728 | vpshufb xmm15,xmm14,xmm13
|
---|
1729 |
|
---|
1730 | vmovdqa xmm3,xmm0
|
---|
1731 | vmovdqa xmm4,xmm1
|
---|
1732 | vmovdqa xmm5,xmm2
|
---|
1733 | sub r9,0x10
|
---|
1734 | jz NEAR $L$tail_avx
|
---|
1735 |
|
---|
1736 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1737 | vpxor xmm3,xmm3,xmm0
|
---|
1738 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1739 | vpxor xmm8,xmm8,xmm15
|
---|
1740 | vmovdqu xmm14,XMMWORD[((-32))+r8]
|
---|
1741 | vpxor xmm4,xmm4,xmm1
|
---|
1742 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1743 | vmovdqu xmm6,XMMWORD[((16-64))+rdx]
|
---|
1744 | vpshufb xmm15,xmm14,xmm13
|
---|
1745 | vpxor xmm5,xmm5,xmm2
|
---|
1746 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1747 | vpsrldq xmm7,xmm7,8
|
---|
1748 | sub r9,0x10
|
---|
1749 | jz NEAR $L$tail_avx
|
---|
1750 |
|
---|
1751 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1752 | vpxor xmm3,xmm3,xmm0
|
---|
1753 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1754 | vpxor xmm8,xmm8,xmm15
|
---|
1755 | vmovdqu xmm14,XMMWORD[((-48))+r8]
|
---|
1756 | vpxor xmm4,xmm4,xmm1
|
---|
1757 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1758 | vmovdqu xmm6,XMMWORD[((48-64))+rdx]
|
---|
1759 | vpshufb xmm15,xmm14,xmm13
|
---|
1760 | vpxor xmm5,xmm5,xmm2
|
---|
1761 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1762 | vmovdqu xmm7,XMMWORD[((80-64))+rdx]
|
---|
1763 | sub r9,0x10
|
---|
1764 | jz NEAR $L$tail_avx
|
---|
1765 |
|
---|
1766 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1767 | vpxor xmm3,xmm3,xmm0
|
---|
1768 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1769 | vpxor xmm8,xmm8,xmm15
|
---|
1770 | vmovdqu xmm14,XMMWORD[((-64))+r8]
|
---|
1771 | vpxor xmm4,xmm4,xmm1
|
---|
1772 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1773 | vmovdqu xmm6,XMMWORD[((64-64))+rdx]
|
---|
1774 | vpshufb xmm15,xmm14,xmm13
|
---|
1775 | vpxor xmm5,xmm5,xmm2
|
---|
1776 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1777 | vpsrldq xmm7,xmm7,8
|
---|
1778 | sub r9,0x10
|
---|
1779 | jz NEAR $L$tail_avx
|
---|
1780 |
|
---|
1781 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1782 | vpxor xmm3,xmm3,xmm0
|
---|
1783 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1784 | vpxor xmm8,xmm8,xmm15
|
---|
1785 | vmovdqu xmm14,XMMWORD[((-80))+r8]
|
---|
1786 | vpxor xmm4,xmm4,xmm1
|
---|
1787 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1788 | vmovdqu xmm6,XMMWORD[((96-64))+rdx]
|
---|
1789 | vpshufb xmm15,xmm14,xmm13
|
---|
1790 | vpxor xmm5,xmm5,xmm2
|
---|
1791 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1792 | vmovdqu xmm7,XMMWORD[((128-64))+rdx]
|
---|
1793 | sub r9,0x10
|
---|
1794 | jz NEAR $L$tail_avx
|
---|
1795 |
|
---|
1796 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1797 | vpxor xmm3,xmm3,xmm0
|
---|
1798 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1799 | vpxor xmm8,xmm8,xmm15
|
---|
1800 | vmovdqu xmm14,XMMWORD[((-96))+r8]
|
---|
1801 | vpxor xmm4,xmm4,xmm1
|
---|
1802 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1803 | vmovdqu xmm6,XMMWORD[((112-64))+rdx]
|
---|
1804 | vpshufb xmm15,xmm14,xmm13
|
---|
1805 | vpxor xmm5,xmm5,xmm2
|
---|
1806 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1807 | vpsrldq xmm7,xmm7,8
|
---|
1808 | sub r9,0x10
|
---|
1809 | jz NEAR $L$tail_avx
|
---|
1810 |
|
---|
1811 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1812 | vpxor xmm3,xmm3,xmm0
|
---|
1813 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1814 | vpxor xmm8,xmm8,xmm15
|
---|
1815 | vmovdqu xmm14,XMMWORD[((-112))+r8]
|
---|
1816 | vpxor xmm4,xmm4,xmm1
|
---|
1817 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1818 | vmovdqu xmm6,XMMWORD[((144-64))+rdx]
|
---|
1819 | vpshufb xmm15,xmm14,xmm13
|
---|
1820 | vpxor xmm5,xmm5,xmm2
|
---|
1821 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1822 | vmovq xmm7,QWORD[((184-64))+rdx]
|
---|
1823 | sub r9,0x10
|
---|
1824 | jmp NEAR $L$tail_avx
|
---|
1825 |
|
---|
1826 | ALIGN 32
|
---|
1827 | $L$tail_avx:
|
---|
1828 | vpxor xmm15,xmm15,xmm10
|
---|
1829 | $L$tail_no_xor_avx:
|
---|
1830 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1831 | vpxor xmm3,xmm3,xmm0
|
---|
1832 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1833 | vpxor xmm8,xmm8,xmm15
|
---|
1834 | vpxor xmm4,xmm4,xmm1
|
---|
1835 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1836 | vpxor xmm5,xmm5,xmm2
|
---|
1837 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1838 |
|
---|
1839 | vmovdqu xmm12,XMMWORD[r10]
|
---|
1840 |
|
---|
1841 | vpxor xmm10,xmm3,xmm0
|
---|
1842 | vpxor xmm11,xmm4,xmm1
|
---|
1843 | vpxor xmm5,xmm5,xmm2
|
---|
1844 |
|
---|
1845 | vpxor xmm5,xmm5,xmm10
|
---|
1846 | vpxor xmm5,xmm5,xmm11
|
---|
1847 | vpslldq xmm9,xmm5,8
|
---|
1848 | vpsrldq xmm5,xmm5,8
|
---|
1849 | vpxor xmm10,xmm10,xmm9
|
---|
1850 | vpxor xmm11,xmm11,xmm5
|
---|
1851 |
|
---|
1852 | vpclmulqdq xmm9,xmm10,xmm12,0x10
|
---|
1853 | vpalignr xmm10,xmm10,xmm10,8
|
---|
1854 | vpxor xmm10,xmm10,xmm9
|
---|
1855 |
|
---|
1856 | vpclmulqdq xmm9,xmm10,xmm12,0x10
|
---|
1857 | vpalignr xmm10,xmm10,xmm10,8
|
---|
1858 | vpxor xmm10,xmm10,xmm11
|
---|
1859 | vpxor xmm10,xmm10,xmm9
|
---|
1860 |
|
---|
1861 | cmp r9,0
|
---|
1862 | jne NEAR $L$short_avx
|
---|
1863 |
|
---|
1864 | vpshufb xmm10,xmm10,xmm13
|
---|
1865 | vmovdqu XMMWORD[rcx],xmm10
|
---|
1866 | vzeroupper
|
---|
1867 | movaps xmm6,XMMWORD[rsp]
|
---|
1868 | movaps xmm7,XMMWORD[16+rsp]
|
---|
1869 | movaps xmm8,XMMWORD[32+rsp]
|
---|
1870 | movaps xmm9,XMMWORD[48+rsp]
|
---|
1871 | movaps xmm10,XMMWORD[64+rsp]
|
---|
1872 | movaps xmm11,XMMWORD[80+rsp]
|
---|
1873 | movaps xmm12,XMMWORD[96+rsp]
|
---|
1874 | movaps xmm13,XMMWORD[112+rsp]
|
---|
1875 | movaps xmm14,XMMWORD[128+rsp]
|
---|
1876 | movaps xmm15,XMMWORD[144+rsp]
|
---|
1877 | lea rsp,[168+rsp]
|
---|
1878 | $L$SEH_end_gcm_ghash_avx:
|
---|
1879 | DB 0F3h,0C3h ;repret
|
---|
1880 |
|
---|
1881 |
|
---|
1882 | ALIGN 64
|
---|
1883 | $L$bswap_mask:
|
---|
1884 | DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
---|
1885 | $L$0x1c2_polynomial:
|
---|
1886 | DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
---|
1887 | $L$7_mask:
|
---|
1888 | DD 7,0,7,0
|
---|
1889 | $L$7_mask_poly:
|
---|
1890 | DD 7,0,450,0
|
---|
1891 | ALIGN 64
|
---|
1892 |
|
---|
1893 | $L$rem_4bit:
|
---|
1894 | DD 0,0,0,471859200,0,943718400,0,610271232
|
---|
1895 | DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
---|
1896 | DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
---|
1897 | DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
---|
1898 |
|
---|
1899 | $L$rem_8bit:
|
---|
1900 | DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
|
---|
1901 | DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
|
---|
1902 | DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
|
---|
1903 | DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
|
---|
1904 | DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
|
---|
1905 | DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
|
---|
1906 | DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
|
---|
1907 | DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
|
---|
1908 | DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
|
---|
1909 | DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
|
---|
1910 | DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
|
---|
1911 | DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
|
---|
1912 | DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
|
---|
1913 | DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
|
---|
1914 | DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
|
---|
1915 | DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
|
---|
1916 | DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
|
---|
1917 | DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
|
---|
1918 | DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
|
---|
1919 | DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
|
---|
1920 | DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
|
---|
1921 | DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
|
---|
1922 | DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
|
---|
1923 | DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
|
---|
1924 | DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
|
---|
1925 | DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
|
---|
1926 | DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
|
---|
1927 | DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
|
---|
1928 | DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
|
---|
1929 | DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
|
---|
1930 | DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
|
---|
1931 | DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
|
---|
1932 |
|
---|
1933 | DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
|
---|
1934 | DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
---|
1935 | DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
---|
1936 | DB 114,103,62,0
|
---|
1937 | ALIGN 64
|
---|
1938 | EXTERN __imp_RtlVirtualUnwind
|
---|
1939 |
|
---|
1940 | ALIGN 16
|
---|
1941 | se_handler:
|
---|
1942 | push rsi
|
---|
1943 | push rdi
|
---|
1944 | push rbx
|
---|
1945 | push rbp
|
---|
1946 | push r12
|
---|
1947 | push r13
|
---|
1948 | push r14
|
---|
1949 | push r15
|
---|
1950 | pushfq
|
---|
1951 | sub rsp,64
|
---|
1952 |
|
---|
1953 | mov rax,QWORD[120+r8]
|
---|
1954 | mov rbx,QWORD[248+r8]
|
---|
1955 |
|
---|
1956 | mov rsi,QWORD[8+r9]
|
---|
1957 | mov r11,QWORD[56+r9]
|
---|
1958 |
|
---|
1959 | mov r10d,DWORD[r11]
|
---|
1960 | lea r10,[r10*1+rsi]
|
---|
1961 | cmp rbx,r10
|
---|
1962 | jb NEAR $L$in_prologue
|
---|
1963 |
|
---|
1964 | mov rax,QWORD[152+r8]
|
---|
1965 |
|
---|
1966 | mov r10d,DWORD[4+r11]
|
---|
1967 | lea r10,[r10*1+rsi]
|
---|
1968 | cmp rbx,r10
|
---|
1969 | jae NEAR $L$in_prologue
|
---|
1970 |
|
---|
1971 | lea rax,[((48+280))+rax]
|
---|
1972 |
|
---|
1973 | mov rbx,QWORD[((-8))+rax]
|
---|
1974 | mov rbp,QWORD[((-16))+rax]
|
---|
1975 | mov r12,QWORD[((-24))+rax]
|
---|
1976 | mov r13,QWORD[((-32))+rax]
|
---|
1977 | mov r14,QWORD[((-40))+rax]
|
---|
1978 | mov r15,QWORD[((-48))+rax]
|
---|
1979 | mov QWORD[144+r8],rbx
|
---|
1980 | mov QWORD[160+r8],rbp
|
---|
1981 | mov QWORD[216+r8],r12
|
---|
1982 | mov QWORD[224+r8],r13
|
---|
1983 | mov QWORD[232+r8],r14
|
---|
1984 | mov QWORD[240+r8],r15
|
---|
1985 |
|
---|
1986 | $L$in_prologue:
|
---|
1987 | mov rdi,QWORD[8+rax]
|
---|
1988 | mov rsi,QWORD[16+rax]
|
---|
1989 | mov QWORD[152+r8],rax
|
---|
1990 | mov QWORD[168+r8],rsi
|
---|
1991 | mov QWORD[176+r8],rdi
|
---|
1992 |
|
---|
1993 | mov rdi,QWORD[40+r9]
|
---|
1994 | mov rsi,r8
|
---|
1995 | mov ecx,154
|
---|
1996 | DD 0xa548f3fc
|
---|
1997 |
|
---|
1998 | mov rsi,r9
|
---|
1999 | xor rcx,rcx
|
---|
2000 | mov rdx,QWORD[8+rsi]
|
---|
2001 | mov r8,QWORD[rsi]
|
---|
2002 | mov r9,QWORD[16+rsi]
|
---|
2003 | mov r10,QWORD[40+rsi]
|
---|
2004 | lea r11,[56+rsi]
|
---|
2005 | lea r12,[24+rsi]
|
---|
2006 | mov QWORD[32+rsp],r10
|
---|
2007 | mov QWORD[40+rsp],r11
|
---|
2008 | mov QWORD[48+rsp],r12
|
---|
2009 | mov QWORD[56+rsp],rcx
|
---|
2010 | call QWORD[__imp_RtlVirtualUnwind]
|
---|
2011 |
|
---|
2012 | mov eax,1
|
---|
2013 | add rsp,64
|
---|
2014 | popfq
|
---|
2015 | pop r15
|
---|
2016 | pop r14
|
---|
2017 | pop r13
|
---|
2018 | pop r12
|
---|
2019 | pop rbp
|
---|
2020 | pop rbx
|
---|
2021 | pop rdi
|
---|
2022 | pop rsi
|
---|
2023 | DB 0F3h,0C3h ;repret
|
---|
2024 |
|
---|
2025 |
|
---|
2026 | section .pdata rdata align=4
|
---|
2027 | ALIGN 4
|
---|
2028 | DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
|
---|
2029 | DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase
|
---|
2030 | DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase
|
---|
2031 |
|
---|
2032 | DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
|
---|
2033 | DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase
|
---|
2034 | DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase
|
---|
2035 |
|
---|
2036 | DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase
|
---|
2037 | DD $L$SEH_end_gcm_init_clmul wrt ..imagebase
|
---|
2038 | DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
|
---|
2039 |
|
---|
2040 | DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
|
---|
2041 | DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase
|
---|
2042 | DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
|
---|
2043 | DD $L$SEH_begin_gcm_init_avx wrt ..imagebase
|
---|
2044 | DD $L$SEH_end_gcm_init_avx wrt ..imagebase
|
---|
2045 | DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
|
---|
2046 |
|
---|
2047 | DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase
|
---|
2048 | DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase
|
---|
2049 | DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
|
---|
2050 | section .xdata rdata align=8
|
---|
2051 | ALIGN 8
|
---|
2052 | $L$SEH_info_gcm_gmult_4bit:
|
---|
2053 | DB 9,0,0,0
|
---|
2054 | DD se_handler wrt ..imagebase
|
---|
2055 | DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
|
---|
2056 | $L$SEH_info_gcm_ghash_4bit:
|
---|
2057 | DB 9,0,0,0
|
---|
2058 | DD se_handler wrt ..imagebase
|
---|
2059 | DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
|
---|
2060 | $L$SEH_info_gcm_init_clmul:
|
---|
2061 | DB 0x01,0x08,0x03,0x00
|
---|
2062 | DB 0x08,0x68,0x00,0x00
|
---|
2063 | DB 0x04,0x22,0x00,0x00
|
---|
2064 | $L$SEH_info_gcm_ghash_clmul:
|
---|
2065 | DB 0x01,0x33,0x16,0x00
|
---|
2066 | DB 0x33,0xf8,0x09,0x00
|
---|
2067 | DB 0x2e,0xe8,0x08,0x00
|
---|
2068 | DB 0x29,0xd8,0x07,0x00
|
---|
2069 | DB 0x24,0xc8,0x06,0x00
|
---|
2070 | DB 0x1f,0xb8,0x05,0x00
|
---|
2071 | DB 0x1a,0xa8,0x04,0x00
|
---|
2072 | DB 0x15,0x98,0x03,0x00
|
---|
2073 | DB 0x10,0x88,0x02,0x00
|
---|
2074 | DB 0x0c,0x78,0x01,0x00
|
---|
2075 | DB 0x08,0x68,0x00,0x00
|
---|
2076 | DB 0x04,0x01,0x15,0x00
|
---|