VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.7/crypto/genasm-nasm/ghash-x86_64.S@ 107278

最後變更 在這個檔案從107278是 99371,由 vboxsync 提交於 23 月 前

openssl-3.1.0: After generating headers and asm (kmk recreate-headers recreate-headers)

檔案大小: 38.7 KB
 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7EXTERN OPENSSL_ia32cap_P
8
9global gcm_gmult_4bit
10
11ALIGN 16
12gcm_gmult_4bit:
13 mov QWORD[8+rsp],rdi ;WIN64 prologue
14 mov QWORD[16+rsp],rsi
15 mov rax,rsp
16$L$SEH_begin_gcm_gmult_4bit:
17 mov rdi,rcx
18 mov rsi,rdx
19
20
21
22DB 243,15,30,250
23 push rbx
24
25 push rbp
26
27 push r12
28
29 push r13
30
31 push r14
32
33 push r15
34
35 sub rsp,280
36
37$L$gmult_prologue:
38
39 movzx r8,BYTE[15+rdi]
40 lea r11,[$L$rem_4bit]
41 xor rax,rax
42 xor rbx,rbx
43 mov al,r8b
44 mov bl,r8b
45 shl al,4
46 mov rcx,14
47 mov r8,QWORD[8+rax*1+rsi]
48 mov r9,QWORD[rax*1+rsi]
49 and bl,0xf0
50 mov rdx,r8
51 jmp NEAR $L$oop1
52
53ALIGN 16
54$L$oop1:
55 shr r8,4
56 and rdx,0xf
57 mov r10,r9
58 mov al,BYTE[rcx*1+rdi]
59 shr r9,4
60 xor r8,QWORD[8+rbx*1+rsi]
61 shl r10,60
62 xor r9,QWORD[rbx*1+rsi]
63 mov bl,al
64 xor r9,QWORD[rdx*8+r11]
65 mov rdx,r8
66 shl al,4
67 xor r8,r10
68 dec rcx
69 js NEAR $L$break1
70
71 shr r8,4
72 and rdx,0xf
73 mov r10,r9
74 shr r9,4
75 xor r8,QWORD[8+rax*1+rsi]
76 shl r10,60
77 xor r9,QWORD[rax*1+rsi]
78 and bl,0xf0
79 xor r9,QWORD[rdx*8+r11]
80 mov rdx,r8
81 xor r8,r10
82 jmp NEAR $L$oop1
83
84ALIGN 16
85$L$break1:
86 shr r8,4
87 and rdx,0xf
88 mov r10,r9
89 shr r9,4
90 xor r8,QWORD[8+rax*1+rsi]
91 shl r10,60
92 xor r9,QWORD[rax*1+rsi]
93 and bl,0xf0
94 xor r9,QWORD[rdx*8+r11]
95 mov rdx,r8
96 xor r8,r10
97
98 shr r8,4
99 and rdx,0xf
100 mov r10,r9
101 shr r9,4
102 xor r8,QWORD[8+rbx*1+rsi]
103 shl r10,60
104 xor r9,QWORD[rbx*1+rsi]
105 xor r8,r10
106 xor r9,QWORD[rdx*8+r11]
107
108 bswap r8
109 bswap r9
110 mov QWORD[8+rdi],r8
111 mov QWORD[rdi],r9
112
113 lea rsi,[((280+48))+rsp]
114
115 mov rbx,QWORD[((-8))+rsi]
116
117 lea rsp,[rsi]
118
119$L$gmult_epilogue:
120 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
121 mov rsi,QWORD[16+rsp]
122 DB 0F3h,0C3h ;repret
123
124$L$SEH_end_gcm_gmult_4bit:
125global gcm_ghash_4bit
126
127ALIGN 16
128gcm_ghash_4bit:
129 mov QWORD[8+rsp],rdi ;WIN64 prologue
130 mov QWORD[16+rsp],rsi
131 mov rax,rsp
132$L$SEH_begin_gcm_ghash_4bit:
133 mov rdi,rcx
134 mov rsi,rdx
135 mov rdx,r8
136 mov rcx,r9
137
138
139
140DB 243,15,30,250
141 push rbx
142
143 push rbp
144
145 push r12
146
147 push r13
148
149 push r14
150
151 push r15
152
153 sub rsp,280
154
155$L$ghash_prologue:
156 mov r14,rdx
157 mov r15,rcx
158 sub rsi,-128
159 lea rbp,[((16+128))+rsp]
160 xor edx,edx
161 mov r8,QWORD[((0+0-128))+rsi]
162 mov rax,QWORD[((0+8-128))+rsi]
163 mov dl,al
164 shr rax,4
165 mov r10,r8
166 shr r8,4
167 mov r9,QWORD[((16+0-128))+rsi]
168 shl dl,4
169 mov rbx,QWORD[((16+8-128))+rsi]
170 shl r10,60
171 mov BYTE[rsp],dl
172 or rax,r10
173 mov dl,bl
174 shr rbx,4
175 mov r10,r9
176 shr r9,4
177 mov QWORD[rbp],r8
178 mov r8,QWORD[((32+0-128))+rsi]
179 shl dl,4
180 mov QWORD[((0-128))+rbp],rax
181 mov rax,QWORD[((32+8-128))+rsi]
182 shl r10,60
183 mov BYTE[1+rsp],dl
184 or rbx,r10
185 mov dl,al
186 shr rax,4
187 mov r10,r8
188 shr r8,4
189 mov QWORD[8+rbp],r9
190 mov r9,QWORD[((48+0-128))+rsi]
191 shl dl,4
192 mov QWORD[((8-128))+rbp],rbx
193 mov rbx,QWORD[((48+8-128))+rsi]
194 shl r10,60
195 mov BYTE[2+rsp],dl
196 or rax,r10
197 mov dl,bl
198 shr rbx,4
199 mov r10,r9
200 shr r9,4
201 mov QWORD[16+rbp],r8
202 mov r8,QWORD[((64+0-128))+rsi]
203 shl dl,4
204 mov QWORD[((16-128))+rbp],rax
205 mov rax,QWORD[((64+8-128))+rsi]
206 shl r10,60
207 mov BYTE[3+rsp],dl
208 or rbx,r10
209 mov dl,al
210 shr rax,4
211 mov r10,r8
212 shr r8,4
213 mov QWORD[24+rbp],r9
214 mov r9,QWORD[((80+0-128))+rsi]
215 shl dl,4
216 mov QWORD[((24-128))+rbp],rbx
217 mov rbx,QWORD[((80+8-128))+rsi]
218 shl r10,60
219 mov BYTE[4+rsp],dl
220 or rax,r10
221 mov dl,bl
222 shr rbx,4
223 mov r10,r9
224 shr r9,4
225 mov QWORD[32+rbp],r8
226 mov r8,QWORD[((96+0-128))+rsi]
227 shl dl,4
228 mov QWORD[((32-128))+rbp],rax
229 mov rax,QWORD[((96+8-128))+rsi]
230 shl r10,60
231 mov BYTE[5+rsp],dl
232 or rbx,r10
233 mov dl,al
234 shr rax,4
235 mov r10,r8
236 shr r8,4
237 mov QWORD[40+rbp],r9
238 mov r9,QWORD[((112+0-128))+rsi]
239 shl dl,4
240 mov QWORD[((40-128))+rbp],rbx
241 mov rbx,QWORD[((112+8-128))+rsi]
242 shl r10,60
243 mov BYTE[6+rsp],dl
244 or rax,r10
245 mov dl,bl
246 shr rbx,4
247 mov r10,r9
248 shr r9,4
249 mov QWORD[48+rbp],r8
250 mov r8,QWORD[((128+0-128))+rsi]
251 shl dl,4
252 mov QWORD[((48-128))+rbp],rax
253 mov rax,QWORD[((128+8-128))+rsi]
254 shl r10,60
255 mov BYTE[7+rsp],dl
256 or rbx,r10
257 mov dl,al
258 shr rax,4
259 mov r10,r8
260 shr r8,4
261 mov QWORD[56+rbp],r9
262 mov r9,QWORD[((144+0-128))+rsi]
263 shl dl,4
264 mov QWORD[((56-128))+rbp],rbx
265 mov rbx,QWORD[((144+8-128))+rsi]
266 shl r10,60
267 mov BYTE[8+rsp],dl
268 or rax,r10
269 mov dl,bl
270 shr rbx,4
271 mov r10,r9
272 shr r9,4
273 mov QWORD[64+rbp],r8
274 mov r8,QWORD[((160+0-128))+rsi]
275 shl dl,4
276 mov QWORD[((64-128))+rbp],rax
277 mov rax,QWORD[((160+8-128))+rsi]
278 shl r10,60
279 mov BYTE[9+rsp],dl
280 or rbx,r10
281 mov dl,al
282 shr rax,4
283 mov r10,r8
284 shr r8,4
285 mov QWORD[72+rbp],r9
286 mov r9,QWORD[((176+0-128))+rsi]
287 shl dl,4
288 mov QWORD[((72-128))+rbp],rbx
289 mov rbx,QWORD[((176+8-128))+rsi]
290 shl r10,60
291 mov BYTE[10+rsp],dl
292 or rax,r10
293 mov dl,bl
294 shr rbx,4
295 mov r10,r9
296 shr r9,4
297 mov QWORD[80+rbp],r8
298 mov r8,QWORD[((192+0-128))+rsi]
299 shl dl,4
300 mov QWORD[((80-128))+rbp],rax
301 mov rax,QWORD[((192+8-128))+rsi]
302 shl r10,60
303 mov BYTE[11+rsp],dl
304 or rbx,r10
305 mov dl,al
306 shr rax,4
307 mov r10,r8
308 shr r8,4
309 mov QWORD[88+rbp],r9
310 mov r9,QWORD[((208+0-128))+rsi]
311 shl dl,4
312 mov QWORD[((88-128))+rbp],rbx
313 mov rbx,QWORD[((208+8-128))+rsi]
314 shl r10,60
315 mov BYTE[12+rsp],dl
316 or rax,r10
317 mov dl,bl
318 shr rbx,4
319 mov r10,r9
320 shr r9,4
321 mov QWORD[96+rbp],r8
322 mov r8,QWORD[((224+0-128))+rsi]
323 shl dl,4
324 mov QWORD[((96-128))+rbp],rax
325 mov rax,QWORD[((224+8-128))+rsi]
326 shl r10,60
327 mov BYTE[13+rsp],dl
328 or rbx,r10
329 mov dl,al
330 shr rax,4
331 mov r10,r8
332 shr r8,4
333 mov QWORD[104+rbp],r9
334 mov r9,QWORD[((240+0-128))+rsi]
335 shl dl,4
336 mov QWORD[((104-128))+rbp],rbx
337 mov rbx,QWORD[((240+8-128))+rsi]
338 shl r10,60
339 mov BYTE[14+rsp],dl
340 or rax,r10
341 mov dl,bl
342 shr rbx,4
343 mov r10,r9
344 shr r9,4
345 mov QWORD[112+rbp],r8
346 shl dl,4
347 mov QWORD[((112-128))+rbp],rax
348 shl r10,60
349 mov BYTE[15+rsp],dl
350 or rbx,r10
351 mov QWORD[120+rbp],r9
352 mov QWORD[((120-128))+rbp],rbx
353 add rsi,-128
354 mov r8,QWORD[8+rdi]
355 mov r9,QWORD[rdi]
356 add r15,r14
357 lea r11,[$L$rem_8bit]
358 jmp NEAR $L$outer_loop
359ALIGN 16
360$L$outer_loop:
361 xor r9,QWORD[r14]
362 mov rdx,QWORD[8+r14]
363 lea r14,[16+r14]
364 xor rdx,r8
365 mov QWORD[rdi],r9
366 mov QWORD[8+rdi],rdx
367 shr rdx,32
368 xor rax,rax
369 rol edx,8
370 mov al,dl
371 movzx ebx,dl
372 shl al,4
373 shr ebx,4
374 rol edx,8
375 mov r8,QWORD[8+rax*1+rsi]
376 mov r9,QWORD[rax*1+rsi]
377 mov al,dl
378 movzx ecx,dl
379 shl al,4
380 movzx r12,BYTE[rbx*1+rsp]
381 shr ecx,4
382 xor r12,r8
383 mov r10,r9
384 shr r8,8
385 movzx r12,r12b
386 shr r9,8
387 xor r8,QWORD[((-128))+rbx*8+rbp]
388 shl r10,56
389 xor r9,QWORD[rbx*8+rbp]
390 rol edx,8
391 xor r8,QWORD[8+rax*1+rsi]
392 xor r9,QWORD[rax*1+rsi]
393 mov al,dl
394 xor r8,r10
395 movzx r12,WORD[r12*2+r11]
396 movzx ebx,dl
397 shl al,4
398 movzx r13,BYTE[rcx*1+rsp]
399 shr ebx,4
400 shl r12,48
401 xor r13,r8
402 mov r10,r9
403 xor r9,r12
404 shr r8,8
405 movzx r13,r13b
406 shr r9,8
407 xor r8,QWORD[((-128))+rcx*8+rbp]
408 shl r10,56
409 xor r9,QWORD[rcx*8+rbp]
410 rol edx,8
411 xor r8,QWORD[8+rax*1+rsi]
412 xor r9,QWORD[rax*1+rsi]
413 mov al,dl
414 xor r8,r10
415 movzx r13,WORD[r13*2+r11]
416 movzx ecx,dl
417 shl al,4
418 movzx r12,BYTE[rbx*1+rsp]
419 shr ecx,4
420 shl r13,48
421 xor r12,r8
422 mov r10,r9
423 xor r9,r13
424 shr r8,8
425 movzx r12,r12b
426 mov edx,DWORD[8+rdi]
427 shr r9,8
428 xor r8,QWORD[((-128))+rbx*8+rbp]
429 shl r10,56
430 xor r9,QWORD[rbx*8+rbp]
431 rol edx,8
432 xor r8,QWORD[8+rax*1+rsi]
433 xor r9,QWORD[rax*1+rsi]
434 mov al,dl
435 xor r8,r10
436 movzx r12,WORD[r12*2+r11]
437 movzx ebx,dl
438 shl al,4
439 movzx r13,BYTE[rcx*1+rsp]
440 shr ebx,4
441 shl r12,48
442 xor r13,r8
443 mov r10,r9
444 xor r9,r12
445 shr r8,8
446 movzx r13,r13b
447 shr r9,8
448 xor r8,QWORD[((-128))+rcx*8+rbp]
449 shl r10,56
450 xor r9,QWORD[rcx*8+rbp]
451 rol edx,8
452 xor r8,QWORD[8+rax*1+rsi]
453 xor r9,QWORD[rax*1+rsi]
454 mov al,dl
455 xor r8,r10
456 movzx r13,WORD[r13*2+r11]
457 movzx ecx,dl
458 shl al,4
459 movzx r12,BYTE[rbx*1+rsp]
460 shr ecx,4
461 shl r13,48
462 xor r12,r8
463 mov r10,r9
464 xor r9,r13
465 shr r8,8
466 movzx r12,r12b
467 shr r9,8
468 xor r8,QWORD[((-128))+rbx*8+rbp]
469 shl r10,56
470 xor r9,QWORD[rbx*8+rbp]
471 rol edx,8
472 xor r8,QWORD[8+rax*1+rsi]
473 xor r9,QWORD[rax*1+rsi]
474 mov al,dl
475 xor r8,r10
476 movzx r12,WORD[r12*2+r11]
477 movzx ebx,dl
478 shl al,4
479 movzx r13,BYTE[rcx*1+rsp]
480 shr ebx,4
481 shl r12,48
482 xor r13,r8
483 mov r10,r9
484 xor r9,r12
485 shr r8,8
486 movzx r13,r13b
487 shr r9,8
488 xor r8,QWORD[((-128))+rcx*8+rbp]
489 shl r10,56
490 xor r9,QWORD[rcx*8+rbp]
491 rol edx,8
492 xor r8,QWORD[8+rax*1+rsi]
493 xor r9,QWORD[rax*1+rsi]
494 mov al,dl
495 xor r8,r10
496 movzx r13,WORD[r13*2+r11]
497 movzx ecx,dl
498 shl al,4
499 movzx r12,BYTE[rbx*1+rsp]
500 shr ecx,4
501 shl r13,48
502 xor r12,r8
503 mov r10,r9
504 xor r9,r13
505 shr r8,8
506 movzx r12,r12b
507 mov edx,DWORD[4+rdi]
508 shr r9,8
509 xor r8,QWORD[((-128))+rbx*8+rbp]
510 shl r10,56
511 xor r9,QWORD[rbx*8+rbp]
512 rol edx,8
513 xor r8,QWORD[8+rax*1+rsi]
514 xor r9,QWORD[rax*1+rsi]
515 mov al,dl
516 xor r8,r10
517 movzx r12,WORD[r12*2+r11]
518 movzx ebx,dl
519 shl al,4
520 movzx r13,BYTE[rcx*1+rsp]
521 shr ebx,4
522 shl r12,48
523 xor r13,r8
524 mov r10,r9
525 xor r9,r12
526 shr r8,8
527 movzx r13,r13b
528 shr r9,8
529 xor r8,QWORD[((-128))+rcx*8+rbp]
530 shl r10,56
531 xor r9,QWORD[rcx*8+rbp]
532 rol edx,8
533 xor r8,QWORD[8+rax*1+rsi]
534 xor r9,QWORD[rax*1+rsi]
535 mov al,dl
536 xor r8,r10
537 movzx r13,WORD[r13*2+r11]
538 movzx ecx,dl
539 shl al,4
540 movzx r12,BYTE[rbx*1+rsp]
541 shr ecx,4
542 shl r13,48
543 xor r12,r8
544 mov r10,r9
545 xor r9,r13
546 shr r8,8
547 movzx r12,r12b
548 shr r9,8
549 xor r8,QWORD[((-128))+rbx*8+rbp]
550 shl r10,56
551 xor r9,QWORD[rbx*8+rbp]
552 rol edx,8
553 xor r8,QWORD[8+rax*1+rsi]
554 xor r9,QWORD[rax*1+rsi]
555 mov al,dl
556 xor r8,r10
557 movzx r12,WORD[r12*2+r11]
558 movzx ebx,dl
559 shl al,4
560 movzx r13,BYTE[rcx*1+rsp]
561 shr ebx,4
562 shl r12,48
563 xor r13,r8
564 mov r10,r9
565 xor r9,r12
566 shr r8,8
567 movzx r13,r13b
568 shr r9,8
569 xor r8,QWORD[((-128))+rcx*8+rbp]
570 shl r10,56
571 xor r9,QWORD[rcx*8+rbp]
572 rol edx,8
573 xor r8,QWORD[8+rax*1+rsi]
574 xor r9,QWORD[rax*1+rsi]
575 mov al,dl
576 xor r8,r10
577 movzx r13,WORD[r13*2+r11]
578 movzx ecx,dl
579 shl al,4
580 movzx r12,BYTE[rbx*1+rsp]
581 shr ecx,4
582 shl r13,48
583 xor r12,r8
584 mov r10,r9
585 xor r9,r13
586 shr r8,8
587 movzx r12,r12b
588 mov edx,DWORD[rdi]
589 shr r9,8
590 xor r8,QWORD[((-128))+rbx*8+rbp]
591 shl r10,56
592 xor r9,QWORD[rbx*8+rbp]
593 rol edx,8
594 xor r8,QWORD[8+rax*1+rsi]
595 xor r9,QWORD[rax*1+rsi]
596 mov al,dl
597 xor r8,r10
598 movzx r12,WORD[r12*2+r11]
599 movzx ebx,dl
600 shl al,4
601 movzx r13,BYTE[rcx*1+rsp]
602 shr ebx,4
603 shl r12,48
604 xor r13,r8
605 mov r10,r9
606 xor r9,r12
607 shr r8,8
608 movzx r13,r13b
609 shr r9,8
610 xor r8,QWORD[((-128))+rcx*8+rbp]
611 shl r10,56
612 xor r9,QWORD[rcx*8+rbp]
613 rol edx,8
614 xor r8,QWORD[8+rax*1+rsi]
615 xor r9,QWORD[rax*1+rsi]
616 mov al,dl
617 xor r8,r10
618 movzx r13,WORD[r13*2+r11]
619 movzx ecx,dl
620 shl al,4
621 movzx r12,BYTE[rbx*1+rsp]
622 shr ecx,4
623 shl r13,48
624 xor r12,r8
625 mov r10,r9
626 xor r9,r13
627 shr r8,8
628 movzx r12,r12b
629 shr r9,8
630 xor r8,QWORD[((-128))+rbx*8+rbp]
631 shl r10,56
632 xor r9,QWORD[rbx*8+rbp]
633 rol edx,8
634 xor r8,QWORD[8+rax*1+rsi]
635 xor r9,QWORD[rax*1+rsi]
636 mov al,dl
637 xor r8,r10
638 movzx r12,WORD[r12*2+r11]
639 movzx ebx,dl
640 shl al,4
641 movzx r13,BYTE[rcx*1+rsp]
642 shr ebx,4
643 shl r12,48
644 xor r13,r8
645 mov r10,r9
646 xor r9,r12
647 shr r8,8
648 movzx r13,r13b
649 shr r9,8
650 xor r8,QWORD[((-128))+rcx*8+rbp]
651 shl r10,56
652 xor r9,QWORD[rcx*8+rbp]
653 rol edx,8
654 xor r8,QWORD[8+rax*1+rsi]
655 xor r9,QWORD[rax*1+rsi]
656 mov al,dl
657 xor r8,r10
658 movzx r13,WORD[r13*2+r11]
659 movzx ecx,dl
660 shl al,4
661 movzx r12,BYTE[rbx*1+rsp]
662 and ecx,240
663 shl r13,48
664 xor r12,r8
665 mov r10,r9
666 xor r9,r13
667 shr r8,8
668 movzx r12,r12b
669 mov edx,DWORD[((-4))+rdi]
670 shr r9,8
671 xor r8,QWORD[((-128))+rbx*8+rbp]
672 shl r10,56
673 xor r9,QWORD[rbx*8+rbp]
674 movzx r12,WORD[r12*2+r11]
675 xor r8,QWORD[8+rax*1+rsi]
676 xor r9,QWORD[rax*1+rsi]
677 shl r12,48
678 xor r8,r10
679 xor r9,r12
680 movzx r13,r8b
681 shr r8,4
682 mov r10,r9
683 shl r13b,4
684 shr r9,4
685 xor r8,QWORD[8+rcx*1+rsi]
686 movzx r13,WORD[r13*2+r11]
687 shl r10,60
688 xor r9,QWORD[rcx*1+rsi]
689 xor r8,r10
690 shl r13,48
691 bswap r8
692 xor r9,r13
693 bswap r9
694 cmp r14,r15
695 jb NEAR $L$outer_loop
696 mov QWORD[8+rdi],r8
697 mov QWORD[rdi],r9
698
699 lea rsi,[((280+48))+rsp]
700
701 mov r15,QWORD[((-48))+rsi]
702
703 mov r14,QWORD[((-40))+rsi]
704
705 mov r13,QWORD[((-32))+rsi]
706
707 mov r12,QWORD[((-24))+rsi]
708
709 mov rbp,QWORD[((-16))+rsi]
710
711 mov rbx,QWORD[((-8))+rsi]
712
713 lea rsp,[rsi]
714
715$L$ghash_epilogue:
716 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
717 mov rsi,QWORD[16+rsp]
718 DB 0F3h,0C3h ;repret
719
720$L$SEH_end_gcm_ghash_4bit:
721global gcm_init_clmul
722
723ALIGN 16
724gcm_init_clmul:
725
726$L$_init_clmul:
727$L$SEH_begin_gcm_init_clmul:
728
729DB 0x48,0x83,0xec,0x18
730DB 0x0f,0x29,0x34,0x24
731 movdqu xmm2,XMMWORD[rdx]
732 pshufd xmm2,xmm2,78
733
734
735 pshufd xmm4,xmm2,255
736 movdqa xmm3,xmm2
737 psllq xmm2,1
738 pxor xmm5,xmm5
739 psrlq xmm3,63
740 pcmpgtd xmm5,xmm4
741 pslldq xmm3,8
742 por xmm2,xmm3
743
744
745 pand xmm5,XMMWORD[$L$0x1c2_polynomial]
746 pxor xmm2,xmm5
747
748
749 pshufd xmm6,xmm2,78
750 movdqa xmm0,xmm2
751 pxor xmm6,xmm2
752 movdqa xmm1,xmm0
753 pshufd xmm3,xmm0,78
754 pxor xmm3,xmm0
755DB 102,15,58,68,194,0
756DB 102,15,58,68,202,17
757DB 102,15,58,68,222,0
758 pxor xmm3,xmm0
759 pxor xmm3,xmm1
760
761 movdqa xmm4,xmm3
762 psrldq xmm3,8
763 pslldq xmm4,8
764 pxor xmm1,xmm3
765 pxor xmm0,xmm4
766
767 movdqa xmm4,xmm0
768 movdqa xmm3,xmm0
769 psllq xmm0,5
770 pxor xmm3,xmm0
771 psllq xmm0,1
772 pxor xmm0,xmm3
773 psllq xmm0,57
774 movdqa xmm3,xmm0
775 pslldq xmm0,8
776 psrldq xmm3,8
777 pxor xmm0,xmm4
778 pxor xmm1,xmm3
779
780
781 movdqa xmm4,xmm0
782 psrlq xmm0,1
783 pxor xmm1,xmm4
784 pxor xmm4,xmm0
785 psrlq xmm0,5
786 pxor xmm0,xmm4
787 psrlq xmm0,1
788 pxor xmm0,xmm1
789 pshufd xmm3,xmm2,78
790 pshufd xmm4,xmm0,78
791 pxor xmm3,xmm2
792 movdqu XMMWORD[rcx],xmm2
793 pxor xmm4,xmm0
794 movdqu XMMWORD[16+rcx],xmm0
795DB 102,15,58,15,227,8
796 movdqu XMMWORD[32+rcx],xmm4
797 movdqa xmm1,xmm0
798 pshufd xmm3,xmm0,78
799 pxor xmm3,xmm0
800DB 102,15,58,68,194,0
801DB 102,15,58,68,202,17
802DB 102,15,58,68,222,0
803 pxor xmm3,xmm0
804 pxor xmm3,xmm1
805
806 movdqa xmm4,xmm3
807 psrldq xmm3,8
808 pslldq xmm4,8
809 pxor xmm1,xmm3
810 pxor xmm0,xmm4
811
812 movdqa xmm4,xmm0
813 movdqa xmm3,xmm0
814 psllq xmm0,5
815 pxor xmm3,xmm0
816 psllq xmm0,1
817 pxor xmm0,xmm3
818 psllq xmm0,57
819 movdqa xmm3,xmm0
820 pslldq xmm0,8
821 psrldq xmm3,8
822 pxor xmm0,xmm4
823 pxor xmm1,xmm3
824
825
826 movdqa xmm4,xmm0
827 psrlq xmm0,1
828 pxor xmm1,xmm4
829 pxor xmm4,xmm0
830 psrlq xmm0,5
831 pxor xmm0,xmm4
832 psrlq xmm0,1
833 pxor xmm0,xmm1
834 movdqa xmm5,xmm0
835 movdqa xmm1,xmm0
836 pshufd xmm3,xmm0,78
837 pxor xmm3,xmm0
838DB 102,15,58,68,194,0
839DB 102,15,58,68,202,17
840DB 102,15,58,68,222,0
841 pxor xmm3,xmm0
842 pxor xmm3,xmm1
843
844 movdqa xmm4,xmm3
845 psrldq xmm3,8
846 pslldq xmm4,8
847 pxor xmm1,xmm3
848 pxor xmm0,xmm4
849
850 movdqa xmm4,xmm0
851 movdqa xmm3,xmm0
852 psllq xmm0,5
853 pxor xmm3,xmm0
854 psllq xmm0,1
855 pxor xmm0,xmm3
856 psllq xmm0,57
857 movdqa xmm3,xmm0
858 pslldq xmm0,8
859 psrldq xmm3,8
860 pxor xmm0,xmm4
861 pxor xmm1,xmm3
862
863
864 movdqa xmm4,xmm0
865 psrlq xmm0,1
866 pxor xmm1,xmm4
867 pxor xmm4,xmm0
868 psrlq xmm0,5
869 pxor xmm0,xmm4
870 psrlq xmm0,1
871 pxor xmm0,xmm1
872 pshufd xmm3,xmm5,78
873 pshufd xmm4,xmm0,78
874 pxor xmm3,xmm5
875 movdqu XMMWORD[48+rcx],xmm5
876 pxor xmm4,xmm0
877 movdqu XMMWORD[64+rcx],xmm0
878DB 102,15,58,15,227,8
879 movdqu XMMWORD[80+rcx],xmm4
880 movaps xmm6,XMMWORD[rsp]
881 lea rsp,[24+rsp]
882$L$SEH_end_gcm_init_clmul:
883 DB 0F3h,0C3h ;repret
884
885
886global gcm_gmult_clmul
887
888ALIGN 16
889gcm_gmult_clmul:
890
891DB 243,15,30,250
892$L$_gmult_clmul:
893 movdqu xmm0,XMMWORD[rcx]
894 movdqa xmm5,XMMWORD[$L$bswap_mask]
895 movdqu xmm2,XMMWORD[rdx]
896 movdqu xmm4,XMMWORD[32+rdx]
897DB 102,15,56,0,197
898 movdqa xmm1,xmm0
899 pshufd xmm3,xmm0,78
900 pxor xmm3,xmm0
901DB 102,15,58,68,194,0
902DB 102,15,58,68,202,17
903DB 102,15,58,68,220,0
904 pxor xmm3,xmm0
905 pxor xmm3,xmm1
906
907 movdqa xmm4,xmm3
908 psrldq xmm3,8
909 pslldq xmm4,8
910 pxor xmm1,xmm3
911 pxor xmm0,xmm4
912
913 movdqa xmm4,xmm0
914 movdqa xmm3,xmm0
915 psllq xmm0,5
916 pxor xmm3,xmm0
917 psllq xmm0,1
918 pxor xmm0,xmm3
919 psllq xmm0,57
920 movdqa xmm3,xmm0
921 pslldq xmm0,8
922 psrldq xmm3,8
923 pxor xmm0,xmm4
924 pxor xmm1,xmm3
925
926
927 movdqa xmm4,xmm0
928 psrlq xmm0,1
929 pxor xmm1,xmm4
930 pxor xmm4,xmm0
931 psrlq xmm0,5
932 pxor xmm0,xmm4
933 psrlq xmm0,1
934 pxor xmm0,xmm1
935DB 102,15,56,0,197
936 movdqu XMMWORD[rcx],xmm0
937 DB 0F3h,0C3h ;repret
938
939
940global gcm_ghash_clmul
941
942ALIGN 32
943gcm_ghash_clmul:
944
945DB 243,15,30,250
946$L$_ghash_clmul:
947 lea rax,[((-136))+rsp]
948$L$SEH_begin_gcm_ghash_clmul:
949
950DB 0x48,0x8d,0x60,0xe0
951DB 0x0f,0x29,0x70,0xe0
952DB 0x0f,0x29,0x78,0xf0
953DB 0x44,0x0f,0x29,0x00
954DB 0x44,0x0f,0x29,0x48,0x10
955DB 0x44,0x0f,0x29,0x50,0x20
956DB 0x44,0x0f,0x29,0x58,0x30
957DB 0x44,0x0f,0x29,0x60,0x40
958DB 0x44,0x0f,0x29,0x68,0x50
959DB 0x44,0x0f,0x29,0x70,0x60
960DB 0x44,0x0f,0x29,0x78,0x70
961 movdqa xmm10,XMMWORD[$L$bswap_mask]
962
963 movdqu xmm0,XMMWORD[rcx]
964 movdqu xmm2,XMMWORD[rdx]
965 movdqu xmm7,XMMWORD[32+rdx]
966DB 102,65,15,56,0,194
967
968 sub r9,0x10
969 jz NEAR $L$odd_tail
970
971 movdqu xmm6,XMMWORD[16+rdx]
972 mov eax,DWORD[((OPENSSL_ia32cap_P+4))]
973 cmp r9,0x30
974 jb NEAR $L$skip4x
975
976 and eax,71303168
977 cmp eax,4194304
978 je NEAR $L$skip4x
979
980 sub r9,0x30
981 mov rax,0xA040608020C0E000
982 movdqu xmm14,XMMWORD[48+rdx]
983 movdqu xmm15,XMMWORD[64+rdx]
984
985
986
987
988 movdqu xmm3,XMMWORD[48+r8]
989 movdqu xmm11,XMMWORD[32+r8]
990DB 102,65,15,56,0,218
991DB 102,69,15,56,0,218
992 movdqa xmm5,xmm3
993 pshufd xmm4,xmm3,78
994 pxor xmm4,xmm3
995DB 102,15,58,68,218,0
996DB 102,15,58,68,234,17
997DB 102,15,58,68,231,0
998
999 movdqa xmm13,xmm11
1000 pshufd xmm12,xmm11,78
1001 pxor xmm12,xmm11
1002DB 102,68,15,58,68,222,0
1003DB 102,68,15,58,68,238,17
1004DB 102,68,15,58,68,231,16
1005 xorps xmm3,xmm11
1006 xorps xmm5,xmm13
1007 movups xmm7,XMMWORD[80+rdx]
1008 xorps xmm4,xmm12
1009
1010 movdqu xmm11,XMMWORD[16+r8]
1011 movdqu xmm8,XMMWORD[r8]
1012DB 102,69,15,56,0,218
1013DB 102,69,15,56,0,194
1014 movdqa xmm13,xmm11
1015 pshufd xmm12,xmm11,78
1016 pxor xmm0,xmm8
1017 pxor xmm12,xmm11
1018DB 102,69,15,58,68,222,0
1019 movdqa xmm1,xmm0
1020 pshufd xmm8,xmm0,78
1021 pxor xmm8,xmm0
1022DB 102,69,15,58,68,238,17
1023DB 102,68,15,58,68,231,0
1024 xorps xmm3,xmm11
1025 xorps xmm5,xmm13
1026
1027 lea r8,[64+r8]
1028 sub r9,0x40
1029 jc NEAR $L$tail4x
1030
1031 jmp NEAR $L$mod4_loop
1032ALIGN 32
1033$L$mod4_loop:
1034DB 102,65,15,58,68,199,0
1035 xorps xmm4,xmm12
1036 movdqu xmm11,XMMWORD[48+r8]
1037DB 102,69,15,56,0,218
1038DB 102,65,15,58,68,207,17
1039 xorps xmm0,xmm3
1040 movdqu xmm3,XMMWORD[32+r8]
1041 movdqa xmm13,xmm11
1042DB 102,68,15,58,68,199,16
1043 pshufd xmm12,xmm11,78
1044 xorps xmm1,xmm5
1045 pxor xmm12,xmm11
1046DB 102,65,15,56,0,218
1047 movups xmm7,XMMWORD[32+rdx]
1048 xorps xmm8,xmm4
1049DB 102,68,15,58,68,218,0
1050 pshufd xmm4,xmm3,78
1051
1052 pxor xmm8,xmm0
1053 movdqa xmm5,xmm3
1054 pxor xmm8,xmm1
1055 pxor xmm4,xmm3
1056 movdqa xmm9,xmm8
1057DB 102,68,15,58,68,234,17
1058 pslldq xmm8,8
1059 psrldq xmm9,8
1060 pxor xmm0,xmm8
1061 movdqa xmm8,XMMWORD[$L$7_mask]
1062 pxor xmm1,xmm9
1063DB 102,76,15,110,200
1064
1065 pand xmm8,xmm0
1066DB 102,69,15,56,0,200
1067 pxor xmm9,xmm0
1068DB 102,68,15,58,68,231,0
1069 psllq xmm9,57
1070 movdqa xmm8,xmm9
1071 pslldq xmm9,8
1072DB 102,15,58,68,222,0
1073 psrldq xmm8,8
1074 pxor xmm0,xmm9
1075 pxor xmm1,xmm8
1076 movdqu xmm8,XMMWORD[r8]
1077
1078 movdqa xmm9,xmm0
1079 psrlq xmm0,1
1080DB 102,15,58,68,238,17
1081 xorps xmm3,xmm11
1082 movdqu xmm11,XMMWORD[16+r8]
1083DB 102,69,15,56,0,218
1084DB 102,15,58,68,231,16
1085 xorps xmm5,xmm13
1086 movups xmm7,XMMWORD[80+rdx]
1087DB 102,69,15,56,0,194
1088 pxor xmm1,xmm9
1089 pxor xmm9,xmm0
1090 psrlq xmm0,5
1091
1092 movdqa xmm13,xmm11
1093 pxor xmm4,xmm12
1094 pshufd xmm12,xmm11,78
1095 pxor xmm0,xmm9
1096 pxor xmm1,xmm8
1097 pxor xmm12,xmm11
1098DB 102,69,15,58,68,222,0
1099 psrlq xmm0,1
1100 pxor xmm0,xmm1
1101 movdqa xmm1,xmm0
1102DB 102,69,15,58,68,238,17
1103 xorps xmm3,xmm11
1104 pshufd xmm8,xmm0,78
1105 pxor xmm8,xmm0
1106
1107DB 102,68,15,58,68,231,0
1108 xorps xmm5,xmm13
1109
1110 lea r8,[64+r8]
1111 sub r9,0x40
1112 jnc NEAR $L$mod4_loop
1113
1114$L$tail4x:
1115DB 102,65,15,58,68,199,0
1116DB 102,65,15,58,68,207,17
1117DB 102,68,15,58,68,199,16
1118 xorps xmm4,xmm12
1119 xorps xmm0,xmm3
1120 xorps xmm1,xmm5
1121 pxor xmm1,xmm0
1122 pxor xmm8,xmm4
1123
1124 pxor xmm8,xmm1
1125 pxor xmm1,xmm0
1126
1127 movdqa xmm9,xmm8
1128 psrldq xmm8,8
1129 pslldq xmm9,8
1130 pxor xmm1,xmm8
1131 pxor xmm0,xmm9
1132
1133 movdqa xmm4,xmm0
1134 movdqa xmm3,xmm0
1135 psllq xmm0,5
1136 pxor xmm3,xmm0
1137 psllq xmm0,1
1138 pxor xmm0,xmm3
1139 psllq xmm0,57
1140 movdqa xmm3,xmm0
1141 pslldq xmm0,8
1142 psrldq xmm3,8
1143 pxor xmm0,xmm4
1144 pxor xmm1,xmm3
1145
1146
1147 movdqa xmm4,xmm0
1148 psrlq xmm0,1
1149 pxor xmm1,xmm4
1150 pxor xmm4,xmm0
1151 psrlq xmm0,5
1152 pxor xmm0,xmm4
1153 psrlq xmm0,1
1154 pxor xmm0,xmm1
1155 add r9,0x40
1156 jz NEAR $L$done
1157 movdqu xmm7,XMMWORD[32+rdx]
1158 sub r9,0x10
1159 jz NEAR $L$odd_tail
1160$L$skip4x:
1161
1162
1163
1164
1165
1166 movdqu xmm8,XMMWORD[r8]
1167 movdqu xmm3,XMMWORD[16+r8]
1168DB 102,69,15,56,0,194
1169DB 102,65,15,56,0,218
1170 pxor xmm0,xmm8
1171
1172 movdqa xmm5,xmm3
1173 pshufd xmm4,xmm3,78
1174 pxor xmm4,xmm3
1175DB 102,15,58,68,218,0
1176DB 102,15,58,68,234,17
1177DB 102,15,58,68,231,0
1178
1179 lea r8,[32+r8]
1180 nop
1181 sub r9,0x20
1182 jbe NEAR $L$even_tail
1183 nop
1184 jmp NEAR $L$mod_loop
1185
1186ALIGN 32
1187$L$mod_loop:
1188 movdqa xmm1,xmm0
1189 movdqa xmm8,xmm4
1190 pshufd xmm4,xmm0,78
1191 pxor xmm4,xmm0
1192
1193DB 102,15,58,68,198,0
1194DB 102,15,58,68,206,17
1195DB 102,15,58,68,231,16
1196
1197 pxor xmm0,xmm3
1198 pxor xmm1,xmm5
1199 movdqu xmm9,XMMWORD[r8]
1200 pxor xmm8,xmm0
1201DB 102,69,15,56,0,202
1202 movdqu xmm3,XMMWORD[16+r8]
1203
1204 pxor xmm8,xmm1
1205 pxor xmm1,xmm9
1206 pxor xmm4,xmm8
1207DB 102,65,15,56,0,218
1208 movdqa xmm8,xmm4
1209 psrldq xmm8,8
1210 pslldq xmm4,8
1211 pxor xmm1,xmm8
1212 pxor xmm0,xmm4
1213
1214 movdqa xmm5,xmm3
1215
1216 movdqa xmm9,xmm0
1217 movdqa xmm8,xmm0
1218 psllq xmm0,5
1219 pxor xmm8,xmm0
1220DB 102,15,58,68,218,0
1221 psllq xmm0,1
1222 pxor xmm0,xmm8
1223 psllq xmm0,57
1224 movdqa xmm8,xmm0
1225 pslldq xmm0,8
1226 psrldq xmm8,8
1227 pxor xmm0,xmm9
1228 pshufd xmm4,xmm5,78
1229 pxor xmm1,xmm8
1230 pxor xmm4,xmm5
1231
1232 movdqa xmm9,xmm0
1233 psrlq xmm0,1
1234DB 102,15,58,68,234,17
1235 pxor xmm1,xmm9
1236 pxor xmm9,xmm0
1237 psrlq xmm0,5
1238 pxor xmm0,xmm9
1239 lea r8,[32+r8]
1240 psrlq xmm0,1
1241DB 102,15,58,68,231,0
1242 pxor xmm0,xmm1
1243
1244 sub r9,0x20
1245 ja NEAR $L$mod_loop
1246
1247$L$even_tail:
1248 movdqa xmm1,xmm0
1249 movdqa xmm8,xmm4
1250 pshufd xmm4,xmm0,78
1251 pxor xmm4,xmm0
1252
1253DB 102,15,58,68,198,0
1254DB 102,15,58,68,206,17
1255DB 102,15,58,68,231,16
1256
1257 pxor xmm0,xmm3
1258 pxor xmm1,xmm5
1259 pxor xmm8,xmm0
1260 pxor xmm8,xmm1
1261 pxor xmm4,xmm8
1262 movdqa xmm8,xmm4
1263 psrldq xmm8,8
1264 pslldq xmm4,8
1265 pxor xmm1,xmm8
1266 pxor xmm0,xmm4
1267
1268 movdqa xmm4,xmm0
1269 movdqa xmm3,xmm0
1270 psllq xmm0,5
1271 pxor xmm3,xmm0
1272 psllq xmm0,1
1273 pxor xmm0,xmm3
1274 psllq xmm0,57
1275 movdqa xmm3,xmm0
1276 pslldq xmm0,8
1277 psrldq xmm3,8
1278 pxor xmm0,xmm4
1279 pxor xmm1,xmm3
1280
1281
1282 movdqa xmm4,xmm0
1283 psrlq xmm0,1
1284 pxor xmm1,xmm4
1285 pxor xmm4,xmm0
1286 psrlq xmm0,5
1287 pxor xmm0,xmm4
1288 psrlq xmm0,1
1289 pxor xmm0,xmm1
1290 test r9,r9
1291 jnz NEAR $L$done
1292
1293$L$odd_tail:
1294 movdqu xmm8,XMMWORD[r8]
1295DB 102,69,15,56,0,194
1296 pxor xmm0,xmm8
1297 movdqa xmm1,xmm0
1298 pshufd xmm3,xmm0,78
1299 pxor xmm3,xmm0
1300DB 102,15,58,68,194,0
1301DB 102,15,58,68,202,17
1302DB 102,15,58,68,223,0
1303 pxor xmm3,xmm0
1304 pxor xmm3,xmm1
1305
1306 movdqa xmm4,xmm3
1307 psrldq xmm3,8
1308 pslldq xmm4,8
1309 pxor xmm1,xmm3
1310 pxor xmm0,xmm4
1311
1312 movdqa xmm4,xmm0
1313 movdqa xmm3,xmm0
1314 psllq xmm0,5
1315 pxor xmm3,xmm0
1316 psllq xmm0,1
1317 pxor xmm0,xmm3
1318 psllq xmm0,57
1319 movdqa xmm3,xmm0
1320 pslldq xmm0,8
1321 psrldq xmm3,8
1322 pxor xmm0,xmm4
1323 pxor xmm1,xmm3
1324
1325
1326 movdqa xmm4,xmm0
1327 psrlq xmm0,1
1328 pxor xmm1,xmm4
1329 pxor xmm4,xmm0
1330 psrlq xmm0,5
1331 pxor xmm0,xmm4
1332 psrlq xmm0,1
1333 pxor xmm0,xmm1
1334$L$done:
1335DB 102,65,15,56,0,194
1336 movdqu XMMWORD[rcx],xmm0
1337 movaps xmm6,XMMWORD[rsp]
1338 movaps xmm7,XMMWORD[16+rsp]
1339 movaps xmm8,XMMWORD[32+rsp]
1340 movaps xmm9,XMMWORD[48+rsp]
1341 movaps xmm10,XMMWORD[64+rsp]
1342 movaps xmm11,XMMWORD[80+rsp]
1343 movaps xmm12,XMMWORD[96+rsp]
1344 movaps xmm13,XMMWORD[112+rsp]
1345 movaps xmm14,XMMWORD[128+rsp]
1346 movaps xmm15,XMMWORD[144+rsp]
1347 lea rsp,[168+rsp]
1348$L$SEH_end_gcm_ghash_clmul:
1349 DB 0F3h,0C3h ;repret
1350
1351
1352global gcm_init_avx
1353
1354ALIGN 32
1355gcm_init_avx:
1356
1357$L$SEH_begin_gcm_init_avx:
1358
1359DB 0x48,0x83,0xec,0x18
1360DB 0x0f,0x29,0x34,0x24
1361 vzeroupper
1362
1363 vmovdqu xmm2,XMMWORD[rdx]
1364 vpshufd xmm2,xmm2,78
1365
1366
1367 vpshufd xmm4,xmm2,255
1368 vpsrlq xmm3,xmm2,63
1369 vpsllq xmm2,xmm2,1
1370 vpxor xmm5,xmm5,xmm5
1371 vpcmpgtd xmm5,xmm5,xmm4
1372 vpslldq xmm3,xmm3,8
1373 vpor xmm2,xmm2,xmm3
1374
1375
1376 vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
1377 vpxor xmm2,xmm2,xmm5
1378
1379 vpunpckhqdq xmm6,xmm2,xmm2
1380 vmovdqa xmm0,xmm2
1381 vpxor xmm6,xmm6,xmm2
1382 mov r10,4
1383 jmp NEAR $L$init_start_avx
1384ALIGN 32
1385$L$init_loop_avx:
1386 vpalignr xmm5,xmm4,xmm3,8
1387 vmovdqu XMMWORD[(-16)+rcx],xmm5
1388 vpunpckhqdq xmm3,xmm0,xmm0
1389 vpxor xmm3,xmm3,xmm0
1390 vpclmulqdq xmm1,xmm0,xmm2,0x11
1391 vpclmulqdq xmm0,xmm0,xmm2,0x00
1392 vpclmulqdq xmm3,xmm3,xmm6,0x00
1393 vpxor xmm4,xmm1,xmm0
1394 vpxor xmm3,xmm3,xmm4
1395
1396 vpslldq xmm4,xmm3,8
1397 vpsrldq xmm3,xmm3,8
1398 vpxor xmm0,xmm0,xmm4
1399 vpxor xmm1,xmm1,xmm3
1400 vpsllq xmm3,xmm0,57
1401 vpsllq xmm4,xmm0,62
1402 vpxor xmm4,xmm4,xmm3
1403 vpsllq xmm3,xmm0,63
1404 vpxor xmm4,xmm4,xmm3
1405 vpslldq xmm3,xmm4,8
1406 vpsrldq xmm4,xmm4,8
1407 vpxor xmm0,xmm0,xmm3
1408 vpxor xmm1,xmm1,xmm4
1409
1410 vpsrlq xmm4,xmm0,1
1411 vpxor xmm1,xmm1,xmm0
1412 vpxor xmm0,xmm0,xmm4
1413 vpsrlq xmm4,xmm4,5
1414 vpxor xmm0,xmm0,xmm4
1415 vpsrlq xmm0,xmm0,1
1416 vpxor xmm0,xmm0,xmm1
1417$L$init_start_avx:
1418 vmovdqa xmm5,xmm0
1419 vpunpckhqdq xmm3,xmm0,xmm0
1420 vpxor xmm3,xmm3,xmm0
1421 vpclmulqdq xmm1,xmm0,xmm2,0x11
1422 vpclmulqdq xmm0,xmm0,xmm2,0x00
1423 vpclmulqdq xmm3,xmm3,xmm6,0x00
1424 vpxor xmm4,xmm1,xmm0
1425 vpxor xmm3,xmm3,xmm4
1426
1427 vpslldq xmm4,xmm3,8
1428 vpsrldq xmm3,xmm3,8
1429 vpxor xmm0,xmm0,xmm4
1430 vpxor xmm1,xmm1,xmm3
1431 vpsllq xmm3,xmm0,57
1432 vpsllq xmm4,xmm0,62
1433 vpxor xmm4,xmm4,xmm3
1434 vpsllq xmm3,xmm0,63
1435 vpxor xmm4,xmm4,xmm3
1436 vpslldq xmm3,xmm4,8
1437 vpsrldq xmm4,xmm4,8
1438 vpxor xmm0,xmm0,xmm3
1439 vpxor xmm1,xmm1,xmm4
1440
1441 vpsrlq xmm4,xmm0,1
1442 vpxor xmm1,xmm1,xmm0
1443 vpxor xmm0,xmm0,xmm4
1444 vpsrlq xmm4,xmm4,5
1445 vpxor xmm0,xmm0,xmm4
1446 vpsrlq xmm0,xmm0,1
1447 vpxor xmm0,xmm0,xmm1
1448 vpshufd xmm3,xmm5,78
1449 vpshufd xmm4,xmm0,78
1450 vpxor xmm3,xmm3,xmm5
1451 vmovdqu XMMWORD[rcx],xmm5
1452 vpxor xmm4,xmm4,xmm0
1453 vmovdqu XMMWORD[16+rcx],xmm0
1454 lea rcx,[48+rcx]
1455 sub r10,1
1456 jnz NEAR $L$init_loop_avx
1457
1458 vpalignr xmm5,xmm3,xmm4,8
1459 vmovdqu XMMWORD[(-16)+rcx],xmm5
1460
1461 vzeroupper
1462 movaps xmm6,XMMWORD[rsp]
1463 lea rsp,[24+rsp]
1464$L$SEH_end_gcm_init_avx:
1465 DB 0F3h,0C3h ;repret
1466
1467
1468global gcm_gmult_avx
1469
1470ALIGN 32
1471gcm_gmult_avx:
1472
1473DB 243,15,30,250
1474 jmp NEAR $L$_gmult_clmul
1475
1476
1477global gcm_ghash_avx
1478
1479ALIGN 32
1480gcm_ghash_avx:
1481
1482DB 243,15,30,250
1483 lea rax,[((-136))+rsp]
1484$L$SEH_begin_gcm_ghash_avx:
1485
1486DB 0x48,0x8d,0x60,0xe0
1487DB 0x0f,0x29,0x70,0xe0
1488DB 0x0f,0x29,0x78,0xf0
1489DB 0x44,0x0f,0x29,0x00
1490DB 0x44,0x0f,0x29,0x48,0x10
1491DB 0x44,0x0f,0x29,0x50,0x20
1492DB 0x44,0x0f,0x29,0x58,0x30
1493DB 0x44,0x0f,0x29,0x60,0x40
1494DB 0x44,0x0f,0x29,0x68,0x50
1495DB 0x44,0x0f,0x29,0x70,0x60
1496DB 0x44,0x0f,0x29,0x78,0x70
1497 vzeroupper
1498
1499 vmovdqu xmm10,XMMWORD[rcx]
1500 lea r10,[$L$0x1c2_polynomial]
1501 lea rdx,[64+rdx]
1502 vmovdqu xmm13,XMMWORD[$L$bswap_mask]
1503 vpshufb xmm10,xmm10,xmm13
1504 cmp r9,0x80
1505 jb NEAR $L$short_avx
1506 sub r9,0x80
1507
1508 vmovdqu xmm14,XMMWORD[112+r8]
1509 vmovdqu xmm6,XMMWORD[((0-64))+rdx]
1510 vpshufb xmm14,xmm14,xmm13
1511 vmovdqu xmm7,XMMWORD[((32-64))+rdx]
1512
1513 vpunpckhqdq xmm9,xmm14,xmm14
1514 vmovdqu xmm15,XMMWORD[96+r8]
1515 vpclmulqdq xmm0,xmm14,xmm6,0x00
1516 vpxor xmm9,xmm9,xmm14
1517 vpshufb xmm15,xmm15,xmm13
1518 vpclmulqdq xmm1,xmm14,xmm6,0x11
1519 vmovdqu xmm6,XMMWORD[((16-64))+rdx]
1520 vpunpckhqdq xmm8,xmm15,xmm15
1521 vmovdqu xmm14,XMMWORD[80+r8]
1522 vpclmulqdq xmm2,xmm9,xmm7,0x00
1523 vpxor xmm8,xmm8,xmm15
1524
1525 vpshufb xmm14,xmm14,xmm13
1526 vpclmulqdq xmm3,xmm15,xmm6,0x00
1527 vpunpckhqdq xmm9,xmm14,xmm14
1528 vpclmulqdq xmm4,xmm15,xmm6,0x11
1529 vmovdqu xmm6,XMMWORD[((48-64))+rdx]
1530 vpxor xmm9,xmm9,xmm14
1531 vmovdqu xmm15,XMMWORD[64+r8]
1532 vpclmulqdq xmm5,xmm8,xmm7,0x10
1533 vmovdqu xmm7,XMMWORD[((80-64))+rdx]
1534
1535 vpshufb xmm15,xmm15,xmm13
1536 vpxor xmm3,xmm3,xmm0
1537 vpclmulqdq xmm0,xmm14,xmm6,0x00
1538 vpxor xmm4,xmm4,xmm1
1539 vpunpckhqdq xmm8,xmm15,xmm15
1540 vpclmulqdq xmm1,xmm14,xmm6,0x11
1541 vmovdqu xmm6,XMMWORD[((64-64))+rdx]
1542 vpxor xmm5,xmm5,xmm2
1543 vpclmulqdq xmm2,xmm9,xmm7,0x00
1544 vpxor xmm8,xmm8,xmm15
1545
1546 vmovdqu xmm14,XMMWORD[48+r8]
1547 vpxor xmm0,xmm0,xmm3
1548 vpclmulqdq xmm3,xmm15,xmm6,0x00
1549 vpxor xmm1,xmm1,xmm4
1550 vpshufb xmm14,xmm14,xmm13
1551 vpclmulqdq xmm4,xmm15,xmm6,0x11
1552 vmovdqu xmm6,XMMWORD[((96-64))+rdx]
1553 vpxor xmm2,xmm2,xmm5
1554 vpunpckhqdq xmm9,xmm14,xmm14
1555 vpclmulqdq xmm5,xmm8,xmm7,0x10
1556 vmovdqu xmm7,XMMWORD[((128-64))+rdx]
1557 vpxor xmm9,xmm9,xmm14
1558
1559 vmovdqu xmm15,XMMWORD[32+r8]
1560 vpxor xmm3,xmm3,xmm0
1561 vpclmulqdq xmm0,xmm14,xmm6,0x00
1562 vpxor xmm4,xmm4,xmm1
1563 vpshufb xmm15,xmm15,xmm13
1564 vpclmulqdq xmm1,xmm14,xmm6,0x11
1565 vmovdqu xmm6,XMMWORD[((112-64))+rdx]
1566 vpxor xmm5,xmm5,xmm2
1567 vpunpckhqdq xmm8,xmm15,xmm15
1568 vpclmulqdq xmm2,xmm9,xmm7,0x00
1569 vpxor xmm8,xmm8,xmm15
1570
1571 vmovdqu xmm14,XMMWORD[16+r8]
1572 vpxor xmm0,xmm0,xmm3
1573 vpclmulqdq xmm3,xmm15,xmm6,0x00
1574 vpxor xmm1,xmm1,xmm4
1575 vpshufb xmm14,xmm14,xmm13
1576 vpclmulqdq xmm4,xmm15,xmm6,0x11
1577 vmovdqu xmm6,XMMWORD[((144-64))+rdx]
1578 vpxor xmm2,xmm2,xmm5
1579 vpunpckhqdq xmm9,xmm14,xmm14
1580 vpclmulqdq xmm5,xmm8,xmm7,0x10
1581 vmovdqu xmm7,XMMWORD[((176-64))+rdx]
1582 vpxor xmm9,xmm9,xmm14
1583
1584 vmovdqu xmm15,XMMWORD[r8]
1585 vpxor xmm3,xmm3,xmm0
1586 vpclmulqdq xmm0,xmm14,xmm6,0x00
1587 vpxor xmm4,xmm4,xmm1
1588 vpshufb xmm15,xmm15,xmm13
1589 vpclmulqdq xmm1,xmm14,xmm6,0x11
1590 vmovdqu xmm6,XMMWORD[((160-64))+rdx]
1591 vpxor xmm5,xmm5,xmm2
1592 vpclmulqdq xmm2,xmm9,xmm7,0x10
1593
1594 lea r8,[128+r8]
1595 cmp r9,0x80
1596 jb NEAR $L$tail_avx
1597
1598 vpxor xmm15,xmm15,xmm10
1599 sub r9,0x80
1600 jmp NEAR $L$oop8x_avx
1601
1602ALIGN 32
1603$L$oop8x_avx:
1604 vpunpckhqdq xmm8,xmm15,xmm15
1605 vmovdqu xmm14,XMMWORD[112+r8]
1606 vpxor xmm3,xmm3,xmm0
1607 vpxor xmm8,xmm8,xmm15
1608 vpclmulqdq xmm10,xmm15,xmm6,0x00
1609 vpshufb xmm14,xmm14,xmm13
1610 vpxor xmm4,xmm4,xmm1
1611 vpclmulqdq xmm11,xmm15,xmm6,0x11
1612 vmovdqu xmm6,XMMWORD[((0-64))+rdx]
1613 vpunpckhqdq xmm9,xmm14,xmm14
1614 vpxor xmm5,xmm5,xmm2
1615 vpclmulqdq xmm12,xmm8,xmm7,0x00
1616 vmovdqu xmm7,XMMWORD[((32-64))+rdx]
1617 vpxor xmm9,xmm9,xmm14
1618
1619 vmovdqu xmm15,XMMWORD[96+r8]
1620 vpclmulqdq xmm0,xmm14,xmm6,0x00
1621 vpxor xmm10,xmm10,xmm3
1622 vpshufb xmm15,xmm15,xmm13
1623 vpclmulqdq xmm1,xmm14,xmm6,0x11
1624 vxorps xmm11,xmm11,xmm4
1625 vmovdqu xmm6,XMMWORD[((16-64))+rdx]
1626 vpunpckhqdq xmm8,xmm15,xmm15
1627 vpclmulqdq xmm2,xmm9,xmm7,0x00
1628 vpxor xmm12,xmm12,xmm5
1629 vxorps xmm8,xmm8,xmm15
1630
1631 vmovdqu xmm14,XMMWORD[80+r8]
1632 vpxor xmm12,xmm12,xmm10
1633 vpclmulqdq xmm3,xmm15,xmm6,0x00
1634 vpxor xmm12,xmm12,xmm11
1635 vpslldq xmm9,xmm12,8
1636 vpxor xmm3,xmm3,xmm0
1637 vpclmulqdq xmm4,xmm15,xmm6,0x11
1638 vpsrldq xmm12,xmm12,8
1639 vpxor xmm10,xmm10,xmm9
1640 vmovdqu xmm6,XMMWORD[((48-64))+rdx]
1641 vpshufb xmm14,xmm14,xmm13
1642 vxorps xmm11,xmm11,xmm12
1643 vpxor xmm4,xmm4,xmm1
1644 vpunpckhqdq xmm9,xmm14,xmm14
1645 vpclmulqdq xmm5,xmm8,xmm7,0x10
1646 vmovdqu xmm7,XMMWORD[((80-64))+rdx]
1647 vpxor xmm9,xmm9,xmm14
1648 vpxor xmm5,xmm5,xmm2
1649
1650 vmovdqu xmm15,XMMWORD[64+r8]
1651 vpalignr xmm12,xmm10,xmm10,8
1652 vpclmulqdq xmm0,xmm14,xmm6,0x00
1653 vpshufb xmm15,xmm15,xmm13
1654 vpxor xmm0,xmm0,xmm3
1655 vpclmulqdq xmm1,xmm14,xmm6,0x11
1656 vmovdqu xmm6,XMMWORD[((64-64))+rdx]
1657 vpunpckhqdq xmm8,xmm15,xmm15
1658 vpxor xmm1,xmm1,xmm4
1659 vpclmulqdq xmm2,xmm9,xmm7,0x00
1660 vxorps xmm8,xmm8,xmm15
1661 vpxor xmm2,xmm2,xmm5
1662
1663 vmovdqu xmm14,XMMWORD[48+r8]
1664 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
1665 vpclmulqdq xmm3,xmm15,xmm6,0x00
1666 vpshufb xmm14,xmm14,xmm13
1667 vpxor xmm3,xmm3,xmm0
1668 vpclmulqdq xmm4,xmm15,xmm6,0x11
1669 vmovdqu xmm6,XMMWORD[((96-64))+rdx]
1670 vpunpckhqdq xmm9,xmm14,xmm14
1671 vpxor xmm4,xmm4,xmm1
1672 vpclmulqdq xmm5,xmm8,xmm7,0x10
1673 vmovdqu xmm7,XMMWORD[((128-64))+rdx]
1674 vpxor xmm9,xmm9,xmm14
1675 vpxor xmm5,xmm5,xmm2
1676
1677 vmovdqu xmm15,XMMWORD[32+r8]
1678 vpclmulqdq xmm0,xmm14,xmm6,0x00
1679 vpshufb xmm15,xmm15,xmm13
1680 vpxor xmm0,xmm0,xmm3
1681 vpclmulqdq xmm1,xmm14,xmm6,0x11
1682 vmovdqu xmm6,XMMWORD[((112-64))+rdx]
1683 vpunpckhqdq xmm8,xmm15,xmm15
1684 vpxor xmm1,xmm1,xmm4
1685 vpclmulqdq xmm2,xmm9,xmm7,0x00
1686 vpxor xmm8,xmm8,xmm15
1687 vpxor xmm2,xmm2,xmm5
1688 vxorps xmm10,xmm10,xmm12
1689
1690 vmovdqu xmm14,XMMWORD[16+r8]
1691 vpalignr xmm12,xmm10,xmm10,8
1692 vpclmulqdq xmm3,xmm15,xmm6,0x00
1693 vpshufb xmm14,xmm14,xmm13
1694 vpxor xmm3,xmm3,xmm0
1695 vpclmulqdq xmm4,xmm15,xmm6,0x11
1696 vmovdqu xmm6,XMMWORD[((144-64))+rdx]
1697 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
1698 vxorps xmm12,xmm12,xmm11
1699 vpunpckhqdq xmm9,xmm14,xmm14
1700 vpxor xmm4,xmm4,xmm1
1701 vpclmulqdq xmm5,xmm8,xmm7,0x10
1702 vmovdqu xmm7,XMMWORD[((176-64))+rdx]
1703 vpxor xmm9,xmm9,xmm14
1704 vpxor xmm5,xmm5,xmm2
1705
1706 vmovdqu xmm15,XMMWORD[r8]
1707 vpclmulqdq xmm0,xmm14,xmm6,0x00
1708 vpshufb xmm15,xmm15,xmm13
1709 vpclmulqdq xmm1,xmm14,xmm6,0x11
1710 vmovdqu xmm6,XMMWORD[((160-64))+rdx]
1711 vpxor xmm15,xmm15,xmm12
1712 vpclmulqdq xmm2,xmm9,xmm7,0x10
1713 vpxor xmm15,xmm15,xmm10
1714
1715 lea r8,[128+r8]
1716 sub r9,0x80
1717 jnc NEAR $L$oop8x_avx
1718
1719 add r9,0x80
1720 jmp NEAR $L$tail_no_xor_avx
1721
1722ALIGN 32
1723$L$short_avx:
1724 vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8]
1725 lea r8,[r9*1+r8]
1726 vmovdqu xmm6,XMMWORD[((0-64))+rdx]
1727 vmovdqu xmm7,XMMWORD[((32-64))+rdx]
1728 vpshufb xmm15,xmm14,xmm13
1729
1730 vmovdqa xmm3,xmm0
1731 vmovdqa xmm4,xmm1
1732 vmovdqa xmm5,xmm2
1733 sub r9,0x10
1734 jz NEAR $L$tail_avx
1735
1736 vpunpckhqdq xmm8,xmm15,xmm15
1737 vpxor xmm3,xmm3,xmm0
1738 vpclmulqdq xmm0,xmm15,xmm6,0x00
1739 vpxor xmm8,xmm8,xmm15
1740 vmovdqu xmm14,XMMWORD[((-32))+r8]
1741 vpxor xmm4,xmm4,xmm1
1742 vpclmulqdq xmm1,xmm15,xmm6,0x11
1743 vmovdqu xmm6,XMMWORD[((16-64))+rdx]
1744 vpshufb xmm15,xmm14,xmm13
1745 vpxor xmm5,xmm5,xmm2
1746 vpclmulqdq xmm2,xmm8,xmm7,0x00
1747 vpsrldq xmm7,xmm7,8
1748 sub r9,0x10
1749 jz NEAR $L$tail_avx
1750
1751 vpunpckhqdq xmm8,xmm15,xmm15
1752 vpxor xmm3,xmm3,xmm0
1753 vpclmulqdq xmm0,xmm15,xmm6,0x00
1754 vpxor xmm8,xmm8,xmm15
1755 vmovdqu xmm14,XMMWORD[((-48))+r8]
1756 vpxor xmm4,xmm4,xmm1
1757 vpclmulqdq xmm1,xmm15,xmm6,0x11
1758 vmovdqu xmm6,XMMWORD[((48-64))+rdx]
1759 vpshufb xmm15,xmm14,xmm13
1760 vpxor xmm5,xmm5,xmm2
1761 vpclmulqdq xmm2,xmm8,xmm7,0x00
1762 vmovdqu xmm7,XMMWORD[((80-64))+rdx]
1763 sub r9,0x10
1764 jz NEAR $L$tail_avx
1765
1766 vpunpckhqdq xmm8,xmm15,xmm15
1767 vpxor xmm3,xmm3,xmm0
1768 vpclmulqdq xmm0,xmm15,xmm6,0x00
1769 vpxor xmm8,xmm8,xmm15
1770 vmovdqu xmm14,XMMWORD[((-64))+r8]
1771 vpxor xmm4,xmm4,xmm1
1772 vpclmulqdq xmm1,xmm15,xmm6,0x11
1773 vmovdqu xmm6,XMMWORD[((64-64))+rdx]
1774 vpshufb xmm15,xmm14,xmm13
1775 vpxor xmm5,xmm5,xmm2
1776 vpclmulqdq xmm2,xmm8,xmm7,0x00
1777 vpsrldq xmm7,xmm7,8
1778 sub r9,0x10
1779 jz NEAR $L$tail_avx
1780
1781 vpunpckhqdq xmm8,xmm15,xmm15
1782 vpxor xmm3,xmm3,xmm0
1783 vpclmulqdq xmm0,xmm15,xmm6,0x00
1784 vpxor xmm8,xmm8,xmm15
1785 vmovdqu xmm14,XMMWORD[((-80))+r8]
1786 vpxor xmm4,xmm4,xmm1
1787 vpclmulqdq xmm1,xmm15,xmm6,0x11
1788 vmovdqu xmm6,XMMWORD[((96-64))+rdx]
1789 vpshufb xmm15,xmm14,xmm13
1790 vpxor xmm5,xmm5,xmm2
1791 vpclmulqdq xmm2,xmm8,xmm7,0x00
1792 vmovdqu xmm7,XMMWORD[((128-64))+rdx]
1793 sub r9,0x10
1794 jz NEAR $L$tail_avx
1795
1796 vpunpckhqdq xmm8,xmm15,xmm15
1797 vpxor xmm3,xmm3,xmm0
1798 vpclmulqdq xmm0,xmm15,xmm6,0x00
1799 vpxor xmm8,xmm8,xmm15
1800 vmovdqu xmm14,XMMWORD[((-96))+r8]
1801 vpxor xmm4,xmm4,xmm1
1802 vpclmulqdq xmm1,xmm15,xmm6,0x11
1803 vmovdqu xmm6,XMMWORD[((112-64))+rdx]
1804 vpshufb xmm15,xmm14,xmm13
1805 vpxor xmm5,xmm5,xmm2
1806 vpclmulqdq xmm2,xmm8,xmm7,0x00
1807 vpsrldq xmm7,xmm7,8
1808 sub r9,0x10
1809 jz NEAR $L$tail_avx
1810
1811 vpunpckhqdq xmm8,xmm15,xmm15
1812 vpxor xmm3,xmm3,xmm0
1813 vpclmulqdq xmm0,xmm15,xmm6,0x00
1814 vpxor xmm8,xmm8,xmm15
1815 vmovdqu xmm14,XMMWORD[((-112))+r8]
1816 vpxor xmm4,xmm4,xmm1
1817 vpclmulqdq xmm1,xmm15,xmm6,0x11
1818 vmovdqu xmm6,XMMWORD[((144-64))+rdx]
1819 vpshufb xmm15,xmm14,xmm13
1820 vpxor xmm5,xmm5,xmm2
1821 vpclmulqdq xmm2,xmm8,xmm7,0x00
1822 vmovq xmm7,QWORD[((184-64))+rdx]
1823 sub r9,0x10
1824 jmp NEAR $L$tail_avx
1825
1826ALIGN 32
1827$L$tail_avx:
1828 vpxor xmm15,xmm15,xmm10
1829$L$tail_no_xor_avx:
1830 vpunpckhqdq xmm8,xmm15,xmm15
1831 vpxor xmm3,xmm3,xmm0
1832 vpclmulqdq xmm0,xmm15,xmm6,0x00
1833 vpxor xmm8,xmm8,xmm15
1834 vpxor xmm4,xmm4,xmm1
1835 vpclmulqdq xmm1,xmm15,xmm6,0x11
1836 vpxor xmm5,xmm5,xmm2
1837 vpclmulqdq xmm2,xmm8,xmm7,0x00
1838
1839 vmovdqu xmm12,XMMWORD[r10]
1840
1841 vpxor xmm10,xmm3,xmm0
1842 vpxor xmm11,xmm4,xmm1
1843 vpxor xmm5,xmm5,xmm2
1844
1845 vpxor xmm5,xmm5,xmm10
1846 vpxor xmm5,xmm5,xmm11
1847 vpslldq xmm9,xmm5,8
1848 vpsrldq xmm5,xmm5,8
1849 vpxor xmm10,xmm10,xmm9
1850 vpxor xmm11,xmm11,xmm5
1851
1852 vpclmulqdq xmm9,xmm10,xmm12,0x10
1853 vpalignr xmm10,xmm10,xmm10,8
1854 vpxor xmm10,xmm10,xmm9
1855
1856 vpclmulqdq xmm9,xmm10,xmm12,0x10
1857 vpalignr xmm10,xmm10,xmm10,8
1858 vpxor xmm10,xmm10,xmm11
1859 vpxor xmm10,xmm10,xmm9
1860
1861 cmp r9,0
1862 jne NEAR $L$short_avx
1863
1864 vpshufb xmm10,xmm10,xmm13
1865 vmovdqu XMMWORD[rcx],xmm10
1866 vzeroupper
1867 movaps xmm6,XMMWORD[rsp]
1868 movaps xmm7,XMMWORD[16+rsp]
1869 movaps xmm8,XMMWORD[32+rsp]
1870 movaps xmm9,XMMWORD[48+rsp]
1871 movaps xmm10,XMMWORD[64+rsp]
1872 movaps xmm11,XMMWORD[80+rsp]
1873 movaps xmm12,XMMWORD[96+rsp]
1874 movaps xmm13,XMMWORD[112+rsp]
1875 movaps xmm14,XMMWORD[128+rsp]
1876 movaps xmm15,XMMWORD[144+rsp]
1877 lea rsp,[168+rsp]
1878$L$SEH_end_gcm_ghash_avx:
1879 DB 0F3h,0C3h ;repret
1880
1881
1882ALIGN 64
1883$L$bswap_mask:
1884DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1885$L$0x1c2_polynomial:
1886DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1887$L$7_mask:
1888 DD 7,0,7,0
1889$L$7_mask_poly:
1890 DD 7,0,450,0
1891ALIGN 64
1892
1893$L$rem_4bit:
1894 DD 0,0,0,471859200,0,943718400,0,610271232
1895 DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1896 DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1897 DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1898
1899$L$rem_8bit:
1900 DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1901 DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1902 DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1903 DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1904 DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1905 DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1906 DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1907 DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1908 DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1909 DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1910 DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1911 DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1912 DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1913 DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1914 DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1915 DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1916 DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1917 DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1918 DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1919 DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1920 DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1921 DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1922 DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1923 DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1924 DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1925 DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1926 DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1927 DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1928 DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1929 DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1930 DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1931 DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1932
1933DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1934DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1935DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1936DB 114,103,62,0
1937ALIGN 64
1938EXTERN __imp_RtlVirtualUnwind
1939
1940ALIGN 16
1941se_handler:
1942 push rsi
1943 push rdi
1944 push rbx
1945 push rbp
1946 push r12
1947 push r13
1948 push r14
1949 push r15
1950 pushfq
1951 sub rsp,64
1952
1953 mov rax,QWORD[120+r8]
1954 mov rbx,QWORD[248+r8]
1955
1956 mov rsi,QWORD[8+r9]
1957 mov r11,QWORD[56+r9]
1958
1959 mov r10d,DWORD[r11]
1960 lea r10,[r10*1+rsi]
1961 cmp rbx,r10
1962 jb NEAR $L$in_prologue
1963
1964 mov rax,QWORD[152+r8]
1965
1966 mov r10d,DWORD[4+r11]
1967 lea r10,[r10*1+rsi]
1968 cmp rbx,r10
1969 jae NEAR $L$in_prologue
1970
1971 lea rax,[((48+280))+rax]
1972
1973 mov rbx,QWORD[((-8))+rax]
1974 mov rbp,QWORD[((-16))+rax]
1975 mov r12,QWORD[((-24))+rax]
1976 mov r13,QWORD[((-32))+rax]
1977 mov r14,QWORD[((-40))+rax]
1978 mov r15,QWORD[((-48))+rax]
1979 mov QWORD[144+r8],rbx
1980 mov QWORD[160+r8],rbp
1981 mov QWORD[216+r8],r12
1982 mov QWORD[224+r8],r13
1983 mov QWORD[232+r8],r14
1984 mov QWORD[240+r8],r15
1985
1986$L$in_prologue:
1987 mov rdi,QWORD[8+rax]
1988 mov rsi,QWORD[16+rax]
1989 mov QWORD[152+r8],rax
1990 mov QWORD[168+r8],rsi
1991 mov QWORD[176+r8],rdi
1992
1993 mov rdi,QWORD[40+r9]
1994 mov rsi,r8
1995 mov ecx,154
1996 DD 0xa548f3fc
1997
1998 mov rsi,r9
1999 xor rcx,rcx
2000 mov rdx,QWORD[8+rsi]
2001 mov r8,QWORD[rsi]
2002 mov r9,QWORD[16+rsi]
2003 mov r10,QWORD[40+rsi]
2004 lea r11,[56+rsi]
2005 lea r12,[24+rsi]
2006 mov QWORD[32+rsp],r10
2007 mov QWORD[40+rsp],r11
2008 mov QWORD[48+rsp],r12
2009 mov QWORD[56+rsp],rcx
2010 call QWORD[__imp_RtlVirtualUnwind]
2011
2012 mov eax,1
2013 add rsp,64
2014 popfq
2015 pop r15
2016 pop r14
2017 pop r13
2018 pop r12
2019 pop rbp
2020 pop rbx
2021 pop rdi
2022 pop rsi
2023 DB 0F3h,0C3h ;repret
2024
2025
2026section .pdata rdata align=4
2027ALIGN 4
2028 DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
2029 DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase
2030 DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase
2031
2032 DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
2033 DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase
2034 DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase
2035
2036 DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase
2037 DD $L$SEH_end_gcm_init_clmul wrt ..imagebase
2038 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
2039
2040 DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
2041 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase
2042 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
2043 DD $L$SEH_begin_gcm_init_avx wrt ..imagebase
2044 DD $L$SEH_end_gcm_init_avx wrt ..imagebase
2045 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
2046
2047 DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase
2048 DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase
2049 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
2050section .xdata rdata align=8
2051ALIGN 8
2052$L$SEH_info_gcm_gmult_4bit:
2053DB 9,0,0,0
2054 DD se_handler wrt ..imagebase
2055 DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
2056$L$SEH_info_gcm_ghash_4bit:
2057DB 9,0,0,0
2058 DD se_handler wrt ..imagebase
2059 DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
2060$L$SEH_info_gcm_init_clmul:
2061DB 0x01,0x08,0x03,0x00
2062DB 0x08,0x68,0x00,0x00
2063DB 0x04,0x22,0x00,0x00
2064$L$SEH_info_gcm_ghash_clmul:
2065DB 0x01,0x33,0x16,0x00
2066DB 0x33,0xf8,0x09,0x00
2067DB 0x2e,0xe8,0x08,0x00
2068DB 0x29,0xd8,0x07,0x00
2069DB 0x24,0xc8,0x06,0x00
2070DB 0x1f,0xb8,0x05,0x00
2071DB 0x1a,0xa8,0x04,0x00
2072DB 0x15,0x98,0x03,0x00
2073DB 0x10,0x88,0x02,0x00
2074DB 0x0c,0x78,0x01,0x00
2075DB 0x08,0x68,0x00,0x00
2076DB 0x04,0x01,0x15,0x00
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette