VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.7/crypto/genasm-nasm/x86_64-mont5.S@ 107278

最後變更 在這個檔案從107278是 99371,由 vboxsync 提交於 23 月 前

openssl-3.1.0: After generating headers and asm (kmk recreate-headers recreate-headers)

檔案大小: 59.7 KB
 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont_gather5
11
12ALIGN 64
13bn_mul_mont_gather5:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_bn_mul_mont_gather5:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26
27 mov r9d,r9d
28 mov rax,rsp
29
30 test r9d,7
31 jnz NEAR $L$mul_enter
32 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
33 jmp NEAR $L$mul4x_enter
34
35ALIGN 16
36$L$mul_enter:
37 movd xmm5,DWORD[56+rsp]
38 push rbx
39
40 push rbp
41
42 push r12
43
44 push r13
45
46 push r14
47
48 push r15
49
50
51 neg r9
52 mov r11,rsp
53 lea r10,[((-280))+r9*8+rsp]
54 neg r9
55 and r10,-1024
56
57
58
59
60
61
62
63
64
65 sub r11,r10
66 and r11,-4096
67 lea rsp,[r11*1+r10]
68 mov r11,QWORD[rsp]
69 cmp rsp,r10
70 ja NEAR $L$mul_page_walk
71 jmp NEAR $L$mul_page_walk_done
72
73$L$mul_page_walk:
74 lea rsp,[((-4096))+rsp]
75 mov r11,QWORD[rsp]
76 cmp rsp,r10
77 ja NEAR $L$mul_page_walk
78$L$mul_page_walk_done:
79
80 lea r10,[$L$inc]
81 mov QWORD[8+r9*8+rsp],rax
82
83$L$mul_body:
84
85 lea r12,[128+rdx]
86 movdqa xmm0,XMMWORD[r10]
87 movdqa xmm1,XMMWORD[16+r10]
88 lea r10,[((24-112))+r9*8+rsp]
89 and r10,-16
90
91 pshufd xmm5,xmm5,0
92 movdqa xmm4,xmm1
93 movdqa xmm2,xmm1
94 paddd xmm1,xmm0
95 pcmpeqd xmm0,xmm5
96DB 0x67
97 movdqa xmm3,xmm4
98 paddd xmm2,xmm1
99 pcmpeqd xmm1,xmm5
100 movdqa XMMWORD[112+r10],xmm0
101 movdqa xmm0,xmm4
102
103 paddd xmm3,xmm2
104 pcmpeqd xmm2,xmm5
105 movdqa XMMWORD[128+r10],xmm1
106 movdqa xmm1,xmm4
107
108 paddd xmm0,xmm3
109 pcmpeqd xmm3,xmm5
110 movdqa XMMWORD[144+r10],xmm2
111 movdqa xmm2,xmm4
112
113 paddd xmm1,xmm0
114 pcmpeqd xmm0,xmm5
115 movdqa XMMWORD[160+r10],xmm3
116 movdqa xmm3,xmm4
117 paddd xmm2,xmm1
118 pcmpeqd xmm1,xmm5
119 movdqa XMMWORD[176+r10],xmm0
120 movdqa xmm0,xmm4
121
122 paddd xmm3,xmm2
123 pcmpeqd xmm2,xmm5
124 movdqa XMMWORD[192+r10],xmm1
125 movdqa xmm1,xmm4
126
127 paddd xmm0,xmm3
128 pcmpeqd xmm3,xmm5
129 movdqa XMMWORD[208+r10],xmm2
130 movdqa xmm2,xmm4
131
132 paddd xmm1,xmm0
133 pcmpeqd xmm0,xmm5
134 movdqa XMMWORD[224+r10],xmm3
135 movdqa xmm3,xmm4
136 paddd xmm2,xmm1
137 pcmpeqd xmm1,xmm5
138 movdqa XMMWORD[240+r10],xmm0
139 movdqa xmm0,xmm4
140
141 paddd xmm3,xmm2
142 pcmpeqd xmm2,xmm5
143 movdqa XMMWORD[256+r10],xmm1
144 movdqa xmm1,xmm4
145
146 paddd xmm0,xmm3
147 pcmpeqd xmm3,xmm5
148 movdqa XMMWORD[272+r10],xmm2
149 movdqa xmm2,xmm4
150
151 paddd xmm1,xmm0
152 pcmpeqd xmm0,xmm5
153 movdqa XMMWORD[288+r10],xmm3
154 movdqa xmm3,xmm4
155 paddd xmm2,xmm1
156 pcmpeqd xmm1,xmm5
157 movdqa XMMWORD[304+r10],xmm0
158
159 paddd xmm3,xmm2
160DB 0x67
161 pcmpeqd xmm2,xmm5
162 movdqa XMMWORD[320+r10],xmm1
163
164 pcmpeqd xmm3,xmm5
165 movdqa XMMWORD[336+r10],xmm2
166 pand xmm0,XMMWORD[64+r12]
167
168 pand xmm1,XMMWORD[80+r12]
169 pand xmm2,XMMWORD[96+r12]
170 movdqa XMMWORD[352+r10],xmm3
171 pand xmm3,XMMWORD[112+r12]
172 por xmm0,xmm2
173 por xmm1,xmm3
174 movdqa xmm4,XMMWORD[((-128))+r12]
175 movdqa xmm5,XMMWORD[((-112))+r12]
176 movdqa xmm2,XMMWORD[((-96))+r12]
177 pand xmm4,XMMWORD[112+r10]
178 movdqa xmm3,XMMWORD[((-80))+r12]
179 pand xmm5,XMMWORD[128+r10]
180 por xmm0,xmm4
181 pand xmm2,XMMWORD[144+r10]
182 por xmm1,xmm5
183 pand xmm3,XMMWORD[160+r10]
184 por xmm0,xmm2
185 por xmm1,xmm3
186 movdqa xmm4,XMMWORD[((-64))+r12]
187 movdqa xmm5,XMMWORD[((-48))+r12]
188 movdqa xmm2,XMMWORD[((-32))+r12]
189 pand xmm4,XMMWORD[176+r10]
190 movdqa xmm3,XMMWORD[((-16))+r12]
191 pand xmm5,XMMWORD[192+r10]
192 por xmm0,xmm4
193 pand xmm2,XMMWORD[208+r10]
194 por xmm1,xmm5
195 pand xmm3,XMMWORD[224+r10]
196 por xmm0,xmm2
197 por xmm1,xmm3
198 movdqa xmm4,XMMWORD[r12]
199 movdqa xmm5,XMMWORD[16+r12]
200 movdqa xmm2,XMMWORD[32+r12]
201 pand xmm4,XMMWORD[240+r10]
202 movdqa xmm3,XMMWORD[48+r12]
203 pand xmm5,XMMWORD[256+r10]
204 por xmm0,xmm4
205 pand xmm2,XMMWORD[272+r10]
206 por xmm1,xmm5
207 pand xmm3,XMMWORD[288+r10]
208 por xmm0,xmm2
209 por xmm1,xmm3
210 por xmm0,xmm1
211 pshufd xmm1,xmm0,0x4e
212 por xmm0,xmm1
213 lea r12,[256+r12]
214DB 102,72,15,126,195
215
216 mov r8,QWORD[r8]
217 mov rax,QWORD[rsi]
218
219 xor r14,r14
220 xor r15,r15
221
222 mov rbp,r8
223 mul rbx
224 mov r10,rax
225 mov rax,QWORD[rcx]
226
227 imul rbp,r10
228 mov r11,rdx
229
230 mul rbp
231 add r10,rax
232 mov rax,QWORD[8+rsi]
233 adc rdx,0
234 mov r13,rdx
235
236 lea r15,[1+r15]
237 jmp NEAR $L$1st_enter
238
239ALIGN 16
240$L$1st:
241 add r13,rax
242 mov rax,QWORD[r15*8+rsi]
243 adc rdx,0
244 add r13,r11
245 mov r11,r10
246 adc rdx,0
247 mov QWORD[((-16))+r15*8+rsp],r13
248 mov r13,rdx
249
250$L$1st_enter:
251 mul rbx
252 add r11,rax
253 mov rax,QWORD[r15*8+rcx]
254 adc rdx,0
255 lea r15,[1+r15]
256 mov r10,rdx
257
258 mul rbp
259 cmp r15,r9
260 jne NEAR $L$1st
261
262
263 add r13,rax
264 adc rdx,0
265 add r13,r11
266 adc rdx,0
267 mov QWORD[((-16))+r9*8+rsp],r13
268 mov r13,rdx
269 mov r11,r10
270
271 xor rdx,rdx
272 add r13,r11
273 adc rdx,0
274 mov QWORD[((-8))+r9*8+rsp],r13
275 mov QWORD[r9*8+rsp],rdx
276
277 lea r14,[1+r14]
278 jmp NEAR $L$outer
279ALIGN 16
280$L$outer:
281 lea rdx,[((24+128))+r9*8+rsp]
282 and rdx,-16
283 pxor xmm4,xmm4
284 pxor xmm5,xmm5
285 movdqa xmm0,XMMWORD[((-128))+r12]
286 movdqa xmm1,XMMWORD[((-112))+r12]
287 movdqa xmm2,XMMWORD[((-96))+r12]
288 movdqa xmm3,XMMWORD[((-80))+r12]
289 pand xmm0,XMMWORD[((-128))+rdx]
290 pand xmm1,XMMWORD[((-112))+rdx]
291 por xmm4,xmm0
292 pand xmm2,XMMWORD[((-96))+rdx]
293 por xmm5,xmm1
294 pand xmm3,XMMWORD[((-80))+rdx]
295 por xmm4,xmm2
296 por xmm5,xmm3
297 movdqa xmm0,XMMWORD[((-64))+r12]
298 movdqa xmm1,XMMWORD[((-48))+r12]
299 movdqa xmm2,XMMWORD[((-32))+r12]
300 movdqa xmm3,XMMWORD[((-16))+r12]
301 pand xmm0,XMMWORD[((-64))+rdx]
302 pand xmm1,XMMWORD[((-48))+rdx]
303 por xmm4,xmm0
304 pand xmm2,XMMWORD[((-32))+rdx]
305 por xmm5,xmm1
306 pand xmm3,XMMWORD[((-16))+rdx]
307 por xmm4,xmm2
308 por xmm5,xmm3
309 movdqa xmm0,XMMWORD[r12]
310 movdqa xmm1,XMMWORD[16+r12]
311 movdqa xmm2,XMMWORD[32+r12]
312 movdqa xmm3,XMMWORD[48+r12]
313 pand xmm0,XMMWORD[rdx]
314 pand xmm1,XMMWORD[16+rdx]
315 por xmm4,xmm0
316 pand xmm2,XMMWORD[32+rdx]
317 por xmm5,xmm1
318 pand xmm3,XMMWORD[48+rdx]
319 por xmm4,xmm2
320 por xmm5,xmm3
321 movdqa xmm0,XMMWORD[64+r12]
322 movdqa xmm1,XMMWORD[80+r12]
323 movdqa xmm2,XMMWORD[96+r12]
324 movdqa xmm3,XMMWORD[112+r12]
325 pand xmm0,XMMWORD[64+rdx]
326 pand xmm1,XMMWORD[80+rdx]
327 por xmm4,xmm0
328 pand xmm2,XMMWORD[96+rdx]
329 por xmm5,xmm1
330 pand xmm3,XMMWORD[112+rdx]
331 por xmm4,xmm2
332 por xmm5,xmm3
333 por xmm4,xmm5
334 pshufd xmm0,xmm4,0x4e
335 por xmm0,xmm4
336 lea r12,[256+r12]
337
338 mov rax,QWORD[rsi]
339DB 102,72,15,126,195
340
341 xor r15,r15
342 mov rbp,r8
343 mov r10,QWORD[rsp]
344
345 mul rbx
346 add r10,rax
347 mov rax,QWORD[rcx]
348 adc rdx,0
349
350 imul rbp,r10
351 mov r11,rdx
352
353 mul rbp
354 add r10,rax
355 mov rax,QWORD[8+rsi]
356 adc rdx,0
357 mov r10,QWORD[8+rsp]
358 mov r13,rdx
359
360 lea r15,[1+r15]
361 jmp NEAR $L$inner_enter
362
363ALIGN 16
364$L$inner:
365 add r13,rax
366 mov rax,QWORD[r15*8+rsi]
367 adc rdx,0
368 add r13,r10
369 mov r10,QWORD[r15*8+rsp]
370 adc rdx,0
371 mov QWORD[((-16))+r15*8+rsp],r13
372 mov r13,rdx
373
374$L$inner_enter:
375 mul rbx
376 add r11,rax
377 mov rax,QWORD[r15*8+rcx]
378 adc rdx,0
379 add r10,r11
380 mov r11,rdx
381 adc r11,0
382 lea r15,[1+r15]
383
384 mul rbp
385 cmp r15,r9
386 jne NEAR $L$inner
387
388 add r13,rax
389 adc rdx,0
390 add r13,r10
391 mov r10,QWORD[r9*8+rsp]
392 adc rdx,0
393 mov QWORD[((-16))+r9*8+rsp],r13
394 mov r13,rdx
395
396 xor rdx,rdx
397 add r13,r11
398 adc rdx,0
399 add r13,r10
400 adc rdx,0
401 mov QWORD[((-8))+r9*8+rsp],r13
402 mov QWORD[r9*8+rsp],rdx
403
404 lea r14,[1+r14]
405 cmp r14,r9
406 jb NEAR $L$outer
407
408 xor r14,r14
409 mov rax,QWORD[rsp]
410 lea rsi,[rsp]
411 mov r15,r9
412 jmp NEAR $L$sub
413ALIGN 16
414$L$sub: sbb rax,QWORD[r14*8+rcx]
415 mov QWORD[r14*8+rdi],rax
416 mov rax,QWORD[8+r14*8+rsi]
417 lea r14,[1+r14]
418 dec r15
419 jnz NEAR $L$sub
420
421 sbb rax,0
422 mov rbx,-1
423 xor rbx,rax
424 xor r14,r14
425 mov r15,r9
426
427$L$copy:
428 mov rcx,QWORD[r14*8+rdi]
429 mov rdx,QWORD[r14*8+rsp]
430 and rcx,rbx
431 and rdx,rax
432 mov QWORD[r14*8+rsp],r14
433 or rdx,rcx
434 mov QWORD[r14*8+rdi],rdx
435 lea r14,[1+r14]
436 sub r15,1
437 jnz NEAR $L$copy
438
439 mov rsi,QWORD[8+r9*8+rsp]
440
441 mov rax,1
442
443 mov r15,QWORD[((-48))+rsi]
444
445 mov r14,QWORD[((-40))+rsi]
446
447 mov r13,QWORD[((-32))+rsi]
448
449 mov r12,QWORD[((-24))+rsi]
450
451 mov rbp,QWORD[((-16))+rsi]
452
453 mov rbx,QWORD[((-8))+rsi]
454
455 lea rsp,[rsi]
456
457$L$mul_epilogue:
458 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
459 mov rsi,QWORD[16+rsp]
460 DB 0F3h,0C3h ;repret
461
462$L$SEH_end_bn_mul_mont_gather5:
463
464ALIGN 32
465bn_mul4x_mont_gather5:
466 mov QWORD[8+rsp],rdi ;WIN64 prologue
467 mov QWORD[16+rsp],rsi
468 mov rax,rsp
469$L$SEH_begin_bn_mul4x_mont_gather5:
470 mov rdi,rcx
471 mov rsi,rdx
472 mov rdx,r8
473 mov rcx,r9
474 mov r8,QWORD[40+rsp]
475 mov r9,QWORD[48+rsp]
476
477
478
479DB 0x67
480 mov rax,rsp
481
482$L$mul4x_enter:
483 and r11d,0x80108
484 cmp r11d,0x80108
485 je NEAR $L$mulx4x_enter
486 push rbx
487
488 push rbp
489
490 push r12
491
492 push r13
493
494 push r14
495
496 push r15
497
498$L$mul4x_prologue:
499
500DB 0x67
501 shl r9d,3
502 lea r10,[r9*2+r9]
503 neg r9
504
505
506
507
508
509
510
511
512
513
514 lea r11,[((-320))+r9*2+rsp]
515 mov rbp,rsp
516 sub r11,rdi
517 and r11,4095
518 cmp r10,r11
519 jb NEAR $L$mul4xsp_alt
520 sub rbp,r11
521 lea rbp,[((-320))+r9*2+rbp]
522 jmp NEAR $L$mul4xsp_done
523
524ALIGN 32
525$L$mul4xsp_alt:
526 lea r10,[((4096-320))+r9*2]
527 lea rbp,[((-320))+r9*2+rbp]
528 sub r11,r10
529 mov r10,0
530 cmovc r11,r10
531 sub rbp,r11
532$L$mul4xsp_done:
533 and rbp,-64
534 mov r11,rsp
535 sub r11,rbp
536 and r11,-4096
537 lea rsp,[rbp*1+r11]
538 mov r10,QWORD[rsp]
539 cmp rsp,rbp
540 ja NEAR $L$mul4x_page_walk
541 jmp NEAR $L$mul4x_page_walk_done
542
543$L$mul4x_page_walk:
544 lea rsp,[((-4096))+rsp]
545 mov r10,QWORD[rsp]
546 cmp rsp,rbp
547 ja NEAR $L$mul4x_page_walk
548$L$mul4x_page_walk_done:
549
550 neg r9
551
552 mov QWORD[40+rsp],rax
553
554$L$mul4x_body:
555
556 call mul4x_internal
557
558 mov rsi,QWORD[40+rsp]
559
560 mov rax,1
561
562 mov r15,QWORD[((-48))+rsi]
563
564 mov r14,QWORD[((-40))+rsi]
565
566 mov r13,QWORD[((-32))+rsi]
567
568 mov r12,QWORD[((-24))+rsi]
569
570 mov rbp,QWORD[((-16))+rsi]
571
572 mov rbx,QWORD[((-8))+rsi]
573
574 lea rsp,[rsi]
575
576$L$mul4x_epilogue:
577 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
578 mov rsi,QWORD[16+rsp]
579 DB 0F3h,0C3h ;repret
580
581$L$SEH_end_bn_mul4x_mont_gather5:
582
583
584ALIGN 32
585mul4x_internal:
586
587 shl r9,5
588 movd xmm5,DWORD[56+rax]
589 lea rax,[$L$inc]
590 lea r13,[128+r9*1+rdx]
591 shr r9,5
592 movdqa xmm0,XMMWORD[rax]
593 movdqa xmm1,XMMWORD[16+rax]
594 lea r10,[((88-112))+r9*1+rsp]
595 lea r12,[128+rdx]
596
597 pshufd xmm5,xmm5,0
598 movdqa xmm4,xmm1
599DB 0x67,0x67
600 movdqa xmm2,xmm1
601 paddd xmm1,xmm0
602 pcmpeqd xmm0,xmm5
603DB 0x67
604 movdqa xmm3,xmm4
605 paddd xmm2,xmm1
606 pcmpeqd xmm1,xmm5
607 movdqa XMMWORD[112+r10],xmm0
608 movdqa xmm0,xmm4
609
610 paddd xmm3,xmm2
611 pcmpeqd xmm2,xmm5
612 movdqa XMMWORD[128+r10],xmm1
613 movdqa xmm1,xmm4
614
615 paddd xmm0,xmm3
616 pcmpeqd xmm3,xmm5
617 movdqa XMMWORD[144+r10],xmm2
618 movdqa xmm2,xmm4
619
620 paddd xmm1,xmm0
621 pcmpeqd xmm0,xmm5
622 movdqa XMMWORD[160+r10],xmm3
623 movdqa xmm3,xmm4
624 paddd xmm2,xmm1
625 pcmpeqd xmm1,xmm5
626 movdqa XMMWORD[176+r10],xmm0
627 movdqa xmm0,xmm4
628
629 paddd xmm3,xmm2
630 pcmpeqd xmm2,xmm5
631 movdqa XMMWORD[192+r10],xmm1
632 movdqa xmm1,xmm4
633
634 paddd xmm0,xmm3
635 pcmpeqd xmm3,xmm5
636 movdqa XMMWORD[208+r10],xmm2
637 movdqa xmm2,xmm4
638
639 paddd xmm1,xmm0
640 pcmpeqd xmm0,xmm5
641 movdqa XMMWORD[224+r10],xmm3
642 movdqa xmm3,xmm4
643 paddd xmm2,xmm1
644 pcmpeqd xmm1,xmm5
645 movdqa XMMWORD[240+r10],xmm0
646 movdqa xmm0,xmm4
647
648 paddd xmm3,xmm2
649 pcmpeqd xmm2,xmm5
650 movdqa XMMWORD[256+r10],xmm1
651 movdqa xmm1,xmm4
652
653 paddd xmm0,xmm3
654 pcmpeqd xmm3,xmm5
655 movdqa XMMWORD[272+r10],xmm2
656 movdqa xmm2,xmm4
657
658 paddd xmm1,xmm0
659 pcmpeqd xmm0,xmm5
660 movdqa XMMWORD[288+r10],xmm3
661 movdqa xmm3,xmm4
662 paddd xmm2,xmm1
663 pcmpeqd xmm1,xmm5
664 movdqa XMMWORD[304+r10],xmm0
665
666 paddd xmm3,xmm2
667DB 0x67
668 pcmpeqd xmm2,xmm5
669 movdqa XMMWORD[320+r10],xmm1
670
671 pcmpeqd xmm3,xmm5
672 movdqa XMMWORD[336+r10],xmm2
673 pand xmm0,XMMWORD[64+r12]
674
675 pand xmm1,XMMWORD[80+r12]
676 pand xmm2,XMMWORD[96+r12]
677 movdqa XMMWORD[352+r10],xmm3
678 pand xmm3,XMMWORD[112+r12]
679 por xmm0,xmm2
680 por xmm1,xmm3
681 movdqa xmm4,XMMWORD[((-128))+r12]
682 movdqa xmm5,XMMWORD[((-112))+r12]
683 movdqa xmm2,XMMWORD[((-96))+r12]
684 pand xmm4,XMMWORD[112+r10]
685 movdqa xmm3,XMMWORD[((-80))+r12]
686 pand xmm5,XMMWORD[128+r10]
687 por xmm0,xmm4
688 pand xmm2,XMMWORD[144+r10]
689 por xmm1,xmm5
690 pand xmm3,XMMWORD[160+r10]
691 por xmm0,xmm2
692 por xmm1,xmm3
693 movdqa xmm4,XMMWORD[((-64))+r12]
694 movdqa xmm5,XMMWORD[((-48))+r12]
695 movdqa xmm2,XMMWORD[((-32))+r12]
696 pand xmm4,XMMWORD[176+r10]
697 movdqa xmm3,XMMWORD[((-16))+r12]
698 pand xmm5,XMMWORD[192+r10]
699 por xmm0,xmm4
700 pand xmm2,XMMWORD[208+r10]
701 por xmm1,xmm5
702 pand xmm3,XMMWORD[224+r10]
703 por xmm0,xmm2
704 por xmm1,xmm3
705 movdqa xmm4,XMMWORD[r12]
706 movdqa xmm5,XMMWORD[16+r12]
707 movdqa xmm2,XMMWORD[32+r12]
708 pand xmm4,XMMWORD[240+r10]
709 movdqa xmm3,XMMWORD[48+r12]
710 pand xmm5,XMMWORD[256+r10]
711 por xmm0,xmm4
712 pand xmm2,XMMWORD[272+r10]
713 por xmm1,xmm5
714 pand xmm3,XMMWORD[288+r10]
715 por xmm0,xmm2
716 por xmm1,xmm3
717 por xmm0,xmm1
718 pshufd xmm1,xmm0,0x4e
719 por xmm0,xmm1
720 lea r12,[256+r12]
721DB 102,72,15,126,195
722
723 mov QWORD[((16+8))+rsp],r13
724 mov QWORD[((56+8))+rsp],rdi
725
726 mov r8,QWORD[r8]
727 mov rax,QWORD[rsi]
728 lea rsi,[r9*1+rsi]
729 neg r9
730
731 mov rbp,r8
732 mul rbx
733 mov r10,rax
734 mov rax,QWORD[rcx]
735
736 imul rbp,r10
737 lea r14,[((64+8))+rsp]
738 mov r11,rdx
739
740 mul rbp
741 add r10,rax
742 mov rax,QWORD[8+r9*1+rsi]
743 adc rdx,0
744 mov rdi,rdx
745
746 mul rbx
747 add r11,rax
748 mov rax,QWORD[8+rcx]
749 adc rdx,0
750 mov r10,rdx
751
752 mul rbp
753 add rdi,rax
754 mov rax,QWORD[16+r9*1+rsi]
755 adc rdx,0
756 add rdi,r11
757 lea r15,[32+r9]
758 lea rcx,[32+rcx]
759 adc rdx,0
760 mov QWORD[r14],rdi
761 mov r13,rdx
762 jmp NEAR $L$1st4x
763
764ALIGN 32
765$L$1st4x:
766 mul rbx
767 add r10,rax
768 mov rax,QWORD[((-16))+rcx]
769 lea r14,[32+r14]
770 adc rdx,0
771 mov r11,rdx
772
773 mul rbp
774 add r13,rax
775 mov rax,QWORD[((-8))+r15*1+rsi]
776 adc rdx,0
777 add r13,r10
778 adc rdx,0
779 mov QWORD[((-24))+r14],r13
780 mov rdi,rdx
781
782 mul rbx
783 add r11,rax
784 mov rax,QWORD[((-8))+rcx]
785 adc rdx,0
786 mov r10,rdx
787
788 mul rbp
789 add rdi,rax
790 mov rax,QWORD[r15*1+rsi]
791 adc rdx,0
792 add rdi,r11
793 adc rdx,0
794 mov QWORD[((-16))+r14],rdi
795 mov r13,rdx
796
797 mul rbx
798 add r10,rax
799 mov rax,QWORD[rcx]
800 adc rdx,0
801 mov r11,rdx
802
803 mul rbp
804 add r13,rax
805 mov rax,QWORD[8+r15*1+rsi]
806 adc rdx,0
807 add r13,r10
808 adc rdx,0
809 mov QWORD[((-8))+r14],r13
810 mov rdi,rdx
811
812 mul rbx
813 add r11,rax
814 mov rax,QWORD[8+rcx]
815 adc rdx,0
816 mov r10,rdx
817
818 mul rbp
819 add rdi,rax
820 mov rax,QWORD[16+r15*1+rsi]
821 adc rdx,0
822 add rdi,r11
823 lea rcx,[32+rcx]
824 adc rdx,0
825 mov QWORD[r14],rdi
826 mov r13,rdx
827
828 add r15,32
829 jnz NEAR $L$1st4x
830
831 mul rbx
832 add r10,rax
833 mov rax,QWORD[((-16))+rcx]
834 lea r14,[32+r14]
835 adc rdx,0
836 mov r11,rdx
837
838 mul rbp
839 add r13,rax
840 mov rax,QWORD[((-8))+rsi]
841 adc rdx,0
842 add r13,r10
843 adc rdx,0
844 mov QWORD[((-24))+r14],r13
845 mov rdi,rdx
846
847 mul rbx
848 add r11,rax
849 mov rax,QWORD[((-8))+rcx]
850 adc rdx,0
851 mov r10,rdx
852
853 mul rbp
854 add rdi,rax
855 mov rax,QWORD[r9*1+rsi]
856 adc rdx,0
857 add rdi,r11
858 adc rdx,0
859 mov QWORD[((-16))+r14],rdi
860 mov r13,rdx
861
862 lea rcx,[r9*1+rcx]
863
864 xor rdi,rdi
865 add r13,r10
866 adc rdi,0
867 mov QWORD[((-8))+r14],r13
868
869 jmp NEAR $L$outer4x
870
871ALIGN 32
872$L$outer4x:
873 lea rdx,[((16+128))+r14]
874 pxor xmm4,xmm4
875 pxor xmm5,xmm5
876 movdqa xmm0,XMMWORD[((-128))+r12]
877 movdqa xmm1,XMMWORD[((-112))+r12]
878 movdqa xmm2,XMMWORD[((-96))+r12]
879 movdqa xmm3,XMMWORD[((-80))+r12]
880 pand xmm0,XMMWORD[((-128))+rdx]
881 pand xmm1,XMMWORD[((-112))+rdx]
882 por xmm4,xmm0
883 pand xmm2,XMMWORD[((-96))+rdx]
884 por xmm5,xmm1
885 pand xmm3,XMMWORD[((-80))+rdx]
886 por xmm4,xmm2
887 por xmm5,xmm3
888 movdqa xmm0,XMMWORD[((-64))+r12]
889 movdqa xmm1,XMMWORD[((-48))+r12]
890 movdqa xmm2,XMMWORD[((-32))+r12]
891 movdqa xmm3,XMMWORD[((-16))+r12]
892 pand xmm0,XMMWORD[((-64))+rdx]
893 pand xmm1,XMMWORD[((-48))+rdx]
894 por xmm4,xmm0
895 pand xmm2,XMMWORD[((-32))+rdx]
896 por xmm5,xmm1
897 pand xmm3,XMMWORD[((-16))+rdx]
898 por xmm4,xmm2
899 por xmm5,xmm3
900 movdqa xmm0,XMMWORD[r12]
901 movdqa xmm1,XMMWORD[16+r12]
902 movdqa xmm2,XMMWORD[32+r12]
903 movdqa xmm3,XMMWORD[48+r12]
904 pand xmm0,XMMWORD[rdx]
905 pand xmm1,XMMWORD[16+rdx]
906 por xmm4,xmm0
907 pand xmm2,XMMWORD[32+rdx]
908 por xmm5,xmm1
909 pand xmm3,XMMWORD[48+rdx]
910 por xmm4,xmm2
911 por xmm5,xmm3
912 movdqa xmm0,XMMWORD[64+r12]
913 movdqa xmm1,XMMWORD[80+r12]
914 movdqa xmm2,XMMWORD[96+r12]
915 movdqa xmm3,XMMWORD[112+r12]
916 pand xmm0,XMMWORD[64+rdx]
917 pand xmm1,XMMWORD[80+rdx]
918 por xmm4,xmm0
919 pand xmm2,XMMWORD[96+rdx]
920 por xmm5,xmm1
921 pand xmm3,XMMWORD[112+rdx]
922 por xmm4,xmm2
923 por xmm5,xmm3
924 por xmm4,xmm5
925 pshufd xmm0,xmm4,0x4e
926 por xmm0,xmm4
927 lea r12,[256+r12]
928DB 102,72,15,126,195
929
930 mov r10,QWORD[r9*1+r14]
931 mov rbp,r8
932 mul rbx
933 add r10,rax
934 mov rax,QWORD[rcx]
935 adc rdx,0
936
937 imul rbp,r10
938 mov r11,rdx
939 mov QWORD[r14],rdi
940
941 lea r14,[r9*1+r14]
942
943 mul rbp
944 add r10,rax
945 mov rax,QWORD[8+r9*1+rsi]
946 adc rdx,0
947 mov rdi,rdx
948
949 mul rbx
950 add r11,rax
951 mov rax,QWORD[8+rcx]
952 adc rdx,0
953 add r11,QWORD[8+r14]
954 adc rdx,0
955 mov r10,rdx
956
957 mul rbp
958 add rdi,rax
959 mov rax,QWORD[16+r9*1+rsi]
960 adc rdx,0
961 add rdi,r11
962 lea r15,[32+r9]
963 lea rcx,[32+rcx]
964 adc rdx,0
965 mov r13,rdx
966 jmp NEAR $L$inner4x
967
968ALIGN 32
969$L$inner4x:
970 mul rbx
971 add r10,rax
972 mov rax,QWORD[((-16))+rcx]
973 adc rdx,0
974 add r10,QWORD[16+r14]
975 lea r14,[32+r14]
976 adc rdx,0
977 mov r11,rdx
978
979 mul rbp
980 add r13,rax
981 mov rax,QWORD[((-8))+r15*1+rsi]
982 adc rdx,0
983 add r13,r10
984 adc rdx,0
985 mov QWORD[((-32))+r14],rdi
986 mov rdi,rdx
987
988 mul rbx
989 add r11,rax
990 mov rax,QWORD[((-8))+rcx]
991 adc rdx,0
992 add r11,QWORD[((-8))+r14]
993 adc rdx,0
994 mov r10,rdx
995
996 mul rbp
997 add rdi,rax
998 mov rax,QWORD[r15*1+rsi]
999 adc rdx,0
1000 add rdi,r11
1001 adc rdx,0
1002 mov QWORD[((-24))+r14],r13
1003 mov r13,rdx
1004
1005 mul rbx
1006 add r10,rax
1007 mov rax,QWORD[rcx]
1008 adc rdx,0
1009 add r10,QWORD[r14]
1010 adc rdx,0
1011 mov r11,rdx
1012
1013 mul rbp
1014 add r13,rax
1015 mov rax,QWORD[8+r15*1+rsi]
1016 adc rdx,0
1017 add r13,r10
1018 adc rdx,0
1019 mov QWORD[((-16))+r14],rdi
1020 mov rdi,rdx
1021
1022 mul rbx
1023 add r11,rax
1024 mov rax,QWORD[8+rcx]
1025 adc rdx,0
1026 add r11,QWORD[8+r14]
1027 adc rdx,0
1028 mov r10,rdx
1029
1030 mul rbp
1031 add rdi,rax
1032 mov rax,QWORD[16+r15*1+rsi]
1033 adc rdx,0
1034 add rdi,r11
1035 lea rcx,[32+rcx]
1036 adc rdx,0
1037 mov QWORD[((-8))+r14],r13
1038 mov r13,rdx
1039
1040 add r15,32
1041 jnz NEAR $L$inner4x
1042
1043 mul rbx
1044 add r10,rax
1045 mov rax,QWORD[((-16))+rcx]
1046 adc rdx,0
1047 add r10,QWORD[16+r14]
1048 lea r14,[32+r14]
1049 adc rdx,0
1050 mov r11,rdx
1051
1052 mul rbp
1053 add r13,rax
1054 mov rax,QWORD[((-8))+rsi]
1055 adc rdx,0
1056 add r13,r10
1057 adc rdx,0
1058 mov QWORD[((-32))+r14],rdi
1059 mov rdi,rdx
1060
1061 mul rbx
1062 add r11,rax
1063 mov rax,rbp
1064 mov rbp,QWORD[((-8))+rcx]
1065 adc rdx,0
1066 add r11,QWORD[((-8))+r14]
1067 adc rdx,0
1068 mov r10,rdx
1069
1070 mul rbp
1071 add rdi,rax
1072 mov rax,QWORD[r9*1+rsi]
1073 adc rdx,0
1074 add rdi,r11
1075 adc rdx,0
1076 mov QWORD[((-24))+r14],r13
1077 mov r13,rdx
1078
1079 mov QWORD[((-16))+r14],rdi
1080 lea rcx,[r9*1+rcx]
1081
1082 xor rdi,rdi
1083 add r13,r10
1084 adc rdi,0
1085 add r13,QWORD[r14]
1086 adc rdi,0
1087 mov QWORD[((-8))+r14],r13
1088
1089 cmp r12,QWORD[((16+8))+rsp]
1090 jb NEAR $L$outer4x
1091 xor rax,rax
1092 sub rbp,r13
1093 adc r15,r15
1094 or rdi,r15
1095 sub rax,rdi
1096 lea rbx,[r9*1+r14]
1097 mov r12,QWORD[rcx]
1098 lea rbp,[rcx]
1099 mov rcx,r9
1100 sar rcx,3+2
1101 mov rdi,QWORD[((56+8))+rsp]
1102 dec r12
1103 xor r10,r10
1104 mov r13,QWORD[8+rbp]
1105 mov r14,QWORD[16+rbp]
1106 mov r15,QWORD[24+rbp]
1107 jmp NEAR $L$sqr4x_sub_entry
1108
1109
1110global bn_power5
1111
1112ALIGN 32
1113bn_power5:
1114 mov QWORD[8+rsp],rdi ;WIN64 prologue
1115 mov QWORD[16+rsp],rsi
1116 mov rax,rsp
1117$L$SEH_begin_bn_power5:
1118 mov rdi,rcx
1119 mov rsi,rdx
1120 mov rdx,r8
1121 mov rcx,r9
1122 mov r8,QWORD[40+rsp]
1123 mov r9,QWORD[48+rsp]
1124
1125
1126
1127 mov rax,rsp
1128
1129 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
1130 and r11d,0x80108
1131 cmp r11d,0x80108
1132 je NEAR $L$powerx5_enter
1133 push rbx
1134
1135 push rbp
1136
1137 push r12
1138
1139 push r13
1140
1141 push r14
1142
1143 push r15
1144
1145$L$power5_prologue:
1146
1147 shl r9d,3
1148 lea r10d,[r9*2+r9]
1149 neg r9
1150 mov r8,QWORD[r8]
1151
1152
1153
1154
1155
1156
1157
1158
1159 lea r11,[((-320))+r9*2+rsp]
1160 mov rbp,rsp
1161 sub r11,rdi
1162 and r11,4095
1163 cmp r10,r11
1164 jb NEAR $L$pwr_sp_alt
1165 sub rbp,r11
1166 lea rbp,[((-320))+r9*2+rbp]
1167 jmp NEAR $L$pwr_sp_done
1168
1169ALIGN 32
1170$L$pwr_sp_alt:
1171 lea r10,[((4096-320))+r9*2]
1172 lea rbp,[((-320))+r9*2+rbp]
1173 sub r11,r10
1174 mov r10,0
1175 cmovc r11,r10
1176 sub rbp,r11
1177$L$pwr_sp_done:
1178 and rbp,-64
1179 mov r11,rsp
1180 sub r11,rbp
1181 and r11,-4096
1182 lea rsp,[rbp*1+r11]
1183 mov r10,QWORD[rsp]
1184 cmp rsp,rbp
1185 ja NEAR $L$pwr_page_walk
1186 jmp NEAR $L$pwr_page_walk_done
1187
1188$L$pwr_page_walk:
1189 lea rsp,[((-4096))+rsp]
1190 mov r10,QWORD[rsp]
1191 cmp rsp,rbp
1192 ja NEAR $L$pwr_page_walk
1193$L$pwr_page_walk_done:
1194
1195 mov r10,r9
1196 neg r9
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207 mov QWORD[32+rsp],r8
1208 mov QWORD[40+rsp],rax
1209
1210$L$power5_body:
1211DB 102,72,15,110,207
1212DB 102,72,15,110,209
1213DB 102,73,15,110,218
1214DB 102,72,15,110,226
1215
1216 call __bn_sqr8x_internal
1217 call __bn_post4x_internal
1218 call __bn_sqr8x_internal
1219 call __bn_post4x_internal
1220 call __bn_sqr8x_internal
1221 call __bn_post4x_internal
1222 call __bn_sqr8x_internal
1223 call __bn_post4x_internal
1224 call __bn_sqr8x_internal
1225 call __bn_post4x_internal
1226
1227DB 102,72,15,126,209
1228DB 102,72,15,126,226
1229 mov rdi,rsi
1230 mov rax,QWORD[40+rsp]
1231 lea r8,[32+rsp]
1232
1233 call mul4x_internal
1234
1235 mov rsi,QWORD[40+rsp]
1236
1237 mov rax,1
1238 mov r15,QWORD[((-48))+rsi]
1239
1240 mov r14,QWORD[((-40))+rsi]
1241
1242 mov r13,QWORD[((-32))+rsi]
1243
1244 mov r12,QWORD[((-24))+rsi]
1245
1246 mov rbp,QWORD[((-16))+rsi]
1247
1248 mov rbx,QWORD[((-8))+rsi]
1249
1250 lea rsp,[rsi]
1251
1252$L$power5_epilogue:
1253 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1254 mov rsi,QWORD[16+rsp]
1255 DB 0F3h,0C3h ;repret
1256
1257$L$SEH_end_bn_power5:
1258
1259global bn_sqr8x_internal
1260
1261
1262ALIGN 32
1263bn_sqr8x_internal:
1264__bn_sqr8x_internal:
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 lea rbp,[32+r10]
1340 lea rsi,[r9*1+rsi]
1341
1342 mov rcx,r9
1343
1344
1345 mov r14,QWORD[((-32))+rbp*1+rsi]
1346 lea rdi,[((48+8))+r9*2+rsp]
1347 mov rax,QWORD[((-24))+rbp*1+rsi]
1348 lea rdi,[((-32))+rbp*1+rdi]
1349 mov rbx,QWORD[((-16))+rbp*1+rsi]
1350 mov r15,rax
1351
1352 mul r14
1353 mov r10,rax
1354 mov rax,rbx
1355 mov r11,rdx
1356 mov QWORD[((-24))+rbp*1+rdi],r10
1357
1358 mul r14
1359 add r11,rax
1360 mov rax,rbx
1361 adc rdx,0
1362 mov QWORD[((-16))+rbp*1+rdi],r11
1363 mov r10,rdx
1364
1365
1366 mov rbx,QWORD[((-8))+rbp*1+rsi]
1367 mul r15
1368 mov r12,rax
1369 mov rax,rbx
1370 mov r13,rdx
1371
1372 lea rcx,[rbp]
1373 mul r14
1374 add r10,rax
1375 mov rax,rbx
1376 mov r11,rdx
1377 adc r11,0
1378 add r10,r12
1379 adc r11,0
1380 mov QWORD[((-8))+rcx*1+rdi],r10
1381 jmp NEAR $L$sqr4x_1st
1382
1383ALIGN 32
1384$L$sqr4x_1st:
1385 mov rbx,QWORD[rcx*1+rsi]
1386 mul r15
1387 add r13,rax
1388 mov rax,rbx
1389 mov r12,rdx
1390 adc r12,0
1391
1392 mul r14
1393 add r11,rax
1394 mov rax,rbx
1395 mov rbx,QWORD[8+rcx*1+rsi]
1396 mov r10,rdx
1397 adc r10,0
1398 add r11,r13
1399 adc r10,0
1400
1401
1402 mul r15
1403 add r12,rax
1404 mov rax,rbx
1405 mov QWORD[rcx*1+rdi],r11
1406 mov r13,rdx
1407 adc r13,0
1408
1409 mul r14
1410 add r10,rax
1411 mov rax,rbx
1412 mov rbx,QWORD[16+rcx*1+rsi]
1413 mov r11,rdx
1414 adc r11,0
1415 add r10,r12
1416 adc r11,0
1417
1418 mul r15
1419 add r13,rax
1420 mov rax,rbx
1421 mov QWORD[8+rcx*1+rdi],r10
1422 mov r12,rdx
1423 adc r12,0
1424
1425 mul r14
1426 add r11,rax
1427 mov rax,rbx
1428 mov rbx,QWORD[24+rcx*1+rsi]
1429 mov r10,rdx
1430 adc r10,0
1431 add r11,r13
1432 adc r10,0
1433
1434
1435 mul r15
1436 add r12,rax
1437 mov rax,rbx
1438 mov QWORD[16+rcx*1+rdi],r11
1439 mov r13,rdx
1440 adc r13,0
1441 lea rcx,[32+rcx]
1442
1443 mul r14
1444 add r10,rax
1445 mov rax,rbx
1446 mov r11,rdx
1447 adc r11,0
1448 add r10,r12
1449 adc r11,0
1450 mov QWORD[((-8))+rcx*1+rdi],r10
1451
1452 cmp rcx,0
1453 jne NEAR $L$sqr4x_1st
1454
1455 mul r15
1456 add r13,rax
1457 lea rbp,[16+rbp]
1458 adc rdx,0
1459 add r13,r11
1460 adc rdx,0
1461
1462 mov QWORD[rdi],r13
1463 mov r12,rdx
1464 mov QWORD[8+rdi],rdx
1465 jmp NEAR $L$sqr4x_outer
1466
1467ALIGN 32
1468$L$sqr4x_outer:
1469 mov r14,QWORD[((-32))+rbp*1+rsi]
1470 lea rdi,[((48+8))+r9*2+rsp]
1471 mov rax,QWORD[((-24))+rbp*1+rsi]
1472 lea rdi,[((-32))+rbp*1+rdi]
1473 mov rbx,QWORD[((-16))+rbp*1+rsi]
1474 mov r15,rax
1475
1476 mul r14
1477 mov r10,QWORD[((-24))+rbp*1+rdi]
1478 add r10,rax
1479 mov rax,rbx
1480 adc rdx,0
1481 mov QWORD[((-24))+rbp*1+rdi],r10
1482 mov r11,rdx
1483
1484 mul r14
1485 add r11,rax
1486 mov rax,rbx
1487 adc rdx,0
1488 add r11,QWORD[((-16))+rbp*1+rdi]
1489 mov r10,rdx
1490 adc r10,0
1491 mov QWORD[((-16))+rbp*1+rdi],r11
1492
1493 xor r12,r12
1494
1495 mov rbx,QWORD[((-8))+rbp*1+rsi]
1496 mul r15
1497 add r12,rax
1498 mov rax,rbx
1499 adc rdx,0
1500 add r12,QWORD[((-8))+rbp*1+rdi]
1501 mov r13,rdx
1502 adc r13,0
1503
1504 mul r14
1505 add r10,rax
1506 mov rax,rbx
1507 adc rdx,0
1508 add r10,r12
1509 mov r11,rdx
1510 adc r11,0
1511 mov QWORD[((-8))+rbp*1+rdi],r10
1512
1513 lea rcx,[rbp]
1514 jmp NEAR $L$sqr4x_inner
1515
1516ALIGN 32
1517$L$sqr4x_inner:
1518 mov rbx,QWORD[rcx*1+rsi]
1519 mul r15
1520 add r13,rax
1521 mov rax,rbx
1522 mov r12,rdx
1523 adc r12,0
1524 add r13,QWORD[rcx*1+rdi]
1525 adc r12,0
1526
1527DB 0x67
1528 mul r14
1529 add r11,rax
1530 mov rax,rbx
1531 mov rbx,QWORD[8+rcx*1+rsi]
1532 mov r10,rdx
1533 adc r10,0
1534 add r11,r13
1535 adc r10,0
1536
1537 mul r15
1538 add r12,rax
1539 mov QWORD[rcx*1+rdi],r11
1540 mov rax,rbx
1541 mov r13,rdx
1542 adc r13,0
1543 add r12,QWORD[8+rcx*1+rdi]
1544 lea rcx,[16+rcx]
1545 adc r13,0
1546
1547 mul r14
1548 add r10,rax
1549 mov rax,rbx
1550 adc rdx,0
1551 add r10,r12
1552 mov r11,rdx
1553 adc r11,0
1554 mov QWORD[((-8))+rcx*1+rdi],r10
1555
1556 cmp rcx,0
1557 jne NEAR $L$sqr4x_inner
1558
1559DB 0x67
1560 mul r15
1561 add r13,rax
1562 adc rdx,0
1563 add r13,r11
1564 adc rdx,0
1565
1566 mov QWORD[rdi],r13
1567 mov r12,rdx
1568 mov QWORD[8+rdi],rdx
1569
1570 add rbp,16
1571 jnz NEAR $L$sqr4x_outer
1572
1573
1574 mov r14,QWORD[((-32))+rsi]
1575 lea rdi,[((48+8))+r9*2+rsp]
1576 mov rax,QWORD[((-24))+rsi]
1577 lea rdi,[((-32))+rbp*1+rdi]
1578 mov rbx,QWORD[((-16))+rsi]
1579 mov r15,rax
1580
1581 mul r14
1582 add r10,rax
1583 mov rax,rbx
1584 mov r11,rdx
1585 adc r11,0
1586
1587 mul r14
1588 add r11,rax
1589 mov rax,rbx
1590 mov QWORD[((-24))+rdi],r10
1591 mov r10,rdx
1592 adc r10,0
1593 add r11,r13
1594 mov rbx,QWORD[((-8))+rsi]
1595 adc r10,0
1596
1597 mul r15
1598 add r12,rax
1599 mov rax,rbx
1600 mov QWORD[((-16))+rdi],r11
1601 mov r13,rdx
1602 adc r13,0
1603
1604 mul r14
1605 add r10,rax
1606 mov rax,rbx
1607 mov r11,rdx
1608 adc r11,0
1609 add r10,r12
1610 adc r11,0
1611 mov QWORD[((-8))+rdi],r10
1612
1613 mul r15
1614 add r13,rax
1615 mov rax,QWORD[((-16))+rsi]
1616 adc rdx,0
1617 add r13,r11
1618 adc rdx,0
1619
1620 mov QWORD[rdi],r13
1621 mov r12,rdx
1622 mov QWORD[8+rdi],rdx
1623
1624 mul rbx
1625 add rbp,16
1626 xor r14,r14
1627 sub rbp,r9
1628 xor r15,r15
1629
1630 add rax,r12
1631 adc rdx,0
1632 mov QWORD[8+rdi],rax
1633 mov QWORD[16+rdi],rdx
1634 mov QWORD[24+rdi],r15
1635
1636 mov rax,QWORD[((-16))+rbp*1+rsi]
1637 lea rdi,[((48+8))+rsp]
1638 xor r10,r10
1639 mov r11,QWORD[8+rdi]
1640
1641 lea r12,[r10*2+r14]
1642 shr r10,63
1643 lea r13,[r11*2+rcx]
1644 shr r11,63
1645 or r13,r10
1646 mov r10,QWORD[16+rdi]
1647 mov r14,r11
1648 mul rax
1649 neg r15
1650 mov r11,QWORD[24+rdi]
1651 adc r12,rax
1652 mov rax,QWORD[((-8))+rbp*1+rsi]
1653 mov QWORD[rdi],r12
1654 adc r13,rdx
1655
1656 lea rbx,[r10*2+r14]
1657 mov QWORD[8+rdi],r13
1658 sbb r15,r15
1659 shr r10,63
1660 lea r8,[r11*2+rcx]
1661 shr r11,63
1662 or r8,r10
1663 mov r10,QWORD[32+rdi]
1664 mov r14,r11
1665 mul rax
1666 neg r15
1667 mov r11,QWORD[40+rdi]
1668 adc rbx,rax
1669 mov rax,QWORD[rbp*1+rsi]
1670 mov QWORD[16+rdi],rbx
1671 adc r8,rdx
1672 lea rbp,[16+rbp]
1673 mov QWORD[24+rdi],r8
1674 sbb r15,r15
1675 lea rdi,[64+rdi]
1676 jmp NEAR $L$sqr4x_shift_n_add
1677
1678ALIGN 32
1679$L$sqr4x_shift_n_add:
1680 lea r12,[r10*2+r14]
1681 shr r10,63
1682 lea r13,[r11*2+rcx]
1683 shr r11,63
1684 or r13,r10
1685 mov r10,QWORD[((-16))+rdi]
1686 mov r14,r11
1687 mul rax
1688 neg r15
1689 mov r11,QWORD[((-8))+rdi]
1690 adc r12,rax
1691 mov rax,QWORD[((-8))+rbp*1+rsi]
1692 mov QWORD[((-32))+rdi],r12
1693 adc r13,rdx
1694
1695 lea rbx,[r10*2+r14]
1696 mov QWORD[((-24))+rdi],r13
1697 sbb r15,r15
1698 shr r10,63
1699 lea r8,[r11*2+rcx]
1700 shr r11,63
1701 or r8,r10
1702 mov r10,QWORD[rdi]
1703 mov r14,r11
1704 mul rax
1705 neg r15
1706 mov r11,QWORD[8+rdi]
1707 adc rbx,rax
1708 mov rax,QWORD[rbp*1+rsi]
1709 mov QWORD[((-16))+rdi],rbx
1710 adc r8,rdx
1711
1712 lea r12,[r10*2+r14]
1713 mov QWORD[((-8))+rdi],r8
1714 sbb r15,r15
1715 shr r10,63
1716 lea r13,[r11*2+rcx]
1717 shr r11,63
1718 or r13,r10
1719 mov r10,QWORD[16+rdi]
1720 mov r14,r11
1721 mul rax
1722 neg r15
1723 mov r11,QWORD[24+rdi]
1724 adc r12,rax
1725 mov rax,QWORD[8+rbp*1+rsi]
1726 mov QWORD[rdi],r12
1727 adc r13,rdx
1728
1729 lea rbx,[r10*2+r14]
1730 mov QWORD[8+rdi],r13
1731 sbb r15,r15
1732 shr r10,63
1733 lea r8,[r11*2+rcx]
1734 shr r11,63
1735 or r8,r10
1736 mov r10,QWORD[32+rdi]
1737 mov r14,r11
1738 mul rax
1739 neg r15
1740 mov r11,QWORD[40+rdi]
1741 adc rbx,rax
1742 mov rax,QWORD[16+rbp*1+rsi]
1743 mov QWORD[16+rdi],rbx
1744 adc r8,rdx
1745 mov QWORD[24+rdi],r8
1746 sbb r15,r15
1747 lea rdi,[64+rdi]
1748 add rbp,32
1749 jnz NEAR $L$sqr4x_shift_n_add
1750
1751 lea r12,[r10*2+r14]
1752DB 0x67
1753 shr r10,63
1754 lea r13,[r11*2+rcx]
1755 shr r11,63
1756 or r13,r10
1757 mov r10,QWORD[((-16))+rdi]
1758 mov r14,r11
1759 mul rax
1760 neg r15
1761 mov r11,QWORD[((-8))+rdi]
1762 adc r12,rax
1763 mov rax,QWORD[((-8))+rsi]
1764 mov QWORD[((-32))+rdi],r12
1765 adc r13,rdx
1766
1767 lea rbx,[r10*2+r14]
1768 mov QWORD[((-24))+rdi],r13
1769 sbb r15,r15
1770 shr r10,63
1771 lea r8,[r11*2+rcx]
1772 shr r11,63
1773 or r8,r10
1774 mul rax
1775 neg r15
1776 adc rbx,rax
1777 adc r8,rdx
1778 mov QWORD[((-16))+rdi],rbx
1779 mov QWORD[((-8))+rdi],r8
1780DB 102,72,15,126,213
1781__bn_sqr8x_reduction:
1782 xor rax,rax
1783 lea rcx,[rbp*1+r9]
1784 lea rdx,[((48+8))+r9*2+rsp]
1785 mov QWORD[((0+8))+rsp],rcx
1786 lea rdi,[((48+8))+r9*1+rsp]
1787 mov QWORD[((8+8))+rsp],rdx
1788 neg r9
1789 jmp NEAR $L$8x_reduction_loop
1790
1791ALIGN 32
1792$L$8x_reduction_loop:
1793 lea rdi,[r9*1+rdi]
1794DB 0x66
1795 mov rbx,QWORD[rdi]
1796 mov r9,QWORD[8+rdi]
1797 mov r10,QWORD[16+rdi]
1798 mov r11,QWORD[24+rdi]
1799 mov r12,QWORD[32+rdi]
1800 mov r13,QWORD[40+rdi]
1801 mov r14,QWORD[48+rdi]
1802 mov r15,QWORD[56+rdi]
1803 mov QWORD[rdx],rax
1804 lea rdi,[64+rdi]
1805
1806DB 0x67
1807 mov r8,rbx
1808 imul rbx,QWORD[((32+8))+rsp]
1809 mov rax,QWORD[rbp]
1810 mov ecx,8
1811 jmp NEAR $L$8x_reduce
1812
1813ALIGN 32
1814$L$8x_reduce:
1815 mul rbx
1816 mov rax,QWORD[8+rbp]
1817 neg r8
1818 mov r8,rdx
1819 adc r8,0
1820
1821 mul rbx
1822 add r9,rax
1823 mov rax,QWORD[16+rbp]
1824 adc rdx,0
1825 add r8,r9
1826 mov QWORD[((48-8+8))+rcx*8+rsp],rbx
1827 mov r9,rdx
1828 adc r9,0
1829
1830 mul rbx
1831 add r10,rax
1832 mov rax,QWORD[24+rbp]
1833 adc rdx,0
1834 add r9,r10
1835 mov rsi,QWORD[((32+8))+rsp]
1836 mov r10,rdx
1837 adc r10,0
1838
1839 mul rbx
1840 add r11,rax
1841 mov rax,QWORD[32+rbp]
1842 adc rdx,0
1843 imul rsi,r8
1844 add r10,r11
1845 mov r11,rdx
1846 adc r11,0
1847
1848 mul rbx
1849 add r12,rax
1850 mov rax,QWORD[40+rbp]
1851 adc rdx,0
1852 add r11,r12
1853 mov r12,rdx
1854 adc r12,0
1855
1856 mul rbx
1857 add r13,rax
1858 mov rax,QWORD[48+rbp]
1859 adc rdx,0
1860 add r12,r13
1861 mov r13,rdx
1862 adc r13,0
1863
1864 mul rbx
1865 add r14,rax
1866 mov rax,QWORD[56+rbp]
1867 adc rdx,0
1868 add r13,r14
1869 mov r14,rdx
1870 adc r14,0
1871
1872 mul rbx
1873 mov rbx,rsi
1874 add r15,rax
1875 mov rax,QWORD[rbp]
1876 adc rdx,0
1877 add r14,r15
1878 mov r15,rdx
1879 adc r15,0
1880
1881 dec ecx
1882 jnz NEAR $L$8x_reduce
1883
1884 lea rbp,[64+rbp]
1885 xor rax,rax
1886 mov rdx,QWORD[((8+8))+rsp]
1887 cmp rbp,QWORD[((0+8))+rsp]
1888 jae NEAR $L$8x_no_tail
1889
1890DB 0x66
1891 add r8,QWORD[rdi]
1892 adc r9,QWORD[8+rdi]
1893 adc r10,QWORD[16+rdi]
1894 adc r11,QWORD[24+rdi]
1895 adc r12,QWORD[32+rdi]
1896 adc r13,QWORD[40+rdi]
1897 adc r14,QWORD[48+rdi]
1898 adc r15,QWORD[56+rdi]
1899 sbb rsi,rsi
1900
1901 mov rbx,QWORD[((48+56+8))+rsp]
1902 mov ecx,8
1903 mov rax,QWORD[rbp]
1904 jmp NEAR $L$8x_tail
1905
1906ALIGN 32
1907$L$8x_tail:
1908 mul rbx
1909 add r8,rax
1910 mov rax,QWORD[8+rbp]
1911 mov QWORD[rdi],r8
1912 mov r8,rdx
1913 adc r8,0
1914
1915 mul rbx
1916 add r9,rax
1917 mov rax,QWORD[16+rbp]
1918 adc rdx,0
1919 add r8,r9
1920 lea rdi,[8+rdi]
1921 mov r9,rdx
1922 adc r9,0
1923
1924 mul rbx
1925 add r10,rax
1926 mov rax,QWORD[24+rbp]
1927 adc rdx,0
1928 add r9,r10
1929 mov r10,rdx
1930 adc r10,0
1931
1932 mul rbx
1933 add r11,rax
1934 mov rax,QWORD[32+rbp]
1935 adc rdx,0
1936 add r10,r11
1937 mov r11,rdx
1938 adc r11,0
1939
1940 mul rbx
1941 add r12,rax
1942 mov rax,QWORD[40+rbp]
1943 adc rdx,0
1944 add r11,r12
1945 mov r12,rdx
1946 adc r12,0
1947
1948 mul rbx
1949 add r13,rax
1950 mov rax,QWORD[48+rbp]
1951 adc rdx,0
1952 add r12,r13
1953 mov r13,rdx
1954 adc r13,0
1955
1956 mul rbx
1957 add r14,rax
1958 mov rax,QWORD[56+rbp]
1959 adc rdx,0
1960 add r13,r14
1961 mov r14,rdx
1962 adc r14,0
1963
1964 mul rbx
1965 mov rbx,QWORD[((48-16+8))+rcx*8+rsp]
1966 add r15,rax
1967 adc rdx,0
1968 add r14,r15
1969 mov rax,QWORD[rbp]
1970 mov r15,rdx
1971 adc r15,0
1972
1973 dec ecx
1974 jnz NEAR $L$8x_tail
1975
1976 lea rbp,[64+rbp]
1977 mov rdx,QWORD[((8+8))+rsp]
1978 cmp rbp,QWORD[((0+8))+rsp]
1979 jae NEAR $L$8x_tail_done
1980
1981 mov rbx,QWORD[((48+56+8))+rsp]
1982 neg rsi
1983 mov rax,QWORD[rbp]
1984 adc r8,QWORD[rdi]
1985 adc r9,QWORD[8+rdi]
1986 adc r10,QWORD[16+rdi]
1987 adc r11,QWORD[24+rdi]
1988 adc r12,QWORD[32+rdi]
1989 adc r13,QWORD[40+rdi]
1990 adc r14,QWORD[48+rdi]
1991 adc r15,QWORD[56+rdi]
1992 sbb rsi,rsi
1993
1994 mov ecx,8
1995 jmp NEAR $L$8x_tail
1996
1997ALIGN 32
1998$L$8x_tail_done:
1999 xor rax,rax
2000 add r8,QWORD[rdx]
2001 adc r9,0
2002 adc r10,0
2003 adc r11,0
2004 adc r12,0
2005 adc r13,0
2006 adc r14,0
2007 adc r15,0
2008 adc rax,0
2009
2010 neg rsi
2011$L$8x_no_tail:
2012 adc r8,QWORD[rdi]
2013 adc r9,QWORD[8+rdi]
2014 adc r10,QWORD[16+rdi]
2015 adc r11,QWORD[24+rdi]
2016 adc r12,QWORD[32+rdi]
2017 adc r13,QWORD[40+rdi]
2018 adc r14,QWORD[48+rdi]
2019 adc r15,QWORD[56+rdi]
2020 adc rax,0
2021 mov rcx,QWORD[((-8))+rbp]
2022 xor rsi,rsi
2023
2024DB 102,72,15,126,213
2025
2026 mov QWORD[rdi],r8
2027 mov QWORD[8+rdi],r9
2028DB 102,73,15,126,217
2029 mov QWORD[16+rdi],r10
2030 mov QWORD[24+rdi],r11
2031 mov QWORD[32+rdi],r12
2032 mov QWORD[40+rdi],r13
2033 mov QWORD[48+rdi],r14
2034 mov QWORD[56+rdi],r15
2035 lea rdi,[64+rdi]
2036
2037 cmp rdi,rdx
2038 jb NEAR $L$8x_reduction_loop
2039 DB 0F3h,0C3h ;repret
2040
2041
2042
2043ALIGN 32
2044__bn_post4x_internal:
2045
2046 mov r12,QWORD[rbp]
2047 lea rbx,[r9*1+rdi]
2048 mov rcx,r9
2049DB 102,72,15,126,207
2050 neg rax
2051DB 102,72,15,126,206
2052 sar rcx,3+2
2053 dec r12
2054 xor r10,r10
2055 mov r13,QWORD[8+rbp]
2056 mov r14,QWORD[16+rbp]
2057 mov r15,QWORD[24+rbp]
2058 jmp NEAR $L$sqr4x_sub_entry
2059
2060ALIGN 16
2061$L$sqr4x_sub:
2062 mov r12,QWORD[rbp]
2063 mov r13,QWORD[8+rbp]
2064 mov r14,QWORD[16+rbp]
2065 mov r15,QWORD[24+rbp]
2066$L$sqr4x_sub_entry:
2067 lea rbp,[32+rbp]
2068 not r12
2069 not r13
2070 not r14
2071 not r15
2072 and r12,rax
2073 and r13,rax
2074 and r14,rax
2075 and r15,rax
2076
2077 neg r10
2078 adc r12,QWORD[rbx]
2079 adc r13,QWORD[8+rbx]
2080 adc r14,QWORD[16+rbx]
2081 adc r15,QWORD[24+rbx]
2082 mov QWORD[rdi],r12
2083 lea rbx,[32+rbx]
2084 mov QWORD[8+rdi],r13
2085 sbb r10,r10
2086 mov QWORD[16+rdi],r14
2087 mov QWORD[24+rdi],r15
2088 lea rdi,[32+rdi]
2089
2090 inc rcx
2091 jnz NEAR $L$sqr4x_sub
2092
2093 mov r10,r9
2094 neg r9
2095 DB 0F3h,0C3h ;repret
2096
2097
2098
2099ALIGN 32
2100bn_mulx4x_mont_gather5:
2101 mov QWORD[8+rsp],rdi ;WIN64 prologue
2102 mov QWORD[16+rsp],rsi
2103 mov rax,rsp
2104$L$SEH_begin_bn_mulx4x_mont_gather5:
2105 mov rdi,rcx
2106 mov rsi,rdx
2107 mov rdx,r8
2108 mov rcx,r9
2109 mov r8,QWORD[40+rsp]
2110 mov r9,QWORD[48+rsp]
2111
2112
2113
2114 mov rax,rsp
2115
2116$L$mulx4x_enter:
2117 push rbx
2118
2119 push rbp
2120
2121 push r12
2122
2123 push r13
2124
2125 push r14
2126
2127 push r15
2128
2129$L$mulx4x_prologue:
2130
2131 shl r9d,3
2132 lea r10,[r9*2+r9]
2133 neg r9
2134 mov r8,QWORD[r8]
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145 lea r11,[((-320))+r9*2+rsp]
2146 mov rbp,rsp
2147 sub r11,rdi
2148 and r11,4095
2149 cmp r10,r11
2150 jb NEAR $L$mulx4xsp_alt
2151 sub rbp,r11
2152 lea rbp,[((-320))+r9*2+rbp]
2153 jmp NEAR $L$mulx4xsp_done
2154
2155$L$mulx4xsp_alt:
2156 lea r10,[((4096-320))+r9*2]
2157 lea rbp,[((-320))+r9*2+rbp]
2158 sub r11,r10
2159 mov r10,0
2160 cmovc r11,r10
2161 sub rbp,r11
2162$L$mulx4xsp_done:
2163 and rbp,-64
2164 mov r11,rsp
2165 sub r11,rbp
2166 and r11,-4096
2167 lea rsp,[rbp*1+r11]
2168 mov r10,QWORD[rsp]
2169 cmp rsp,rbp
2170 ja NEAR $L$mulx4x_page_walk
2171 jmp NEAR $L$mulx4x_page_walk_done
2172
2173$L$mulx4x_page_walk:
2174 lea rsp,[((-4096))+rsp]
2175 mov r10,QWORD[rsp]
2176 cmp rsp,rbp
2177 ja NEAR $L$mulx4x_page_walk
2178$L$mulx4x_page_walk_done:
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192 mov QWORD[32+rsp],r8
2193 mov QWORD[40+rsp],rax
2194
2195$L$mulx4x_body:
2196 call mulx4x_internal
2197
2198 mov rsi,QWORD[40+rsp]
2199
2200 mov rax,1
2201
2202 mov r15,QWORD[((-48))+rsi]
2203
2204 mov r14,QWORD[((-40))+rsi]
2205
2206 mov r13,QWORD[((-32))+rsi]
2207
2208 mov r12,QWORD[((-24))+rsi]
2209
2210 mov rbp,QWORD[((-16))+rsi]
2211
2212 mov rbx,QWORD[((-8))+rsi]
2213
2214 lea rsp,[rsi]
2215
2216$L$mulx4x_epilogue:
2217 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2218 mov rsi,QWORD[16+rsp]
2219 DB 0F3h,0C3h ;repret
2220
2221$L$SEH_end_bn_mulx4x_mont_gather5:
2222
2223
2224ALIGN 32
2225mulx4x_internal:
2226
2227 mov QWORD[8+rsp],r9
2228 mov r10,r9
2229 neg r9
2230 shl r9,5
2231 neg r10
2232 lea r13,[128+r9*1+rdx]
2233 shr r9,5+5
2234 movd xmm5,DWORD[56+rax]
2235 sub r9,1
2236 lea rax,[$L$inc]
2237 mov QWORD[((16+8))+rsp],r13
2238 mov QWORD[((24+8))+rsp],r9
2239 mov QWORD[((56+8))+rsp],rdi
2240 movdqa xmm0,XMMWORD[rax]
2241 movdqa xmm1,XMMWORD[16+rax]
2242 lea r10,[((88-112))+r10*1+rsp]
2243 lea rdi,[128+rdx]
2244
2245 pshufd xmm5,xmm5,0
2246 movdqa xmm4,xmm1
2247DB 0x67
2248 movdqa xmm2,xmm1
2249DB 0x67
2250 paddd xmm1,xmm0
2251 pcmpeqd xmm0,xmm5
2252 movdqa xmm3,xmm4
2253 paddd xmm2,xmm1
2254 pcmpeqd xmm1,xmm5
2255 movdqa XMMWORD[112+r10],xmm0
2256 movdqa xmm0,xmm4
2257
2258 paddd xmm3,xmm2
2259 pcmpeqd xmm2,xmm5
2260 movdqa XMMWORD[128+r10],xmm1
2261 movdqa xmm1,xmm4
2262
2263 paddd xmm0,xmm3
2264 pcmpeqd xmm3,xmm5
2265 movdqa XMMWORD[144+r10],xmm2
2266 movdqa xmm2,xmm4
2267
2268 paddd xmm1,xmm0
2269 pcmpeqd xmm0,xmm5
2270 movdqa XMMWORD[160+r10],xmm3
2271 movdqa xmm3,xmm4
2272 paddd xmm2,xmm1
2273 pcmpeqd xmm1,xmm5
2274 movdqa XMMWORD[176+r10],xmm0
2275 movdqa xmm0,xmm4
2276
2277 paddd xmm3,xmm2
2278 pcmpeqd xmm2,xmm5
2279 movdqa XMMWORD[192+r10],xmm1
2280 movdqa xmm1,xmm4
2281
2282 paddd xmm0,xmm3
2283 pcmpeqd xmm3,xmm5
2284 movdqa XMMWORD[208+r10],xmm2
2285 movdqa xmm2,xmm4
2286
2287 paddd xmm1,xmm0
2288 pcmpeqd xmm0,xmm5
2289 movdqa XMMWORD[224+r10],xmm3
2290 movdqa xmm3,xmm4
2291 paddd xmm2,xmm1
2292 pcmpeqd xmm1,xmm5
2293 movdqa XMMWORD[240+r10],xmm0
2294 movdqa xmm0,xmm4
2295
2296 paddd xmm3,xmm2
2297 pcmpeqd xmm2,xmm5
2298 movdqa XMMWORD[256+r10],xmm1
2299 movdqa xmm1,xmm4
2300
2301 paddd xmm0,xmm3
2302 pcmpeqd xmm3,xmm5
2303 movdqa XMMWORD[272+r10],xmm2
2304 movdqa xmm2,xmm4
2305
2306 paddd xmm1,xmm0
2307 pcmpeqd xmm0,xmm5
2308 movdqa XMMWORD[288+r10],xmm3
2309 movdqa xmm3,xmm4
2310DB 0x67
2311 paddd xmm2,xmm1
2312 pcmpeqd xmm1,xmm5
2313 movdqa XMMWORD[304+r10],xmm0
2314
2315 paddd xmm3,xmm2
2316 pcmpeqd xmm2,xmm5
2317 movdqa XMMWORD[320+r10],xmm1
2318
2319 pcmpeqd xmm3,xmm5
2320 movdqa XMMWORD[336+r10],xmm2
2321
2322 pand xmm0,XMMWORD[64+rdi]
2323 pand xmm1,XMMWORD[80+rdi]
2324 pand xmm2,XMMWORD[96+rdi]
2325 movdqa XMMWORD[352+r10],xmm3
2326 pand xmm3,XMMWORD[112+rdi]
2327 por xmm0,xmm2
2328 por xmm1,xmm3
2329 movdqa xmm4,XMMWORD[((-128))+rdi]
2330 movdqa xmm5,XMMWORD[((-112))+rdi]
2331 movdqa xmm2,XMMWORD[((-96))+rdi]
2332 pand xmm4,XMMWORD[112+r10]
2333 movdqa xmm3,XMMWORD[((-80))+rdi]
2334 pand xmm5,XMMWORD[128+r10]
2335 por xmm0,xmm4
2336 pand xmm2,XMMWORD[144+r10]
2337 por xmm1,xmm5
2338 pand xmm3,XMMWORD[160+r10]
2339 por xmm0,xmm2
2340 por xmm1,xmm3
2341 movdqa xmm4,XMMWORD[((-64))+rdi]
2342 movdqa xmm5,XMMWORD[((-48))+rdi]
2343 movdqa xmm2,XMMWORD[((-32))+rdi]
2344 pand xmm4,XMMWORD[176+r10]
2345 movdqa xmm3,XMMWORD[((-16))+rdi]
2346 pand xmm5,XMMWORD[192+r10]
2347 por xmm0,xmm4
2348 pand xmm2,XMMWORD[208+r10]
2349 por xmm1,xmm5
2350 pand xmm3,XMMWORD[224+r10]
2351 por xmm0,xmm2
2352 por xmm1,xmm3
2353 movdqa xmm4,XMMWORD[rdi]
2354 movdqa xmm5,XMMWORD[16+rdi]
2355 movdqa xmm2,XMMWORD[32+rdi]
2356 pand xmm4,XMMWORD[240+r10]
2357 movdqa xmm3,XMMWORD[48+rdi]
2358 pand xmm5,XMMWORD[256+r10]
2359 por xmm0,xmm4
2360 pand xmm2,XMMWORD[272+r10]
2361 por xmm1,xmm5
2362 pand xmm3,XMMWORD[288+r10]
2363 por xmm0,xmm2
2364 por xmm1,xmm3
2365 pxor xmm0,xmm1
2366 pshufd xmm1,xmm0,0x4e
2367 por xmm0,xmm1
2368 lea rdi,[256+rdi]
2369DB 102,72,15,126,194
2370 lea rbx,[((64+32+8))+rsp]
2371
2372 mov r9,rdx
2373 mulx rax,r8,QWORD[rsi]
2374 mulx r12,r11,QWORD[8+rsi]
2375 add r11,rax
2376 mulx r13,rax,QWORD[16+rsi]
2377 adc r12,rax
2378 adc r13,0
2379 mulx r14,rax,QWORD[24+rsi]
2380
2381 mov r15,r8
2382 imul r8,QWORD[((32+8))+rsp]
2383 xor rbp,rbp
2384 mov rdx,r8
2385
2386 mov QWORD[((8+8))+rsp],rdi
2387
2388 lea rsi,[32+rsi]
2389 adcx r13,rax
2390 adcx r14,rbp
2391
2392 mulx r10,rax,QWORD[rcx]
2393 adcx r15,rax
2394 adox r10,r11
2395 mulx r11,rax,QWORD[8+rcx]
2396 adcx r10,rax
2397 adox r11,r12
2398 mulx r12,rax,QWORD[16+rcx]
2399 mov rdi,QWORD[((24+8))+rsp]
2400 mov QWORD[((-32))+rbx],r10
2401 adcx r11,rax
2402 adox r12,r13
2403 mulx r15,rax,QWORD[24+rcx]
2404 mov rdx,r9
2405 mov QWORD[((-24))+rbx],r11
2406 adcx r12,rax
2407 adox r15,rbp
2408 lea rcx,[32+rcx]
2409 mov QWORD[((-16))+rbx],r12
2410 jmp NEAR $L$mulx4x_1st
2411
2412ALIGN 32
2413$L$mulx4x_1st:
2414 adcx r15,rbp
2415 mulx rax,r10,QWORD[rsi]
2416 adcx r10,r14
2417 mulx r14,r11,QWORD[8+rsi]
2418 adcx r11,rax
2419 mulx rax,r12,QWORD[16+rsi]
2420 adcx r12,r14
2421 mulx r14,r13,QWORD[24+rsi]
2422DB 0x67,0x67
2423 mov rdx,r8
2424 adcx r13,rax
2425 adcx r14,rbp
2426 lea rsi,[32+rsi]
2427 lea rbx,[32+rbx]
2428
2429 adox r10,r15
2430 mulx r15,rax,QWORD[rcx]
2431 adcx r10,rax
2432 adox r11,r15
2433 mulx r15,rax,QWORD[8+rcx]
2434 adcx r11,rax
2435 adox r12,r15
2436 mulx r15,rax,QWORD[16+rcx]
2437 mov QWORD[((-40))+rbx],r10
2438 adcx r12,rax
2439 mov QWORD[((-32))+rbx],r11
2440 adox r13,r15
2441 mulx r15,rax,QWORD[24+rcx]
2442 mov rdx,r9
2443 mov QWORD[((-24))+rbx],r12
2444 adcx r13,rax
2445 adox r15,rbp
2446 lea rcx,[32+rcx]
2447 mov QWORD[((-16))+rbx],r13
2448
2449 dec rdi
2450 jnz NEAR $L$mulx4x_1st
2451
2452 mov rax,QWORD[8+rsp]
2453 adc r15,rbp
2454 lea rsi,[rax*1+rsi]
2455 add r14,r15
2456 mov rdi,QWORD[((8+8))+rsp]
2457 adc rbp,rbp
2458 mov QWORD[((-8))+rbx],r14
2459 jmp NEAR $L$mulx4x_outer
2460
2461ALIGN 32
2462$L$mulx4x_outer:
2463 lea r10,[((16-256))+rbx]
2464 pxor xmm4,xmm4
2465DB 0x67,0x67
2466 pxor xmm5,xmm5
2467 movdqa xmm0,XMMWORD[((-128))+rdi]
2468 movdqa xmm1,XMMWORD[((-112))+rdi]
2469 movdqa xmm2,XMMWORD[((-96))+rdi]
2470 pand xmm0,XMMWORD[256+r10]
2471 movdqa xmm3,XMMWORD[((-80))+rdi]
2472 pand xmm1,XMMWORD[272+r10]
2473 por xmm4,xmm0
2474 pand xmm2,XMMWORD[288+r10]
2475 por xmm5,xmm1
2476 pand xmm3,XMMWORD[304+r10]
2477 por xmm4,xmm2
2478 por xmm5,xmm3
2479 movdqa xmm0,XMMWORD[((-64))+rdi]
2480 movdqa xmm1,XMMWORD[((-48))+rdi]
2481 movdqa xmm2,XMMWORD[((-32))+rdi]
2482 pand xmm0,XMMWORD[320+r10]
2483 movdqa xmm3,XMMWORD[((-16))+rdi]
2484 pand xmm1,XMMWORD[336+r10]
2485 por xmm4,xmm0
2486 pand xmm2,XMMWORD[352+r10]
2487 por xmm5,xmm1
2488 pand xmm3,XMMWORD[368+r10]
2489 por xmm4,xmm2
2490 por xmm5,xmm3
2491 movdqa xmm0,XMMWORD[rdi]
2492 movdqa xmm1,XMMWORD[16+rdi]
2493 movdqa xmm2,XMMWORD[32+rdi]
2494 pand xmm0,XMMWORD[384+r10]
2495 movdqa xmm3,XMMWORD[48+rdi]
2496 pand xmm1,XMMWORD[400+r10]
2497 por xmm4,xmm0
2498 pand xmm2,XMMWORD[416+r10]
2499 por xmm5,xmm1
2500 pand xmm3,XMMWORD[432+r10]
2501 por xmm4,xmm2
2502 por xmm5,xmm3
2503 movdqa xmm0,XMMWORD[64+rdi]
2504 movdqa xmm1,XMMWORD[80+rdi]
2505 movdqa xmm2,XMMWORD[96+rdi]
2506 pand xmm0,XMMWORD[448+r10]
2507 movdqa xmm3,XMMWORD[112+rdi]
2508 pand xmm1,XMMWORD[464+r10]
2509 por xmm4,xmm0
2510 pand xmm2,XMMWORD[480+r10]
2511 por xmm5,xmm1
2512 pand xmm3,XMMWORD[496+r10]
2513 por xmm4,xmm2
2514 por xmm5,xmm3
2515 por xmm4,xmm5
2516 pshufd xmm0,xmm4,0x4e
2517 por xmm0,xmm4
2518 lea rdi,[256+rdi]
2519DB 102,72,15,126,194
2520
2521 mov QWORD[rbx],rbp
2522 lea rbx,[32+rax*1+rbx]
2523 mulx r11,r8,QWORD[rsi]
2524 xor rbp,rbp
2525 mov r9,rdx
2526 mulx r12,r14,QWORD[8+rsi]
2527 adox r8,QWORD[((-32))+rbx]
2528 adcx r11,r14
2529 mulx r13,r15,QWORD[16+rsi]
2530 adox r11,QWORD[((-24))+rbx]
2531 adcx r12,r15
2532 mulx r14,rdx,QWORD[24+rsi]
2533 adox r12,QWORD[((-16))+rbx]
2534 adcx r13,rdx
2535 lea rcx,[rax*1+rcx]
2536 lea rsi,[32+rsi]
2537 adox r13,QWORD[((-8))+rbx]
2538 adcx r14,rbp
2539 adox r14,rbp
2540
2541 mov r15,r8
2542 imul r8,QWORD[((32+8))+rsp]
2543
2544 mov rdx,r8
2545 xor rbp,rbp
2546 mov QWORD[((8+8))+rsp],rdi
2547
2548 mulx r10,rax,QWORD[rcx]
2549 adcx r15,rax
2550 adox r10,r11
2551 mulx r11,rax,QWORD[8+rcx]
2552 adcx r10,rax
2553 adox r11,r12
2554 mulx r12,rax,QWORD[16+rcx]
2555 adcx r11,rax
2556 adox r12,r13
2557 mulx r15,rax,QWORD[24+rcx]
2558 mov rdx,r9
2559 mov rdi,QWORD[((24+8))+rsp]
2560 mov QWORD[((-32))+rbx],r10
2561 adcx r12,rax
2562 mov QWORD[((-24))+rbx],r11
2563 adox r15,rbp
2564 mov QWORD[((-16))+rbx],r12
2565 lea rcx,[32+rcx]
2566 jmp NEAR $L$mulx4x_inner
2567
2568ALIGN 32
2569$L$mulx4x_inner:
2570 mulx rax,r10,QWORD[rsi]
2571 adcx r15,rbp
2572 adox r10,r14
2573 mulx r14,r11,QWORD[8+rsi]
2574 adcx r10,QWORD[rbx]
2575 adox r11,rax
2576 mulx rax,r12,QWORD[16+rsi]
2577 adcx r11,QWORD[8+rbx]
2578 adox r12,r14
2579 mulx r14,r13,QWORD[24+rsi]
2580 mov rdx,r8
2581 adcx r12,QWORD[16+rbx]
2582 adox r13,rax
2583 adcx r13,QWORD[24+rbx]
2584 adox r14,rbp
2585 lea rsi,[32+rsi]
2586 lea rbx,[32+rbx]
2587 adcx r14,rbp
2588
2589 adox r10,r15
2590 mulx r15,rax,QWORD[rcx]
2591 adcx r10,rax
2592 adox r11,r15
2593 mulx r15,rax,QWORD[8+rcx]
2594 adcx r11,rax
2595 adox r12,r15
2596 mulx r15,rax,QWORD[16+rcx]
2597 mov QWORD[((-40))+rbx],r10
2598 adcx r12,rax
2599 adox r13,r15
2600 mov QWORD[((-32))+rbx],r11
2601 mulx r15,rax,QWORD[24+rcx]
2602 mov rdx,r9
2603 lea rcx,[32+rcx]
2604 mov QWORD[((-24))+rbx],r12
2605 adcx r13,rax
2606 adox r15,rbp
2607 mov QWORD[((-16))+rbx],r13
2608
2609 dec rdi
2610 jnz NEAR $L$mulx4x_inner
2611
2612 mov rax,QWORD[((0+8))+rsp]
2613 adc r15,rbp
2614 sub rdi,QWORD[rbx]
2615 mov rdi,QWORD[((8+8))+rsp]
2616 mov r10,QWORD[((16+8))+rsp]
2617 adc r14,r15
2618 lea rsi,[rax*1+rsi]
2619 adc rbp,rbp
2620 mov QWORD[((-8))+rbx],r14
2621
2622 cmp rdi,r10
2623 jb NEAR $L$mulx4x_outer
2624
2625 mov r10,QWORD[((-8))+rcx]
2626 mov r8,rbp
2627 mov r12,QWORD[rax*1+rcx]
2628 lea rbp,[rax*1+rcx]
2629 mov rcx,rax
2630 lea rdi,[rax*1+rbx]
2631 xor eax,eax
2632 xor r15,r15
2633 sub r10,r14
2634 adc r15,r15
2635 or r8,r15
2636 sar rcx,3+2
2637 sub rax,r8
2638 mov rdx,QWORD[((56+8))+rsp]
2639 dec r12
2640 mov r13,QWORD[8+rbp]
2641 xor r8,r8
2642 mov r14,QWORD[16+rbp]
2643 mov r15,QWORD[24+rbp]
2644 jmp NEAR $L$sqrx4x_sub_entry
2645
2646
2647
2648ALIGN 32
2649bn_powerx5:
2650 mov QWORD[8+rsp],rdi ;WIN64 prologue
2651 mov QWORD[16+rsp],rsi
2652 mov rax,rsp
2653$L$SEH_begin_bn_powerx5:
2654 mov rdi,rcx
2655 mov rsi,rdx
2656 mov rdx,r8
2657 mov rcx,r9
2658 mov r8,QWORD[40+rsp]
2659 mov r9,QWORD[48+rsp]
2660
2661
2662
2663 mov rax,rsp
2664
2665$L$powerx5_enter:
2666 push rbx
2667
2668 push rbp
2669
2670 push r12
2671
2672 push r13
2673
2674 push r14
2675
2676 push r15
2677
2678$L$powerx5_prologue:
2679
2680 shl r9d,3
2681 lea r10,[r9*2+r9]
2682 neg r9
2683 mov r8,QWORD[r8]
2684
2685
2686
2687
2688
2689
2690
2691
2692 lea r11,[((-320))+r9*2+rsp]
2693 mov rbp,rsp
2694 sub r11,rdi
2695 and r11,4095
2696 cmp r10,r11
2697 jb NEAR $L$pwrx_sp_alt
2698 sub rbp,r11
2699 lea rbp,[((-320))+r9*2+rbp]
2700 jmp NEAR $L$pwrx_sp_done
2701
2702ALIGN 32
2703$L$pwrx_sp_alt:
2704 lea r10,[((4096-320))+r9*2]
2705 lea rbp,[((-320))+r9*2+rbp]
2706 sub r11,r10
2707 mov r10,0
2708 cmovc r11,r10
2709 sub rbp,r11
2710$L$pwrx_sp_done:
2711 and rbp,-64
2712 mov r11,rsp
2713 sub r11,rbp
2714 and r11,-4096
2715 lea rsp,[rbp*1+r11]
2716 mov r10,QWORD[rsp]
2717 cmp rsp,rbp
2718 ja NEAR $L$pwrx_page_walk
2719 jmp NEAR $L$pwrx_page_walk_done
2720
2721$L$pwrx_page_walk:
2722 lea rsp,[((-4096))+rsp]
2723 mov r10,QWORD[rsp]
2724 cmp rsp,rbp
2725 ja NEAR $L$pwrx_page_walk
2726$L$pwrx_page_walk_done:
2727
2728 mov r10,r9
2729 neg r9
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742 pxor xmm0,xmm0
2743DB 102,72,15,110,207
2744DB 102,72,15,110,209
2745DB 102,73,15,110,218
2746DB 102,72,15,110,226
2747 mov QWORD[32+rsp],r8
2748 mov QWORD[40+rsp],rax
2749
2750$L$powerx5_body:
2751
2752 call __bn_sqrx8x_internal
2753 call __bn_postx4x_internal
2754 call __bn_sqrx8x_internal
2755 call __bn_postx4x_internal
2756 call __bn_sqrx8x_internal
2757 call __bn_postx4x_internal
2758 call __bn_sqrx8x_internal
2759 call __bn_postx4x_internal
2760 call __bn_sqrx8x_internal
2761 call __bn_postx4x_internal
2762
2763 mov r9,r10
2764 mov rdi,rsi
2765DB 102,72,15,126,209
2766DB 102,72,15,126,226
2767 mov rax,QWORD[40+rsp]
2768
2769 call mulx4x_internal
2770
2771 mov rsi,QWORD[40+rsp]
2772
2773 mov rax,1
2774
2775 mov r15,QWORD[((-48))+rsi]
2776
2777 mov r14,QWORD[((-40))+rsi]
2778
2779 mov r13,QWORD[((-32))+rsi]
2780
2781 mov r12,QWORD[((-24))+rsi]
2782
2783 mov rbp,QWORD[((-16))+rsi]
2784
2785 mov rbx,QWORD[((-8))+rsi]
2786
2787 lea rsp,[rsi]
2788
2789$L$powerx5_epilogue:
2790 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2791 mov rsi,QWORD[16+rsp]
2792 DB 0F3h,0C3h ;repret
2793
2794$L$SEH_end_bn_powerx5:
2795
2796global bn_sqrx8x_internal
2797
2798
2799ALIGN 32
2800bn_sqrx8x_internal:
2801__bn_sqrx8x_internal:
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843 lea rdi,[((48+8))+rsp]
2844 lea rbp,[r9*1+rsi]
2845 mov QWORD[((0+8))+rsp],r9
2846 mov QWORD[((8+8))+rsp],rbp
2847 jmp NEAR $L$sqr8x_zero_start
2848
2849ALIGN 32
2850DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2851$L$sqrx8x_zero:
2852DB 0x3e
2853 movdqa XMMWORD[rdi],xmm0
2854 movdqa XMMWORD[16+rdi],xmm0
2855 movdqa XMMWORD[32+rdi],xmm0
2856 movdqa XMMWORD[48+rdi],xmm0
2857$L$sqr8x_zero_start:
2858 movdqa XMMWORD[64+rdi],xmm0
2859 movdqa XMMWORD[80+rdi],xmm0
2860 movdqa XMMWORD[96+rdi],xmm0
2861 movdqa XMMWORD[112+rdi],xmm0
2862 lea rdi,[128+rdi]
2863 sub r9,64
2864 jnz NEAR $L$sqrx8x_zero
2865
2866 mov rdx,QWORD[rsi]
2867
2868 xor r10,r10
2869 xor r11,r11
2870 xor r12,r12
2871 xor r13,r13
2872 xor r14,r14
2873 xor r15,r15
2874 lea rdi,[((48+8))+rsp]
2875 xor rbp,rbp
2876 jmp NEAR $L$sqrx8x_outer_loop
2877
2878ALIGN 32
2879$L$sqrx8x_outer_loop:
2880 mulx rax,r8,QWORD[8+rsi]
2881 adcx r8,r9
2882 adox r10,rax
2883 mulx rax,r9,QWORD[16+rsi]
2884 adcx r9,r10
2885 adox r11,rax
2886DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2887 adcx r10,r11
2888 adox r12,rax
2889DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2890 adcx r11,r12
2891 adox r13,rax
2892 mulx rax,r12,QWORD[40+rsi]
2893 adcx r12,r13
2894 adox r14,rax
2895 mulx rax,r13,QWORD[48+rsi]
2896 adcx r13,r14
2897 adox rax,r15
2898 mulx r15,r14,QWORD[56+rsi]
2899 mov rdx,QWORD[8+rsi]
2900 adcx r14,rax
2901 adox r15,rbp
2902 adc r15,QWORD[64+rdi]
2903 mov QWORD[8+rdi],r8
2904 mov QWORD[16+rdi],r9
2905 sbb rcx,rcx
2906 xor rbp,rbp
2907
2908
2909 mulx rbx,r8,QWORD[16+rsi]
2910 mulx rax,r9,QWORD[24+rsi]
2911 adcx r8,r10
2912 adox r9,rbx
2913 mulx rbx,r10,QWORD[32+rsi]
2914 adcx r9,r11
2915 adox r10,rax
2916DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
2917 adcx r10,r12
2918 adox r11,rbx
2919DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
2920 adcx r11,r13
2921 adox r12,r14
2922DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
2923 mov rdx,QWORD[16+rsi]
2924 adcx r12,rax
2925 adox r13,rbx
2926 adcx r13,r15
2927 adox r14,rbp
2928 adcx r14,rbp
2929
2930 mov QWORD[24+rdi],r8
2931 mov QWORD[32+rdi],r9
2932
2933 mulx rbx,r8,QWORD[24+rsi]
2934 mulx rax,r9,QWORD[32+rsi]
2935 adcx r8,r10
2936 adox r9,rbx
2937 mulx rbx,r10,QWORD[40+rsi]
2938 adcx r9,r11
2939 adox r10,rax
2940DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
2941 adcx r10,r12
2942 adox r11,r13
2943DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
2944DB 0x3e
2945 mov rdx,QWORD[24+rsi]
2946 adcx r11,rbx
2947 adox r12,rax
2948 adcx r12,r14
2949 mov QWORD[40+rdi],r8
2950 mov QWORD[48+rdi],r9
2951 mulx rax,r8,QWORD[32+rsi]
2952 adox r13,rbp
2953 adcx r13,rbp
2954
2955 mulx rbx,r9,QWORD[40+rsi]
2956 adcx r8,r10
2957 adox r9,rax
2958 mulx rax,r10,QWORD[48+rsi]
2959 adcx r9,r11
2960 adox r10,r12
2961 mulx r12,r11,QWORD[56+rsi]
2962 mov rdx,QWORD[32+rsi]
2963 mov r14,QWORD[40+rsi]
2964 adcx r10,rbx
2965 adox r11,rax
2966 mov r15,QWORD[48+rsi]
2967 adcx r11,r13
2968 adox r12,rbp
2969 adcx r12,rbp
2970
2971 mov QWORD[56+rdi],r8
2972 mov QWORD[64+rdi],r9
2973
2974 mulx rax,r9,r14
2975 mov r8,QWORD[56+rsi]
2976 adcx r9,r10
2977 mulx rbx,r10,r15
2978 adox r10,rax
2979 adcx r10,r11
2980 mulx rax,r11,r8
2981 mov rdx,r14
2982 adox r11,rbx
2983 adcx r11,r12
2984
2985 adcx rax,rbp
2986
2987 mulx rbx,r14,r15
2988 mulx r13,r12,r8
2989 mov rdx,r15
2990 lea rsi,[64+rsi]
2991 adcx r11,r14
2992 adox r12,rbx
2993 adcx r12,rax
2994 adox r13,rbp
2995
2996DB 0x67,0x67
2997 mulx r14,r8,r8
2998 adcx r13,r8
2999 adcx r14,rbp
3000
3001 cmp rsi,QWORD[((8+8))+rsp]
3002 je NEAR $L$sqrx8x_outer_break
3003
3004 neg rcx
3005 mov rcx,-8
3006 mov r15,rbp
3007 mov r8,QWORD[64+rdi]
3008 adcx r9,QWORD[72+rdi]
3009 adcx r10,QWORD[80+rdi]
3010 adcx r11,QWORD[88+rdi]
3011 adc r12,QWORD[96+rdi]
3012 adc r13,QWORD[104+rdi]
3013 adc r14,QWORD[112+rdi]
3014 adc r15,QWORD[120+rdi]
3015 lea rbp,[rsi]
3016 lea rdi,[128+rdi]
3017 sbb rax,rax
3018
3019 mov rdx,QWORD[((-64))+rsi]
3020 mov QWORD[((16+8))+rsp],rax
3021 mov QWORD[((24+8))+rsp],rdi
3022
3023
3024 xor eax,eax
3025 jmp NEAR $L$sqrx8x_loop
3026
3027ALIGN 32
3028$L$sqrx8x_loop:
3029 mov rbx,r8
3030 mulx r8,rax,QWORD[rbp]
3031 adcx rbx,rax
3032 adox r8,r9
3033
3034 mulx r9,rax,QWORD[8+rbp]
3035 adcx r8,rax
3036 adox r9,r10
3037
3038 mulx r10,rax,QWORD[16+rbp]
3039 adcx r9,rax
3040 adox r10,r11
3041
3042 mulx r11,rax,QWORD[24+rbp]
3043 adcx r10,rax
3044 adox r11,r12
3045
3046DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3047 adcx r11,rax
3048 adox r12,r13
3049
3050 mulx r13,rax,QWORD[40+rbp]
3051 adcx r12,rax
3052 adox r13,r14
3053
3054 mulx r14,rax,QWORD[48+rbp]
3055 mov QWORD[rcx*8+rdi],rbx
3056 mov ebx,0
3057 adcx r13,rax
3058 adox r14,r15
3059
3060DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3061 mov rdx,QWORD[8+rcx*8+rsi]
3062 adcx r14,rax
3063 adox r15,rbx
3064 adcx r15,rbx
3065
3066DB 0x67
3067 inc rcx
3068 jnz NEAR $L$sqrx8x_loop
3069
3070 lea rbp,[64+rbp]
3071 mov rcx,-8
3072 cmp rbp,QWORD[((8+8))+rsp]
3073 je NEAR $L$sqrx8x_break
3074
3075 sub rbx,QWORD[((16+8))+rsp]
3076DB 0x66
3077 mov rdx,QWORD[((-64))+rsi]
3078 adcx r8,QWORD[rdi]
3079 adcx r9,QWORD[8+rdi]
3080 adc r10,QWORD[16+rdi]
3081 adc r11,QWORD[24+rdi]
3082 adc r12,QWORD[32+rdi]
3083 adc r13,QWORD[40+rdi]
3084 adc r14,QWORD[48+rdi]
3085 adc r15,QWORD[56+rdi]
3086 lea rdi,[64+rdi]
3087DB 0x67
3088 sbb rax,rax
3089 xor ebx,ebx
3090 mov QWORD[((16+8))+rsp],rax
3091 jmp NEAR $L$sqrx8x_loop
3092
3093ALIGN 32
3094$L$sqrx8x_break:
3095 xor rbp,rbp
3096 sub rbx,QWORD[((16+8))+rsp]
3097 adcx r8,rbp
3098 mov rcx,QWORD[((24+8))+rsp]
3099 adcx r9,rbp
3100 mov rdx,QWORD[rsi]
3101 adc r10,0
3102 mov QWORD[rdi],r8
3103 adc r11,0
3104 adc r12,0
3105 adc r13,0
3106 adc r14,0
3107 adc r15,0
3108 cmp rdi,rcx
3109 je NEAR $L$sqrx8x_outer_loop
3110
3111 mov QWORD[8+rdi],r9
3112 mov r9,QWORD[8+rcx]
3113 mov QWORD[16+rdi],r10
3114 mov r10,QWORD[16+rcx]
3115 mov QWORD[24+rdi],r11
3116 mov r11,QWORD[24+rcx]
3117 mov QWORD[32+rdi],r12
3118 mov r12,QWORD[32+rcx]
3119 mov QWORD[40+rdi],r13
3120 mov r13,QWORD[40+rcx]
3121 mov QWORD[48+rdi],r14
3122 mov r14,QWORD[48+rcx]
3123 mov QWORD[56+rdi],r15
3124 mov r15,QWORD[56+rcx]
3125 mov rdi,rcx
3126 jmp NEAR $L$sqrx8x_outer_loop
3127
3128ALIGN 32
3129$L$sqrx8x_outer_break:
3130 mov QWORD[72+rdi],r9
3131DB 102,72,15,126,217
3132 mov QWORD[80+rdi],r10
3133 mov QWORD[88+rdi],r11
3134 mov QWORD[96+rdi],r12
3135 mov QWORD[104+rdi],r13
3136 mov QWORD[112+rdi],r14
3137 lea rdi,[((48+8))+rsp]
3138 mov rdx,QWORD[rcx*1+rsi]
3139
3140 mov r11,QWORD[8+rdi]
3141 xor r10,r10
3142 mov r9,QWORD[((0+8))+rsp]
3143 adox r11,r11
3144 mov r12,QWORD[16+rdi]
3145 mov r13,QWORD[24+rdi]
3146
3147
3148ALIGN 32
3149$L$sqrx4x_shift_n_add:
3150 mulx rbx,rax,rdx
3151 adox r12,r12
3152 adcx rax,r10
3153DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3154DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3155 adox r13,r13
3156 adcx rbx,r11
3157 mov r11,QWORD[40+rdi]
3158 mov QWORD[rdi],rax
3159 mov QWORD[8+rdi],rbx
3160
3161 mulx rbx,rax,rdx
3162 adox r10,r10
3163 adcx rax,r12
3164 mov rdx,QWORD[16+rcx*1+rsi]
3165 mov r12,QWORD[48+rdi]
3166 adox r11,r11
3167 adcx rbx,r13
3168 mov r13,QWORD[56+rdi]
3169 mov QWORD[16+rdi],rax
3170 mov QWORD[24+rdi],rbx
3171
3172 mulx rbx,rax,rdx
3173 adox r12,r12
3174 adcx rax,r10
3175 mov rdx,QWORD[24+rcx*1+rsi]
3176 lea rcx,[32+rcx]
3177 mov r10,QWORD[64+rdi]
3178 adox r13,r13
3179 adcx rbx,r11
3180 mov r11,QWORD[72+rdi]
3181 mov QWORD[32+rdi],rax
3182 mov QWORD[40+rdi],rbx
3183
3184 mulx rbx,rax,rdx
3185 adox r10,r10
3186 adcx rax,r12
3187 jrcxz $L$sqrx4x_shift_n_add_break
3188DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3189 adox r11,r11
3190 adcx rbx,r13
3191 mov r12,QWORD[80+rdi]
3192 mov r13,QWORD[88+rdi]
3193 mov QWORD[48+rdi],rax
3194 mov QWORD[56+rdi],rbx
3195 lea rdi,[64+rdi]
3196 nop
3197 jmp NEAR $L$sqrx4x_shift_n_add
3198
3199ALIGN 32
3200$L$sqrx4x_shift_n_add_break:
3201 adcx rbx,r13
3202 mov QWORD[48+rdi],rax
3203 mov QWORD[56+rdi],rbx
3204 lea rdi,[64+rdi]
3205DB 102,72,15,126,213
3206__bn_sqrx8x_reduction:
3207 xor eax,eax
3208 mov rbx,QWORD[((32+8))+rsp]
3209 mov rdx,QWORD[((48+8))+rsp]
3210 lea rcx,[((-64))+r9*1+rbp]
3211
3212 mov QWORD[((0+8))+rsp],rcx
3213 mov QWORD[((8+8))+rsp],rdi
3214
3215 lea rdi,[((48+8))+rsp]
3216 jmp NEAR $L$sqrx8x_reduction_loop
3217
3218ALIGN 32
3219$L$sqrx8x_reduction_loop:
3220 mov r9,QWORD[8+rdi]
3221 mov r10,QWORD[16+rdi]
3222 mov r11,QWORD[24+rdi]
3223 mov r12,QWORD[32+rdi]
3224 mov r8,rdx
3225 imul rdx,rbx
3226 mov r13,QWORD[40+rdi]
3227 mov r14,QWORD[48+rdi]
3228 mov r15,QWORD[56+rdi]
3229 mov QWORD[((24+8))+rsp],rax
3230
3231 lea rdi,[64+rdi]
3232 xor rsi,rsi
3233 mov rcx,-8
3234 jmp NEAR $L$sqrx8x_reduce
3235
3236ALIGN 32
3237$L$sqrx8x_reduce:
3238 mov rbx,r8
3239 mulx r8,rax,QWORD[rbp]
3240 adcx rax,rbx
3241 adox r8,r9
3242
3243 mulx r9,rbx,QWORD[8+rbp]
3244 adcx r8,rbx
3245 adox r9,r10
3246
3247 mulx r10,rbx,QWORD[16+rbp]
3248 adcx r9,rbx
3249 adox r10,r11
3250
3251 mulx r11,rbx,QWORD[24+rbp]
3252 adcx r10,rbx
3253 adox r11,r12
3254
3255DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3256 mov rax,rdx
3257 mov rdx,r8
3258 adcx r11,rbx
3259 adox r12,r13
3260
3261 mulx rdx,rbx,QWORD[((32+8))+rsp]
3262 mov rdx,rax
3263 mov QWORD[((64+48+8))+rcx*8+rsp],rax
3264
3265 mulx r13,rax,QWORD[40+rbp]
3266 adcx r12,rax
3267 adox r13,r14
3268
3269 mulx r14,rax,QWORD[48+rbp]
3270 adcx r13,rax
3271 adox r14,r15
3272
3273 mulx r15,rax,QWORD[56+rbp]
3274 mov rdx,rbx
3275 adcx r14,rax
3276 adox r15,rsi
3277 adcx r15,rsi
3278
3279DB 0x67,0x67,0x67
3280 inc rcx
3281 jnz NEAR $L$sqrx8x_reduce
3282
3283 mov rax,rsi
3284 cmp rbp,QWORD[((0+8))+rsp]
3285 jae NEAR $L$sqrx8x_no_tail
3286
3287 mov rdx,QWORD[((48+8))+rsp]
3288 add r8,QWORD[rdi]
3289 lea rbp,[64+rbp]
3290 mov rcx,-8
3291 adcx r9,QWORD[8+rdi]
3292 adcx r10,QWORD[16+rdi]
3293 adc r11,QWORD[24+rdi]
3294 adc r12,QWORD[32+rdi]
3295 adc r13,QWORD[40+rdi]
3296 adc r14,QWORD[48+rdi]
3297 adc r15,QWORD[56+rdi]
3298 lea rdi,[64+rdi]
3299 sbb rax,rax
3300
3301 xor rsi,rsi
3302 mov QWORD[((16+8))+rsp],rax
3303 jmp NEAR $L$sqrx8x_tail
3304
3305ALIGN 32
3306$L$sqrx8x_tail:
3307 mov rbx,r8
3308 mulx r8,rax,QWORD[rbp]
3309 adcx rbx,rax
3310 adox r8,r9
3311
3312 mulx r9,rax,QWORD[8+rbp]
3313 adcx r8,rax
3314 adox r9,r10
3315
3316 mulx r10,rax,QWORD[16+rbp]
3317 adcx r9,rax
3318 adox r10,r11
3319
3320 mulx r11,rax,QWORD[24+rbp]
3321 adcx r10,rax
3322 adox r11,r12
3323
3324DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3325 adcx r11,rax
3326 adox r12,r13
3327
3328 mulx r13,rax,QWORD[40+rbp]
3329 adcx r12,rax
3330 adox r13,r14
3331
3332 mulx r14,rax,QWORD[48+rbp]
3333 adcx r13,rax
3334 adox r14,r15
3335
3336 mulx r15,rax,QWORD[56+rbp]
3337 mov rdx,QWORD[((72+48+8))+rcx*8+rsp]
3338 adcx r14,rax
3339 adox r15,rsi
3340 mov QWORD[rcx*8+rdi],rbx
3341 mov rbx,r8
3342 adcx r15,rsi
3343
3344 inc rcx
3345 jnz NEAR $L$sqrx8x_tail
3346
3347 cmp rbp,QWORD[((0+8))+rsp]
3348 jae NEAR $L$sqrx8x_tail_done
3349
3350 sub rsi,QWORD[((16+8))+rsp]
3351 mov rdx,QWORD[((48+8))+rsp]
3352 lea rbp,[64+rbp]
3353 adc r8,QWORD[rdi]
3354 adc r9,QWORD[8+rdi]
3355 adc r10,QWORD[16+rdi]
3356 adc r11,QWORD[24+rdi]
3357 adc r12,QWORD[32+rdi]
3358 adc r13,QWORD[40+rdi]
3359 adc r14,QWORD[48+rdi]
3360 adc r15,QWORD[56+rdi]
3361 lea rdi,[64+rdi]
3362 sbb rax,rax
3363 sub rcx,8
3364
3365 xor rsi,rsi
3366 mov QWORD[((16+8))+rsp],rax
3367 jmp NEAR $L$sqrx8x_tail
3368
3369ALIGN 32
3370$L$sqrx8x_tail_done:
3371 xor rax,rax
3372 add r8,QWORD[((24+8))+rsp]
3373 adc r9,0
3374 adc r10,0
3375 adc r11,0
3376 adc r12,0
3377 adc r13,0
3378 adc r14,0
3379 adc r15,0
3380 adc rax,0
3381
3382 sub rsi,QWORD[((16+8))+rsp]
3383$L$sqrx8x_no_tail:
3384 adc r8,QWORD[rdi]
3385DB 102,72,15,126,217
3386 adc r9,QWORD[8+rdi]
3387 mov rsi,QWORD[56+rbp]
3388DB 102,72,15,126,213
3389 adc r10,QWORD[16+rdi]
3390 adc r11,QWORD[24+rdi]
3391 adc r12,QWORD[32+rdi]
3392 adc r13,QWORD[40+rdi]
3393 adc r14,QWORD[48+rdi]
3394 adc r15,QWORD[56+rdi]
3395 adc rax,0
3396
3397 mov rbx,QWORD[((32+8))+rsp]
3398 mov rdx,QWORD[64+rcx*1+rdi]
3399
3400 mov QWORD[rdi],r8
3401 lea r8,[64+rdi]
3402 mov QWORD[8+rdi],r9
3403 mov QWORD[16+rdi],r10
3404 mov QWORD[24+rdi],r11
3405 mov QWORD[32+rdi],r12
3406 mov QWORD[40+rdi],r13
3407 mov QWORD[48+rdi],r14
3408 mov QWORD[56+rdi],r15
3409
3410 lea rdi,[64+rcx*1+rdi]
3411 cmp r8,QWORD[((8+8))+rsp]
3412 jb NEAR $L$sqrx8x_reduction_loop
3413 DB 0F3h,0C3h ;repret
3414
3415
3416ALIGN 32
3417__bn_postx4x_internal:
3418
3419 mov r12,QWORD[rbp]
3420 mov r10,rcx
3421 mov r9,rcx
3422 neg rax
3423 sar rcx,3+2
3424
3425DB 102,72,15,126,202
3426DB 102,72,15,126,206
3427 dec r12
3428 mov r13,QWORD[8+rbp]
3429 xor r8,r8
3430 mov r14,QWORD[16+rbp]
3431 mov r15,QWORD[24+rbp]
3432 jmp NEAR $L$sqrx4x_sub_entry
3433
3434ALIGN 16
3435$L$sqrx4x_sub:
3436 mov r12,QWORD[rbp]
3437 mov r13,QWORD[8+rbp]
3438 mov r14,QWORD[16+rbp]
3439 mov r15,QWORD[24+rbp]
3440$L$sqrx4x_sub_entry:
3441 andn r12,r12,rax
3442 lea rbp,[32+rbp]
3443 andn r13,r13,rax
3444 andn r14,r14,rax
3445 andn r15,r15,rax
3446
3447 neg r8
3448 adc r12,QWORD[rdi]
3449 adc r13,QWORD[8+rdi]
3450 adc r14,QWORD[16+rdi]
3451 adc r15,QWORD[24+rdi]
3452 mov QWORD[rdx],r12
3453 lea rdi,[32+rdi]
3454 mov QWORD[8+rdx],r13
3455 sbb r8,r8
3456 mov QWORD[16+rdx],r14
3457 mov QWORD[24+rdx],r15
3458 lea rdx,[32+rdx]
3459
3460 inc rcx
3461 jnz NEAR $L$sqrx4x_sub
3462
3463 neg r9
3464
3465 DB 0F3h,0C3h ;repret
3466
3467
3468global bn_get_bits5
3469
3470ALIGN 16
3471bn_get_bits5:
3472
3473 lea r10,[rcx]
3474 lea r11,[1+rcx]
3475 mov ecx,edx
3476 shr edx,4
3477 and ecx,15
3478 lea eax,[((-8))+rcx]
3479 cmp ecx,11
3480 cmova r10,r11
3481 cmova ecx,eax
3482 movzx eax,WORD[rdx*2+r10]
3483 shr eax,cl
3484 and eax,31
3485 DB 0F3h,0C3h ;repret
3486
3487
3488
3489global bn_scatter5
3490
3491ALIGN 16
3492bn_scatter5:
3493
3494 cmp edx,0
3495 jz NEAR $L$scatter_epilogue
3496 lea r8,[r9*8+r8]
3497$L$scatter:
3498 mov rax,QWORD[rcx]
3499 lea rcx,[8+rcx]
3500 mov QWORD[r8],rax
3501 lea r8,[256+r8]
3502 sub edx,1
3503 jnz NEAR $L$scatter
3504$L$scatter_epilogue:
3505 DB 0F3h,0C3h ;repret
3506
3507
3508
3509global bn_gather5
3510
3511ALIGN 32
3512bn_gather5:
3513$L$SEH_begin_bn_gather5:
3514
3515
3516DB 0x4c,0x8d,0x14,0x24
3517DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3518 lea rax,[$L$inc]
3519 and rsp,-16
3520
3521 movd xmm5,r9d
3522 movdqa xmm0,XMMWORD[rax]
3523 movdqa xmm1,XMMWORD[16+rax]
3524 lea r11,[128+r8]
3525 lea rax,[128+rsp]
3526
3527 pshufd xmm5,xmm5,0
3528 movdqa xmm4,xmm1
3529 movdqa xmm2,xmm1
3530 paddd xmm1,xmm0
3531 pcmpeqd xmm0,xmm5
3532 movdqa xmm3,xmm4
3533
3534 paddd xmm2,xmm1
3535 pcmpeqd xmm1,xmm5
3536 movdqa XMMWORD[(-128)+rax],xmm0
3537 movdqa xmm0,xmm4
3538
3539 paddd xmm3,xmm2
3540 pcmpeqd xmm2,xmm5
3541 movdqa XMMWORD[(-112)+rax],xmm1
3542 movdqa xmm1,xmm4
3543
3544 paddd xmm0,xmm3
3545 pcmpeqd xmm3,xmm5
3546 movdqa XMMWORD[(-96)+rax],xmm2
3547 movdqa xmm2,xmm4
3548 paddd xmm1,xmm0
3549 pcmpeqd xmm0,xmm5
3550 movdqa XMMWORD[(-80)+rax],xmm3
3551 movdqa xmm3,xmm4
3552
3553 paddd xmm2,xmm1
3554 pcmpeqd xmm1,xmm5
3555 movdqa XMMWORD[(-64)+rax],xmm0
3556 movdqa xmm0,xmm4
3557
3558 paddd xmm3,xmm2
3559 pcmpeqd xmm2,xmm5
3560 movdqa XMMWORD[(-48)+rax],xmm1
3561 movdqa xmm1,xmm4
3562
3563 paddd xmm0,xmm3
3564 pcmpeqd xmm3,xmm5
3565 movdqa XMMWORD[(-32)+rax],xmm2
3566 movdqa xmm2,xmm4
3567 paddd xmm1,xmm0
3568 pcmpeqd xmm0,xmm5
3569 movdqa XMMWORD[(-16)+rax],xmm3
3570 movdqa xmm3,xmm4
3571
3572 paddd xmm2,xmm1
3573 pcmpeqd xmm1,xmm5
3574 movdqa XMMWORD[rax],xmm0
3575 movdqa xmm0,xmm4
3576
3577 paddd xmm3,xmm2
3578 pcmpeqd xmm2,xmm5
3579 movdqa XMMWORD[16+rax],xmm1
3580 movdqa xmm1,xmm4
3581
3582 paddd xmm0,xmm3
3583 pcmpeqd xmm3,xmm5
3584 movdqa XMMWORD[32+rax],xmm2
3585 movdqa xmm2,xmm4
3586 paddd xmm1,xmm0
3587 pcmpeqd xmm0,xmm5
3588 movdqa XMMWORD[48+rax],xmm3
3589 movdqa xmm3,xmm4
3590
3591 paddd xmm2,xmm1
3592 pcmpeqd xmm1,xmm5
3593 movdqa XMMWORD[64+rax],xmm0
3594 movdqa xmm0,xmm4
3595
3596 paddd xmm3,xmm2
3597 pcmpeqd xmm2,xmm5
3598 movdqa XMMWORD[80+rax],xmm1
3599 movdqa xmm1,xmm4
3600
3601 paddd xmm0,xmm3
3602 pcmpeqd xmm3,xmm5
3603 movdqa XMMWORD[96+rax],xmm2
3604 movdqa xmm2,xmm4
3605 movdqa XMMWORD[112+rax],xmm3
3606 jmp NEAR $L$gather
3607
3608ALIGN 32
3609$L$gather:
3610 pxor xmm4,xmm4
3611 pxor xmm5,xmm5
3612 movdqa xmm0,XMMWORD[((-128))+r11]
3613 movdqa xmm1,XMMWORD[((-112))+r11]
3614 movdqa xmm2,XMMWORD[((-96))+r11]
3615 pand xmm0,XMMWORD[((-128))+rax]
3616 movdqa xmm3,XMMWORD[((-80))+r11]
3617 pand xmm1,XMMWORD[((-112))+rax]
3618 por xmm4,xmm0
3619 pand xmm2,XMMWORD[((-96))+rax]
3620 por xmm5,xmm1
3621 pand xmm3,XMMWORD[((-80))+rax]
3622 por xmm4,xmm2
3623 por xmm5,xmm3
3624 movdqa xmm0,XMMWORD[((-64))+r11]
3625 movdqa xmm1,XMMWORD[((-48))+r11]
3626 movdqa xmm2,XMMWORD[((-32))+r11]
3627 pand xmm0,XMMWORD[((-64))+rax]
3628 movdqa xmm3,XMMWORD[((-16))+r11]
3629 pand xmm1,XMMWORD[((-48))+rax]
3630 por xmm4,xmm0
3631 pand xmm2,XMMWORD[((-32))+rax]
3632 por xmm5,xmm1
3633 pand xmm3,XMMWORD[((-16))+rax]
3634 por xmm4,xmm2
3635 por xmm5,xmm3
3636 movdqa xmm0,XMMWORD[r11]
3637 movdqa xmm1,XMMWORD[16+r11]
3638 movdqa xmm2,XMMWORD[32+r11]
3639 pand xmm0,XMMWORD[rax]
3640 movdqa xmm3,XMMWORD[48+r11]
3641 pand xmm1,XMMWORD[16+rax]
3642 por xmm4,xmm0
3643 pand xmm2,XMMWORD[32+rax]
3644 por xmm5,xmm1
3645 pand xmm3,XMMWORD[48+rax]
3646 por xmm4,xmm2
3647 por xmm5,xmm3
3648 movdqa xmm0,XMMWORD[64+r11]
3649 movdqa xmm1,XMMWORD[80+r11]
3650 movdqa xmm2,XMMWORD[96+r11]
3651 pand xmm0,XMMWORD[64+rax]
3652 movdqa xmm3,XMMWORD[112+r11]
3653 pand xmm1,XMMWORD[80+rax]
3654 por xmm4,xmm0
3655 pand xmm2,XMMWORD[96+rax]
3656 por xmm5,xmm1
3657 pand xmm3,XMMWORD[112+rax]
3658 por xmm4,xmm2
3659 por xmm5,xmm3
3660 por xmm4,xmm5
3661 lea r11,[256+r11]
3662 pshufd xmm0,xmm4,0x4e
3663 por xmm0,xmm4
3664 movq QWORD[rcx],xmm0
3665 lea rcx,[8+rcx]
3666 sub edx,1
3667 jnz NEAR $L$gather
3668
3669 lea rsp,[r10]
3670 DB 0F3h,0C3h ;repret
3671$L$SEH_end_bn_gather5:
3672
3673
3674ALIGN 64
3675$L$inc:
3676 DD 0,0,1,1
3677 DD 2,2,2,2
3678DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
3679DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
3680DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
3681DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
3682DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
3683DB 112,101,110,115,115,108,46,111,114,103,62,0
3684EXTERN __imp_RtlVirtualUnwind
3685
3686ALIGN 16
3687mul_handler:
3688 push rsi
3689 push rdi
3690 push rbx
3691 push rbp
3692 push r12
3693 push r13
3694 push r14
3695 push r15
3696 pushfq
3697 sub rsp,64
3698
3699 mov rax,QWORD[120+r8]
3700 mov rbx,QWORD[248+r8]
3701
3702 mov rsi,QWORD[8+r9]
3703 mov r11,QWORD[56+r9]
3704
3705 mov r10d,DWORD[r11]
3706 lea r10,[r10*1+rsi]
3707 cmp rbx,r10
3708 jb NEAR $L$common_seh_tail
3709
3710 mov r10d,DWORD[4+r11]
3711 lea r10,[r10*1+rsi]
3712 cmp rbx,r10
3713 jb NEAR $L$common_pop_regs
3714
3715 mov rax,QWORD[152+r8]
3716
3717 mov r10d,DWORD[8+r11]
3718 lea r10,[r10*1+rsi]
3719 cmp rbx,r10
3720 jae NEAR $L$common_seh_tail
3721
3722 lea r10,[$L$mul_epilogue]
3723 cmp rbx,r10
3724 ja NEAR $L$body_40
3725
3726 mov r10,QWORD[192+r8]
3727 mov rax,QWORD[8+r10*8+rax]
3728
3729 jmp NEAR $L$common_pop_regs
3730
3731$L$body_40:
3732 mov rax,QWORD[40+rax]
3733$L$common_pop_regs:
3734 mov rbx,QWORD[((-8))+rax]
3735 mov rbp,QWORD[((-16))+rax]
3736 mov r12,QWORD[((-24))+rax]
3737 mov r13,QWORD[((-32))+rax]
3738 mov r14,QWORD[((-40))+rax]
3739 mov r15,QWORD[((-48))+rax]
3740 mov QWORD[144+r8],rbx
3741 mov QWORD[160+r8],rbp
3742 mov QWORD[216+r8],r12
3743 mov QWORD[224+r8],r13
3744 mov QWORD[232+r8],r14
3745 mov QWORD[240+r8],r15
3746
3747$L$common_seh_tail:
3748 mov rdi,QWORD[8+rax]
3749 mov rsi,QWORD[16+rax]
3750 mov QWORD[152+r8],rax
3751 mov QWORD[168+r8],rsi
3752 mov QWORD[176+r8],rdi
3753
3754 mov rdi,QWORD[40+r9]
3755 mov rsi,r8
3756 mov ecx,154
3757 DD 0xa548f3fc
3758
3759 mov rsi,r9
3760 xor rcx,rcx
3761 mov rdx,QWORD[8+rsi]
3762 mov r8,QWORD[rsi]
3763 mov r9,QWORD[16+rsi]
3764 mov r10,QWORD[40+rsi]
3765 lea r11,[56+rsi]
3766 lea r12,[24+rsi]
3767 mov QWORD[32+rsp],r10
3768 mov QWORD[40+rsp],r11
3769 mov QWORD[48+rsp],r12
3770 mov QWORD[56+rsp],rcx
3771 call QWORD[__imp_RtlVirtualUnwind]
3772
3773 mov eax,1
3774 add rsp,64
3775 popfq
3776 pop r15
3777 pop r14
3778 pop r13
3779 pop r12
3780 pop rbp
3781 pop rbx
3782 pop rdi
3783 pop rsi
3784 DB 0F3h,0C3h ;repret
3785
3786
3787section .pdata rdata align=4
3788ALIGN 4
3789 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
3790 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
3791 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
3792
3793 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
3794 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
3795 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
3796
3797 DD $L$SEH_begin_bn_power5 wrt ..imagebase
3798 DD $L$SEH_end_bn_power5 wrt ..imagebase
3799 DD $L$SEH_info_bn_power5 wrt ..imagebase
3800 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase
3801 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase
3802 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase
3803
3804 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase
3805 DD $L$SEH_end_bn_powerx5 wrt ..imagebase
3806 DD $L$SEH_info_bn_powerx5 wrt ..imagebase
3807 DD $L$SEH_begin_bn_gather5 wrt ..imagebase
3808 DD $L$SEH_end_bn_gather5 wrt ..imagebase
3809 DD $L$SEH_info_bn_gather5 wrt ..imagebase
3810
3811section .xdata rdata align=8
3812ALIGN 8
3813$L$SEH_info_bn_mul_mont_gather5:
3814DB 9,0,0,0
3815 DD mul_handler wrt ..imagebase
3816 DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
3817ALIGN 8
3818$L$SEH_info_bn_mul4x_mont_gather5:
3819DB 9,0,0,0
3820 DD mul_handler wrt ..imagebase
3821 DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
3822ALIGN 8
3823$L$SEH_info_bn_power5:
3824DB 9,0,0,0
3825 DD mul_handler wrt ..imagebase
3826 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase
3827ALIGN 8
3828$L$SEH_info_bn_mulx4x_mont_gather5:
3829DB 9,0,0,0
3830 DD mul_handler wrt ..imagebase
3831 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
3832ALIGN 8
3833$L$SEH_info_bn_powerx5:
3834DB 9,0,0,0
3835 DD mul_handler wrt ..imagebase
3836 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase
3837ALIGN 8
3838$L$SEH_info_bn_gather5:
3839DB 0x01,0x0b,0x03,0x0a
3840DB 0x0b,0x01,0x21,0x00
3841DB 0x04,0xa3,0x00,0x00
3842ALIGN 8
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette