VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.4/crypto/genasm-nasm/x86_64-mont.S@ 103112

最後變更 在這個檔案從103112是 99371,由 vboxsync 提交於 23 月 前

openssl-3.1.0: After generating headers and asm (kmk recreate-headers recreate-headers)

檔案大小: 22.5 KB
 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont
11
12ALIGN 16
13bn_mul_mont:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_bn_mul_mont:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26
27 mov r9d,r9d
28 mov rax,rsp
29
30 test r9d,3
31 jnz NEAR $L$mul_enter
32 cmp r9d,8
33 jb NEAR $L$mul_enter
34 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
35 cmp rdx,rsi
36 jne NEAR $L$mul4x_enter
37 test r9d,7
38 jz NEAR $L$sqr8x_enter
39 jmp NEAR $L$mul4x_enter
40
41ALIGN 16
42$L$mul_enter:
43 push rbx
44
45 push rbp
46
47 push r12
48
49 push r13
50
51 push r14
52
53 push r15
54
55
56 neg r9
57 mov r11,rsp
58 lea r10,[((-16))+r9*8+rsp]
59 neg r9
60 and r10,-1024
61
62
63
64
65
66
67
68
69
70 sub r11,r10
71 and r11,-4096
72 lea rsp,[r11*1+r10]
73 mov r11,QWORD[rsp]
74 cmp rsp,r10
75 ja NEAR $L$mul_page_walk
76 jmp NEAR $L$mul_page_walk_done
77
78ALIGN 16
79$L$mul_page_walk:
80 lea rsp,[((-4096))+rsp]
81 mov r11,QWORD[rsp]
82 cmp rsp,r10
83 ja NEAR $L$mul_page_walk
84$L$mul_page_walk_done:
85
86 mov QWORD[8+r9*8+rsp],rax
87
88$L$mul_body:
89 mov r12,rdx
90 mov r8,QWORD[r8]
91 mov rbx,QWORD[r12]
92 mov rax,QWORD[rsi]
93
94 xor r14,r14
95 xor r15,r15
96
97 mov rbp,r8
98 mul rbx
99 mov r10,rax
100 mov rax,QWORD[rcx]
101
102 imul rbp,r10
103 mov r11,rdx
104
105 mul rbp
106 add r10,rax
107 mov rax,QWORD[8+rsi]
108 adc rdx,0
109 mov r13,rdx
110
111 lea r15,[1+r15]
112 jmp NEAR $L$1st_enter
113
114ALIGN 16
115$L$1st:
116 add r13,rax
117 mov rax,QWORD[r15*8+rsi]
118 adc rdx,0
119 add r13,r11
120 mov r11,r10
121 adc rdx,0
122 mov QWORD[((-16))+r15*8+rsp],r13
123 mov r13,rdx
124
125$L$1st_enter:
126 mul rbx
127 add r11,rax
128 mov rax,QWORD[r15*8+rcx]
129 adc rdx,0
130 lea r15,[1+r15]
131 mov r10,rdx
132
133 mul rbp
134 cmp r15,r9
135 jne NEAR $L$1st
136
137 add r13,rax
138 mov rax,QWORD[rsi]
139 adc rdx,0
140 add r13,r11
141 adc rdx,0
142 mov QWORD[((-16))+r15*8+rsp],r13
143 mov r13,rdx
144 mov r11,r10
145
146 xor rdx,rdx
147 add r13,r11
148 adc rdx,0
149 mov QWORD[((-8))+r9*8+rsp],r13
150 mov QWORD[r9*8+rsp],rdx
151
152 lea r14,[1+r14]
153 jmp NEAR $L$outer
154ALIGN 16
155$L$outer:
156 mov rbx,QWORD[r14*8+r12]
157 xor r15,r15
158 mov rbp,r8
159 mov r10,QWORD[rsp]
160 mul rbx
161 add r10,rax
162 mov rax,QWORD[rcx]
163 adc rdx,0
164
165 imul rbp,r10
166 mov r11,rdx
167
168 mul rbp
169 add r10,rax
170 mov rax,QWORD[8+rsi]
171 adc rdx,0
172 mov r10,QWORD[8+rsp]
173 mov r13,rdx
174
175 lea r15,[1+r15]
176 jmp NEAR $L$inner_enter
177
178ALIGN 16
179$L$inner:
180 add r13,rax
181 mov rax,QWORD[r15*8+rsi]
182 adc rdx,0
183 add r13,r10
184 mov r10,QWORD[r15*8+rsp]
185 adc rdx,0
186 mov QWORD[((-16))+r15*8+rsp],r13
187 mov r13,rdx
188
189$L$inner_enter:
190 mul rbx
191 add r11,rax
192 mov rax,QWORD[r15*8+rcx]
193 adc rdx,0
194 add r10,r11
195 mov r11,rdx
196 adc r11,0
197 lea r15,[1+r15]
198
199 mul rbp
200 cmp r15,r9
201 jne NEAR $L$inner
202
203 add r13,rax
204 mov rax,QWORD[rsi]
205 adc rdx,0
206 add r13,r10
207 mov r10,QWORD[r15*8+rsp]
208 adc rdx,0
209 mov QWORD[((-16))+r15*8+rsp],r13
210 mov r13,rdx
211
212 xor rdx,rdx
213 add r13,r11
214 adc rdx,0
215 add r13,r10
216 adc rdx,0
217 mov QWORD[((-8))+r9*8+rsp],r13
218 mov QWORD[r9*8+rsp],rdx
219
220 lea r14,[1+r14]
221 cmp r14,r9
222 jb NEAR $L$outer
223
224 xor r14,r14
225 mov rax,QWORD[rsp]
226 mov r15,r9
227
228ALIGN 16
229$L$sub: sbb rax,QWORD[r14*8+rcx]
230 mov QWORD[r14*8+rdi],rax
231 mov rax,QWORD[8+r14*8+rsp]
232 lea r14,[1+r14]
233 dec r15
234 jnz NEAR $L$sub
235
236 sbb rax,0
237 mov rbx,-1
238 xor rbx,rax
239 xor r14,r14
240 mov r15,r9
241
242$L$copy:
243 mov rcx,QWORD[r14*8+rdi]
244 mov rdx,QWORD[r14*8+rsp]
245 and rcx,rbx
246 and rdx,rax
247 mov QWORD[r14*8+rsp],r9
248 or rdx,rcx
249 mov QWORD[r14*8+rdi],rdx
250 lea r14,[1+r14]
251 sub r15,1
252 jnz NEAR $L$copy
253
254 mov rsi,QWORD[8+r9*8+rsp]
255
256 mov rax,1
257 mov r15,QWORD[((-48))+rsi]
258
259 mov r14,QWORD[((-40))+rsi]
260
261 mov r13,QWORD[((-32))+rsi]
262
263 mov r12,QWORD[((-24))+rsi]
264
265 mov rbp,QWORD[((-16))+rsi]
266
267 mov rbx,QWORD[((-8))+rsi]
268
269 lea rsp,[rsi]
270
271$L$mul_epilogue:
272 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
273 mov rsi,QWORD[16+rsp]
274 DB 0F3h,0C3h ;repret
275
276$L$SEH_end_bn_mul_mont:
277
278ALIGN 16
279bn_mul4x_mont:
280 mov QWORD[8+rsp],rdi ;WIN64 prologue
281 mov QWORD[16+rsp],rsi
282 mov rax,rsp
283$L$SEH_begin_bn_mul4x_mont:
284 mov rdi,rcx
285 mov rsi,rdx
286 mov rdx,r8
287 mov rcx,r9
288 mov r8,QWORD[40+rsp]
289 mov r9,QWORD[48+rsp]
290
291
292
293 mov r9d,r9d
294 mov rax,rsp
295
296$L$mul4x_enter:
297 and r11d,0x80100
298 cmp r11d,0x80100
299 je NEAR $L$mulx4x_enter
300 push rbx
301
302 push rbp
303
304 push r12
305
306 push r13
307
308 push r14
309
310 push r15
311
312
313 neg r9
314 mov r11,rsp
315 lea r10,[((-32))+r9*8+rsp]
316 neg r9
317 and r10,-1024
318
319 sub r11,r10
320 and r11,-4096
321 lea rsp,[r11*1+r10]
322 mov r11,QWORD[rsp]
323 cmp rsp,r10
324 ja NEAR $L$mul4x_page_walk
325 jmp NEAR $L$mul4x_page_walk_done
326
327$L$mul4x_page_walk:
328 lea rsp,[((-4096))+rsp]
329 mov r11,QWORD[rsp]
330 cmp rsp,r10
331 ja NEAR $L$mul4x_page_walk
332$L$mul4x_page_walk_done:
333
334 mov QWORD[8+r9*8+rsp],rax
335
336$L$mul4x_body:
337 mov QWORD[16+r9*8+rsp],rdi
338 mov r12,rdx
339 mov r8,QWORD[r8]
340 mov rbx,QWORD[r12]
341 mov rax,QWORD[rsi]
342
343 xor r14,r14
344 xor r15,r15
345
346 mov rbp,r8
347 mul rbx
348 mov r10,rax
349 mov rax,QWORD[rcx]
350
351 imul rbp,r10
352 mov r11,rdx
353
354 mul rbp
355 add r10,rax
356 mov rax,QWORD[8+rsi]
357 adc rdx,0
358 mov rdi,rdx
359
360 mul rbx
361 add r11,rax
362 mov rax,QWORD[8+rcx]
363 adc rdx,0
364 mov r10,rdx
365
366 mul rbp
367 add rdi,rax
368 mov rax,QWORD[16+rsi]
369 adc rdx,0
370 add rdi,r11
371 lea r15,[4+r15]
372 adc rdx,0
373 mov QWORD[rsp],rdi
374 mov r13,rdx
375 jmp NEAR $L$1st4x
376ALIGN 16
377$L$1st4x:
378 mul rbx
379 add r10,rax
380 mov rax,QWORD[((-16))+r15*8+rcx]
381 adc rdx,0
382 mov r11,rdx
383
384 mul rbp
385 add r13,rax
386 mov rax,QWORD[((-8))+r15*8+rsi]
387 adc rdx,0
388 add r13,r10
389 adc rdx,0
390 mov QWORD[((-24))+r15*8+rsp],r13
391 mov rdi,rdx
392
393 mul rbx
394 add r11,rax
395 mov rax,QWORD[((-8))+r15*8+rcx]
396 adc rdx,0
397 mov r10,rdx
398
399 mul rbp
400 add rdi,rax
401 mov rax,QWORD[r15*8+rsi]
402 adc rdx,0
403 add rdi,r11
404 adc rdx,0
405 mov QWORD[((-16))+r15*8+rsp],rdi
406 mov r13,rdx
407
408 mul rbx
409 add r10,rax
410 mov rax,QWORD[r15*8+rcx]
411 adc rdx,0
412 mov r11,rdx
413
414 mul rbp
415 add r13,rax
416 mov rax,QWORD[8+r15*8+rsi]
417 adc rdx,0
418 add r13,r10
419 adc rdx,0
420 mov QWORD[((-8))+r15*8+rsp],r13
421 mov rdi,rdx
422
423 mul rbx
424 add r11,rax
425 mov rax,QWORD[8+r15*8+rcx]
426 adc rdx,0
427 lea r15,[4+r15]
428 mov r10,rdx
429
430 mul rbp
431 add rdi,rax
432 mov rax,QWORD[((-16))+r15*8+rsi]
433 adc rdx,0
434 add rdi,r11
435 adc rdx,0
436 mov QWORD[((-32))+r15*8+rsp],rdi
437 mov r13,rdx
438 cmp r15,r9
439 jb NEAR $L$1st4x
440
441 mul rbx
442 add r10,rax
443 mov rax,QWORD[((-16))+r15*8+rcx]
444 adc rdx,0
445 mov r11,rdx
446
447 mul rbp
448 add r13,rax
449 mov rax,QWORD[((-8))+r15*8+rsi]
450 adc rdx,0
451 add r13,r10
452 adc rdx,0
453 mov QWORD[((-24))+r15*8+rsp],r13
454 mov rdi,rdx
455
456 mul rbx
457 add r11,rax
458 mov rax,QWORD[((-8))+r15*8+rcx]
459 adc rdx,0
460 mov r10,rdx
461
462 mul rbp
463 add rdi,rax
464 mov rax,QWORD[rsi]
465 adc rdx,0
466 add rdi,r11
467 adc rdx,0
468 mov QWORD[((-16))+r15*8+rsp],rdi
469 mov r13,rdx
470
471 xor rdi,rdi
472 add r13,r10
473 adc rdi,0
474 mov QWORD[((-8))+r15*8+rsp],r13
475 mov QWORD[r15*8+rsp],rdi
476
477 lea r14,[1+r14]
478ALIGN 4
479$L$outer4x:
480 mov rbx,QWORD[r14*8+r12]
481 xor r15,r15
482 mov r10,QWORD[rsp]
483 mov rbp,r8
484 mul rbx
485 add r10,rax
486 mov rax,QWORD[rcx]
487 adc rdx,0
488
489 imul rbp,r10
490 mov r11,rdx
491
492 mul rbp
493 add r10,rax
494 mov rax,QWORD[8+rsi]
495 adc rdx,0
496 mov rdi,rdx
497
498 mul rbx
499 add r11,rax
500 mov rax,QWORD[8+rcx]
501 adc rdx,0
502 add r11,QWORD[8+rsp]
503 adc rdx,0
504 mov r10,rdx
505
506 mul rbp
507 add rdi,rax
508 mov rax,QWORD[16+rsi]
509 adc rdx,0
510 add rdi,r11
511 lea r15,[4+r15]
512 adc rdx,0
513 mov QWORD[rsp],rdi
514 mov r13,rdx
515 jmp NEAR $L$inner4x
516ALIGN 16
517$L$inner4x:
518 mul rbx
519 add r10,rax
520 mov rax,QWORD[((-16))+r15*8+rcx]
521 adc rdx,0
522 add r10,QWORD[((-16))+r15*8+rsp]
523 adc rdx,0
524 mov r11,rdx
525
526 mul rbp
527 add r13,rax
528 mov rax,QWORD[((-8))+r15*8+rsi]
529 adc rdx,0
530 add r13,r10
531 adc rdx,0
532 mov QWORD[((-24))+r15*8+rsp],r13
533 mov rdi,rdx
534
535 mul rbx
536 add r11,rax
537 mov rax,QWORD[((-8))+r15*8+rcx]
538 adc rdx,0
539 add r11,QWORD[((-8))+r15*8+rsp]
540 adc rdx,0
541 mov r10,rdx
542
543 mul rbp
544 add rdi,rax
545 mov rax,QWORD[r15*8+rsi]
546 adc rdx,0
547 add rdi,r11
548 adc rdx,0
549 mov QWORD[((-16))+r15*8+rsp],rdi
550 mov r13,rdx
551
552 mul rbx
553 add r10,rax
554 mov rax,QWORD[r15*8+rcx]
555 adc rdx,0
556 add r10,QWORD[r15*8+rsp]
557 adc rdx,0
558 mov r11,rdx
559
560 mul rbp
561 add r13,rax
562 mov rax,QWORD[8+r15*8+rsi]
563 adc rdx,0
564 add r13,r10
565 adc rdx,0
566 mov QWORD[((-8))+r15*8+rsp],r13
567 mov rdi,rdx
568
569 mul rbx
570 add r11,rax
571 mov rax,QWORD[8+r15*8+rcx]
572 adc rdx,0
573 add r11,QWORD[8+r15*8+rsp]
574 adc rdx,0
575 lea r15,[4+r15]
576 mov r10,rdx
577
578 mul rbp
579 add rdi,rax
580 mov rax,QWORD[((-16))+r15*8+rsi]
581 adc rdx,0
582 add rdi,r11
583 adc rdx,0
584 mov QWORD[((-32))+r15*8+rsp],rdi
585 mov r13,rdx
586 cmp r15,r9
587 jb NEAR $L$inner4x
588
589 mul rbx
590 add r10,rax
591 mov rax,QWORD[((-16))+r15*8+rcx]
592 adc rdx,0
593 add r10,QWORD[((-16))+r15*8+rsp]
594 adc rdx,0
595 mov r11,rdx
596
597 mul rbp
598 add r13,rax
599 mov rax,QWORD[((-8))+r15*8+rsi]
600 adc rdx,0
601 add r13,r10
602 adc rdx,0
603 mov QWORD[((-24))+r15*8+rsp],r13
604 mov rdi,rdx
605
606 mul rbx
607 add r11,rax
608 mov rax,QWORD[((-8))+r15*8+rcx]
609 adc rdx,0
610 add r11,QWORD[((-8))+r15*8+rsp]
611 adc rdx,0
612 lea r14,[1+r14]
613 mov r10,rdx
614
615 mul rbp
616 add rdi,rax
617 mov rax,QWORD[rsi]
618 adc rdx,0
619 add rdi,r11
620 adc rdx,0
621 mov QWORD[((-16))+r15*8+rsp],rdi
622 mov r13,rdx
623
624 xor rdi,rdi
625 add r13,r10
626 adc rdi,0
627 add r13,QWORD[r9*8+rsp]
628 adc rdi,0
629 mov QWORD[((-8))+r15*8+rsp],r13
630 mov QWORD[r15*8+rsp],rdi
631
632 cmp r14,r9
633 jb NEAR $L$outer4x
634 mov rdi,QWORD[16+r9*8+rsp]
635 lea r15,[((-4))+r9]
636 mov rax,QWORD[rsp]
637 mov rdx,QWORD[8+rsp]
638 shr r15,2
639 lea rsi,[rsp]
640 xor r14,r14
641
642 sub rax,QWORD[rcx]
643 mov rbx,QWORD[16+rsi]
644 mov rbp,QWORD[24+rsi]
645 sbb rdx,QWORD[8+rcx]
646
647$L$sub4x:
648 mov QWORD[r14*8+rdi],rax
649 mov QWORD[8+r14*8+rdi],rdx
650 sbb rbx,QWORD[16+r14*8+rcx]
651 mov rax,QWORD[32+r14*8+rsi]
652 mov rdx,QWORD[40+r14*8+rsi]
653 sbb rbp,QWORD[24+r14*8+rcx]
654 mov QWORD[16+r14*8+rdi],rbx
655 mov QWORD[24+r14*8+rdi],rbp
656 sbb rax,QWORD[32+r14*8+rcx]
657 mov rbx,QWORD[48+r14*8+rsi]
658 mov rbp,QWORD[56+r14*8+rsi]
659 sbb rdx,QWORD[40+r14*8+rcx]
660 lea r14,[4+r14]
661 dec r15
662 jnz NEAR $L$sub4x
663
664 mov QWORD[r14*8+rdi],rax
665 mov rax,QWORD[32+r14*8+rsi]
666 sbb rbx,QWORD[16+r14*8+rcx]
667 mov QWORD[8+r14*8+rdi],rdx
668 sbb rbp,QWORD[24+r14*8+rcx]
669 mov QWORD[16+r14*8+rdi],rbx
670
671 sbb rax,0
672 mov QWORD[24+r14*8+rdi],rbp
673 pxor xmm0,xmm0
674DB 102,72,15,110,224
675 pcmpeqd xmm5,xmm5
676 pshufd xmm4,xmm4,0
677 mov r15,r9
678 pxor xmm5,xmm4
679 shr r15,2
680 xor eax,eax
681
682 jmp NEAR $L$copy4x
683ALIGN 16
684$L$copy4x:
685 movdqa xmm1,XMMWORD[rax*1+rsp]
686 movdqu xmm2,XMMWORD[rax*1+rdi]
687 pand xmm1,xmm4
688 pand xmm2,xmm5
689 movdqa xmm3,XMMWORD[16+rax*1+rsp]
690 movdqa XMMWORD[rax*1+rsp],xmm0
691 por xmm1,xmm2
692 movdqu xmm2,XMMWORD[16+rax*1+rdi]
693 movdqu XMMWORD[rax*1+rdi],xmm1
694 pand xmm3,xmm4
695 pand xmm2,xmm5
696 movdqa XMMWORD[16+rax*1+rsp],xmm0
697 por xmm3,xmm2
698 movdqu XMMWORD[16+rax*1+rdi],xmm3
699 lea rax,[32+rax]
700 dec r15
701 jnz NEAR $L$copy4x
702 mov rsi,QWORD[8+r9*8+rsp]
703
704 mov rax,1
705 mov r15,QWORD[((-48))+rsi]
706
707 mov r14,QWORD[((-40))+rsi]
708
709 mov r13,QWORD[((-32))+rsi]
710
711 mov r12,QWORD[((-24))+rsi]
712
713 mov rbp,QWORD[((-16))+rsi]
714
715 mov rbx,QWORD[((-8))+rsi]
716
717 lea rsp,[rsi]
718
719$L$mul4x_epilogue:
720 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
721 mov rsi,QWORD[16+rsp]
722 DB 0F3h,0C3h ;repret
723
724$L$SEH_end_bn_mul4x_mont:
725EXTERN bn_sqrx8x_internal
726EXTERN bn_sqr8x_internal
727
728
729ALIGN 32
730bn_sqr8x_mont:
731 mov QWORD[8+rsp],rdi ;WIN64 prologue
732 mov QWORD[16+rsp],rsi
733 mov rax,rsp
734$L$SEH_begin_bn_sqr8x_mont:
735 mov rdi,rcx
736 mov rsi,rdx
737 mov rdx,r8
738 mov rcx,r9
739 mov r8,QWORD[40+rsp]
740 mov r9,QWORD[48+rsp]
741
742
743
744 mov rax,rsp
745
746$L$sqr8x_enter:
747 push rbx
748
749 push rbp
750
751 push r12
752
753 push r13
754
755 push r14
756
757 push r15
758
759$L$sqr8x_prologue:
760
761 mov r10d,r9d
762 shl r9d,3
763 shl r10,3+2
764 neg r9
765
766
767
768
769
770
771 lea r11,[((-64))+r9*2+rsp]
772 mov rbp,rsp
773 mov r8,QWORD[r8]
774 sub r11,rsi
775 and r11,4095
776 cmp r10,r11
777 jb NEAR $L$sqr8x_sp_alt
778 sub rbp,r11
779 lea rbp,[((-64))+r9*2+rbp]
780 jmp NEAR $L$sqr8x_sp_done
781
782ALIGN 32
783$L$sqr8x_sp_alt:
784 lea r10,[((4096-64))+r9*2]
785 lea rbp,[((-64))+r9*2+rbp]
786 sub r11,r10
787 mov r10,0
788 cmovc r11,r10
789 sub rbp,r11
790$L$sqr8x_sp_done:
791 and rbp,-64
792 mov r11,rsp
793 sub r11,rbp
794 and r11,-4096
795 lea rsp,[rbp*1+r11]
796 mov r10,QWORD[rsp]
797 cmp rsp,rbp
798 ja NEAR $L$sqr8x_page_walk
799 jmp NEAR $L$sqr8x_page_walk_done
800
801ALIGN 16
802$L$sqr8x_page_walk:
803 lea rsp,[((-4096))+rsp]
804 mov r10,QWORD[rsp]
805 cmp rsp,rbp
806 ja NEAR $L$sqr8x_page_walk
807$L$sqr8x_page_walk_done:
808
809 mov r10,r9
810 neg r9
811
812 mov QWORD[32+rsp],r8
813 mov QWORD[40+rsp],rax
814
815$L$sqr8x_body:
816
817DB 102,72,15,110,209
818 pxor xmm0,xmm0
819DB 102,72,15,110,207
820DB 102,73,15,110,218
821 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
822 and eax,0x80100
823 cmp eax,0x80100
824 jne NEAR $L$sqr8x_nox
825
826 call bn_sqrx8x_internal
827
828
829
830
831 lea rbx,[rcx*1+r8]
832 mov r9,rcx
833 mov rdx,rcx
834DB 102,72,15,126,207
835 sar rcx,3+2
836 jmp NEAR $L$sqr8x_sub
837
838ALIGN 32
839$L$sqr8x_nox:
840 call bn_sqr8x_internal
841
842
843
844
845 lea rbx,[r9*1+rdi]
846 mov rcx,r9
847 mov rdx,r9
848DB 102,72,15,126,207
849 sar rcx,3+2
850 jmp NEAR $L$sqr8x_sub
851
852ALIGN 32
853$L$sqr8x_sub:
854 mov r12,QWORD[rbx]
855 mov r13,QWORD[8+rbx]
856 mov r14,QWORD[16+rbx]
857 mov r15,QWORD[24+rbx]
858 lea rbx,[32+rbx]
859 sbb r12,QWORD[rbp]
860 sbb r13,QWORD[8+rbp]
861 sbb r14,QWORD[16+rbp]
862 sbb r15,QWORD[24+rbp]
863 lea rbp,[32+rbp]
864 mov QWORD[rdi],r12
865 mov QWORD[8+rdi],r13
866 mov QWORD[16+rdi],r14
867 mov QWORD[24+rdi],r15
868 lea rdi,[32+rdi]
869 inc rcx
870 jnz NEAR $L$sqr8x_sub
871
872 sbb rax,0
873 lea rbx,[r9*1+rbx]
874 lea rdi,[r9*1+rdi]
875
876DB 102,72,15,110,200
877 pxor xmm0,xmm0
878 pshufd xmm1,xmm1,0
879 mov rsi,QWORD[40+rsp]
880
881 jmp NEAR $L$sqr8x_cond_copy
882
883ALIGN 32
884$L$sqr8x_cond_copy:
885 movdqa xmm2,XMMWORD[rbx]
886 movdqa xmm3,XMMWORD[16+rbx]
887 lea rbx,[32+rbx]
888 movdqu xmm4,XMMWORD[rdi]
889 movdqu xmm5,XMMWORD[16+rdi]
890 lea rdi,[32+rdi]
891 movdqa XMMWORD[(-32)+rbx],xmm0
892 movdqa XMMWORD[(-16)+rbx],xmm0
893 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0
894 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0
895 pcmpeqd xmm0,xmm1
896 pand xmm2,xmm1
897 pand xmm3,xmm1
898 pand xmm4,xmm0
899 pand xmm5,xmm0
900 pxor xmm0,xmm0
901 por xmm4,xmm2
902 por xmm5,xmm3
903 movdqu XMMWORD[(-32)+rdi],xmm4
904 movdqu XMMWORD[(-16)+rdi],xmm5
905 add r9,32
906 jnz NEAR $L$sqr8x_cond_copy
907
908 mov rax,1
909 mov r15,QWORD[((-48))+rsi]
910
911 mov r14,QWORD[((-40))+rsi]
912
913 mov r13,QWORD[((-32))+rsi]
914
915 mov r12,QWORD[((-24))+rsi]
916
917 mov rbp,QWORD[((-16))+rsi]
918
919 mov rbx,QWORD[((-8))+rsi]
920
921 lea rsp,[rsi]
922
923$L$sqr8x_epilogue:
924 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
925 mov rsi,QWORD[16+rsp]
926 DB 0F3h,0C3h ;repret
927
928$L$SEH_end_bn_sqr8x_mont:
929
930ALIGN 32
931bn_mulx4x_mont:
932 mov QWORD[8+rsp],rdi ;WIN64 prologue
933 mov QWORD[16+rsp],rsi
934 mov rax,rsp
935$L$SEH_begin_bn_mulx4x_mont:
936 mov rdi,rcx
937 mov rsi,rdx
938 mov rdx,r8
939 mov rcx,r9
940 mov r8,QWORD[40+rsp]
941 mov r9,QWORD[48+rsp]
942
943
944
945 mov rax,rsp
946
947$L$mulx4x_enter:
948 push rbx
949
950 push rbp
951
952 push r12
953
954 push r13
955
956 push r14
957
958 push r15
959
960$L$mulx4x_prologue:
961
962 shl r9d,3
963 xor r10,r10
964 sub r10,r9
965 mov r8,QWORD[r8]
966 lea rbp,[((-72))+r10*1+rsp]
967 and rbp,-128
968 mov r11,rsp
969 sub r11,rbp
970 and r11,-4096
971 lea rsp,[rbp*1+r11]
972 mov r10,QWORD[rsp]
973 cmp rsp,rbp
974 ja NEAR $L$mulx4x_page_walk
975 jmp NEAR $L$mulx4x_page_walk_done
976
977ALIGN 16
978$L$mulx4x_page_walk:
979 lea rsp,[((-4096))+rsp]
980 mov r10,QWORD[rsp]
981 cmp rsp,rbp
982 ja NEAR $L$mulx4x_page_walk
983$L$mulx4x_page_walk_done:
984
985 lea r10,[r9*1+rdx]
986
987
988
989
990
991
992
993
994
995
996
997
998 mov QWORD[rsp],r9
999 shr r9,5
1000 mov QWORD[16+rsp],r10
1001 sub r9,1
1002 mov QWORD[24+rsp],r8
1003 mov QWORD[32+rsp],rdi
1004 mov QWORD[40+rsp],rax
1005
1006 mov QWORD[48+rsp],r9
1007 jmp NEAR $L$mulx4x_body
1008
1009ALIGN 32
1010$L$mulx4x_body:
1011 lea rdi,[8+rdx]
1012 mov rdx,QWORD[rdx]
1013 lea rbx,[((64+32))+rsp]
1014 mov r9,rdx
1015
1016 mulx rax,r8,QWORD[rsi]
1017 mulx r14,r11,QWORD[8+rsi]
1018 add r11,rax
1019 mov QWORD[8+rsp],rdi
1020 mulx r13,r12,QWORD[16+rsi]
1021 adc r12,r14
1022 adc r13,0
1023
1024 mov rdi,r8
1025 imul r8,QWORD[24+rsp]
1026 xor rbp,rbp
1027
1028 mulx r14,rax,QWORD[24+rsi]
1029 mov rdx,r8
1030 lea rsi,[32+rsi]
1031 adcx r13,rax
1032 adcx r14,rbp
1033
1034 mulx r10,rax,QWORD[rcx]
1035 adcx rdi,rax
1036 adox r10,r11
1037 mulx r11,rax,QWORD[8+rcx]
1038 adcx r10,rax
1039 adox r11,r12
1040DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
1041 mov rdi,QWORD[48+rsp]
1042 mov QWORD[((-32))+rbx],r10
1043 adcx r11,rax
1044 adox r12,r13
1045 mulx r15,rax,QWORD[24+rcx]
1046 mov rdx,r9
1047 mov QWORD[((-24))+rbx],r11
1048 adcx r12,rax
1049 adox r15,rbp
1050 lea rcx,[32+rcx]
1051 mov QWORD[((-16))+rbx],r12
1052
1053 jmp NEAR $L$mulx4x_1st
1054
1055ALIGN 32
1056$L$mulx4x_1st:
1057 adcx r15,rbp
1058 mulx rax,r10,QWORD[rsi]
1059 adcx r10,r14
1060 mulx r14,r11,QWORD[8+rsi]
1061 adcx r11,rax
1062 mulx rax,r12,QWORD[16+rsi]
1063 adcx r12,r14
1064 mulx r14,r13,QWORD[24+rsi]
1065DB 0x67,0x67
1066 mov rdx,r8
1067 adcx r13,rax
1068 adcx r14,rbp
1069 lea rsi,[32+rsi]
1070 lea rbx,[32+rbx]
1071
1072 adox r10,r15
1073 mulx r15,rax,QWORD[rcx]
1074 adcx r10,rax
1075 adox r11,r15
1076 mulx r15,rax,QWORD[8+rcx]
1077 adcx r11,rax
1078 adox r12,r15
1079 mulx r15,rax,QWORD[16+rcx]
1080 mov QWORD[((-40))+rbx],r10
1081 adcx r12,rax
1082 mov QWORD[((-32))+rbx],r11
1083 adox r13,r15
1084 mulx r15,rax,QWORD[24+rcx]
1085 mov rdx,r9
1086 mov QWORD[((-24))+rbx],r12
1087 adcx r13,rax
1088 adox r15,rbp
1089 lea rcx,[32+rcx]
1090 mov QWORD[((-16))+rbx],r13
1091
1092 dec rdi
1093 jnz NEAR $L$mulx4x_1st
1094
1095 mov rax,QWORD[rsp]
1096 mov rdi,QWORD[8+rsp]
1097 adc r15,rbp
1098 add r14,r15
1099 sbb r15,r15
1100 mov QWORD[((-8))+rbx],r14
1101 jmp NEAR $L$mulx4x_outer
1102
1103ALIGN 32
1104$L$mulx4x_outer:
1105 mov rdx,QWORD[rdi]
1106 lea rdi,[8+rdi]
1107 sub rsi,rax
1108 mov QWORD[rbx],r15
1109 lea rbx,[((64+32))+rsp]
1110 sub rcx,rax
1111
1112 mulx r11,r8,QWORD[rsi]
1113 xor ebp,ebp
1114 mov r9,rdx
1115 mulx r12,r14,QWORD[8+rsi]
1116 adox r8,QWORD[((-32))+rbx]
1117 adcx r11,r14
1118 mulx r13,r15,QWORD[16+rsi]
1119 adox r11,QWORD[((-24))+rbx]
1120 adcx r12,r15
1121 adox r12,QWORD[((-16))+rbx]
1122 adcx r13,rbp
1123 adox r13,rbp
1124
1125 mov QWORD[8+rsp],rdi
1126 mov r15,r8
1127 imul r8,QWORD[24+rsp]
1128 xor ebp,ebp
1129
1130 mulx r14,rax,QWORD[24+rsi]
1131 mov rdx,r8
1132 adcx r13,rax
1133 adox r13,QWORD[((-8))+rbx]
1134 adcx r14,rbp
1135 lea rsi,[32+rsi]
1136 adox r14,rbp
1137
1138 mulx r10,rax,QWORD[rcx]
1139 adcx r15,rax
1140 adox r10,r11
1141 mulx r11,rax,QWORD[8+rcx]
1142 adcx r10,rax
1143 adox r11,r12
1144 mulx r12,rax,QWORD[16+rcx]
1145 mov QWORD[((-32))+rbx],r10
1146 adcx r11,rax
1147 adox r12,r13
1148 mulx r15,rax,QWORD[24+rcx]
1149 mov rdx,r9
1150 mov QWORD[((-24))+rbx],r11
1151 lea rcx,[32+rcx]
1152 adcx r12,rax
1153 adox r15,rbp
1154 mov rdi,QWORD[48+rsp]
1155 mov QWORD[((-16))+rbx],r12
1156
1157 jmp NEAR $L$mulx4x_inner
1158
1159ALIGN 32
1160$L$mulx4x_inner:
1161 mulx rax,r10,QWORD[rsi]
1162 adcx r15,rbp
1163 adox r10,r14
1164 mulx r14,r11,QWORD[8+rsi]
1165 adcx r10,QWORD[rbx]
1166 adox r11,rax
1167 mulx rax,r12,QWORD[16+rsi]
1168 adcx r11,QWORD[8+rbx]
1169 adox r12,r14
1170 mulx r14,r13,QWORD[24+rsi]
1171 mov rdx,r8
1172 adcx r12,QWORD[16+rbx]
1173 adox r13,rax
1174 adcx r13,QWORD[24+rbx]
1175 adox r14,rbp
1176 lea rsi,[32+rsi]
1177 lea rbx,[32+rbx]
1178 adcx r14,rbp
1179
1180 adox r10,r15
1181 mulx r15,rax,QWORD[rcx]
1182 adcx r10,rax
1183 adox r11,r15
1184 mulx r15,rax,QWORD[8+rcx]
1185 adcx r11,rax
1186 adox r12,r15
1187 mulx r15,rax,QWORD[16+rcx]
1188 mov QWORD[((-40))+rbx],r10
1189 adcx r12,rax
1190 adox r13,r15
1191 mulx r15,rax,QWORD[24+rcx]
1192 mov rdx,r9
1193 mov QWORD[((-32))+rbx],r11
1194 mov QWORD[((-24))+rbx],r12
1195 adcx r13,rax
1196 adox r15,rbp
1197 lea rcx,[32+rcx]
1198 mov QWORD[((-16))+rbx],r13
1199
1200 dec rdi
1201 jnz NEAR $L$mulx4x_inner
1202
1203 mov rax,QWORD[rsp]
1204 mov rdi,QWORD[8+rsp]
1205 adc r15,rbp
1206 sub rbp,QWORD[rbx]
1207 adc r14,r15
1208 sbb r15,r15
1209 mov QWORD[((-8))+rbx],r14
1210
1211 cmp rdi,QWORD[16+rsp]
1212 jne NEAR $L$mulx4x_outer
1213
1214 lea rbx,[64+rsp]
1215 sub rcx,rax
1216 neg r15
1217 mov rdx,rax
1218 shr rax,3+2
1219 mov rdi,QWORD[32+rsp]
1220 jmp NEAR $L$mulx4x_sub
1221
1222ALIGN 32
1223$L$mulx4x_sub:
1224 mov r11,QWORD[rbx]
1225 mov r12,QWORD[8+rbx]
1226 mov r13,QWORD[16+rbx]
1227 mov r14,QWORD[24+rbx]
1228 lea rbx,[32+rbx]
1229 sbb r11,QWORD[rcx]
1230 sbb r12,QWORD[8+rcx]
1231 sbb r13,QWORD[16+rcx]
1232 sbb r14,QWORD[24+rcx]
1233 lea rcx,[32+rcx]
1234 mov QWORD[rdi],r11
1235 mov QWORD[8+rdi],r12
1236 mov QWORD[16+rdi],r13
1237 mov QWORD[24+rdi],r14
1238 lea rdi,[32+rdi]
1239 dec rax
1240 jnz NEAR $L$mulx4x_sub
1241
1242 sbb r15,0
1243 lea rbx,[64+rsp]
1244 sub rdi,rdx
1245
1246DB 102,73,15,110,207
1247 pxor xmm0,xmm0
1248 pshufd xmm1,xmm1,0
1249 mov rsi,QWORD[40+rsp]
1250
1251 jmp NEAR $L$mulx4x_cond_copy
1252
1253ALIGN 32
1254$L$mulx4x_cond_copy:
1255 movdqa xmm2,XMMWORD[rbx]
1256 movdqa xmm3,XMMWORD[16+rbx]
1257 lea rbx,[32+rbx]
1258 movdqu xmm4,XMMWORD[rdi]
1259 movdqu xmm5,XMMWORD[16+rdi]
1260 lea rdi,[32+rdi]
1261 movdqa XMMWORD[(-32)+rbx],xmm0
1262 movdqa XMMWORD[(-16)+rbx],xmm0
1263 pcmpeqd xmm0,xmm1
1264 pand xmm2,xmm1
1265 pand xmm3,xmm1
1266 pand xmm4,xmm0
1267 pand xmm5,xmm0
1268 pxor xmm0,xmm0
1269 por xmm4,xmm2
1270 por xmm5,xmm3
1271 movdqu XMMWORD[(-32)+rdi],xmm4
1272 movdqu XMMWORD[(-16)+rdi],xmm5
1273 sub rdx,32
1274 jnz NEAR $L$mulx4x_cond_copy
1275
1276 mov QWORD[rbx],rdx
1277
1278 mov rax,1
1279 mov r15,QWORD[((-48))+rsi]
1280
1281 mov r14,QWORD[((-40))+rsi]
1282
1283 mov r13,QWORD[((-32))+rsi]
1284
1285 mov r12,QWORD[((-24))+rsi]
1286
1287 mov rbp,QWORD[((-16))+rsi]
1288
1289 mov rbx,QWORD[((-8))+rsi]
1290
1291 lea rsp,[rsi]
1292
1293$L$mulx4x_epilogue:
1294 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1295 mov rsi,QWORD[16+rsp]
1296 DB 0F3h,0C3h ;repret
1297
1298$L$SEH_end_bn_mulx4x_mont:
1299DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
1300DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
1301DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
1302DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
1303DB 115,108,46,111,114,103,62,0
1304ALIGN 16
1305EXTERN __imp_RtlVirtualUnwind
1306
1307ALIGN 16
1308mul_handler:
1309 push rsi
1310 push rdi
1311 push rbx
1312 push rbp
1313 push r12
1314 push r13
1315 push r14
1316 push r15
1317 pushfq
1318 sub rsp,64
1319
1320 mov rax,QWORD[120+r8]
1321 mov rbx,QWORD[248+r8]
1322
1323 mov rsi,QWORD[8+r9]
1324 mov r11,QWORD[56+r9]
1325
1326 mov r10d,DWORD[r11]
1327 lea r10,[r10*1+rsi]
1328 cmp rbx,r10
1329 jb NEAR $L$common_seh_tail
1330
1331 mov rax,QWORD[152+r8]
1332
1333 mov r10d,DWORD[4+r11]
1334 lea r10,[r10*1+rsi]
1335 cmp rbx,r10
1336 jae NEAR $L$common_seh_tail
1337
1338 mov r10,QWORD[192+r8]
1339 mov rax,QWORD[8+r10*8+rax]
1340
1341 jmp NEAR $L$common_pop_regs
1342
1343
1344
1345ALIGN 16
1346sqr_handler:
1347 push rsi
1348 push rdi
1349 push rbx
1350 push rbp
1351 push r12
1352 push r13
1353 push r14
1354 push r15
1355 pushfq
1356 sub rsp,64
1357
1358 mov rax,QWORD[120+r8]
1359 mov rbx,QWORD[248+r8]
1360
1361 mov rsi,QWORD[8+r9]
1362 mov r11,QWORD[56+r9]
1363
1364 mov r10d,DWORD[r11]
1365 lea r10,[r10*1+rsi]
1366 cmp rbx,r10
1367 jb NEAR $L$common_seh_tail
1368
1369 mov r10d,DWORD[4+r11]
1370 lea r10,[r10*1+rsi]
1371 cmp rbx,r10
1372 jb NEAR $L$common_pop_regs
1373
1374 mov rax,QWORD[152+r8]
1375
1376 mov r10d,DWORD[8+r11]
1377 lea r10,[r10*1+rsi]
1378 cmp rbx,r10
1379 jae NEAR $L$common_seh_tail
1380
1381 mov rax,QWORD[40+rax]
1382
1383$L$common_pop_regs:
1384 mov rbx,QWORD[((-8))+rax]
1385 mov rbp,QWORD[((-16))+rax]
1386 mov r12,QWORD[((-24))+rax]
1387 mov r13,QWORD[((-32))+rax]
1388 mov r14,QWORD[((-40))+rax]
1389 mov r15,QWORD[((-48))+rax]
1390 mov QWORD[144+r8],rbx
1391 mov QWORD[160+r8],rbp
1392 mov QWORD[216+r8],r12
1393 mov QWORD[224+r8],r13
1394 mov QWORD[232+r8],r14
1395 mov QWORD[240+r8],r15
1396
1397$L$common_seh_tail:
1398 mov rdi,QWORD[8+rax]
1399 mov rsi,QWORD[16+rax]
1400 mov QWORD[152+r8],rax
1401 mov QWORD[168+r8],rsi
1402 mov QWORD[176+r8],rdi
1403
1404 mov rdi,QWORD[40+r9]
1405 mov rsi,r8
1406 mov ecx,154
1407 DD 0xa548f3fc
1408
1409 mov rsi,r9
1410 xor rcx,rcx
1411 mov rdx,QWORD[8+rsi]
1412 mov r8,QWORD[rsi]
1413 mov r9,QWORD[16+rsi]
1414 mov r10,QWORD[40+rsi]
1415 lea r11,[56+rsi]
1416 lea r12,[24+rsi]
1417 mov QWORD[32+rsp],r10
1418 mov QWORD[40+rsp],r11
1419 mov QWORD[48+rsp],r12
1420 mov QWORD[56+rsp],rcx
1421 call QWORD[__imp_RtlVirtualUnwind]
1422
1423 mov eax,1
1424 add rsp,64
1425 popfq
1426 pop r15
1427 pop r14
1428 pop r13
1429 pop r12
1430 pop rbp
1431 pop rbx
1432 pop rdi
1433 pop rsi
1434 DB 0F3h,0C3h ;repret
1435
1436
1437section .pdata rdata align=4
1438ALIGN 4
1439 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase
1440 DD $L$SEH_end_bn_mul_mont wrt ..imagebase
1441 DD $L$SEH_info_bn_mul_mont wrt ..imagebase
1442
1443 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase
1444 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase
1445 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase
1446
1447 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
1448 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase
1449 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase
1450 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase
1451 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase
1452 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase
1453section .xdata rdata align=8
1454ALIGN 8
1455$L$SEH_info_bn_mul_mont:
1456DB 9,0,0,0
1457 DD mul_handler wrt ..imagebase
1458 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1459$L$SEH_info_bn_mul4x_mont:
1460DB 9,0,0,0
1461 DD mul_handler wrt ..imagebase
1462 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
1463$L$SEH_info_bn_sqr8x_mont:
1464DB 9,0,0,0
1465 DD sqr_handler wrt ..imagebase
1466 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
1467ALIGN 8
1468$L$SEH_info_bn_mulx4x_mont:
1469DB 9,0,0,0
1470 DD sqr_handler wrt ..imagebase
1471 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
1472ALIGN 8
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette