VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1k/crypto/genasm-macosx/rsaz-x86_64.S@ 91789

最後變更 在這個檔案從91789是 83531,由 vboxsync 提交於 5 年 前

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

檔案大小: 17.7 KB
 
1.text
2
3
4
5.globl _rsaz_512_sqr
6
7.p2align 5
8_rsaz_512_sqr:
9
10 pushq %rbx
11
12 pushq %rbp
13
14 pushq %r12
15
16 pushq %r13
17
18 pushq %r14
19
20 pushq %r15
21
22
23 subq $128+24,%rsp
24
25L$sqr_body:
26 movq %rdx,%rbp
27 movq (%rsi),%rdx
28 movq 8(%rsi),%rax
29 movq %rcx,128(%rsp)
30 jmp L$oop_sqr
31
32.p2align 5
33L$oop_sqr:
34 movl %r8d,128+8(%rsp)
35
36 movq %rdx,%rbx
37 mulq %rdx
38 movq %rax,%r8
39 movq 16(%rsi),%rax
40 movq %rdx,%r9
41
42 mulq %rbx
43 addq %rax,%r9
44 movq 24(%rsi),%rax
45 movq %rdx,%r10
46 adcq $0,%r10
47
48 mulq %rbx
49 addq %rax,%r10
50 movq 32(%rsi),%rax
51 movq %rdx,%r11
52 adcq $0,%r11
53
54 mulq %rbx
55 addq %rax,%r11
56 movq 40(%rsi),%rax
57 movq %rdx,%r12
58 adcq $0,%r12
59
60 mulq %rbx
61 addq %rax,%r12
62 movq 48(%rsi),%rax
63 movq %rdx,%r13
64 adcq $0,%r13
65
66 mulq %rbx
67 addq %rax,%r13
68 movq 56(%rsi),%rax
69 movq %rdx,%r14
70 adcq $0,%r14
71
72 mulq %rbx
73 addq %rax,%r14
74 movq %rbx,%rax
75 movq %rdx,%r15
76 adcq $0,%r15
77
78 addq %r8,%r8
79 movq %r9,%rcx
80 adcq %r9,%r9
81
82 mulq %rax
83 movq %rax,(%rsp)
84 addq %rdx,%r8
85 adcq $0,%r9
86
87 movq %r8,8(%rsp)
88 shrq $63,%rcx
89
90
91 movq 8(%rsi),%r8
92 movq 16(%rsi),%rax
93 mulq %r8
94 addq %rax,%r10
95 movq 24(%rsi),%rax
96 movq %rdx,%rbx
97 adcq $0,%rbx
98
99 mulq %r8
100 addq %rax,%r11
101 movq 32(%rsi),%rax
102 adcq $0,%rdx
103 addq %rbx,%r11
104 movq %rdx,%rbx
105 adcq $0,%rbx
106
107 mulq %r8
108 addq %rax,%r12
109 movq 40(%rsi),%rax
110 adcq $0,%rdx
111 addq %rbx,%r12
112 movq %rdx,%rbx
113 adcq $0,%rbx
114
115 mulq %r8
116 addq %rax,%r13
117 movq 48(%rsi),%rax
118 adcq $0,%rdx
119 addq %rbx,%r13
120 movq %rdx,%rbx
121 adcq $0,%rbx
122
123 mulq %r8
124 addq %rax,%r14
125 movq 56(%rsi),%rax
126 adcq $0,%rdx
127 addq %rbx,%r14
128 movq %rdx,%rbx
129 adcq $0,%rbx
130
131 mulq %r8
132 addq %rax,%r15
133 movq %r8,%rax
134 adcq $0,%rdx
135 addq %rbx,%r15
136 movq %rdx,%r8
137 movq %r10,%rdx
138 adcq $0,%r8
139
140 addq %rdx,%rdx
141 leaq (%rcx,%r10,2),%r10
142 movq %r11,%rbx
143 adcq %r11,%r11
144
145 mulq %rax
146 addq %rax,%r9
147 adcq %rdx,%r10
148 adcq $0,%r11
149
150 movq %r9,16(%rsp)
151 movq %r10,24(%rsp)
152 shrq $63,%rbx
153
154
155 movq 16(%rsi),%r9
156 movq 24(%rsi),%rax
157 mulq %r9
158 addq %rax,%r12
159 movq 32(%rsi),%rax
160 movq %rdx,%rcx
161 adcq $0,%rcx
162
163 mulq %r9
164 addq %rax,%r13
165 movq 40(%rsi),%rax
166 adcq $0,%rdx
167 addq %rcx,%r13
168 movq %rdx,%rcx
169 adcq $0,%rcx
170
171 mulq %r9
172 addq %rax,%r14
173 movq 48(%rsi),%rax
174 adcq $0,%rdx
175 addq %rcx,%r14
176 movq %rdx,%rcx
177 adcq $0,%rcx
178
179 mulq %r9
180 movq %r12,%r10
181 leaq (%rbx,%r12,2),%r12
182 addq %rax,%r15
183 movq 56(%rsi),%rax
184 adcq $0,%rdx
185 addq %rcx,%r15
186 movq %rdx,%rcx
187 adcq $0,%rcx
188
189 mulq %r9
190 shrq $63,%r10
191 addq %rax,%r8
192 movq %r9,%rax
193 adcq $0,%rdx
194 addq %rcx,%r8
195 movq %rdx,%r9
196 adcq $0,%r9
197
198 movq %r13,%rcx
199 leaq (%r10,%r13,2),%r13
200
201 mulq %rax
202 addq %rax,%r11
203 adcq %rdx,%r12
204 adcq $0,%r13
205
206 movq %r11,32(%rsp)
207 movq %r12,40(%rsp)
208 shrq $63,%rcx
209
210
211 movq 24(%rsi),%r10
212 movq 32(%rsi),%rax
213 mulq %r10
214 addq %rax,%r14
215 movq 40(%rsi),%rax
216 movq %rdx,%rbx
217 adcq $0,%rbx
218
219 mulq %r10
220 addq %rax,%r15
221 movq 48(%rsi),%rax
222 adcq $0,%rdx
223 addq %rbx,%r15
224 movq %rdx,%rbx
225 adcq $0,%rbx
226
227 mulq %r10
228 movq %r14,%r12
229 leaq (%rcx,%r14,2),%r14
230 addq %rax,%r8
231 movq 56(%rsi),%rax
232 adcq $0,%rdx
233 addq %rbx,%r8
234 movq %rdx,%rbx
235 adcq $0,%rbx
236
237 mulq %r10
238 shrq $63,%r12
239 addq %rax,%r9
240 movq %r10,%rax
241 adcq $0,%rdx
242 addq %rbx,%r9
243 movq %rdx,%r10
244 adcq $0,%r10
245
246 movq %r15,%rbx
247 leaq (%r12,%r15,2),%r15
248
249 mulq %rax
250 addq %rax,%r13
251 adcq %rdx,%r14
252 adcq $0,%r15
253
254 movq %r13,48(%rsp)
255 movq %r14,56(%rsp)
256 shrq $63,%rbx
257
258
259 movq 32(%rsi),%r11
260 movq 40(%rsi),%rax
261 mulq %r11
262 addq %rax,%r8
263 movq 48(%rsi),%rax
264 movq %rdx,%rcx
265 adcq $0,%rcx
266
267 mulq %r11
268 addq %rax,%r9
269 movq 56(%rsi),%rax
270 adcq $0,%rdx
271 movq %r8,%r12
272 leaq (%rbx,%r8,2),%r8
273 addq %rcx,%r9
274 movq %rdx,%rcx
275 adcq $0,%rcx
276
277 mulq %r11
278 shrq $63,%r12
279 addq %rax,%r10
280 movq %r11,%rax
281 adcq $0,%rdx
282 addq %rcx,%r10
283 movq %rdx,%r11
284 adcq $0,%r11
285
286 movq %r9,%rcx
287 leaq (%r12,%r9,2),%r9
288
289 mulq %rax
290 addq %rax,%r15
291 adcq %rdx,%r8
292 adcq $0,%r9
293
294 movq %r15,64(%rsp)
295 movq %r8,72(%rsp)
296 shrq $63,%rcx
297
298
299 movq 40(%rsi),%r12
300 movq 48(%rsi),%rax
301 mulq %r12
302 addq %rax,%r10
303 movq 56(%rsi),%rax
304 movq %rdx,%rbx
305 adcq $0,%rbx
306
307 mulq %r12
308 addq %rax,%r11
309 movq %r12,%rax
310 movq %r10,%r15
311 leaq (%rcx,%r10,2),%r10
312 adcq $0,%rdx
313 shrq $63,%r15
314 addq %rbx,%r11
315 movq %rdx,%r12
316 adcq $0,%r12
317
318 movq %r11,%rbx
319 leaq (%r15,%r11,2),%r11
320
321 mulq %rax
322 addq %rax,%r9
323 adcq %rdx,%r10
324 adcq $0,%r11
325
326 movq %r9,80(%rsp)
327 movq %r10,88(%rsp)
328
329
330 movq 48(%rsi),%r13
331 movq 56(%rsi),%rax
332 mulq %r13
333 addq %rax,%r12
334 movq %r13,%rax
335 movq %rdx,%r13
336 adcq $0,%r13
337
338 xorq %r14,%r14
339 shlq $1,%rbx
340 adcq %r12,%r12
341 adcq %r13,%r13
342 adcq %r14,%r14
343
344 mulq %rax
345 addq %rax,%r11
346 adcq %rdx,%r12
347 adcq $0,%r13
348
349 movq %r11,96(%rsp)
350 movq %r12,104(%rsp)
351
352
353 movq 56(%rsi),%rax
354 mulq %rax
355 addq %rax,%r13
356 adcq $0,%rdx
357
358 addq %rdx,%r14
359
360 movq %r13,112(%rsp)
361 movq %r14,120(%rsp)
362
363 movq (%rsp),%r8
364 movq 8(%rsp),%r9
365 movq 16(%rsp),%r10
366 movq 24(%rsp),%r11
367 movq 32(%rsp),%r12
368 movq 40(%rsp),%r13
369 movq 48(%rsp),%r14
370 movq 56(%rsp),%r15
371
372 call __rsaz_512_reduce
373
374 addq 64(%rsp),%r8
375 adcq 72(%rsp),%r9
376 adcq 80(%rsp),%r10
377 adcq 88(%rsp),%r11
378 adcq 96(%rsp),%r12
379 adcq 104(%rsp),%r13
380 adcq 112(%rsp),%r14
381 adcq 120(%rsp),%r15
382 sbbq %rcx,%rcx
383
384 call __rsaz_512_subtract
385
386 movq %r8,%rdx
387 movq %r9,%rax
388 movl 128+8(%rsp),%r8d
389 movq %rdi,%rsi
390
391 decl %r8d
392 jnz L$oop_sqr
393
394 leaq 128+24+48(%rsp),%rax
395
396 movq -48(%rax),%r15
397
398 movq -40(%rax),%r14
399
400 movq -32(%rax),%r13
401
402 movq -24(%rax),%r12
403
404 movq -16(%rax),%rbp
405
406 movq -8(%rax),%rbx
407
408 leaq (%rax),%rsp
409
410L$sqr_epilogue:
411 .byte 0xf3,0xc3
412
413
414.globl _rsaz_512_mul
415
416.p2align 5
417_rsaz_512_mul:
418
419 pushq %rbx
420
421 pushq %rbp
422
423 pushq %r12
424
425 pushq %r13
426
427 pushq %r14
428
429 pushq %r15
430
431
432 subq $128+24,%rsp
433
434L$mul_body:
435.byte 102,72,15,110,199
436.byte 102,72,15,110,201
437 movq %r8,128(%rsp)
438 movq (%rdx),%rbx
439 movq %rdx,%rbp
440 call __rsaz_512_mul
441
442.byte 102,72,15,126,199
443.byte 102,72,15,126,205
444
445 movq (%rsp),%r8
446 movq 8(%rsp),%r9
447 movq 16(%rsp),%r10
448 movq 24(%rsp),%r11
449 movq 32(%rsp),%r12
450 movq 40(%rsp),%r13
451 movq 48(%rsp),%r14
452 movq 56(%rsp),%r15
453
454 call __rsaz_512_reduce
455 addq 64(%rsp),%r8
456 adcq 72(%rsp),%r9
457 adcq 80(%rsp),%r10
458 adcq 88(%rsp),%r11
459 adcq 96(%rsp),%r12
460 adcq 104(%rsp),%r13
461 adcq 112(%rsp),%r14
462 adcq 120(%rsp),%r15
463 sbbq %rcx,%rcx
464
465 call __rsaz_512_subtract
466
467 leaq 128+24+48(%rsp),%rax
468
469 movq -48(%rax),%r15
470
471 movq -40(%rax),%r14
472
473 movq -32(%rax),%r13
474
475 movq -24(%rax),%r12
476
477 movq -16(%rax),%rbp
478
479 movq -8(%rax),%rbx
480
481 leaq (%rax),%rsp
482
483L$mul_epilogue:
484 .byte 0xf3,0xc3
485
486
487.globl _rsaz_512_mul_gather4
488
489.p2align 5
490_rsaz_512_mul_gather4:
491
492 pushq %rbx
493
494 pushq %rbp
495
496 pushq %r12
497
498 pushq %r13
499
500 pushq %r14
501
502 pushq %r15
503
504
505 subq $152,%rsp
506
507L$mul_gather4_body:
508 movd %r9d,%xmm8
509 movdqa L$inc+16(%rip),%xmm1
510 movdqa L$inc(%rip),%xmm0
511
512 pshufd $0,%xmm8,%xmm8
513 movdqa %xmm1,%xmm7
514 movdqa %xmm1,%xmm2
515 paddd %xmm0,%xmm1
516 pcmpeqd %xmm8,%xmm0
517 movdqa %xmm7,%xmm3
518 paddd %xmm1,%xmm2
519 pcmpeqd %xmm8,%xmm1
520 movdqa %xmm7,%xmm4
521 paddd %xmm2,%xmm3
522 pcmpeqd %xmm8,%xmm2
523 movdqa %xmm7,%xmm5
524 paddd %xmm3,%xmm4
525 pcmpeqd %xmm8,%xmm3
526 movdqa %xmm7,%xmm6
527 paddd %xmm4,%xmm5
528 pcmpeqd %xmm8,%xmm4
529 paddd %xmm5,%xmm6
530 pcmpeqd %xmm8,%xmm5
531 paddd %xmm6,%xmm7
532 pcmpeqd %xmm8,%xmm6
533 pcmpeqd %xmm8,%xmm7
534
535 movdqa 0(%rdx),%xmm8
536 movdqa 16(%rdx),%xmm9
537 movdqa 32(%rdx),%xmm10
538 movdqa 48(%rdx),%xmm11
539 pand %xmm0,%xmm8
540 movdqa 64(%rdx),%xmm12
541 pand %xmm1,%xmm9
542 movdqa 80(%rdx),%xmm13
543 pand %xmm2,%xmm10
544 movdqa 96(%rdx),%xmm14
545 pand %xmm3,%xmm11
546 movdqa 112(%rdx),%xmm15
547 leaq 128(%rdx),%rbp
548 pand %xmm4,%xmm12
549 pand %xmm5,%xmm13
550 pand %xmm6,%xmm14
551 pand %xmm7,%xmm15
552 por %xmm10,%xmm8
553 por %xmm11,%xmm9
554 por %xmm12,%xmm8
555 por %xmm13,%xmm9
556 por %xmm14,%xmm8
557 por %xmm15,%xmm9
558
559 por %xmm9,%xmm8
560 pshufd $0x4e,%xmm8,%xmm9
561 por %xmm9,%xmm8
562.byte 102,76,15,126,195
563
564 movq %r8,128(%rsp)
565 movq %rdi,128+8(%rsp)
566 movq %rcx,128+16(%rsp)
567
568 movq (%rsi),%rax
569 movq 8(%rsi),%rcx
570 mulq %rbx
571 movq %rax,(%rsp)
572 movq %rcx,%rax
573 movq %rdx,%r8
574
575 mulq %rbx
576 addq %rax,%r8
577 movq 16(%rsi),%rax
578 movq %rdx,%r9
579 adcq $0,%r9
580
581 mulq %rbx
582 addq %rax,%r9
583 movq 24(%rsi),%rax
584 movq %rdx,%r10
585 adcq $0,%r10
586
587 mulq %rbx
588 addq %rax,%r10
589 movq 32(%rsi),%rax
590 movq %rdx,%r11
591 adcq $0,%r11
592
593 mulq %rbx
594 addq %rax,%r11
595 movq 40(%rsi),%rax
596 movq %rdx,%r12
597 adcq $0,%r12
598
599 mulq %rbx
600 addq %rax,%r12
601 movq 48(%rsi),%rax
602 movq %rdx,%r13
603 adcq $0,%r13
604
605 mulq %rbx
606 addq %rax,%r13
607 movq 56(%rsi),%rax
608 movq %rdx,%r14
609 adcq $0,%r14
610
611 mulq %rbx
612 addq %rax,%r14
613 movq (%rsi),%rax
614 movq %rdx,%r15
615 adcq $0,%r15
616
617 leaq 8(%rsp),%rdi
618 movl $7,%ecx
619 jmp L$oop_mul_gather
620
621.p2align 5
622L$oop_mul_gather:
623 movdqa 0(%rbp),%xmm8
624 movdqa 16(%rbp),%xmm9
625 movdqa 32(%rbp),%xmm10
626 movdqa 48(%rbp),%xmm11
627 pand %xmm0,%xmm8
628 movdqa 64(%rbp),%xmm12
629 pand %xmm1,%xmm9
630 movdqa 80(%rbp),%xmm13
631 pand %xmm2,%xmm10
632 movdqa 96(%rbp),%xmm14
633 pand %xmm3,%xmm11
634 movdqa 112(%rbp),%xmm15
635 leaq 128(%rbp),%rbp
636 pand %xmm4,%xmm12
637 pand %xmm5,%xmm13
638 pand %xmm6,%xmm14
639 pand %xmm7,%xmm15
640 por %xmm10,%xmm8
641 por %xmm11,%xmm9
642 por %xmm12,%xmm8
643 por %xmm13,%xmm9
644 por %xmm14,%xmm8
645 por %xmm15,%xmm9
646
647 por %xmm9,%xmm8
648 pshufd $0x4e,%xmm8,%xmm9
649 por %xmm9,%xmm8
650.byte 102,76,15,126,195
651
652 mulq %rbx
653 addq %rax,%r8
654 movq 8(%rsi),%rax
655 movq %r8,(%rdi)
656 movq %rdx,%r8
657 adcq $0,%r8
658
659 mulq %rbx
660 addq %rax,%r9
661 movq 16(%rsi),%rax
662 adcq $0,%rdx
663 addq %r9,%r8
664 movq %rdx,%r9
665 adcq $0,%r9
666
667 mulq %rbx
668 addq %rax,%r10
669 movq 24(%rsi),%rax
670 adcq $0,%rdx
671 addq %r10,%r9
672 movq %rdx,%r10
673 adcq $0,%r10
674
675 mulq %rbx
676 addq %rax,%r11
677 movq 32(%rsi),%rax
678 adcq $0,%rdx
679 addq %r11,%r10
680 movq %rdx,%r11
681 adcq $0,%r11
682
683 mulq %rbx
684 addq %rax,%r12
685 movq 40(%rsi),%rax
686 adcq $0,%rdx
687 addq %r12,%r11
688 movq %rdx,%r12
689 adcq $0,%r12
690
691 mulq %rbx
692 addq %rax,%r13
693 movq 48(%rsi),%rax
694 adcq $0,%rdx
695 addq %r13,%r12
696 movq %rdx,%r13
697 adcq $0,%r13
698
699 mulq %rbx
700 addq %rax,%r14
701 movq 56(%rsi),%rax
702 adcq $0,%rdx
703 addq %r14,%r13
704 movq %rdx,%r14
705 adcq $0,%r14
706
707 mulq %rbx
708 addq %rax,%r15
709 movq (%rsi),%rax
710 adcq $0,%rdx
711 addq %r15,%r14
712 movq %rdx,%r15
713 adcq $0,%r15
714
715 leaq 8(%rdi),%rdi
716
717 decl %ecx
718 jnz L$oop_mul_gather
719
720 movq %r8,(%rdi)
721 movq %r9,8(%rdi)
722 movq %r10,16(%rdi)
723 movq %r11,24(%rdi)
724 movq %r12,32(%rdi)
725 movq %r13,40(%rdi)
726 movq %r14,48(%rdi)
727 movq %r15,56(%rdi)
728
729 movq 128+8(%rsp),%rdi
730 movq 128+16(%rsp),%rbp
731
732 movq (%rsp),%r8
733 movq 8(%rsp),%r9
734 movq 16(%rsp),%r10
735 movq 24(%rsp),%r11
736 movq 32(%rsp),%r12
737 movq 40(%rsp),%r13
738 movq 48(%rsp),%r14
739 movq 56(%rsp),%r15
740
741 call __rsaz_512_reduce
742 addq 64(%rsp),%r8
743 adcq 72(%rsp),%r9
744 adcq 80(%rsp),%r10
745 adcq 88(%rsp),%r11
746 adcq 96(%rsp),%r12
747 adcq 104(%rsp),%r13
748 adcq 112(%rsp),%r14
749 adcq 120(%rsp),%r15
750 sbbq %rcx,%rcx
751
752 call __rsaz_512_subtract
753
754 leaq 128+24+48(%rsp),%rax
755
756 movq -48(%rax),%r15
757
758 movq -40(%rax),%r14
759
760 movq -32(%rax),%r13
761
762 movq -24(%rax),%r12
763
764 movq -16(%rax),%rbp
765
766 movq -8(%rax),%rbx
767
768 leaq (%rax),%rsp
769
770L$mul_gather4_epilogue:
771 .byte 0xf3,0xc3
772
773
774.globl _rsaz_512_mul_scatter4
775
776.p2align 5
777_rsaz_512_mul_scatter4:
778
779 pushq %rbx
780
781 pushq %rbp
782
783 pushq %r12
784
785 pushq %r13
786
787 pushq %r14
788
789 pushq %r15
790
791
792 movl %r9d,%r9d
793 subq $128+24,%rsp
794
795L$mul_scatter4_body:
796 leaq (%r8,%r9,8),%r8
797.byte 102,72,15,110,199
798.byte 102,72,15,110,202
799.byte 102,73,15,110,208
800 movq %rcx,128(%rsp)
801
802 movq %rdi,%rbp
803 movq (%rdi),%rbx
804 call __rsaz_512_mul
805
806.byte 102,72,15,126,199
807.byte 102,72,15,126,205
808
809 movq (%rsp),%r8
810 movq 8(%rsp),%r9
811 movq 16(%rsp),%r10
812 movq 24(%rsp),%r11
813 movq 32(%rsp),%r12
814 movq 40(%rsp),%r13
815 movq 48(%rsp),%r14
816 movq 56(%rsp),%r15
817
818 call __rsaz_512_reduce
819 addq 64(%rsp),%r8
820 adcq 72(%rsp),%r9
821 adcq 80(%rsp),%r10
822 adcq 88(%rsp),%r11
823 adcq 96(%rsp),%r12
824 adcq 104(%rsp),%r13
825 adcq 112(%rsp),%r14
826 adcq 120(%rsp),%r15
827.byte 102,72,15,126,214
828 sbbq %rcx,%rcx
829
830 call __rsaz_512_subtract
831
832 movq %r8,0(%rsi)
833 movq %r9,128(%rsi)
834 movq %r10,256(%rsi)
835 movq %r11,384(%rsi)
836 movq %r12,512(%rsi)
837 movq %r13,640(%rsi)
838 movq %r14,768(%rsi)
839 movq %r15,896(%rsi)
840
841 leaq 128+24+48(%rsp),%rax
842
843 movq -48(%rax),%r15
844
845 movq -40(%rax),%r14
846
847 movq -32(%rax),%r13
848
849 movq -24(%rax),%r12
850
851 movq -16(%rax),%rbp
852
853 movq -8(%rax),%rbx
854
855 leaq (%rax),%rsp
856
857L$mul_scatter4_epilogue:
858 .byte 0xf3,0xc3
859
860
861.globl _rsaz_512_mul_by_one
862
863.p2align 5
864_rsaz_512_mul_by_one:
865
866 pushq %rbx
867
868 pushq %rbp
869
870 pushq %r12
871
872 pushq %r13
873
874 pushq %r14
875
876 pushq %r15
877
878
879 subq $128+24,%rsp
880
881L$mul_by_one_body:
882 movq %rdx,%rbp
883 movq %rcx,128(%rsp)
884
885 movq (%rsi),%r8
886 pxor %xmm0,%xmm0
887 movq 8(%rsi),%r9
888 movq 16(%rsi),%r10
889 movq 24(%rsi),%r11
890 movq 32(%rsi),%r12
891 movq 40(%rsi),%r13
892 movq 48(%rsi),%r14
893 movq 56(%rsi),%r15
894
895 movdqa %xmm0,(%rsp)
896 movdqa %xmm0,16(%rsp)
897 movdqa %xmm0,32(%rsp)
898 movdqa %xmm0,48(%rsp)
899 movdqa %xmm0,64(%rsp)
900 movdqa %xmm0,80(%rsp)
901 movdqa %xmm0,96(%rsp)
902 call __rsaz_512_reduce
903 movq %r8,(%rdi)
904 movq %r9,8(%rdi)
905 movq %r10,16(%rdi)
906 movq %r11,24(%rdi)
907 movq %r12,32(%rdi)
908 movq %r13,40(%rdi)
909 movq %r14,48(%rdi)
910 movq %r15,56(%rdi)
911
912 leaq 128+24+48(%rsp),%rax
913
914 movq -48(%rax),%r15
915
916 movq -40(%rax),%r14
917
918 movq -32(%rax),%r13
919
920 movq -24(%rax),%r12
921
922 movq -16(%rax),%rbp
923
924 movq -8(%rax),%rbx
925
926 leaq (%rax),%rsp
927
928L$mul_by_one_epilogue:
929 .byte 0xf3,0xc3
930
931
932
933.p2align 5
934__rsaz_512_reduce:
935 movq %r8,%rbx
936 imulq 128+8(%rsp),%rbx
937 movq 0(%rbp),%rax
938 movl $8,%ecx
939 jmp L$reduction_loop
940
941.p2align 5
942L$reduction_loop:
943 mulq %rbx
944 movq 8(%rbp),%rax
945 negq %r8
946 movq %rdx,%r8
947 adcq $0,%r8
948
949 mulq %rbx
950 addq %rax,%r9
951 movq 16(%rbp),%rax
952 adcq $0,%rdx
953 addq %r9,%r8
954 movq %rdx,%r9
955 adcq $0,%r9
956
957 mulq %rbx
958 addq %rax,%r10
959 movq 24(%rbp),%rax
960 adcq $0,%rdx
961 addq %r10,%r9
962 movq %rdx,%r10
963 adcq $0,%r10
964
965 mulq %rbx
966 addq %rax,%r11
967 movq 32(%rbp),%rax
968 adcq $0,%rdx
969 addq %r11,%r10
970 movq 128+8(%rsp),%rsi
971
972
973 adcq $0,%rdx
974 movq %rdx,%r11
975
976 mulq %rbx
977 addq %rax,%r12
978 movq 40(%rbp),%rax
979 adcq $0,%rdx
980 imulq %r8,%rsi
981 addq %r12,%r11
982 movq %rdx,%r12
983 adcq $0,%r12
984
985 mulq %rbx
986 addq %rax,%r13
987 movq 48(%rbp),%rax
988 adcq $0,%rdx
989 addq %r13,%r12
990 movq %rdx,%r13
991 adcq $0,%r13
992
993 mulq %rbx
994 addq %rax,%r14
995 movq 56(%rbp),%rax
996 adcq $0,%rdx
997 addq %r14,%r13
998 movq %rdx,%r14
999 adcq $0,%r14
1000
1001 mulq %rbx
1002 movq %rsi,%rbx
1003 addq %rax,%r15
1004 movq 0(%rbp),%rax
1005 adcq $0,%rdx
1006 addq %r15,%r14
1007 movq %rdx,%r15
1008 adcq $0,%r15
1009
1010 decl %ecx
1011 jne L$reduction_loop
1012
1013 .byte 0xf3,0xc3
1014
1015
1016.p2align 5
1017__rsaz_512_subtract:
1018 movq %r8,(%rdi)
1019 movq %r9,8(%rdi)
1020 movq %r10,16(%rdi)
1021 movq %r11,24(%rdi)
1022 movq %r12,32(%rdi)
1023 movq %r13,40(%rdi)
1024 movq %r14,48(%rdi)
1025 movq %r15,56(%rdi)
1026
1027 movq 0(%rbp),%r8
1028 movq 8(%rbp),%r9
1029 negq %r8
1030 notq %r9
1031 andq %rcx,%r8
1032 movq 16(%rbp),%r10
1033 andq %rcx,%r9
1034 notq %r10
1035 movq 24(%rbp),%r11
1036 andq %rcx,%r10
1037 notq %r11
1038 movq 32(%rbp),%r12
1039 andq %rcx,%r11
1040 notq %r12
1041 movq 40(%rbp),%r13
1042 andq %rcx,%r12
1043 notq %r13
1044 movq 48(%rbp),%r14
1045 andq %rcx,%r13
1046 notq %r14
1047 movq 56(%rbp),%r15
1048 andq %rcx,%r14
1049 notq %r15
1050 andq %rcx,%r15
1051
1052 addq (%rdi),%r8
1053 adcq 8(%rdi),%r9
1054 adcq 16(%rdi),%r10
1055 adcq 24(%rdi),%r11
1056 adcq 32(%rdi),%r12
1057 adcq 40(%rdi),%r13
1058 adcq 48(%rdi),%r14
1059 adcq 56(%rdi),%r15
1060
1061 movq %r8,(%rdi)
1062 movq %r9,8(%rdi)
1063 movq %r10,16(%rdi)
1064 movq %r11,24(%rdi)
1065 movq %r12,32(%rdi)
1066 movq %r13,40(%rdi)
1067 movq %r14,48(%rdi)
1068 movq %r15,56(%rdi)
1069
1070 .byte 0xf3,0xc3
1071
1072
1073.p2align 5
1074__rsaz_512_mul:
1075 leaq 8(%rsp),%rdi
1076
1077 movq (%rsi),%rax
1078 mulq %rbx
1079 movq %rax,(%rdi)
1080 movq 8(%rsi),%rax
1081 movq %rdx,%r8
1082
1083 mulq %rbx
1084 addq %rax,%r8
1085 movq 16(%rsi),%rax
1086 movq %rdx,%r9
1087 adcq $0,%r9
1088
1089 mulq %rbx
1090 addq %rax,%r9
1091 movq 24(%rsi),%rax
1092 movq %rdx,%r10
1093 adcq $0,%r10
1094
1095 mulq %rbx
1096 addq %rax,%r10
1097 movq 32(%rsi),%rax
1098 movq %rdx,%r11
1099 adcq $0,%r11
1100
1101 mulq %rbx
1102 addq %rax,%r11
1103 movq 40(%rsi),%rax
1104 movq %rdx,%r12
1105 adcq $0,%r12
1106
1107 mulq %rbx
1108 addq %rax,%r12
1109 movq 48(%rsi),%rax
1110 movq %rdx,%r13
1111 adcq $0,%r13
1112
1113 mulq %rbx
1114 addq %rax,%r13
1115 movq 56(%rsi),%rax
1116 movq %rdx,%r14
1117 adcq $0,%r14
1118
1119 mulq %rbx
1120 addq %rax,%r14
1121 movq (%rsi),%rax
1122 movq %rdx,%r15
1123 adcq $0,%r15
1124
1125 leaq 8(%rbp),%rbp
1126 leaq 8(%rdi),%rdi
1127
1128 movl $7,%ecx
1129 jmp L$oop_mul
1130
1131.p2align 5
1132L$oop_mul:
1133 movq (%rbp),%rbx
1134 mulq %rbx
1135 addq %rax,%r8
1136 movq 8(%rsi),%rax
1137 movq %r8,(%rdi)
1138 movq %rdx,%r8
1139 adcq $0,%r8
1140
1141 mulq %rbx
1142 addq %rax,%r9
1143 movq 16(%rsi),%rax
1144 adcq $0,%rdx
1145 addq %r9,%r8
1146 movq %rdx,%r9
1147 adcq $0,%r9
1148
1149 mulq %rbx
1150 addq %rax,%r10
1151 movq 24(%rsi),%rax
1152 adcq $0,%rdx
1153 addq %r10,%r9
1154 movq %rdx,%r10
1155 adcq $0,%r10
1156
1157 mulq %rbx
1158 addq %rax,%r11
1159 movq 32(%rsi),%rax
1160 adcq $0,%rdx
1161 addq %r11,%r10
1162 movq %rdx,%r11
1163 adcq $0,%r11
1164
1165 mulq %rbx
1166 addq %rax,%r12
1167 movq 40(%rsi),%rax
1168 adcq $0,%rdx
1169 addq %r12,%r11
1170 movq %rdx,%r12
1171 adcq $0,%r12
1172
1173 mulq %rbx
1174 addq %rax,%r13
1175 movq 48(%rsi),%rax
1176 adcq $0,%rdx
1177 addq %r13,%r12
1178 movq %rdx,%r13
1179 adcq $0,%r13
1180
1181 mulq %rbx
1182 addq %rax,%r14
1183 movq 56(%rsi),%rax
1184 adcq $0,%rdx
1185 addq %r14,%r13
1186 movq %rdx,%r14
1187 leaq 8(%rbp),%rbp
1188 adcq $0,%r14
1189
1190 mulq %rbx
1191 addq %rax,%r15
1192 movq (%rsi),%rax
1193 adcq $0,%rdx
1194 addq %r15,%r14
1195 movq %rdx,%r15
1196 adcq $0,%r15
1197
1198 leaq 8(%rdi),%rdi
1199
1200 decl %ecx
1201 jnz L$oop_mul
1202
1203 movq %r8,(%rdi)
1204 movq %r9,8(%rdi)
1205 movq %r10,16(%rdi)
1206 movq %r11,24(%rdi)
1207 movq %r12,32(%rdi)
1208 movq %r13,40(%rdi)
1209 movq %r14,48(%rdi)
1210 movq %r15,56(%rdi)
1211
1212 .byte 0xf3,0xc3
1213
1214.globl _rsaz_512_scatter4
1215
1216.p2align 4
1217_rsaz_512_scatter4:
1218 leaq (%rdi,%rdx,8),%rdi
1219 movl $8,%r9d
1220 jmp L$oop_scatter
1221.p2align 4
1222L$oop_scatter:
1223 movq (%rsi),%rax
1224 leaq 8(%rsi),%rsi
1225 movq %rax,(%rdi)
1226 leaq 128(%rdi),%rdi
1227 decl %r9d
1228 jnz L$oop_scatter
1229 .byte 0xf3,0xc3
1230
1231
1232.globl _rsaz_512_gather4
1233
1234.p2align 4
1235_rsaz_512_gather4:
1236 movd %edx,%xmm8
1237 movdqa L$inc+16(%rip),%xmm1
1238 movdqa L$inc(%rip),%xmm0
1239
1240 pshufd $0,%xmm8,%xmm8
1241 movdqa %xmm1,%xmm7
1242 movdqa %xmm1,%xmm2
1243 paddd %xmm0,%xmm1
1244 pcmpeqd %xmm8,%xmm0
1245 movdqa %xmm7,%xmm3
1246 paddd %xmm1,%xmm2
1247 pcmpeqd %xmm8,%xmm1
1248 movdqa %xmm7,%xmm4
1249 paddd %xmm2,%xmm3
1250 pcmpeqd %xmm8,%xmm2
1251 movdqa %xmm7,%xmm5
1252 paddd %xmm3,%xmm4
1253 pcmpeqd %xmm8,%xmm3
1254 movdqa %xmm7,%xmm6
1255 paddd %xmm4,%xmm5
1256 pcmpeqd %xmm8,%xmm4
1257 paddd %xmm5,%xmm6
1258 pcmpeqd %xmm8,%xmm5
1259 paddd %xmm6,%xmm7
1260 pcmpeqd %xmm8,%xmm6
1261 pcmpeqd %xmm8,%xmm7
1262 movl $8,%r9d
1263 jmp L$oop_gather
1264.p2align 4
1265L$oop_gather:
1266 movdqa 0(%rsi),%xmm8
1267 movdqa 16(%rsi),%xmm9
1268 movdqa 32(%rsi),%xmm10
1269 movdqa 48(%rsi),%xmm11
1270 pand %xmm0,%xmm8
1271 movdqa 64(%rsi),%xmm12
1272 pand %xmm1,%xmm9
1273 movdqa 80(%rsi),%xmm13
1274 pand %xmm2,%xmm10
1275 movdqa 96(%rsi),%xmm14
1276 pand %xmm3,%xmm11
1277 movdqa 112(%rsi),%xmm15
1278 leaq 128(%rsi),%rsi
1279 pand %xmm4,%xmm12
1280 pand %xmm5,%xmm13
1281 pand %xmm6,%xmm14
1282 pand %xmm7,%xmm15
1283 por %xmm10,%xmm8
1284 por %xmm11,%xmm9
1285 por %xmm12,%xmm8
1286 por %xmm13,%xmm9
1287 por %xmm14,%xmm8
1288 por %xmm15,%xmm9
1289
1290 por %xmm9,%xmm8
1291 pshufd $0x4e,%xmm8,%xmm9
1292 por %xmm9,%xmm8
1293 movq %xmm8,(%rdi)
1294 leaq 8(%rdi),%rdi
1295 decl %r9d
1296 jnz L$oop_gather
1297 .byte 0xf3,0xc3
1298L$SEH_end_rsaz_512_gather4:
1299
1300
1301.p2align 6
1302L$inc:
1303.long 0,0, 1,1
1304.long 2,2, 2,2
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette