1 | // Copyright 2004-2017 The OpenSSL Project Authors. All Rights Reserved.
|
---|
2 | //
|
---|
3 | // Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
4 | // this file except in compliance with the License. You can obtain a copy
|
---|
5 | // in the file LICENSE in the source distribution or at
|
---|
6 | // https://www.openssl.org/source/license.html
|
---|
7 | // Works on all IA-64 platforms: Linux, HP-UX, Win64i...
|
---|
8 | // On Win64i compile with ias.exe.
|
---|
9 | .text
|
---|
10 |
|
---|
11 | #if defined(_HPUX_SOURCE) && !defined(_LP64)
|
---|
12 | #define ADDP addp4
|
---|
13 | #else
|
---|
14 | #define ADDP add
|
---|
15 | #endif
|
---|
16 |
|
---|
17 | .global OPENSSL_cpuid_setup#
|
---|
18 | .proc OPENSSL_cpuid_setup#
|
---|
19 | OPENSSL_cpuid_setup:
|
---|
20 | { .mib; br.ret.sptk.many b0 };;
|
---|
21 | .endp OPENSSL_cpuid_setup#
|
---|
22 |
|
---|
23 | .global OPENSSL_rdtsc#
|
---|
24 | .proc OPENSSL_rdtsc#
|
---|
25 | OPENSSL_rdtsc:
|
---|
26 | { .mib; mov r8=ar.itc
|
---|
27 | br.ret.sptk.many b0 };;
|
---|
28 | .endp OPENSSL_rdtsc#
|
---|
29 |
|
---|
30 | .global OPENSSL_atomic_add#
|
---|
31 | .proc OPENSSL_atomic_add#
|
---|
32 | .align 32
|
---|
33 | OPENSSL_atomic_add:
|
---|
34 | { .mii; ld4 r2=[r32]
|
---|
35 | nop.i 0
|
---|
36 | nop.i 0 };;
|
---|
37 | .Lspin:
|
---|
38 | { .mii; mov ar.ccv=r2
|
---|
39 | add r8=r2,r33
|
---|
40 | mov r3=r2 };;
|
---|
41 | { .mmi; mf;;
|
---|
42 | cmpxchg4.acq r2=[r32],r8,ar.ccv
|
---|
43 | nop.i 0 };;
|
---|
44 | { .mib; cmp.ne p6,p0=r2,r3
|
---|
45 | nop.i 0
|
---|
46 | (p6) br.dpnt .Lspin };;
|
---|
47 | { .mib; nop.m 0
|
---|
48 | sxt4 r8=r8
|
---|
49 | br.ret.sptk.many b0 };;
|
---|
50 | .endp OPENSSL_atomic_add#
|
---|
51 |
|
---|
52 | // Returns a structure comprising pointer to the top of stack of
|
---|
53 | // the caller and pointer beyond backing storage for the current
|
---|
54 | // register frame. The latter is required, because it might be
|
---|
55 | // insufficient to wipe backing storage for the current frame
|
---|
56 | // (as this procedure does), one might have to go further, toward
|
---|
57 | // higher addresses to reach for whole "retroactively" saved
|
---|
58 | // context...
|
---|
59 | .global OPENSSL_wipe_cpu#
|
---|
60 | .proc OPENSSL_wipe_cpu#
|
---|
61 | .align 32
|
---|
62 | OPENSSL_wipe_cpu:
|
---|
63 | .prologue
|
---|
64 | .fframe 0
|
---|
65 | .save ar.pfs,r2
|
---|
66 | .save ar.lc,r3
|
---|
67 | { .mib; alloc r2=ar.pfs,0,96,0,96
|
---|
68 | mov r3=ar.lc
|
---|
69 | brp.loop.imp .L_wipe_top,.L_wipe_end-16
|
---|
70 | };;
|
---|
71 | { .mii; mov r9=ar.bsp
|
---|
72 | mov r8=pr
|
---|
73 | mov ar.lc=96 };;
|
---|
74 | .body
|
---|
75 | { .mii; add r9=96*8-8,r9
|
---|
76 | mov ar.ec=1 };;
|
---|
77 |
|
---|
78 | // One can sweep double as fast, but then we can't guarantee
|
---|
79 | // that backing storage is wiped...
|
---|
80 | .L_wipe_top:
|
---|
81 | { .mfi; st8 [r9]=r0,-8
|
---|
82 | mov f127=f0
|
---|
83 | mov r127=r0 }
|
---|
84 | { .mfb; nop.m 0
|
---|
85 | nop.f 0
|
---|
86 | br.ctop.sptk .L_wipe_top };;
|
---|
87 | .L_wipe_end:
|
---|
88 |
|
---|
89 | { .mfi; mov r11=r0
|
---|
90 | mov f6=f0
|
---|
91 | mov r14=r0 }
|
---|
92 | { .mfi; mov r15=r0
|
---|
93 | mov f7=f0
|
---|
94 | mov r16=r0 }
|
---|
95 | { .mfi; mov r17=r0
|
---|
96 | mov f8=f0
|
---|
97 | mov r18=r0 }
|
---|
98 | { .mfi; mov r19=r0
|
---|
99 | mov f9=f0
|
---|
100 | mov r20=r0 }
|
---|
101 | { .mfi; mov r21=r0
|
---|
102 | mov f10=f0
|
---|
103 | mov r22=r0 }
|
---|
104 | { .mfi; mov r23=r0
|
---|
105 | mov f11=f0
|
---|
106 | mov r24=r0 }
|
---|
107 | { .mfi; mov r25=r0
|
---|
108 | mov f12=f0
|
---|
109 | mov r26=r0 }
|
---|
110 | { .mfi; mov r27=r0
|
---|
111 | mov f13=f0
|
---|
112 | mov r28=r0 }
|
---|
113 | { .mfi; mov r29=r0
|
---|
114 | mov f14=f0
|
---|
115 | mov r30=r0 }
|
---|
116 | { .mfi; mov r31=r0
|
---|
117 | mov f15=f0
|
---|
118 | nop.i 0 }
|
---|
119 | { .mfi; mov f16=f0 }
|
---|
120 | { .mfi; mov f17=f0 }
|
---|
121 | { .mfi; mov f18=f0 }
|
---|
122 | { .mfi; mov f19=f0 }
|
---|
123 | { .mfi; mov f20=f0 }
|
---|
124 | { .mfi; mov f21=f0 }
|
---|
125 | { .mfi; mov f22=f0 }
|
---|
126 | { .mfi; mov f23=f0 }
|
---|
127 | { .mfi; mov f24=f0 }
|
---|
128 | { .mfi; mov f25=f0 }
|
---|
129 | { .mfi; mov f26=f0 }
|
---|
130 | { .mfi; mov f27=f0 }
|
---|
131 | { .mfi; mov f28=f0 }
|
---|
132 | { .mfi; mov f29=f0 }
|
---|
133 | { .mfi; mov f30=f0 }
|
---|
134 | { .mfi; add r9=96*8+8,r9
|
---|
135 | mov f31=f0
|
---|
136 | mov pr=r8,0x1ffff }
|
---|
137 | { .mib; mov r8=sp
|
---|
138 | mov ar.lc=r3
|
---|
139 | br.ret.sptk b0 };;
|
---|
140 | .endp OPENSSL_wipe_cpu#
|
---|
141 |
|
---|
142 | .global OPENSSL_cleanse#
|
---|
143 | .proc OPENSSL_cleanse#
|
---|
144 | OPENSSL_cleanse:
|
---|
145 | { .mib; cmp.eq p6,p0=0,r33 // len==0
|
---|
146 | ADDP r32=0,r32
|
---|
147 | (p6) br.ret.spnt b0 };;
|
---|
148 | { .mib; and r2=7,r32
|
---|
149 | cmp.leu p6,p0=15,r33 // len>=15
|
---|
150 | (p6) br.cond.dptk .Lot };;
|
---|
151 |
|
---|
152 | .Little:
|
---|
153 | { .mib; st1 [r32]=r0,1
|
---|
154 | cmp.ltu p6,p7=1,r33 } // len>1
|
---|
155 | { .mbb; add r33=-1,r33 // len--
|
---|
156 | (p6) br.cond.dptk .Little
|
---|
157 | (p7) br.ret.sptk.many b0 };;
|
---|
158 |
|
---|
159 | .Lot:
|
---|
160 | { .mib; cmp.eq p6,p0=0,r2
|
---|
161 | (p6) br.cond.dptk .Laligned };;
|
---|
162 | { .mmi; st1 [r32]=r0,1;;
|
---|
163 | and r2=7,r32 }
|
---|
164 | { .mib; add r33=-1,r33
|
---|
165 | br .Lot };;
|
---|
166 |
|
---|
167 | .Laligned:
|
---|
168 | { .mmi; st8 [r32]=r0,8
|
---|
169 | and r2=-8,r33 // len&~7
|
---|
170 | add r33=-8,r33 };; // len-=8
|
---|
171 | { .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8
|
---|
172 | (p6) br.cond.dptk .Laligned };;
|
---|
173 |
|
---|
174 | { .mbb; cmp.eq p6,p7=r0,r33
|
---|
175 | (p7) br.cond.dpnt .Little
|
---|
176 | (p6) br.ret.sptk.many b0 };;
|
---|
177 | .endp OPENSSL_cleanse#
|
---|
178 |
|
---|
179 | .global CRYPTO_memcmp#
|
---|
180 | .proc CRYPTO_memcmp#
|
---|
181 | .align 32
|
---|
182 | .skip 16
|
---|
183 | CRYPTO_memcmp:
|
---|
184 | .prologue
|
---|
185 | { .mib; mov r8=0
|
---|
186 | cmp.eq p6,p0=0,r34 // len==0?
|
---|
187 | (p6) br.ret.spnt b0 };;
|
---|
188 | .save ar.pfs,r2
|
---|
189 | { .mib; alloc r2=ar.pfs,3,5,0,8
|
---|
190 | .save ar.lc,r3
|
---|
191 | mov r3=ar.lc
|
---|
192 | brp.loop.imp .Loop_cmp_ctop,.Loop_cmp_cend-16
|
---|
193 | }
|
---|
194 | { .mib; sub r10=r34,r0,1
|
---|
195 | .save pr,r9
|
---|
196 | mov r9=pr };;
|
---|
197 | { .mii; ADDP r16=0,r32
|
---|
198 | mov ar.lc=r10
|
---|
199 | mov ar.ec=4 }
|
---|
200 | { .mib; ADDP r17=0,r33
|
---|
201 | mov pr.rot=1<<16 };;
|
---|
202 |
|
---|
203 | .Loop_cmp_ctop:
|
---|
204 | { .mib; (p16) ld1 r32=[r16],1
|
---|
205 | (p18) xor r34=r34,r38 }
|
---|
206 | { .mib; (p16) ld1 r36=[r17],1
|
---|
207 | (p19) or r8=r8,r35
|
---|
208 | br.ctop.sptk .Loop_cmp_ctop };;
|
---|
209 | .Loop_cmp_cend:
|
---|
210 |
|
---|
211 | { .mib; cmp.ne p6,p0=0,r8
|
---|
212 | mov ar.lc=r3 };;
|
---|
213 | { .mib;
|
---|
214 | (p6) mov r8=1
|
---|
215 | mov pr=r9,0x1ffff
|
---|
216 | br.ret.sptk.many b0 };;
|
---|
217 | .endp CRYPTO_memcmp#
|
---|
218 |
|
---|
219 | .global OPENSSL_instrument_bus#
|
---|
220 | .proc OPENSSL_instrument_bus#
|
---|
221 | OPENSSL_instrument_bus:
|
---|
222 | { .mmi; mov r2=r33
|
---|
223 | ADDP r32=0,r32 }
|
---|
224 | { .mmi; mov r8=ar.itc;;
|
---|
225 | mov r10=r0
|
---|
226 | mov r9=r8 };;
|
---|
227 |
|
---|
228 | { .mmi; fc r32;;
|
---|
229 | ld4 r8=[r32] };;
|
---|
230 | { .mmi; mf
|
---|
231 | mov ar.ccv=r8
|
---|
232 | add r8=r8,r10 };;
|
---|
233 | { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
---|
234 | };;
|
---|
235 | .Loop:
|
---|
236 | { .mmi; mov r8=ar.itc;;
|
---|
237 | sub r10=r8,r9 // diff=tick-lasttick
|
---|
238 | mov r9=r8 };; // lasttick=tick
|
---|
239 | { .mmi; fc r32;;
|
---|
240 | ld4 r8=[r32] };;
|
---|
241 | { .mmi; mf
|
---|
242 | mov ar.ccv=r8
|
---|
243 | add r8=r8,r10 };;
|
---|
244 | { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
---|
245 | add r33=-1,r33
|
---|
246 | add r32=4,r32 };;
|
---|
247 | { .mib; cmp4.ne p6,p0=0,r33
|
---|
248 | (p6) br.cond.dptk .Loop };;
|
---|
249 |
|
---|
250 | { .mib; sub r8=r2,r33
|
---|
251 | br.ret.sptk.many b0 };;
|
---|
252 | .endp OPENSSL_instrument_bus#
|
---|
253 |
|
---|
254 | .global OPENSSL_instrument_bus2#
|
---|
255 | .proc OPENSSL_instrument_bus2#
|
---|
256 | OPENSSL_instrument_bus2:
|
---|
257 | { .mmi; mov r2=r33 // put aside cnt
|
---|
258 | ADDP r32=0,r32 }
|
---|
259 | { .mmi; mov r8=ar.itc;;
|
---|
260 | mov r10=r0
|
---|
261 | mov r9=r8 };;
|
---|
262 |
|
---|
263 | { .mmi; fc r32;;
|
---|
264 | ld4 r8=[r32] };;
|
---|
265 | { .mmi; mf
|
---|
266 | mov ar.ccv=r8
|
---|
267 | add r8=r8,r10 };;
|
---|
268 | { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
---|
269 | };;
|
---|
270 |
|
---|
271 | { .mmi; mov r8=ar.itc;;
|
---|
272 | sub r10=r8,r9
|
---|
273 | mov r9=r8 };;
|
---|
274 | .Loop2:
|
---|
275 | { .mmi; mov r11=r10 // lastdiff=diff
|
---|
276 | add r34=-1,r34 };; // --max
|
---|
277 | { .mmi; fc r32;;
|
---|
278 | ld4 r8=[r32]
|
---|
279 | cmp4.eq p6,p0=0,r34 };;
|
---|
280 | { .mmi; mf
|
---|
281 | mov ar.ccv=r8
|
---|
282 | add r8=r8,r10 };;
|
---|
283 | { .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv
|
---|
284 | (p6) br.cond.spnt .Ldone2 };;
|
---|
285 |
|
---|
286 | { .mmi; mov r8=ar.itc;;
|
---|
287 | sub r10=r8,r9 // diff=tick-lasttick
|
---|
288 | mov r9=r8 };; // lasttick=tick
|
---|
289 | { .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff
|
---|
290 | (p6) add r33=-1,r33 };; // conditional --cnt
|
---|
291 | { .mib; cmp4.ne p7,p0=0,r33
|
---|
292 | (p6) add r32=4,r32 // conditional ++out
|
---|
293 | (p7) br.cond.dptk .Loop2 };;
|
---|
294 | .Ldone2:
|
---|
295 | { .mib; sub r8=r2,r33
|
---|
296 | br.ret.sptk.many b0 };;
|
---|
297 | .endp OPENSSL_instrument_bus2#
|
---|