VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.0/crypto/aes/asm/vpaes-ppc.pl@ 99371

最後變更 在這個檔案從99371是 99366,由 vboxsync 提交於 23 月 前

openssl-3.1.0: Applied and adjusted our OpenSSL changes to 3.0.7. bugref:10418

檔案大小: 42.0 KB
 
1#! /usr/bin/env perl
2# Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10######################################################################
11## Constant-time SSSE3 AES core implementation.
12## version 0.1
13##
14## By Mike Hamburg (Stanford University), 2009
15## Public domain.
16##
17## For details see http://shiftleft.org/papers/vector_aes/ and
18## http://crypto.stanford.edu/vpaes/.
19
20# CBC encrypt/decrypt performance in cycles per byte processed with
21# 128-bit key.
22#
23# aes-ppc.pl this
24# PPC74x0/G4e 35.5/52.1/(23.8) 11.9(*)/15.4
25# PPC970/G5 37.9/55.0/(28.5) 22.2/28.5
26# POWER6 42.7/54.3/(28.2) 63.0/92.8(**)
27# POWER7 32.3/42.9/(18.4) 18.5/23.3
28#
29# (*) This is ~10% worse than reported in paper. The reason is
30# twofold. This module doesn't make any assumption about
31# key schedule (or data for that matter) alignment and handles
32# it in-line. Secondly it, being transliterated from
33# vpaes-x86_64.pl, relies on "nested inversion" better suited
34# for Intel CPUs.
35# (**) Inadequate POWER6 performance is due to astronomic AltiVec
36# latency, 9 cycles per simple logical operation.
37
38# $output is the last argument if it looks like a file (it has an extension)
39# $flavour is the first argument if it doesn't look like a file
40$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
41$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
42
43if ($flavour =~ /64/) {
44 $SIZE_T =8;
45 $LRSAVE =2*$SIZE_T;
46 $STU ="stdu";
47 $POP ="ld";
48 $PUSH ="std";
49 $UCMP ="cmpld";
50} elsif ($flavour =~ /32/) {
51 $SIZE_T =4;
52 $LRSAVE =$SIZE_T;
53 $STU ="stwu";
54 $POP ="lwz";
55 $PUSH ="stw";
56 $UCMP ="cmplw";
57} else { die "nonsense $flavour"; }
58
59$sp="r1";
60$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
61
62$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
63( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
64( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
65die "can't locate ppc-xlate.pl";
66
67open STDOUT,"| $^X $xlate $flavour \"$output\""
68 || die "can't call $xlate: $!";
69
70$code.=<<___;
71.machine "any"
72
73.text
74
75.align 7 # totally strategic alignment
76_vpaes_consts:
77Lk_mc_forward: # mc_forward
78 .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv
79 .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv
80 .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv
81 .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv
82Lk_mc_backward: # mc_backward
83 .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv
84 .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv
85 .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv
86 .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv
87Lk_sr: # sr
88 .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv
89 .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv
90 .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv
91 .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv
92
93##
94## "Hot" constants
95##
96Lk_inv: # inv, inva
97 .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev
98 .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev
99Lk_ipt: # input transform (lo, hi)
100 .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev
101 .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev
102Lk_sbo: # sbou, sbot
103 .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev
104 .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev
105Lk_sb1: # sb1u, sb1t
106 .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev
107 .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev
108Lk_sb2: # sb2u, sb2t
109 .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev
110 .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev
111
112##
113## Decryption stuff
114##
115Lk_dipt: # decryption input transform
116 .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev
117 .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev
118Lk_dsbo: # decryption sbox final output
119 .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev
120 .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev
121Lk_dsb9: # decryption sbox output *9*u, *9*t
122 .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev
123 .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev
124Lk_dsbd: # decryption sbox output *D*u, *D*t
125 .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev
126 .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev
127Lk_dsbb: # decryption sbox output *B*u, *B*t
128 .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev
129 .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev
130Lk_dsbe: # decryption sbox output *E*u, *E*t
131 .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev
132 .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev
133
134##
135## Key schedule constants
136##
137Lk_dksd: # decryption key schedule: invskew x*D
138 .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev
139 .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev
140Lk_dksb: # decryption key schedule: invskew x*B
141 .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev
142 .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev
143Lk_dkse: # decryption key schedule: invskew x*E + 0x63
144 .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev
145 .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev
146Lk_dks9: # decryption key schedule: invskew x*9
147 .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev
148 .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev
149
150Lk_rcon: # rcon
151 .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis
152Lk_s63:
153 .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis
154
155Lk_opt: # output transform
156 .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev
157 .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev
158Lk_deskew: # deskew tables: inverts the sbox's "skew"
159 .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev
160 .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev
161.align 5
162Lconsts:
163 mflr r0
164 bcl 20,31,\$+4
165 mflr r12 #vvvvv "distance between . and _vpaes_consts
166 addi r12,r12,-0x308
167 mtlr r0
168 blr
169 .long 0
170 .byte 0,12,0x14,0,0,0,0,0
171.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
172.align 6
173___
174
175
176my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
177{
178my ($inp,$out,$key) = map("r$_",(3..5));
179
180my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
181my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
182my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
183
184$code.=<<___;
185##
186## _aes_preheat
187##
188## Fills register %r10 -> .aes_consts (so you can -fPIC)
189## and %xmm9-%xmm15 as specified below.
190##
191.align 4
192_vpaes_encrypt_preheat:
193 mflr r8
194 bl Lconsts
195 mtlr r8
196 li r11, 0xc0 # Lk_inv
197 li r10, 0xd0
198 li r9, 0xe0 # Lk_ipt
199 li r8, 0xf0
200 vxor v7, v7, v7 # 0x00..00
201 vspltisb v8,4 # 0x04..04
202 vspltisb v9,0x0f # 0x0f..0f
203 lvx $invlo, r12, r11
204 li r11, 0x100
205 lvx $invhi, r12, r10
206 li r10, 0x110
207 lvx $iptlo, r12, r9
208 li r9, 0x120
209 lvx $ipthi, r12, r8
210 li r8, 0x130
211 lvx $sbou, r12, r11
212 li r11, 0x140
213 lvx $sbot, r12, r10
214 li r10, 0x150
215 lvx $sb1u, r12, r9
216 lvx $sb1t, r12, r8
217 lvx $sb2u, r12, r11
218 lvx $sb2t, r12, r10
219 blr
220 .long 0
221 .byte 0,12,0x14,0,0,0,0,0
222
223##
224## _aes_encrypt_core
225##
226## AES-encrypt %xmm0.
227##
228## Inputs:
229## %xmm0 = input
230## %xmm9-%xmm15 as in _vpaes_preheat
231## (%rdx) = scheduled keys
232##
233## Output in %xmm0
234## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax
235##
236##
237.align 5
238_vpaes_encrypt_core:
239 lwz r8, 240($key) # pull rounds
240 li r9, 16
241 lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key
242 li r11, 0x10
243 lvx v6, r9, $key
244 addi r9, r9, 16
245 ?vperm v5, v5, v6, $keyperm # align round key
246 addi r10, r11, 0x40
247 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
248 vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1
249 vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2
250 vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0
251 vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
252 mtctr r8
253 b Lenc_entry
254
255.align 4
256Lenc_loop:
257 # middle of middle round
258 vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u
259 lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
260 addi r11, r11, 16
261 vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t
262 vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
263 andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4
264 vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u
265 vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
266 vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t
267 lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
268 addi r10, r11, 0x40
269 vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B
270 vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A
271 vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D
272 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B
273 vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C
274 vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D
275 vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D
276
277Lenc_entry:
278 # top of round
279 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
280 vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k
281 vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
282 vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
283 vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
284 vand v0, v0, v9
285 vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
286 vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
287 vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
288 vmr v5, v6
289 lvx v6, r9, $key # vmovdqu (%r9), %xmm5
290 vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
291 addi r9, r9, 16
292 vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
293 ?vperm v5, v5, v6, $keyperm # align round key
294 vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
295 bdnz Lenc_loop
296
297 # middle of last round
298 addi r10, r11, 0x80
299 # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
300 # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
301 vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
302 lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
303 vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t
304 vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
305 vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
306 vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0
307 blr
308 .long 0
309 .byte 0,12,0x14,0,0,0,0,0
310
311.globl .vpaes_encrypt
312.align 5
313.vpaes_encrypt:
314 $STU $sp,-$FRAME($sp)
315 li r10,`15+6*$SIZE_T`
316 li r11,`31+6*$SIZE_T`
317 mflr r6
318 mfspr r7, 256 # save vrsave
319 stvx v20,r10,$sp
320 addi r10,r10,32
321 stvx v21,r11,$sp
322 addi r11,r11,32
323 stvx v22,r10,$sp
324 addi r10,r10,32
325 stvx v23,r11,$sp
326 addi r11,r11,32
327 stvx v24,r10,$sp
328 addi r10,r10,32
329 stvx v25,r11,$sp
330 addi r11,r11,32
331 stvx v26,r10,$sp
332 addi r10,r10,32
333 stvx v27,r11,$sp
334 addi r11,r11,32
335 stvx v28,r10,$sp
336 addi r10,r10,32
337 stvx v29,r11,$sp
338 addi r11,r11,32
339 stvx v30,r10,$sp
340 stvx v31,r11,$sp
341 stw r7,`$FRAME-4`($sp) # save vrsave
342 li r0, -1
343 $PUSH r6,`$FRAME+$LRSAVE`($sp)
344 mtspr 256, r0 # preserve all AltiVec registers
345
346 bl _vpaes_encrypt_preheat
347
348 ?lvsl $inpperm, 0, $inp # prepare for unaligned access
349 lvx v0, 0, $inp
350 addi $inp, $inp, 15 # 15 is not a typo
351 ?lvsr $outperm, 0, $out
352 ?lvsl $keyperm, 0, $key # prepare for unaligned access
353 lvx $inptail, 0, $inp # redundant in aligned case
354 ?vperm v0, v0, $inptail, $inpperm
355
356 bl _vpaes_encrypt_core
357
358 andi. r8, $out, 15
359 li r9, 16
360 beq Lenc_out_aligned
361
362 vperm v0, v0, v0, $outperm # rotate right/left
363 mtctr r9
364Lenc_out_unaligned:
365 stvebx v0, 0, $out
366 addi $out, $out, 1
367 bdnz Lenc_out_unaligned
368 b Lenc_done
369
370.align 4
371Lenc_out_aligned:
372 stvx v0, 0, $out
373Lenc_done:
374
375 li r10,`15+6*$SIZE_T`
376 li r11,`31+6*$SIZE_T`
377 mtlr r6
378 mtspr 256, r7 # restore vrsave
379 lvx v20,r10,$sp
380 addi r10,r10,32
381 lvx v21,r11,$sp
382 addi r11,r11,32
383 lvx v22,r10,$sp
384 addi r10,r10,32
385 lvx v23,r11,$sp
386 addi r11,r11,32
387 lvx v24,r10,$sp
388 addi r10,r10,32
389 lvx v25,r11,$sp
390 addi r11,r11,32
391 lvx v26,r10,$sp
392 addi r10,r10,32
393 lvx v27,r11,$sp
394 addi r11,r11,32
395 lvx v28,r10,$sp
396 addi r10,r10,32
397 lvx v29,r11,$sp
398 addi r11,r11,32
399 lvx v30,r10,$sp
400 lvx v31,r11,$sp
401 addi $sp,$sp,$FRAME
402 blr
403 .long 0
404 .byte 0,12,0x04,1,0x80,0,3,0
405 .long 0
406.size .vpaes_encrypt,.-.vpaes_encrypt
407
408.align 4
409_vpaes_decrypt_preheat:
410 mflr r8
411 bl Lconsts
412 mtlr r8
413 li r11, 0xc0 # Lk_inv
414 li r10, 0xd0
415 li r9, 0x160 # Ldipt
416 li r8, 0x170
417 vxor v7, v7, v7 # 0x00..00
418 vspltisb v8,4 # 0x04..04
419 vspltisb v9,0x0f # 0x0f..0f
420 lvx $invlo, r12, r11
421 li r11, 0x180
422 lvx $invhi, r12, r10
423 li r10, 0x190
424 lvx $iptlo, r12, r9
425 li r9, 0x1a0
426 lvx $ipthi, r12, r8
427 li r8, 0x1b0
428 lvx $sbou, r12, r11
429 li r11, 0x1c0
430 lvx $sbot, r12, r10
431 li r10, 0x1d0
432 lvx $sb9u, r12, r9
433 li r9, 0x1e0
434 lvx $sb9t, r12, r8
435 li r8, 0x1f0
436 lvx $sbdu, r12, r11
437 li r11, 0x200
438 lvx $sbdt, r12, r10
439 li r10, 0x210
440 lvx $sbbu, r12, r9
441 lvx $sbbt, r12, r8
442 lvx $sbeu, r12, r11
443 lvx $sbet, r12, r10
444 blr
445 .long 0
446 .byte 0,12,0x14,0,0,0,0,0
447
448##
449## Decryption core
450##
451## Same API as encryption core.
452##
453.align 4
454_vpaes_decrypt_core:
455 lwz r8, 240($key) # pull rounds
456 li r9, 16
457 lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key
458 li r11, 0x30
459 lvx v6, r9, $key
460 addi r9, r9, 16
461 ?vperm v5, v5, v6, $keyperm # align round key
462 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
463 vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
464 vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0
465 vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2
466 vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
467 mtctr r8
468 b Ldec_entry
469
470.align 4
471Ldec_loop:
472#
473# Inverse mix columns
474#
475 lvx v0, r12, r11 # v5 and v0 are flipped
476 # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u
477 # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t
478 vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u
479 subi r11, r11, 16
480 vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t
481 andi. r11, r11, 0x30
482 vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0
483 # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu
484 vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
485 # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt
486
487 vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu
488 vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
489 vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt
490 vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
491 # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu
492 vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
493 # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt
494
495 vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu
496 vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
497 vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt
498 vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
499 # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu
500 vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
501 # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet
502
503 vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu
504 vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
505 vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet
506 vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
507 vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
508
509Ldec_entry:
510 # top of round
511 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
512 vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
513 vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
514 vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
515 vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
516 vand v0, v0, v9
517 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
518 vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
519 vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
520 vmr v5, v6
521 lvx v6, r9, $key # vmovdqu (%r9), %xmm0
522 vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
523 addi r9, r9, 16
524 vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
525 ?vperm v5, v5, v6, $keyperm # align round key
526 vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
527 bdnz Ldec_loop
528
529 # middle of last round
530 addi r10, r11, 0x80
531 # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou
532 vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
533 # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot
534 lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
535 vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t
536 vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k
537 vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A
538 vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0
539 blr
540 .long 0
541 .byte 0,12,0x14,0,0,0,0,0
542
543.globl .vpaes_decrypt
544.align 5
545.vpaes_decrypt:
546 $STU $sp,-$FRAME($sp)
547 li r10,`15+6*$SIZE_T`
548 li r11,`31+6*$SIZE_T`
549 mflr r6
550 mfspr r7, 256 # save vrsave
551 stvx v20,r10,$sp
552 addi r10,r10,32
553 stvx v21,r11,$sp
554 addi r11,r11,32
555 stvx v22,r10,$sp
556 addi r10,r10,32
557 stvx v23,r11,$sp
558 addi r11,r11,32
559 stvx v24,r10,$sp
560 addi r10,r10,32
561 stvx v25,r11,$sp
562 addi r11,r11,32
563 stvx v26,r10,$sp
564 addi r10,r10,32
565 stvx v27,r11,$sp
566 addi r11,r11,32
567 stvx v28,r10,$sp
568 addi r10,r10,32
569 stvx v29,r11,$sp
570 addi r11,r11,32
571 stvx v30,r10,$sp
572 stvx v31,r11,$sp
573 stw r7,`$FRAME-4`($sp) # save vrsave
574 li r0, -1
575 $PUSH r6,`$FRAME+$LRSAVE`($sp)
576 mtspr 256, r0 # preserve all AltiVec registers
577
578 bl _vpaes_decrypt_preheat
579
580 ?lvsl $inpperm, 0, $inp # prepare for unaligned access
581 lvx v0, 0, $inp
582 addi $inp, $inp, 15 # 15 is not a typo
583 ?lvsr $outperm, 0, $out
584 ?lvsl $keyperm, 0, $key
585 lvx $inptail, 0, $inp # redundant in aligned case
586 ?vperm v0, v0, $inptail, $inpperm
587
588 bl _vpaes_decrypt_core
589
590 andi. r8, $out, 15
591 li r9, 16
592 beq Ldec_out_aligned
593
594 vperm v0, v0, v0, $outperm # rotate right/left
595 mtctr r9
596Ldec_out_unaligned:
597 stvebx v0, 0, $out
598 addi $out, $out, 1
599 bdnz Ldec_out_unaligned
600 b Ldec_done
601
602.align 4
603Ldec_out_aligned:
604 stvx v0, 0, $out
605Ldec_done:
606
607 li r10,`15+6*$SIZE_T`
608 li r11,`31+6*$SIZE_T`
609 mtlr r6
610 mtspr 256, r7 # restore vrsave
611 lvx v20,r10,$sp
612 addi r10,r10,32
613 lvx v21,r11,$sp
614 addi r11,r11,32
615 lvx v22,r10,$sp
616 addi r10,r10,32
617 lvx v23,r11,$sp
618 addi r11,r11,32
619 lvx v24,r10,$sp
620 addi r10,r10,32
621 lvx v25,r11,$sp
622 addi r11,r11,32
623 lvx v26,r10,$sp
624 addi r10,r10,32
625 lvx v27,r11,$sp
626 addi r11,r11,32
627 lvx v28,r10,$sp
628 addi r10,r10,32
629 lvx v29,r11,$sp
630 addi r11,r11,32
631 lvx v30,r10,$sp
632 lvx v31,r11,$sp
633 addi $sp,$sp,$FRAME
634 blr
635 .long 0
636 .byte 0,12,0x04,1,0x80,0,3,0
637 .long 0
638.size .vpaes_decrypt,.-.vpaes_decrypt
639
640.globl .vpaes_cbc_encrypt
641.align 5
642.vpaes_cbc_encrypt:
643 ${UCMP}i r5,16
644 bltlr-
645
646 $STU $sp,-`($FRAME+2*$SIZE_T)`($sp)
647 mflr r0
648 li r10,`15+6*$SIZE_T`
649 li r11,`31+6*$SIZE_T`
650 mfspr r12, 256
651 stvx v20,r10,$sp
652 addi r10,r10,32
653 stvx v21,r11,$sp
654 addi r11,r11,32
655 stvx v22,r10,$sp
656 addi r10,r10,32
657 stvx v23,r11,$sp
658 addi r11,r11,32
659 stvx v24,r10,$sp
660 addi r10,r10,32
661 stvx v25,r11,$sp
662 addi r11,r11,32
663 stvx v26,r10,$sp
664 addi r10,r10,32
665 stvx v27,r11,$sp
666 addi r11,r11,32
667 stvx v28,r10,$sp
668 addi r10,r10,32
669 stvx v29,r11,$sp
670 addi r11,r11,32
671 stvx v30,r10,$sp
672 stvx v31,r11,$sp
673 stw r12,`$FRAME-4`($sp) # save vrsave
674 $PUSH r30,`$FRAME+$SIZE_T*0`($sp)
675 $PUSH r31,`$FRAME+$SIZE_T*1`($sp)
676 li r9, -16
677 $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
678
679 and r30, r5, r9 # copy length&-16
680 andi. r9, $out, 15 # is $out aligned?
681 mr r5, r6 # copy pointer to key
682 mr r31, r7 # copy pointer to iv
683 li r6, -1
684 mcrf cr1, cr0 # put aside $out alignment flag
685 mr r7, r12 # copy vrsave
686 mtspr 256, r6 # preserve all AltiVec registers
687
688 lvx v24, 0, r31 # load [potentially unaligned] iv
689 li r9, 15
690 ?lvsl $inpperm, 0, r31
691 lvx v25, r9, r31
692 ?vperm v24, v24, v25, $inpperm
693
694 cmpwi r8, 0 # test direction
695 neg r8, $inp # prepare for unaligned access
696 vxor v7, v7, v7
697 ?lvsl $keyperm, 0, $key
698 ?lvsr $outperm, 0, $out
699 ?lvsr $inpperm, 0, r8 # -$inp
700 vnor $outmask, v7, v7 # 0xff..ff
701 lvx $inptail, 0, $inp
702 ?vperm $outmask, v7, $outmask, $outperm
703 addi $inp, $inp, 15 # 15 is not a typo
704
705 beq Lcbc_decrypt
706
707 bl _vpaes_encrypt_preheat
708 li r0, 16
709
710 beq cr1, Lcbc_enc_loop # $out is aligned
711
712 vmr v0, $inptail
713 lvx $inptail, 0, $inp
714 addi $inp, $inp, 16
715 ?vperm v0, v0, $inptail, $inpperm
716 vxor v0, v0, v24 # ^= iv
717
718 bl _vpaes_encrypt_core
719
720 andi. r8, $out, 15
721 vmr v24, v0 # put aside iv
722 sub r9, $out, r8
723 vperm $outhead, v0, v0, $outperm # rotate right/left
724
725Lcbc_enc_head:
726 stvebx $outhead, r8, r9
727 cmpwi r8, 15
728 addi r8, r8, 1
729 bne Lcbc_enc_head
730
731 sub. r30, r30, r0 # len -= 16
732 addi $out, $out, 16
733 beq Lcbc_unaligned_done
734
735Lcbc_enc_loop:
736 vmr v0, $inptail
737 lvx $inptail, 0, $inp
738 addi $inp, $inp, 16
739 ?vperm v0, v0, $inptail, $inpperm
740 vxor v0, v0, v24 # ^= iv
741
742 bl _vpaes_encrypt_core
743
744 vmr v24, v0 # put aside iv
745 sub. r30, r30, r0 # len -= 16
746 vperm v0, v0, v0, $outperm # rotate right/left
747 vsel v1, $outhead, v0, $outmask
748 vmr $outhead, v0
749 stvx v1, 0, $out
750 addi $out, $out, 16
751 bne Lcbc_enc_loop
752
753 b Lcbc_done
754
755.align 5
756Lcbc_decrypt:
757 bl _vpaes_decrypt_preheat
758 li r0, 16
759
760 beq cr1, Lcbc_dec_loop # $out is aligned
761
762 vmr v0, $inptail
763 lvx $inptail, 0, $inp
764 addi $inp, $inp, 16
765 ?vperm v0, v0, $inptail, $inpperm
766 vmr v25, v0 # put aside input
767
768 bl _vpaes_decrypt_core
769
770 andi. r8, $out, 15
771 vxor v0, v0, v24 # ^= iv
772 vmr v24, v25
773 sub r9, $out, r8
774 vperm $outhead, v0, v0, $outperm # rotate right/left
775
776Lcbc_dec_head:
777 stvebx $outhead, r8, r9
778 cmpwi r8, 15
779 addi r8, r8, 1
780 bne Lcbc_dec_head
781
782 sub. r30, r30, r0 # len -= 16
783 addi $out, $out, 16
784 beq Lcbc_unaligned_done
785
786Lcbc_dec_loop:
787 vmr v0, $inptail
788 lvx $inptail, 0, $inp
789 addi $inp, $inp, 16
790 ?vperm v0, v0, $inptail, $inpperm
791 vmr v25, v0 # put aside input
792
793 bl _vpaes_decrypt_core
794
795 vxor v0, v0, v24 # ^= iv
796 vmr v24, v25
797 sub. r30, r30, r0 # len -= 16
798 vperm v0, v0, v0, $outperm # rotate right/left
799 vsel v1, $outhead, v0, $outmask
800 vmr $outhead, v0
801 stvx v1, 0, $out
802 addi $out, $out, 16
803 bne Lcbc_dec_loop
804
805Lcbc_done:
806 beq cr1, Lcbc_write_iv # $out is aligned
807
808Lcbc_unaligned_done:
809 andi. r8, $out, 15
810 sub $out, $out, r8
811 li r9, 0
812Lcbc_tail:
813 stvebx $outhead, r9, $out
814 addi r9, r9, 1
815 cmpw r9, r8
816 bne Lcbc_tail
817
818Lcbc_write_iv:
819 neg r8, r31 # write [potentially unaligned] iv
820 li r10, 4
821 ?lvsl $outperm, 0, r8
822 li r11, 8
823 li r12, 12
824 vperm v24, v24, v24, $outperm # rotate right/left
825 stvewx v24, 0, r31 # ivp is at least 32-bit aligned
826 stvewx v24, r10, r31
827 stvewx v24, r11, r31
828 stvewx v24, r12, r31
829
830 mtspr 256, r7 # restore vrsave
831 li r10,`15+6*$SIZE_T`
832 li r11,`31+6*$SIZE_T`
833 lvx v20,r10,$sp
834 addi r10,r10,32
835 lvx v21,r11,$sp
836 addi r11,r11,32
837 lvx v22,r10,$sp
838 addi r10,r10,32
839 lvx v23,r11,$sp
840 addi r11,r11,32
841 lvx v24,r10,$sp
842 addi r10,r10,32
843 lvx v25,r11,$sp
844 addi r11,r11,32
845 lvx v26,r10,$sp
846 addi r10,r10,32
847 lvx v27,r11,$sp
848 addi r11,r11,32
849 lvx v28,r10,$sp
850 addi r10,r10,32
851 lvx v29,r11,$sp
852 addi r11,r11,32
853 lvx v30,r10,$sp
854 lvx v31,r11,$sp
855Lcbc_abort:
856 $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
857 $POP r30,`$FRAME+$SIZE_T*0`($sp)
858 $POP r31,`$FRAME+$SIZE_T*1`($sp)
859 mtlr r0
860 addi $sp,$sp,`$FRAME+$SIZE_T*2`
861 blr
862 .long 0
863 .byte 0,12,0x04,1,0x80,2,6,0
864 .long 0
865.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
866___
867}
868
869{
870my ($inp,$bits,$out)=map("r$_",(3..5));
871my $dir="cr1";
872my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
873
874$code.=<<___;
875########################################################
876## ##
877## AES key schedule ##
878## ##
879########################################################
880.align 4
881_vpaes_key_preheat:
882 mflr r8
883 bl Lconsts
884 mtlr r8
885 li r11, 0xc0 # Lk_inv
886 li r10, 0xd0
887 li r9, 0xe0 # L_ipt
888 li r8, 0xf0
889
890 vspltisb v8,4 # 0x04..04
891 vxor v9,v9,v9 # 0x00..00
892 lvx $invlo, r12, r11 # Lk_inv
893 li r11, 0x120
894 lvx $invhi, r12, r10
895 li r10, 0x130
896 lvx $iptlo, r12, r9 # Lk_ipt
897 li r9, 0x220
898 lvx $ipthi, r12, r8
899 li r8, 0x230
900
901 lvx v14, r12, r11 # Lk_sb1
902 li r11, 0x240
903 lvx v15, r12, r10
904 li r10, 0x250
905
906 lvx v16, r12, r9 # Lk_dksd
907 li r9, 0x260
908 lvx v17, r12, r8
909 li r8, 0x270
910 lvx v18, r12, r11 # Lk_dksb
911 li r11, 0x280
912 lvx v19, r12, r10
913 li r10, 0x290
914 lvx v20, r12, r9 # Lk_dkse
915 li r9, 0x2a0
916 lvx v21, r12, r8
917 li r8, 0x2b0
918 lvx v22, r12, r11 # Lk_dks9
919 lvx v23, r12, r10
920
921 lvx v24, r12, r9 # Lk_rcon
922 lvx v25, 0, r12 # Lk_mc_forward[0]
923 lvx v26, r12, r8 # Lks63
924 blr
925 .long 0
926 .byte 0,12,0x14,0,0,0,0,0
927
928.align 4
929_vpaes_schedule_core:
930 mflr r7
931
932 bl _vpaes_key_preheat # load the tables
933
934 #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned)
935 neg r8, $inp # prepare for unaligned access
936 lvx v0, 0, $inp
937 addi $inp, $inp, 15 # 15 is not typo
938 ?lvsr $inpperm, 0, r8 # -$inp
939 lvx v6, 0, $inp # v6 serves as inptail
940 addi $inp, $inp, 8
941 ?vperm v0, v0, v6, $inpperm
942
943 # input transform
944 vmr v3, v0 # vmovdqa %xmm0, %xmm3
945 bl _vpaes_schedule_transform
946 vmr v7, v0 # vmovdqa %xmm0, %xmm7
947
948 bne $dir, Lschedule_am_decrypting
949
950 # encrypting, output zeroth round key after transform
951 li r8, 0x30 # mov \$0x30,%r8d
952 li r9, 4
953 li r10, 8
954 li r11, 12
955
956 ?lvsr $outperm, 0, $out # prepare for unaligned access
957 vnor $outmask, v9, v9 # 0xff..ff
958 ?vperm $outmask, v9, $outmask, $outperm
959
960 #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx)
961 vperm $outhead, v0, v0, $outperm # rotate right/left
962 stvewx $outhead, 0, $out # some are superfluous
963 stvewx $outhead, r9, $out
964 stvewx $outhead, r10, $out
965 addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
966 stvewx $outhead, r11, $out
967 b Lschedule_go
968
969Lschedule_am_decrypting:
970 srwi r8, $bits, 1 # shr \$1,%r8d
971 andi. r8, r8, 32 # and \$32,%r8d
972 xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
973 addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
974 # decrypting, output zeroth round key after shiftrows
975 lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
976 li r9, 4
977 li r10, 8
978 li r11, 12
979 vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
980
981 neg r0, $out # prepare for unaligned access
982 ?lvsl $outperm, 0, r0
983 vnor $outmask, v9, v9 # 0xff..ff
984 ?vperm $outmask, $outmask, v9, $outperm
985
986 #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx)
987 vperm $outhead, v4, v4, $outperm # rotate right/left
988 stvewx $outhead, 0, $out # some are superfluous
989 stvewx $outhead, r9, $out
990 stvewx $outhead, r10, $out
991 addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
992 stvewx $outhead, r11, $out
993 addi $out, $out, 15 # 15 is not typo
994 xori r8, r8, 0x30 # xor \$0x30, %r8
995
996Lschedule_go:
997 cmplwi $bits, 192 # cmp \$192, %esi
998 bgt Lschedule_256
999 beq Lschedule_192
1000 # 128: fall though
1001
1002##
1003## .schedule_128
1004##
1005## 128-bit specific part of key schedule.
1006##
1007## This schedule is really simple, because all its parts
1008## are accomplished by the subroutines.
1009##
1010Lschedule_128:
1011 li r0, 10 # mov \$10, %esi
1012 mtctr r0
1013
1014Loop_schedule_128:
1015 bl _vpaes_schedule_round
1016 bdz Lschedule_mangle_last # dec %esi
1017 bl _vpaes_schedule_mangle # write output
1018 b Loop_schedule_128
1019
1020##
1021## .aes_schedule_192
1022##
1023## 192-bit specific part of key schedule.
1024##
1025## The main body of this schedule is the same as the 128-bit
1026## schedule, but with more smearing. The long, high side is
1027## stored in %xmm7 as before, and the short, low side is in
1028## the high bits of %xmm6.
1029##
1030## This schedule is somewhat nastier, however, because each
1031## round produces 192 bits of key material, or 1.5 round keys.
1032## Therefore, on each cycle we do 2 rounds and produce 3 round
1033## keys.
1034##
1035.align 4
1036Lschedule_192:
1037 li r0, 4 # mov \$4, %esi
1038 lvx v0, 0, $inp
1039 ?vperm v0, v6, v0, $inpperm
1040 ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
1041 bl _vpaes_schedule_transform # input transform
1042 ?vsldoi v6, v0, v9, 8
1043 ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros
1044 mtctr r0
1045
1046Loop_schedule_192:
1047 bl _vpaes_schedule_round
1048 ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0
1049 bl _vpaes_schedule_mangle # save key n
1050 bl _vpaes_schedule_192_smear
1051 bl _vpaes_schedule_mangle # save key n+1
1052 bl _vpaes_schedule_round
1053 bdz Lschedule_mangle_last # dec %esi
1054 bl _vpaes_schedule_mangle # save key n+2
1055 bl _vpaes_schedule_192_smear
1056 b Loop_schedule_192
1057
1058##
1059## .aes_schedule_256
1060##
1061## 256-bit specific part of key schedule.
1062##
1063## The structure here is very similar to the 128-bit
1064## schedule, but with an additional "low side" in
1065## %xmm6. The low side's rounds are the same as the
1066## high side's, except no rcon and no rotation.
1067##
1068.align 4
1069Lschedule_256:
1070 li r0, 7 # mov \$7, %esi
1071 addi $inp, $inp, 8
1072 lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
1073 ?vperm v0, v6, v0, $inpperm
1074 bl _vpaes_schedule_transform # input transform
1075 mtctr r0
1076
1077Loop_schedule_256:
1078 bl _vpaes_schedule_mangle # output low result
1079 vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6
1080
1081 # high round
1082 bl _vpaes_schedule_round
1083 bdz Lschedule_mangle_last # dec %esi
1084 bl _vpaes_schedule_mangle
1085
1086 # low round. swap xmm7 and xmm6
1087 ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
1088 vmr v5, v7 # vmovdqa %xmm7, %xmm5
1089 vmr v7, v6 # vmovdqa %xmm6, %xmm7
1090 bl _vpaes_schedule_low_round
1091 vmr v7, v5 # vmovdqa %xmm5, %xmm7
1092
1093 b Loop_schedule_256
1094##
1095## .aes_schedule_mangle_last
1096##
1097## Mangler for last round of key schedule
1098## Mangles %xmm0
1099## when encrypting, outputs out(%xmm0) ^ 63
1100## when decrypting, outputs unskew(%xmm0)
1101##
1102## Always called right before return... jumps to cleanup and exits
1103##
1104.align 4
1105Lschedule_mangle_last:
1106 # schedule last round key from xmm0
1107 li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11
1108 li r9, 0x2f0
1109 bne $dir, Lschedule_mangle_last_dec
1110
1111 # encrypting
1112 lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1
1113 li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform
1114 li r9, 0x2d0 # prepare to output transform
1115 vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute
1116
1117 lvx $iptlo, r11, r12 # reload $ipt
1118 lvx $ipthi, r9, r12
1119 addi $out, $out, 16 # add \$16, %rdx
1120 vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
1121 bl _vpaes_schedule_transform # output transform
1122
1123 #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
1124 vperm v0, v0, v0, $outperm # rotate right/left
1125 li r10, 4
1126 vsel v2, $outhead, v0, $outmask
1127 li r11, 8
1128 stvx v2, 0, $out
1129 li r12, 12
1130 stvewx v0, 0, $out # some (or all) are redundant
1131 stvewx v0, r10, $out
1132 stvewx v0, r11, $out
1133 stvewx v0, r12, $out
1134 b Lschedule_mangle_done
1135
1136.align 4
1137Lschedule_mangle_last_dec:
1138 lvx $iptlo, r11, r12 # reload $ipt
1139 lvx $ipthi, r9, r12
1140 addi $out, $out, -16 # add \$-16, %rdx
1141 vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
1142 bl _vpaes_schedule_transform # output transform
1143
1144 #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
1145 addi r9, $out, -15 # -15 is not typo
1146 vperm v0, v0, v0, $outperm # rotate right/left
1147 li r10, 4
1148 vsel v2, $outhead, v0, $outmask
1149 li r11, 8
1150 stvx v2, 0, $out
1151 li r12, 12
1152 stvewx v0, 0, r9 # some (or all) are redundant
1153 stvewx v0, r10, r9
1154 stvewx v0, r11, r9
1155 stvewx v0, r12, r9
1156
1157
1158Lschedule_mangle_done:
1159 mtlr r7
1160 # cleanup
1161 vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0
1162 vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1
1163 vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2
1164 vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3
1165 vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
1166 vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5
1167 vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6
1168 vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7
1169
1170 blr
1171 .long 0
1172 .byte 0,12,0x14,0,0,0,0,0
1173
1174##
1175## .aes_schedule_192_smear
1176##
1177## Smear the short, low side in the 192-bit key schedule.
1178##
1179## Inputs:
1180## %xmm7: high side, b a x y
1181## %xmm6: low side, d c 0 0
1182## %xmm13: 0
1183##
1184## Outputs:
1185## %xmm6: b+c+d b+c 0 0
1186## %xmm0: b+c+d b+c b a
1187##
1188.align 4
1189_vpaes_schedule_192_smear:
1190 ?vspltw v0, v7, 3
1191 ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
1192 ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
1193 vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0
1194 vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a
1195 vmr v0, v6
1196 ?vsldoi v6, v6, v9, 8
1197 ?vsldoi v6, v9, v6, 8 # clobber low side with zeros
1198 blr
1199 .long 0
1200 .byte 0,12,0x14,0,0,0,0,0
1201
1202##
1203## .aes_schedule_round
1204##
1205## Runs one main round of the key schedule on %xmm0, %xmm7
1206##
1207## Specifically, runs subbytes on the high dword of %xmm0
1208## then rotates it by one byte and xors into the low dword of
1209## %xmm7.
1210##
1211## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
1212## next rcon.
1213##
1214## Smears the dwords of %xmm7 by xoring the low into the
1215## second low, result into third, result into highest.
1216##
1217## Returns results in %xmm7 = %xmm0.
1218## Clobbers %xmm1-%xmm4, %r11.
1219##
1220.align 4
1221_vpaes_schedule_round:
1222 # extract rcon from xmm8
1223 #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
1224 ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1
1225 ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8
1226 vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
1227
1228 # rotate
1229 ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
1230 ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0
1231
1232 # fall through...
1233
1234 # low round: same as high round, but no rotation and no rcon.
1235_vpaes_schedule_low_round:
1236 # smear xmm7
1237 ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1
1238 vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
1239 vspltisb v1, 0x0f # 0x0f..0f
1240 ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4
1241
1242 # subbytes
1243 vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k
1244 vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
1245 vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7
1246 vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
1247 vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
1248 vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
1249 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
1250 vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
1251 vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7
1252 vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak
1253 vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
1254 vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak
1255 vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io
1256 vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo
1257 vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou
1258 vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t
1259 vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output
1260
1261 # add in smeared stuff
1262 vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0
1263 vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7
1264 blr
1265 .long 0
1266 .byte 0,12,0x14,0,0,0,0,0
1267
1268##
1269## .aes_schedule_transform
1270##
1271## Linear-transform %xmm0 according to tables at (%r11)
1272##
1273## Requires that %xmm9 = 0x0F0F... as in preheat
1274## Output in %xmm0
1275## Clobbers %xmm2
1276##
1277.align 4
1278_vpaes_schedule_transform:
1279 #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1
1280 vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
1281 # vmovdqa (%r11), %xmm2 # lo
1282 vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
1283 # vmovdqa 16(%r11), %xmm1 # hi
1284 vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0
1285 vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0
1286 blr
1287 .long 0
1288 .byte 0,12,0x14,0,0,0,0,0
1289
1290##
1291## .aes_schedule_mangle
1292##
1293## Mangle xmm0 from (basis-transformed) standard version
1294## to our version.
1295##
1296## On encrypt,
1297## xor with 0x63
1298## multiply by circulant 0,1,1,1
1299## apply shiftrows transform
1300##
1301## On decrypt,
1302## xor with 0x63
1303## multiply by "inverse mixcolumns" circulant E,B,D,9
1304## deskew
1305## apply shiftrows transform
1306##
1307##
1308## Writes out to (%rdx), and increments or decrements it
1309## Keeps track of round number mod 4 in %r8
1310## Preserves xmm0
1311## Clobbers xmm1-xmm5
1312##
1313.align 4
1314_vpaes_schedule_mangle:
1315 #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later
1316 # vmovdqa .Lk_mc_forward(%rip),%xmm5
1317 bne $dir, Lschedule_mangle_dec
1318
1319 # encrypting
1320 vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4
1321 addi $out, $out, 16 # add \$16, %rdx
1322 vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4
1323 vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1
1324 vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3
1325 vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4
1326 lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
1327 vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3
1328
1329 vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
1330 addi r8, r8, -16 # add \$-16, %r8
1331 andi. r8, r8, 0x30 # and \$0x30, %r8
1332
1333 #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
1334 vperm v1, v3, v3, $outperm # rotate right/left
1335 vsel v2, $outhead, v1, $outmask
1336 vmr $outhead, v1
1337 stvx v2, 0, $out
1338 blr
1339
1340.align 4
1341Lschedule_mangle_dec:
1342 # inverse mix columns
1343 # lea .Lk_dksd(%rip),%r11
1344 vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi
1345 #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo
1346
1347 # vmovdqa 0x00(%r11), %xmm2
1348 vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2
1349 # vmovdqa 0x10(%r11), %xmm3
1350 vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3
1351 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
1352 vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
1353
1354 # vmovdqa 0x20(%r11), %xmm2
1355 vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2
1356 vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
1357 # vmovdqa 0x30(%r11), %xmm3
1358 vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3
1359 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
1360 vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
1361
1362 # vmovdqa 0x40(%r11), %xmm2
1363 vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2
1364 vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
1365 # vmovdqa 0x50(%r11), %xmm3
1366 vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3
1367 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
1368
1369 # vmovdqa 0x60(%r11), %xmm2
1370 vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2
1371 vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
1372 # vmovdqa 0x70(%r11), %xmm4
1373 vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4
1374 lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
1375 vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
1376 vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3
1377
1378 addi $out, $out, -16 # add \$-16, %rdx
1379
1380 vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
1381 addi r8, r8, -16 # add \$-16, %r8
1382 andi. r8, r8, 0x30 # and \$0x30, %r8
1383
1384 #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
1385 vperm v1, v3, v3, $outperm # rotate right/left
1386 vsel v2, $outhead, v1, $outmask
1387 vmr $outhead, v1
1388 stvx v2, 0, $out
1389 blr
1390 .long 0
1391 .byte 0,12,0x14,0,0,0,0,0
1392
1393.globl .vpaes_set_encrypt_key
1394.align 5
1395.vpaes_set_encrypt_key:
1396 $STU $sp,-$FRAME($sp)
1397 li r10,`15+6*$SIZE_T`
1398 li r11,`31+6*$SIZE_T`
1399 mflr r0
1400 mfspr r6, 256 # save vrsave
1401 stvx v20,r10,$sp
1402 addi r10,r10,32
1403 stvx v21,r11,$sp
1404 addi r11,r11,32
1405 stvx v22,r10,$sp
1406 addi r10,r10,32
1407 stvx v23,r11,$sp
1408 addi r11,r11,32
1409 stvx v24,r10,$sp
1410 addi r10,r10,32
1411 stvx v25,r11,$sp
1412 addi r11,r11,32
1413 stvx v26,r10,$sp
1414 addi r10,r10,32
1415 stvx v27,r11,$sp
1416 addi r11,r11,32
1417 stvx v28,r10,$sp
1418 addi r10,r10,32
1419 stvx v29,r11,$sp
1420 addi r11,r11,32
1421 stvx v30,r10,$sp
1422 stvx v31,r11,$sp
1423 stw r6,`$FRAME-4`($sp) # save vrsave
1424 li r7, -1
1425 $PUSH r0, `$FRAME+$LRSAVE`($sp)
1426 mtspr 256, r7 # preserve all AltiVec registers
1427
1428 srwi r9, $bits, 5 # shr \$5,%eax
1429 addi r9, r9, 6 # add \$5,%eax
1430 stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
1431
1432 cmplw $dir, $bits, $bits # set encrypt direction
1433 li r8, 0x30 # mov \$0x30,%r8d
1434 bl _vpaes_schedule_core
1435
1436 $POP r0, `$FRAME+$LRSAVE`($sp)
1437 li r10,`15+6*$SIZE_T`
1438 li r11,`31+6*$SIZE_T`
1439 mtspr 256, r6 # restore vrsave
1440 mtlr r0
1441 xor r3, r3, r3
1442 lvx v20,r10,$sp
1443 addi r10,r10,32
1444 lvx v21,r11,$sp
1445 addi r11,r11,32
1446 lvx v22,r10,$sp
1447 addi r10,r10,32
1448 lvx v23,r11,$sp
1449 addi r11,r11,32
1450 lvx v24,r10,$sp
1451 addi r10,r10,32
1452 lvx v25,r11,$sp
1453 addi r11,r11,32
1454 lvx v26,r10,$sp
1455 addi r10,r10,32
1456 lvx v27,r11,$sp
1457 addi r11,r11,32
1458 lvx v28,r10,$sp
1459 addi r10,r10,32
1460 lvx v29,r11,$sp
1461 addi r11,r11,32
1462 lvx v30,r10,$sp
1463 lvx v31,r11,$sp
1464 addi $sp,$sp,$FRAME
1465 blr
1466 .long 0
1467 .byte 0,12,0x04,1,0x80,0,3,0
1468 .long 0
1469.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
1470
1471.globl .vpaes_set_decrypt_key
1472.align 4
1473.vpaes_set_decrypt_key:
1474 $STU $sp,-$FRAME($sp)
1475 li r10,`15+6*$SIZE_T`
1476 li r11,`31+6*$SIZE_T`
1477 mflr r0
1478 mfspr r6, 256 # save vrsave
1479 stvx v20,r10,$sp
1480 addi r10,r10,32
1481 stvx v21,r11,$sp
1482 addi r11,r11,32
1483 stvx v22,r10,$sp
1484 addi r10,r10,32
1485 stvx v23,r11,$sp
1486 addi r11,r11,32
1487 stvx v24,r10,$sp
1488 addi r10,r10,32
1489 stvx v25,r11,$sp
1490 addi r11,r11,32
1491 stvx v26,r10,$sp
1492 addi r10,r10,32
1493 stvx v27,r11,$sp
1494 addi r11,r11,32
1495 stvx v28,r10,$sp
1496 addi r10,r10,32
1497 stvx v29,r11,$sp
1498 addi r11,r11,32
1499 stvx v30,r10,$sp
1500 stvx v31,r11,$sp
1501 stw r6,`$FRAME-4`($sp) # save vrsave
1502 li r7, -1
1503 $PUSH r0, `$FRAME+$LRSAVE`($sp)
1504 mtspr 256, r7 # preserve all AltiVec registers
1505
1506 srwi r9, $bits, 5 # shr \$5,%eax
1507 addi r9, r9, 6 # add \$5,%eax
1508 stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
1509
1510 slwi r9, r9, 4 # shl \$4,%eax
1511 add $out, $out, r9 # lea (%rdx,%rax),%rdx
1512
1513 cmplwi $dir, $bits, 0 # set decrypt direction
1514 srwi r8, $bits, 1 # shr \$1,%r8d
1515 andi. r8, r8, 32 # and \$32,%r8d
1516 xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
1517 bl _vpaes_schedule_core
1518
1519 $POP r0, `$FRAME+$LRSAVE`($sp)
1520 li r10,`15+6*$SIZE_T`
1521 li r11,`31+6*$SIZE_T`
1522 mtspr 256, r6 # restore vrsave
1523 mtlr r0
1524 xor r3, r3, r3
1525 lvx v20,r10,$sp
1526 addi r10,r10,32
1527 lvx v21,r11,$sp
1528 addi r11,r11,32
1529 lvx v22,r10,$sp
1530 addi r10,r10,32
1531 lvx v23,r11,$sp
1532 addi r11,r11,32
1533 lvx v24,r10,$sp
1534 addi r10,r10,32
1535 lvx v25,r11,$sp
1536 addi r11,r11,32
1537 lvx v26,r10,$sp
1538 addi r10,r10,32
1539 lvx v27,r11,$sp
1540 addi r11,r11,32
1541 lvx v28,r10,$sp
1542 addi r10,r10,32
1543 lvx v29,r11,$sp
1544 addi r11,r11,32
1545 lvx v30,r10,$sp
1546 lvx v31,r11,$sp
1547 addi $sp,$sp,$FRAME
1548 blr
1549 .long 0
1550 .byte 0,12,0x04,1,0x80,0,3,0
1551 .long 0
1552.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
1553___
1554}
1555
1556my $consts=1;
1557foreach (split("\n",$code)) {
1558 s/\`([^\`]*)\`/eval $1/geo;
1559
1560 # constants table endian-specific conversion
1561 if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
1562 my $conv=$2;
1563 my @bytes=();
1564
1565 # convert to endian-agnostic format
1566 foreach (split(/,\s+/,$1)) {
1567 my $l = /^0/?oct:int;
1568 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1569 }
1570
1571 # little-endian conversion
1572 if ($flavour =~ /le$/o) {
1573 SWITCH: for($conv) {
1574 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1575 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1576 }
1577 }
1578
1579 #emit
1580 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1581 next;
1582 }
1583 $consts=0 if (m/Lconsts:/o); # end of table
1584
1585 # instructions prefixed with '?' are endian-specific and need
1586 # to be adjusted accordingly...
1587 if ($flavour =~ /le$/o) { # little-endian
1588 s/\?lvsr/lvsl/o or
1589 s/\?lvsl/lvsr/o or
1590 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1591 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1592 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1593 } else { # big-endian
1594 s/\?([a-z]+)/$1/o;
1595 }
1596
1597 print $_,"\n";
1598}
1599
1600close STDOUT or die "error closing STDOUT: $!";
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette