VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.4/crypto/aes/asm/aes-s390x.pl@ 103582

最後變更 在這個檔案從103582是 102863,由 vboxsync 提交於 13 月 前

openssl-3.1.4: Applied and adjusted our OpenSSL changes to 3.1.3. bugref:10577

檔案大小: 53.4 KB
 
1#! /usr/bin/env perl
2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# AES for s390x.
18
19# April 2007.
20#
21# Software performance improvement over gcc-generated code is ~70% and
22# in absolute terms is ~73 cycles per byte processed with 128-bit key.
23# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
24# *strictly* in-order execution and issued instruction [in this case
25# load value from memory is critical] has to complete before execution
26# flow proceeds. S-boxes are compressed to 2KB[+256B].
27#
28# As for hardware acceleration support. It's basically a "teaser," as
29# it can and should be improved in several ways. Most notably support
30# for CBC is not utilized, nor multiple blocks are ever processed.
31# Then software key schedule can be postponed till hardware support
32# detection... Performance improvement over assembler is reportedly
33# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
34# support is implemented.
35
36# May 2007.
37#
38# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
39# for 128-bit keys, if hardware support is detected.
40
41# January 2009.
42#
43# Add support for hardware AES192/256 and reschedule instructions to
44# minimize/avoid Address Generation Interlock hazard and to favour
45# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
46# almost 50% on z9. The gain is smaller on z10, because being dual-
47# issue z10 makes it impossible to eliminate the interlock condition:
48# critical path is not long enough. Yet it spends ~24 cycles per byte
49# processed with 128-bit key.
50#
51# Unlike previous version hardware support detection takes place only
52# at the moment of key schedule setup, which is denoted in key->rounds.
53# This is done, because deferred key setup can't be made MT-safe, not
54# for keys longer than 128 bits.
55#
56# Add AES_cbc_encrypt, which gives incredible performance improvement,
57# it was measured to be ~6.6x. It's less than previously mentioned 8x,
58# because software implementation was optimized.
59
60# May 2010.
61#
62# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
63# performance improvement over "generic" counter mode routine relying
64# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
65# to the fact that exact throughput value depends on current stack
66# frame alignment within 4KB page. In worst case you get ~75% of the
67# maximum, but *on average* it would be as much as ~98%. Meaning that
68# worst case is unlike, it's like hitting ravine on plateau.
69
70# November 2010.
71#
72# Adapt for -m31 build. If kernel supports what's called "highgprs"
73# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
74# instructions and achieve "64-bit" performance even in 31-bit legacy
75# application context. The feature is not specific to any particular
76# processor, as long as it's "z-CPU". Latter implies that the code
77# remains z/Architecture specific. On z990 it was measured to perform
78# 2x better than code generated by gcc 4.3.
79
80# December 2010.
81#
82# Add support for z196 "cipher message with counter" instruction.
83# Note however that it's disengaged, because it was measured to
84# perform ~12% worse than vanilla km-based code...
85
86# February 2011.
87#
88# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
89# instructions, which deliver ~70% improvement at 8KB block size over
90# vanilla km-based code, 37% - at most like 512-bytes block size.
91
92# $output is the last argument if it looks like a file (it has an extension)
93# $flavour is the first argument if it doesn't look like a file
94$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
95$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
96
97if ($flavour =~ /3[12]/) {
98 $SIZE_T=4;
99 $g="";
100} else {
101 $SIZE_T=8;
102 $g="g";
103}
104
105$output and open STDOUT,">$output";
106
107$softonly=0; # allow hardware support
108
109$t0="%r0"; $mask="%r0";
110$t1="%r1";
111$t2="%r2"; $inp="%r2";
112$t3="%r3"; $out="%r3"; $bits="%r3";
113$key="%r4";
114$i1="%r5";
115$i2="%r6";
116$i3="%r7";
117$s0="%r8";
118$s1="%r9";
119$s2="%r10";
120$s3="%r11";
121$tbl="%r12";
122$rounds="%r13";
123$ra="%r14";
124$sp="%r15";
125
126$stdframe=16*$SIZE_T+4*8;
127
128sub _data_word()
129{ my $i;
130 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
131}
132
133$code=<<___;
134#include "s390x_arch.h"
135
136.text
137
138.type AES_Te,\@object
139.align 256
140AES_Te:
141___
142&_data_word(
143 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
144 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
145 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
146 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
147 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
148 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
149 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
150 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
151 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
152 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
153 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
154 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
155 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
156 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
157 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
158 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
159 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
160 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
161 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
162 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
163 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
164 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
165 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
166 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
167 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
168 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
169 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
170 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
171 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
172 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
173 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
174 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
175 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
176 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
177 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
178 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
179 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
180 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
181 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
182 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
183 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
184 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
185 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
186 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
187 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
188 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
189 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
190 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
191 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
192 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
193 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
194 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
195 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
196 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
197 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
198 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
199 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
200 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
201 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
202 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
203 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
204 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
205 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
206 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
207$code.=<<___;
208# Te4[256]
209.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
210.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
211.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
212.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
213.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
214.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
215.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
216.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
217.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
218.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
219.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
220.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
221.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
222.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
223.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
224.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
225.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
226.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
227.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
228.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
229.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
230.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
231.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
232.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
233.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
234.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
235.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
236.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
237.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
238.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
239.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
240.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
241# rcon[]
242.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
243.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
244.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
245.align 256
246.size AES_Te,.-AES_Te
247
248# void AES_encrypt(const unsigned char *inp, unsigned char *out,
249# const AES_KEY *key) {
250.globl AES_encrypt
251.type AES_encrypt,\@function
252AES_encrypt:
253___
254$code.=<<___ if (!$softonly);
255 l %r0,240($key)
256 lhi %r1,16
257 clr %r0,%r1
258 jl .Lesoft
259
260 la %r1,0($key)
261 #la %r2,0($inp)
262 la %r4,0($out)
263 lghi %r3,16 # single block length
264 .long 0xb92e0042 # km %r4,%r2
265 brc 1,.-4 # can this happen?
266 br %r14
267.align 64
268.Lesoft:
269___
270$code.=<<___;
271 stm${g} %r3,$ra,3*$SIZE_T($sp)
272
273 llgf $s0,0($inp)
274 llgf $s1,4($inp)
275 llgf $s2,8($inp)
276 llgf $s3,12($inp)
277
278 larl $tbl,AES_Te
279 bras $ra,_s390x_AES_encrypt
280
281 l${g} $out,3*$SIZE_T($sp)
282 st $s0,0($out)
283 st $s1,4($out)
284 st $s2,8($out)
285 st $s3,12($out)
286
287 lm${g} %r6,$ra,6*$SIZE_T($sp)
288 br $ra
289.size AES_encrypt,.-AES_encrypt
290
291.type _s390x_AES_encrypt,\@function
292.align 16
293_s390x_AES_encrypt:
294 st${g} $ra,15*$SIZE_T($sp)
295 x $s0,0($key)
296 x $s1,4($key)
297 x $s2,8($key)
298 x $s3,12($key)
299 l $rounds,240($key)
300 llill $mask,`0xff<<3`
301 aghi $rounds,-1
302 j .Lenc_loop
303.align 16
304.Lenc_loop:
305 sllg $t1,$s0,`0+3`
306 srlg $t2,$s0,`8-3`
307 srlg $t3,$s0,`16-3`
308 srl $s0,`24-3`
309 nr $s0,$mask
310 ngr $t1,$mask
311 nr $t2,$mask
312 nr $t3,$mask
313
314 srlg $i1,$s1,`16-3` # i0
315 sllg $i2,$s1,`0+3`
316 srlg $i3,$s1,`8-3`
317 srl $s1,`24-3`
318 nr $i1,$mask
319 nr $s1,$mask
320 ngr $i2,$mask
321 nr $i3,$mask
322
323 l $s0,0($s0,$tbl) # Te0[s0>>24]
324 l $t1,1($t1,$tbl) # Te3[s0>>0]
325 l $t2,2($t2,$tbl) # Te2[s0>>8]
326 l $t3,3($t3,$tbl) # Te1[s0>>16]
327
328 x $s0,3($i1,$tbl) # Te1[s1>>16]
329 l $s1,0($s1,$tbl) # Te0[s1>>24]
330 x $t2,1($i2,$tbl) # Te3[s1>>0]
331 x $t3,2($i3,$tbl) # Te2[s1>>8]
332
333 srlg $i1,$s2,`8-3` # i0
334 srlg $i2,$s2,`16-3` # i1
335 nr $i1,$mask
336 nr $i2,$mask
337 sllg $i3,$s2,`0+3`
338 srl $s2,`24-3`
339 nr $s2,$mask
340 ngr $i3,$mask
341
342 xr $s1,$t1
343 srlg $ra,$s3,`8-3` # i1
344 sllg $t1,$s3,`0+3` # i0
345 nr $ra,$mask
346 la $key,16($key)
347 ngr $t1,$mask
348
349 x $s0,2($i1,$tbl) # Te2[s2>>8]
350 x $s1,3($i2,$tbl) # Te1[s2>>16]
351 l $s2,0($s2,$tbl) # Te0[s2>>24]
352 x $t3,1($i3,$tbl) # Te3[s2>>0]
353
354 srlg $i3,$s3,`16-3` # i2
355 xr $s2,$t2
356 srl $s3,`24-3`
357 nr $i3,$mask
358 nr $s3,$mask
359
360 x $s0,0($key)
361 x $s1,4($key)
362 x $s2,8($key)
363 x $t3,12($key)
364
365 x $s0,1($t1,$tbl) # Te3[s3>>0]
366 x $s1,2($ra,$tbl) # Te2[s3>>8]
367 x $s2,3($i3,$tbl) # Te1[s3>>16]
368 l $s3,0($s3,$tbl) # Te0[s3>>24]
369 xr $s3,$t3
370
371 brct $rounds,.Lenc_loop
372 .align 16
373
374 sllg $t1,$s0,`0+3`
375 srlg $t2,$s0,`8-3`
376 ngr $t1,$mask
377 srlg $t3,$s0,`16-3`
378 srl $s0,`24-3`
379 nr $s0,$mask
380 nr $t2,$mask
381 nr $t3,$mask
382
383 srlg $i1,$s1,`16-3` # i0
384 sllg $i2,$s1,`0+3`
385 ngr $i2,$mask
386 srlg $i3,$s1,`8-3`
387 srl $s1,`24-3`
388 nr $i1,$mask
389 nr $s1,$mask
390 nr $i3,$mask
391
392 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
393 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
394 sll $s0,24
395 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
396 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
397 sll $t2,8
398 sll $t3,16
399
400 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
401 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
402 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
403 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
404 sll $i1,16
405 sll $s1,24
406 sll $i3,8
407 or $s0,$i1
408 or $s1,$t1
409 or $t2,$i2
410 or $t3,$i3
411
412 srlg $i1,$s2,`8-3` # i0
413 srlg $i2,$s2,`16-3` # i1
414 nr $i1,$mask
415 nr $i2,$mask
416 sllg $i3,$s2,`0+3`
417 srl $s2,`24-3`
418 ngr $i3,$mask
419 nr $s2,$mask
420
421 sllg $t1,$s3,`0+3` # i0
422 srlg $ra,$s3,`8-3` # i1
423 ngr $t1,$mask
424
425 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
426 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
427 sll $i1,8
428 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
429 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
430 sll $i2,16
431 nr $ra,$mask
432 sll $s2,24
433 or $s0,$i1
434 or $s1,$i2
435 or $s2,$t2
436 or $t3,$i3
437
438 srlg $i3,$s3,`16-3` # i2
439 srl $s3,`24-3`
440 nr $i3,$mask
441 nr $s3,$mask
442
443 l $t0,16($key)
444 l $t2,20($key)
445
446 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
447 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
448 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
449 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
450 sll $i2,8
451 sll $i3,16
452 sll $s3,24
453 or $s0,$i1
454 or $s1,$i2
455 or $s2,$i3
456 or $s3,$t3
457
458 l${g} $ra,15*$SIZE_T($sp)
459 xr $s0,$t0
460 xr $s1,$t2
461 x $s2,24($key)
462 x $s3,28($key)
463
464 br $ra
465.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
466___
467
468$code.=<<___;
469.type AES_Td,\@object
470.align 256
471AES_Td:
472___
473&_data_word(
474 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
475 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
476 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
477 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
478 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
479 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
480 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
481 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
482 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
483 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
484 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
485 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
486 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
487 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
488 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
489 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
490 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
491 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
492 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
493 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
494 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
495 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
496 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
497 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
498 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
499 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
500 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
501 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
502 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
503 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
504 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
505 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
506 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
507 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
508 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
509 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
510 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
511 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
512 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
513 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
514 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
515 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
516 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
517 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
518 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
519 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
520 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
521 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
522 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
523 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
524 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
525 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
526 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
527 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
528 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
529 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
530 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
531 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
532 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
533 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
534 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
535 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
536 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
537 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
538$code.=<<___;
539# Td4[256]
540.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
541.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
542.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
543.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
544.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
545.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
546.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
547.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
548.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
549.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
550.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
551.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
552.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
553.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
554.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
555.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
556.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
557.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
558.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
559.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
560.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
561.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
562.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
563.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
564.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
565.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
566.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
567.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
568.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
569.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
570.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
571.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
572.size AES_Td,.-AES_Td
573
574# void AES_decrypt(const unsigned char *inp, unsigned char *out,
575# const AES_KEY *key) {
576.globl AES_decrypt
577.type AES_decrypt,\@function
578AES_decrypt:
579___
580$code.=<<___ if (!$softonly);
581 l %r0,240($key)
582 lhi %r1,16
583 clr %r0,%r1
584 jl .Ldsoft
585
586 la %r1,0($key)
587 #la %r2,0($inp)
588 la %r4,0($out)
589 lghi %r3,16 # single block length
590 .long 0xb92e0042 # km %r4,%r2
591 brc 1,.-4 # can this happen?
592 br %r14
593.align 64
594.Ldsoft:
595___
596$code.=<<___;
597 stm${g} %r3,$ra,3*$SIZE_T($sp)
598
599 llgf $s0,0($inp)
600 llgf $s1,4($inp)
601 llgf $s2,8($inp)
602 llgf $s3,12($inp)
603
604 larl $tbl,AES_Td
605 bras $ra,_s390x_AES_decrypt
606
607 l${g} $out,3*$SIZE_T($sp)
608 st $s0,0($out)
609 st $s1,4($out)
610 st $s2,8($out)
611 st $s3,12($out)
612
613 lm${g} %r6,$ra,6*$SIZE_T($sp)
614 br $ra
615.size AES_decrypt,.-AES_decrypt
616
617.type _s390x_AES_decrypt,\@function
618.align 16
619_s390x_AES_decrypt:
620 st${g} $ra,15*$SIZE_T($sp)
621 x $s0,0($key)
622 x $s1,4($key)
623 x $s2,8($key)
624 x $s3,12($key)
625 l $rounds,240($key)
626 llill $mask,`0xff<<3`
627 aghi $rounds,-1
628 j .Ldec_loop
629.align 16
630.Ldec_loop:
631 srlg $t1,$s0,`16-3`
632 srlg $t2,$s0,`8-3`
633 sllg $t3,$s0,`0+3`
634 srl $s0,`24-3`
635 nr $s0,$mask
636 nr $t1,$mask
637 nr $t2,$mask
638 ngr $t3,$mask
639
640 sllg $i1,$s1,`0+3` # i0
641 srlg $i2,$s1,`16-3`
642 srlg $i3,$s1,`8-3`
643 srl $s1,`24-3`
644 ngr $i1,$mask
645 nr $s1,$mask
646 nr $i2,$mask
647 nr $i3,$mask
648
649 l $s0,0($s0,$tbl) # Td0[s0>>24]
650 l $t1,3($t1,$tbl) # Td1[s0>>16]
651 l $t2,2($t2,$tbl) # Td2[s0>>8]
652 l $t3,1($t3,$tbl) # Td3[s0>>0]
653
654 x $s0,1($i1,$tbl) # Td3[s1>>0]
655 l $s1,0($s1,$tbl) # Td0[s1>>24]
656 x $t2,3($i2,$tbl) # Td1[s1>>16]
657 x $t3,2($i3,$tbl) # Td2[s1>>8]
658
659 srlg $i1,$s2,`8-3` # i0
660 sllg $i2,$s2,`0+3` # i1
661 srlg $i3,$s2,`16-3`
662 srl $s2,`24-3`
663 nr $i1,$mask
664 ngr $i2,$mask
665 nr $s2,$mask
666 nr $i3,$mask
667
668 xr $s1,$t1
669 srlg $ra,$s3,`8-3` # i1
670 srlg $t1,$s3,`16-3` # i0
671 nr $ra,$mask
672 la $key,16($key)
673 nr $t1,$mask
674
675 x $s0,2($i1,$tbl) # Td2[s2>>8]
676 x $s1,1($i2,$tbl) # Td3[s2>>0]
677 l $s2,0($s2,$tbl) # Td0[s2>>24]
678 x $t3,3($i3,$tbl) # Td1[s2>>16]
679
680 sllg $i3,$s3,`0+3` # i2
681 srl $s3,`24-3`
682 ngr $i3,$mask
683 nr $s3,$mask
684
685 xr $s2,$t2
686 x $s0,0($key)
687 x $s1,4($key)
688 x $s2,8($key)
689 x $t3,12($key)
690
691 x $s0,3($t1,$tbl) # Td1[s3>>16]
692 x $s1,2($ra,$tbl) # Td2[s3>>8]
693 x $s2,1($i3,$tbl) # Td3[s3>>0]
694 l $s3,0($s3,$tbl) # Td0[s3>>24]
695 xr $s3,$t3
696
697 brct $rounds,.Ldec_loop
698 .align 16
699
700 l $t1,`2048+0`($tbl) # prefetch Td4
701 l $t2,`2048+64`($tbl)
702 l $t3,`2048+128`($tbl)
703 l $i1,`2048+192`($tbl)
704 llill $mask,0xff
705
706 srlg $i3,$s0,24 # i0
707 srlg $t1,$s0,16
708 srlg $t2,$s0,8
709 nr $s0,$mask # i3
710 nr $t1,$mask
711
712 srlg $i1,$s1,24
713 nr $t2,$mask
714 srlg $i2,$s1,16
715 srlg $ra,$s1,8
716 nr $s1,$mask # i0
717 nr $i2,$mask
718 nr $ra,$mask
719
720 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
721 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
722 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
723 sll $t1,16
724 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
725 sllg $s0,$i3,24
726 sll $t2,8
727
728 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
729 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
730 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
731 sll $i1,24
732 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
733 sll $i2,16
734 sll $i3,8
735 or $s0,$s1
736 or $t1,$i1
737 or $t2,$i2
738 or $t3,$i3
739
740 srlg $i1,$s2,8 # i0
741 srlg $i2,$s2,24
742 srlg $i3,$s2,16
743 nr $s2,$mask # i1
744 nr $i1,$mask
745 nr $i3,$mask
746 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
747 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
748 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
749 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
750 sll $i1,8
751 sll $i2,24
752 or $s0,$i1
753 sll $i3,16
754 or $t2,$i2
755 or $t3,$i3
756
757 srlg $i1,$s3,16 # i0
758 srlg $i2,$s3,8 # i1
759 srlg $i3,$s3,24
760 nr $s3,$mask # i2
761 nr $i1,$mask
762 nr $i2,$mask
763
764 l${g} $ra,15*$SIZE_T($sp)
765 or $s1,$t1
766 l $t0,16($key)
767 l $t1,20($key)
768
769 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
770 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
771 sll $i1,16
772 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
773 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
774 sll $i2,8
775 sll $s3,24
776 or $s0,$i1
777 or $s1,$i2
778 or $s2,$t2
779 or $s3,$t3
780
781 xr $s0,$t0
782 xr $s1,$t1
783 x $s2,24($key)
784 x $s3,28($key)
785
786 br $ra
787.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
788___
789
790$code.=<<___;
791# void AES_set_encrypt_key(const unsigned char *in, int bits,
792# AES_KEY *key) {
793.globl AES_set_encrypt_key
794.type AES_set_encrypt_key,\@function
795.align 16
796AES_set_encrypt_key:
797_s390x_AES_set_encrypt_key:
798 lghi $t0,0
799 cl${g}r $inp,$t0
800 je .Lminus1
801 cl${g}r $key,$t0
802 je .Lminus1
803
804 lghi $t0,128
805 clr $bits,$t0
806 je .Lproceed
807 lghi $t0,192
808 clr $bits,$t0
809 je .Lproceed
810 lghi $t0,256
811 clr $bits,$t0
812 je .Lproceed
813 lghi %r2,-2
814 br %r14
815
816.align 16
817.Lproceed:
818___
819$code.=<<___ if (!$softonly);
820 # convert bits to km(c) code, [128,192,256]->[18,19,20]
821 lhi %r5,-128
822 lhi %r0,18
823 ar %r5,$bits
824 srl %r5,6
825 ar %r5,%r0
826
827 larl %r1,OPENSSL_s390xcap_P
828 llihh %r0,0x8000
829 srlg %r0,%r0,0(%r5)
830 ng %r0,S390X_KM(%r1) # check availability of both km...
831 ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
832 jz .Lekey_internal
833
834 lmg %r0,%r1,0($inp) # just copy 128 bits...
835 stmg %r0,%r1,0($key)
836 lhi %r0,192
837 cr $bits,%r0
838 jl 1f
839 lg %r1,16($inp)
840 stg %r1,16($key)
841 je 1f
842 lg %r1,24($inp)
843 stg %r1,24($key)
8441: st $bits,236($key) # save bits [for debugging purposes]
845 lgr $t0,%r5
846 st %r5,240($key) # save km(c) code
847 lghi %r2,0
848 br %r14
849___
850$code.=<<___;
851.align 16
852.Lekey_internal:
853 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
854
855 larl $tbl,AES_Te+2048
856
857 llgf $s0,0($inp)
858 llgf $s1,4($inp)
859 llgf $s2,8($inp)
860 llgf $s3,12($inp)
861 st $s0,0($key)
862 st $s1,4($key)
863 st $s2,8($key)
864 st $s3,12($key)
865 lghi $t0,128
866 cr $bits,$t0
867 jne .Lnot128
868
869 llill $mask,0xff
870 lghi $t3,0 # i=0
871 lghi $rounds,10
872 st $rounds,240($key)
873
874 llgfr $t2,$s3 # temp=rk[3]
875 srlg $i1,$s3,8
876 srlg $i2,$s3,16
877 srlg $i3,$s3,24
878 nr $t2,$mask
879 nr $i1,$mask
880 nr $i2,$mask
881
882.align 16
883.L128_loop:
884 la $t2,0($t2,$tbl)
885 la $i1,0($i1,$tbl)
886 la $i2,0($i2,$tbl)
887 la $i3,0($i3,$tbl)
888 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
889 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
890 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
891 icm $t2,1,0($i3) # Te4[rk[3]>>24]
892 x $t2,256($t3,$tbl) # rcon[i]
893 xr $s0,$t2 # rk[4]=rk[0]^...
894 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
895 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
896 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
897
898 llgfr $t2,$s3 # temp=rk[3]
899 srlg $i1,$s3,8
900 srlg $i2,$s3,16
901 nr $t2,$mask
902 nr $i1,$mask
903 srlg $i3,$s3,24
904 nr $i2,$mask
905
906 st $s0,16($key)
907 st $s1,20($key)
908 st $s2,24($key)
909 st $s3,28($key)
910 la $key,16($key) # key+=4
911 la $t3,4($t3) # i++
912 brct $rounds,.L128_loop
913 lghi $t0,10
914 lghi %r2,0
915 lm${g} %r4,%r13,4*$SIZE_T($sp)
916 br $ra
917
918.align 16
919.Lnot128:
920 llgf $t0,16($inp)
921 llgf $t1,20($inp)
922 st $t0,16($key)
923 st $t1,20($key)
924 lghi $t0,192
925 cr $bits,$t0
926 jne .Lnot192
927
928 llill $mask,0xff
929 lghi $t3,0 # i=0
930 lghi $rounds,12
931 st $rounds,240($key)
932 lghi $rounds,8
933
934 srlg $i1,$t1,8
935 srlg $i2,$t1,16
936 srlg $i3,$t1,24
937 nr $t1,$mask
938 nr $i1,$mask
939 nr $i2,$mask
940
941.align 16
942.L192_loop:
943 la $t1,0($t1,$tbl)
944 la $i1,0($i1,$tbl)
945 la $i2,0($i2,$tbl)
946 la $i3,0($i3,$tbl)
947 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
948 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
949 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
950 icm $t1,1,0($i3) # Te4[rk[5]>>24]
951 x $t1,256($t3,$tbl) # rcon[i]
952 xr $s0,$t1 # rk[6]=rk[0]^...
953 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
954 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
955 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
956
957 st $s0,24($key)
958 st $s1,28($key)
959 st $s2,32($key)
960 st $s3,36($key)
961 brct $rounds,.L192_continue
962 lghi $t0,12
963 lghi %r2,0
964 lm${g} %r4,%r13,4*$SIZE_T($sp)
965 br $ra
966
967.align 16
968.L192_continue:
969 lgr $t1,$s3
970 x $t1,16($key) # rk[10]=rk[4]^rk[9]
971 st $t1,40($key)
972 x $t1,20($key) # rk[11]=rk[5]^rk[10]
973 st $t1,44($key)
974
975 srlg $i1,$t1,8
976 srlg $i2,$t1,16
977 srlg $i3,$t1,24
978 nr $t1,$mask
979 nr $i1,$mask
980 nr $i2,$mask
981
982 la $key,24($key) # key+=6
983 la $t3,4($t3) # i++
984 j .L192_loop
985
986.align 16
987.Lnot192:
988 llgf $t0,24($inp)
989 llgf $t1,28($inp)
990 st $t0,24($key)
991 st $t1,28($key)
992 llill $mask,0xff
993 lghi $t3,0 # i=0
994 lghi $rounds,14
995 st $rounds,240($key)
996 lghi $rounds,7
997
998 srlg $i1,$t1,8
999 srlg $i2,$t1,16
1000 srlg $i3,$t1,24
1001 nr $t1,$mask
1002 nr $i1,$mask
1003 nr $i2,$mask
1004
1005.align 16
1006.L256_loop:
1007 la $t1,0($t1,$tbl)
1008 la $i1,0($i1,$tbl)
1009 la $i2,0($i2,$tbl)
1010 la $i3,0($i3,$tbl)
1011 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
1012 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
1013 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
1014 icm $t1,1,0($i3) # Te4[rk[7]>>24]
1015 x $t1,256($t3,$tbl) # rcon[i]
1016 xr $s0,$t1 # rk[8]=rk[0]^...
1017 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
1018 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
1019 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
1020 st $s0,32($key)
1021 st $s1,36($key)
1022 st $s2,40($key)
1023 st $s3,44($key)
1024 brct $rounds,.L256_continue
1025 lghi $t0,14
1026 lghi %r2,0
1027 lm${g} %r4,%r13,4*$SIZE_T($sp)
1028 br $ra
1029
1030.align 16
1031.L256_continue:
1032 lgr $t1,$s3 # temp=rk[11]
1033 srlg $i1,$s3,8
1034 srlg $i2,$s3,16
1035 srlg $i3,$s3,24
1036 nr $t1,$mask
1037 nr $i1,$mask
1038 nr $i2,$mask
1039 la $t1,0($t1,$tbl)
1040 la $i1,0($i1,$tbl)
1041 la $i2,0($i2,$tbl)
1042 la $i3,0($i3,$tbl)
1043 llgc $t1,0($t1) # Te4[rk[11]>>0]
1044 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
1045 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
1046 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
1047 x $t1,16($key) # rk[12]=rk[4]^...
1048 st $t1,48($key)
1049 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1050 st $t1,52($key)
1051 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1052 st $t1,56($key)
1053 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1054 st $t1,60($key)
1055
1056 srlg $i1,$t1,8
1057 srlg $i2,$t1,16
1058 srlg $i3,$t1,24
1059 nr $t1,$mask
1060 nr $i1,$mask
1061 nr $i2,$mask
1062
1063 la $key,32($key) # key+=8
1064 la $t3,4($t3) # i++
1065 j .L256_loop
1066
1067.Lminus1:
1068 lghi %r2,-1
1069 br $ra
1070.size AES_set_encrypt_key,.-AES_set_encrypt_key
1071
1072# void AES_set_decrypt_key(const unsigned char *in, int bits,
1073# AES_KEY *key) {
1074.globl AES_set_decrypt_key
1075.type AES_set_decrypt_key,\@function
1076.align 16
1077AES_set_decrypt_key:
1078 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
1079 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
1080 bras $ra,_s390x_AES_set_encrypt_key
1081 #l${g} $key,4*$SIZE_T($sp)
1082 l${g} $ra,14*$SIZE_T($sp)
1083 ltgr %r2,%r2
1084 bnzr $ra
1085___
1086$code.=<<___ if (!$softonly);
1087 #l $t0,240($key)
1088 lhi $t1,16
1089 cr $t0,$t1
1090 jl .Lgo
1091 oill $t0,S390X_DECRYPT # set "decrypt" bit
1092 st $t0,240($key)
1093 br $ra
1094___
1095$code.=<<___;
1096.align 16
1097.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
1098 la $i1,0($key)
1099 sllg $i2,$rounds,4
1100 la $i2,0($i2,$key)
1101 srl $rounds,1
1102 lghi $t1,-16
1103
1104.align 16
1105.Linv: lmg $s0,$s1,0($i1)
1106 lmg $s2,$s3,0($i2)
1107 stmg $s0,$s1,0($i2)
1108 stmg $s2,$s3,0($i1)
1109 la $i1,16($i1)
1110 la $i2,0($t1,$i2)
1111 brct $rounds,.Linv
1112___
1113$mask80=$i1;
1114$mask1b=$i2;
1115$maskfe=$i3;
1116$code.=<<___;
1117 llgf $rounds,240($key)
1118 aghi $rounds,-1
1119 sll $rounds,2 # (rounds-1)*4
1120 llilh $mask80,0x8080
1121 llilh $mask1b,0x1b1b
1122 llilh $maskfe,0xfefe
1123 oill $mask80,0x8080
1124 oill $mask1b,0x1b1b
1125 oill $maskfe,0xfefe
1126
1127.align 16
1128.Lmix: l $s0,16($key) # tp1
1129 lr $s1,$s0
1130 ngr $s1,$mask80
1131 srlg $t1,$s1,7
1132 slr $s1,$t1
1133 nr $s1,$mask1b
1134 sllg $t1,$s0,1
1135 nr $t1,$maskfe
1136 xr $s1,$t1 # tp2
1137
1138 lr $s2,$s1
1139 ngr $s2,$mask80
1140 srlg $t1,$s2,7
1141 slr $s2,$t1
1142 nr $s2,$mask1b
1143 sllg $t1,$s1,1
1144 nr $t1,$maskfe
1145 xr $s2,$t1 # tp4
1146
1147 lr $s3,$s2
1148 ngr $s3,$mask80
1149 srlg $t1,$s3,7
1150 slr $s3,$t1
1151 nr $s3,$mask1b
1152 sllg $t1,$s2,1
1153 nr $t1,$maskfe
1154 xr $s3,$t1 # tp8
1155
1156 xr $s1,$s0 # tp2^tp1
1157 xr $s2,$s0 # tp4^tp1
1158 rll $s0,$s0,24 # = ROTATE(tp1,8)
1159 xr $s2,$s3 # ^=tp8
1160 xr $s0,$s1 # ^=tp2^tp1
1161 xr $s1,$s3 # tp2^tp1^tp8
1162 xr $s0,$s2 # ^=tp4^tp1^tp8
1163 rll $s1,$s1,8
1164 rll $s2,$s2,16
1165 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1166 rll $s3,$s3,24
1167 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1168 xr $s0,$s3 # ^= ROTATE(tp8,8)
1169
1170 st $s0,16($key)
1171 la $key,4($key)
1172 brct $rounds,.Lmix
1173
1174 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1175 lghi %r2,0
1176 br $ra
1177.size AES_set_decrypt_key,.-AES_set_decrypt_key
1178___
1179
1180########################################################################
1181# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1182# size_t length, const AES_KEY *key,
1183# unsigned char *ivec, const int enc)
1184{
1185my $inp="%r2";
1186my $out="%r4"; # length and out are swapped
1187my $len="%r3";
1188my $key="%r5";
1189my $ivp="%r6";
1190
1191$code.=<<___;
1192.globl AES_cbc_encrypt
1193.type AES_cbc_encrypt,\@function
1194.align 16
1195AES_cbc_encrypt:
1196 xgr %r3,%r4 # flip %r3 and %r4, out and len
1197 xgr %r4,%r3
1198 xgr %r3,%r4
1199___
1200$code.=<<___ if (!$softonly);
1201 lhi %r0,16
1202 cl %r0,240($key)
1203 jh .Lcbc_software
1204
1205 lg %r0,0($ivp) # copy ivec
1206 lg %r1,8($ivp)
1207 stmg %r0,%r1,16($sp)
1208 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1209 stmg %r0,%r1,32($sp)
1210 lmg %r0,%r1,16($key)
1211 stmg %r0,%r1,48($sp)
1212 l %r0,240($key) # load kmc code
1213 lghi $key,15 # res=len%16, len-=res;
1214 ngr $key,$len
1215 sl${g}r $len,$key
1216 la %r1,16($sp) # parameter block - ivec || key
1217 jz .Lkmc_truncated
1218 .long 0xb92f0042 # kmc %r4,%r2
1219 brc 1,.-4 # pay attention to "partial completion"
1220 ltr $key,$key
1221 jnz .Lkmc_truncated
1222.Lkmc_done:
1223 lmg %r0,%r1,16($sp) # copy ivec to caller
1224 stg %r0,0($ivp)
1225 stg %r1,8($ivp)
1226 br $ra
1227.align 16
1228.Lkmc_truncated:
1229 ahi $key,-1 # it's the way it's encoded in mvc
1230 tmll %r0,S390X_DECRYPT
1231 jnz .Lkmc_truncated_dec
1232 lghi %r1,0
1233 stg %r1,16*$SIZE_T($sp)
1234 stg %r1,16*$SIZE_T+8($sp)
1235 bras %r1,1f
1236 mvc 16*$SIZE_T(1,$sp),0($inp)
12371: ex $key,0(%r1)
1238 la %r1,16($sp) # restore parameter block
1239 la $inp,16*$SIZE_T($sp)
1240 lghi $len,16
1241 .long 0xb92f0042 # kmc %r4,%r2
1242 j .Lkmc_done
1243.align 16
1244.Lkmc_truncated_dec:
1245 st${g} $out,4*$SIZE_T($sp)
1246 la $out,16*$SIZE_T($sp)
1247 lghi $len,16
1248 .long 0xb92f0042 # kmc %r4,%r2
1249 l${g} $out,4*$SIZE_T($sp)
1250 bras %r1,2f
1251 mvc 0(1,$out),16*$SIZE_T($sp)
12522: ex $key,0(%r1)
1253 j .Lkmc_done
1254.align 16
1255.Lcbc_software:
1256___
1257$code.=<<___;
1258 stm${g} $key,$ra,5*$SIZE_T($sp)
1259 lhi %r0,0
1260 cl %r0,`$stdframe+$SIZE_T-4`($sp)
1261 je .Lcbc_decrypt
1262
1263 larl $tbl,AES_Te
1264
1265 llgf $s0,0($ivp)
1266 llgf $s1,4($ivp)
1267 llgf $s2,8($ivp)
1268 llgf $s3,12($ivp)
1269
1270 lghi $t0,16
1271 sl${g}r $len,$t0
1272 brc 4,.Lcbc_enc_tail # if borrow
1273.Lcbc_enc_loop:
1274 stm${g} $inp,$out,2*$SIZE_T($sp)
1275 x $s0,0($inp)
1276 x $s1,4($inp)
1277 x $s2,8($inp)
1278 x $s3,12($inp)
1279 lgr %r4,$key
1280
1281 bras $ra,_s390x_AES_encrypt
1282
1283 lm${g} $inp,$key,2*$SIZE_T($sp)
1284 st $s0,0($out)
1285 st $s1,4($out)
1286 st $s2,8($out)
1287 st $s3,12($out)
1288
1289 la $inp,16($inp)
1290 la $out,16($out)
1291 lghi $t0,16
1292 lt${g}r $len,$len
1293 jz .Lcbc_enc_done
1294 sl${g}r $len,$t0
1295 brc 4,.Lcbc_enc_tail # if borrow
1296 j .Lcbc_enc_loop
1297.align 16
1298.Lcbc_enc_done:
1299 l${g} $ivp,6*$SIZE_T($sp)
1300 st $s0,0($ivp)
1301 st $s1,4($ivp)
1302 st $s2,8($ivp)
1303 st $s3,12($ivp)
1304
1305 lm${g} %r7,$ra,7*$SIZE_T($sp)
1306 br $ra
1307
1308.align 16
1309.Lcbc_enc_tail:
1310 aghi $len,15
1311 lghi $t0,0
1312 stg $t0,16*$SIZE_T($sp)
1313 stg $t0,16*$SIZE_T+8($sp)
1314 bras $t1,3f
1315 mvc 16*$SIZE_T(1,$sp),0($inp)
13163: ex $len,0($t1)
1317 lghi $len,0
1318 la $inp,16*$SIZE_T($sp)
1319 j .Lcbc_enc_loop
1320
1321.align 16
1322.Lcbc_decrypt:
1323 larl $tbl,AES_Td
1324
1325 lg $t0,0($ivp)
1326 lg $t1,8($ivp)
1327 stmg $t0,$t1,16*$SIZE_T($sp)
1328
1329.Lcbc_dec_loop:
1330 stm${g} $inp,$out,2*$SIZE_T($sp)
1331 llgf $s0,0($inp)
1332 llgf $s1,4($inp)
1333 llgf $s2,8($inp)
1334 llgf $s3,12($inp)
1335 lgr %r4,$key
1336
1337 bras $ra,_s390x_AES_decrypt
1338
1339 lm${g} $inp,$key,2*$SIZE_T($sp)
1340 sllg $s0,$s0,32
1341 sllg $s2,$s2,32
1342 lr $s0,$s1
1343 lr $s2,$s3
1344
1345 lg $t0,0($inp)
1346 lg $t1,8($inp)
1347 xg $s0,16*$SIZE_T($sp)
1348 xg $s2,16*$SIZE_T+8($sp)
1349 lghi $s1,16
1350 sl${g}r $len,$s1
1351 brc 4,.Lcbc_dec_tail # if borrow
1352 brc 2,.Lcbc_dec_done # if zero
1353 stg $s0,0($out)
1354 stg $s2,8($out)
1355 stmg $t0,$t1,16*$SIZE_T($sp)
1356
1357 la $inp,16($inp)
1358 la $out,16($out)
1359 j .Lcbc_dec_loop
1360
1361.Lcbc_dec_done:
1362 stg $s0,0($out)
1363 stg $s2,8($out)
1364.Lcbc_dec_exit:
1365 lm${g} %r6,$ra,6*$SIZE_T($sp)
1366 stmg $t0,$t1,0($ivp)
1367
1368 br $ra
1369
1370.align 16
1371.Lcbc_dec_tail:
1372 aghi $len,15
1373 stg $s0,16*$SIZE_T($sp)
1374 stg $s2,16*$SIZE_T+8($sp)
1375 bras $s1,4f
1376 mvc 0(1,$out),16*$SIZE_T($sp)
13774: ex $len,0($s1)
1378 j .Lcbc_dec_exit
1379.size AES_cbc_encrypt,.-AES_cbc_encrypt
1380___
1381}
1382########################################################################
1383# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1384# size_t blocks, const AES_KEY *key,
1385# const unsigned char *ivec)
1386{
1387my $inp="%r2";
1388my $out="%r4"; # blocks and out are swapped
1389my $len="%r3";
1390my $key="%r5"; my $iv0="%r5";
1391my $ivp="%r6";
1392my $fp ="%r7";
1393
1394$code.=<<___;
1395.globl AES_ctr32_encrypt
1396.type AES_ctr32_encrypt,\@function
1397.align 16
1398AES_ctr32_encrypt:
1399 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1400 xgr %r4,%r3
1401 xgr %r3,%r4
1402 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
1403___
1404$code.=<<___ if (!$softonly);
1405 l %r0,240($key)
1406 lhi %r1,16
1407 clr %r0,%r1
1408 jl .Lctr32_software
1409
1410 st${g} $s2,10*$SIZE_T($sp)
1411 st${g} $s3,11*$SIZE_T($sp)
1412
1413 clr $len,%r1 # does work even in 64-bit mode
1414 jle .Lctr32_nokma # kma is slower for <= 16 blocks
1415
1416 larl %r1,OPENSSL_s390xcap_P
1417 lr $s2,%r0
1418 llihh $s3,0x8000
1419 srlg $s3,$s3,0($s2)
1420 ng $s3,S390X_KMA(%r1) # check kma capability vector
1421 jz .Lctr32_nokma
1422
1423 l${g}hi %r1,-$stdframe-112
1424 l${g}r $s3,$sp
1425 la $sp,0(%r1,$sp) # prepare parameter block
1426
1427 lhi %r1,0x0600
1428 sllg $len,$len,4
1429 or %r0,%r1 # set HS and LAAD flags
1430
1431 st${g} $s3,0($sp) # backchain
1432 la %r1,$stdframe($sp)
1433
1434 lmg $s2,$s3,0($key) # copy key
1435 stg $s2,$stdframe+80($sp)
1436 stg $s3,$stdframe+88($sp)
1437 lmg $s2,$s3,16($key)
1438 stg $s2,$stdframe+96($sp)
1439 stg $s3,$stdframe+104($sp)
1440
1441 lmg $s2,$s3,0($ivp) # copy iv
1442 stg $s2,$stdframe+64($sp)
1443 ahi $s3,-1 # kma requires counter-1
1444 stg $s3,$stdframe+72($sp)
1445 st $s3,$stdframe+12($sp) # copy counter
1446
1447 lghi $s2,0 # no AAD
1448 lghi $s3,0
1449
1450 .long 0xb929a042 # kma $out,$s2,$inp
1451 brc 1,.-4 # pay attention to "partial completion"
1452
1453 stg %r0,$stdframe+80($sp) # wipe key
1454 stg %r0,$stdframe+88($sp)
1455 stg %r0,$stdframe+96($sp)
1456 stg %r0,$stdframe+104($sp)
1457 la $sp,$stdframe+112($sp)
1458
1459 lm${g} $s2,$s3,10*$SIZE_T($sp)
1460 br $ra
1461
1462.align 16
1463.Lctr32_nokma:
1464 stm${g} %r6,$s1,6*$SIZE_T($sp)
1465
1466 slgr $out,$inp
1467 la %r1,0($key) # %r1 is permanent copy of $key
1468 lg $iv0,0($ivp) # load ivec
1469 lg $ivp,8($ivp)
1470
1471 # prepare and allocate stack frame at the top of 4K page
1472 # with 1K reserved for eventual signal handling
1473 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1474 lghi $s1,-4096
1475 algr $s0,$sp
1476 lgr $fp,$sp
1477 ngr $s0,$s1 # align at page boundary
1478 slgr $fp,$s0 # total buffer size
1479 lgr $s2,$sp
1480 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1481 slgr $fp,$s1 # deduct reservation to get usable buffer size
1482 # buffer size is at lest 256 and at most 3072+256-16
1483
1484 la $sp,1024($s0) # alloca
1485 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1486 st${g} $s2,0($sp) # back-chain
1487 st${g} $fp,$SIZE_T($sp)
1488
1489 slgr $len,$fp
1490 brc 1,.Lctr32_hw_switch # not zero, no borrow
1491 algr $fp,$len # input is shorter than allocated buffer
1492 lghi $len,0
1493 st${g} $fp,$SIZE_T($sp)
1494
1495.Lctr32_hw_switch:
1496___
1497$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
1498 llgfr $s0,%r0
1499 lgr $s1,%r1
1500 larl %r1,OPENSSL_s390xcap_P
1501 llihh %r0,0x8000 # check if kmctr supports the function code
1502 srlg %r0,%r0,0($s0)
1503 ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
1504 lgr %r0,$s0
1505 lgr %r1,$s1
1506 jz .Lctr32_km_loop
1507
1508####### kmctr code
1509 algr $out,$inp # restore $out
1510 lgr $s1,$len # $s1 undertakes $len
1511 j .Lctr32_kmctr_loop
1512.align 16
1513.Lctr32_kmctr_loop:
1514 la $s2,16($sp)
1515 lgr $s3,$fp
1516.Lctr32_kmctr_prepare:
1517 stg $iv0,0($s2)
1518 stg $ivp,8($s2)
1519 la $s2,16($s2)
1520 ahi $ivp,1 # 32-bit increment, preserves upper half
1521 brct $s3,.Lctr32_kmctr_prepare
1522
1523 #la $inp,0($inp) # inp
1524 sllg $len,$fp,4 # len
1525 #la $out,0($out) # out
1526 la $s2,16($sp) # iv
1527 .long 0xb92da042 # kmctr $out,$s2,$inp
1528 brc 1,.-4 # pay attention to "partial completion"
1529
1530 slgr $s1,$fp
1531 brc 1,.Lctr32_kmctr_loop # not zero, no borrow
1532 algr $fp,$s1
1533 lghi $s1,0
1534 brc 4+1,.Lctr32_kmctr_loop # not zero
1535
1536 l${g} $sp,0($sp)
1537 lm${g} %r6,$s3,6*$SIZE_T($sp)
1538 br $ra
1539.align 16
1540___
1541$code.=<<___ if (!$softonly);
1542.Lctr32_km_loop:
1543 la $s2,16($sp)
1544 lgr $s3,$fp
1545.Lctr32_km_prepare:
1546 stg $iv0,0($s2)
1547 stg $ivp,8($s2)
1548 la $s2,16($s2)
1549 ahi $ivp,1 # 32-bit increment, preserves upper half
1550 brct $s3,.Lctr32_km_prepare
1551
1552 la $s0,16($sp) # inp
1553 sllg $s1,$fp,4 # len
1554 la $s2,16($sp) # out
1555 .long 0xb92e00a8 # km %r10,%r8
1556 brc 1,.-4 # pay attention to "partial completion"
1557
1558 la $s2,16($sp)
1559 lgr $s3,$fp
1560 slgr $s2,$inp
1561.Lctr32_km_xor:
1562 lg $s0,0($inp)
1563 lg $s1,8($inp)
1564 xg $s0,0($s2,$inp)
1565 xg $s1,8($s2,$inp)
1566 stg $s0,0($out,$inp)
1567 stg $s1,8($out,$inp)
1568 la $inp,16($inp)
1569 brct $s3,.Lctr32_km_xor
1570
1571 slgr $len,$fp
1572 brc 1,.Lctr32_km_loop # not zero, no borrow
1573 algr $fp,$len
1574 lghi $len,0
1575 brc 4+1,.Lctr32_km_loop # not zero
1576
1577 l${g} $s0,0($sp)
1578 l${g} $s1,$SIZE_T($sp)
1579 la $s2,16($sp)
1580.Lctr32_km_zap:
1581 stg $s0,0($s2)
1582 stg $s0,8($s2)
1583 la $s2,16($s2)
1584 brct $s1,.Lctr32_km_zap
1585
1586 la $sp,0($s0)
1587 lm${g} %r6,$s3,6*$SIZE_T($sp)
1588 br $ra
1589.align 16
1590.Lctr32_software:
1591___
1592$code.=<<___;
1593 stm${g} $key,$ra,5*$SIZE_T($sp)
1594 sl${g}r $inp,$out
1595 larl $tbl,AES_Te
1596 llgf $t1,12($ivp)
1597
1598.Lctr32_loop:
1599 stm${g} $inp,$out,2*$SIZE_T($sp)
1600 llgf $s0,0($ivp)
1601 llgf $s1,4($ivp)
1602 llgf $s2,8($ivp)
1603 lgr $s3,$t1
1604 st $t1,16*$SIZE_T($sp)
1605 lgr %r4,$key
1606
1607 bras $ra,_s390x_AES_encrypt
1608
1609 lm${g} $inp,$ivp,2*$SIZE_T($sp)
1610 llgf $t1,16*$SIZE_T($sp)
1611 x $s0,0($inp,$out)
1612 x $s1,4($inp,$out)
1613 x $s2,8($inp,$out)
1614 x $s3,12($inp,$out)
1615 stm $s0,$s3,0($out)
1616
1617 la $out,16($out)
1618 ahi $t1,1 # 32-bit increment
1619 brct $len,.Lctr32_loop
1620
1621 lm${g} %r6,$ra,6*$SIZE_T($sp)
1622 br $ra
1623.size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1624___
1625}
1626
1627########################################################################
1628# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
1629# size_t len, const AES_KEY *key1, const AES_KEY *key2,
1630# const unsigned char iv[16]);
1631#
1632{
1633my $inp="%r2";
1634my $out="%r4"; # len and out are swapped
1635my $len="%r3";
1636my $key1="%r5"; # $i1
1637my $key2="%r6"; # $i2
1638my $fp="%r7"; # $i3
1639my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
1640
1641$code.=<<___;
1642.type _s390x_xts_km,\@function
1643.align 16
1644_s390x_xts_km:
1645___
1646$code.=<<___ if(1);
1647 llgfr $s0,%r0 # put aside the function code
1648 lghi $s1,0x7f
1649 nr $s1,%r0
1650 larl %r1,OPENSSL_s390xcap_P
1651 llihh %r0,0x8000
1652 srlg %r0,%r0,32($s1) # check for 32+function code
1653 ng %r0,S390X_KM(%r1) # check km capability vector
1654 lgr %r0,$s0 # restore the function code
1655 la %r1,0($key1) # restore $key1
1656 jz .Lxts_km_vanilla
1657
1658 lmg $i2,$i3,$tweak($sp) # put aside the tweak value
1659 algr $out,$inp
1660
1661 oill %r0,32 # switch to xts function code
1662 aghi $s1,-18 #
1663 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
1664 la %r1,$tweak-16($sp)
1665 slgr %r1,$s1 # parameter block position
1666 lmg $s0,$s3,0($key1) # load 256 bits of key material,
1667 stmg $s0,$s3,0(%r1) # and copy it to parameter block.
1668 # yes, it contains junk and overlaps
1669 # with the tweak in 128-bit case.
1670 # it's done to avoid conditional
1671 # branch.
1672 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
1673
1674 .long 0xb92e0042 # km %r4,%r2
1675 brc 1,.-4 # pay attention to "partial completion"
1676
1677 lrvg $s0,$tweak+0($sp) # load the last tweak
1678 lrvg $s1,$tweak+8($sp)
1679 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
1680
1681 nill %r0,0xffdf # switch back to original function code
1682 la %r1,0($key1) # restore pointer to $key1
1683 slgr $out,$inp
1684
1685 llgc $len,2*$SIZE_T-1($sp)
1686 nill $len,0x0f # $len%=16
1687 br $ra
1688
1689.align 16
1690.Lxts_km_vanilla:
1691___
1692$code.=<<___;
1693 # prepare and allocate stack frame at the top of 4K page
1694 # with 1K reserved for eventual signal handling
1695 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1696 lghi $s1,-4096
1697 algr $s0,$sp
1698 lgr $fp,$sp
1699 ngr $s0,$s1 # align at page boundary
1700 slgr $fp,$s0 # total buffer size
1701 lgr $s2,$sp
1702 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1703 slgr $fp,$s1 # deduct reservation to get usable buffer size
1704 # buffer size is at lest 256 and at most 3072+256-16
1705
1706 la $sp,1024($s0) # alloca
1707 nill $fp,0xfff0 # round to 16*n
1708 st${g} $s2,0($sp) # back-chain
1709 nill $len,0xfff0 # redundant
1710 st${g} $fp,$SIZE_T($sp)
1711
1712 slgr $len,$fp
1713 brc 1,.Lxts_km_go # not zero, no borrow
1714 algr $fp,$len # input is shorter than allocated buffer
1715 lghi $len,0
1716 st${g} $fp,$SIZE_T($sp)
1717
1718.Lxts_km_go:
1719 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
1720 lrvg $s1,$tweak+8($s2)
1721
1722 la $s2,16($sp) # vector of ascending tweak values
1723 slgr $s2,$inp
1724 srlg $s3,$fp,4
1725 j .Lxts_km_start
1726
1727.Lxts_km_loop:
1728 la $s2,16($sp)
1729 slgr $s2,$inp
1730 srlg $s3,$fp,4
1731.Lxts_km_prepare:
1732 lghi $i1,0x87
1733 srag $i2,$s1,63 # broadcast upper bit
1734 ngr $i1,$i2 # rem
1735 algr $s0,$s0
1736 alcgr $s1,$s1
1737 xgr $s0,$i1
1738.Lxts_km_start:
1739 lrvgr $i1,$s0 # flip byte order
1740 lrvgr $i2,$s1
1741 stg $i1,0($s2,$inp)
1742 stg $i2,8($s2,$inp)
1743 xg $i1,0($inp)
1744 xg $i2,8($inp)
1745 stg $i1,0($out,$inp)
1746 stg $i2,8($out,$inp)
1747 la $inp,16($inp)
1748 brct $s3,.Lxts_km_prepare
1749
1750 slgr $inp,$fp # rewind $inp
1751 la $s2,0($out,$inp)
1752 lgr $s3,$fp
1753 .long 0xb92e00aa # km $s2,$s2
1754 brc 1,.-4 # pay attention to "partial completion"
1755
1756 la $s2,16($sp)
1757 slgr $s2,$inp
1758 srlg $s3,$fp,4
1759.Lxts_km_xor:
1760 lg $i1,0($out,$inp)
1761 lg $i2,8($out,$inp)
1762 xg $i1,0($s2,$inp)
1763 xg $i2,8($s2,$inp)
1764 stg $i1,0($out,$inp)
1765 stg $i2,8($out,$inp)
1766 la $inp,16($inp)
1767 brct $s3,.Lxts_km_xor
1768
1769 slgr $len,$fp
1770 brc 1,.Lxts_km_loop # not zero, no borrow
1771 algr $fp,$len
1772 lghi $len,0
1773 brc 4+1,.Lxts_km_loop # not zero
1774
1775 l${g} $i1,0($sp) # back-chain
1776 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
1777 la $i2,16($sp)
1778 srlg $fp,$fp,4
1779.Lxts_km_zap:
1780 stg $i1,0($i2)
1781 stg $i1,8($i2)
1782 la $i2,16($i2)
1783 brct $fp,.Lxts_km_zap
1784
1785 la $sp,0($i1)
1786 llgc $len,2*$SIZE_T-1($i1)
1787 nill $len,0x0f # $len%=16
1788 bzr $ra
1789
1790 # generate one more tweak...
1791 lghi $i1,0x87
1792 srag $i2,$s1,63 # broadcast upper bit
1793 ngr $i1,$i2 # rem
1794 algr $s0,$s0
1795 alcgr $s1,$s1
1796 xgr $s0,$i1
1797
1798 ltr $len,$len # clear zero flag
1799 br $ra
1800.size _s390x_xts_km,.-_s390x_xts_km
1801
1802.globl AES_xts_encrypt
1803.type AES_xts_encrypt,\@function
1804.align 16
1805AES_xts_encrypt:
1806 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1807 xgr %r4,%r3
1808 xgr %r3,%r4
1809___
1810$code.=<<___ if ($SIZE_T==4);
1811 llgfr $len,$len
1812___
1813$code.=<<___;
1814 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1815 srag $len,$len,4 # formally wrong, because it expands
1816 # sign byte, but who can afford asking
1817 # to process more than 2^63-1 bytes?
1818 # I use it, because it sets condition
1819 # code...
1820 bcr 8,$ra # abort if zero (i.e. less than 16)
1821___
1822$code.=<<___ if (!$softonly);
1823 llgf %r0,240($key2)
1824 lhi %r1,16
1825 clr %r0,%r1
1826 jl .Lxts_enc_software
1827
1828 st${g} $ra,5*$SIZE_T($sp)
1829 stm${g} %r6,$s3,6*$SIZE_T($sp)
1830
1831 sllg $len,$len,4 # $len&=~15
1832 slgr $out,$inp
1833
1834 # generate the tweak value
1835 l${g} $s3,$stdframe($sp) # pointer to iv
1836 la $s2,$tweak($sp)
1837 lmg $s0,$s1,0($s3)
1838 lghi $s3,16
1839 stmg $s0,$s1,0($s2)
1840 la %r1,0($key2) # $key2 is not needed anymore
1841 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1842 brc 1,.-4 # can this happen?
1843
1844 l %r0,240($key1)
1845 la %r1,0($key1) # $key1 is not needed anymore
1846 bras $ra,_s390x_xts_km
1847 jz .Lxts_enc_km_done
1848
1849 aghi $inp,-16 # take one step back
1850 la $i3,0($out,$inp) # put aside real $out
1851.Lxts_enc_km_steal:
1852 llgc $i1,16($inp)
1853 llgc $i2,0($out,$inp)
1854 stc $i1,0($out,$inp)
1855 stc $i2,16($out,$inp)
1856 la $inp,1($inp)
1857 brct $len,.Lxts_enc_km_steal
1858
1859 la $s2,0($i3)
1860 lghi $s3,16
1861 lrvgr $i1,$s0 # flip byte order
1862 lrvgr $i2,$s1
1863 xg $i1,0($s2)
1864 xg $i2,8($s2)
1865 stg $i1,0($s2)
1866 stg $i2,8($s2)
1867 .long 0xb92e00aa # km $s2,$s2
1868 brc 1,.-4 # can this happen?
1869 lrvgr $i1,$s0 # flip byte order
1870 lrvgr $i2,$s1
1871 xg $i1,0($i3)
1872 xg $i2,8($i3)
1873 stg $i1,0($i3)
1874 stg $i2,8($i3)
1875
1876.Lxts_enc_km_done:
1877 stg $sp,$tweak+0($sp) # wipe tweak
1878 stg $sp,$tweak+8($sp)
1879 l${g} $ra,5*$SIZE_T($sp)
1880 lm${g} %r6,$s3,6*$SIZE_T($sp)
1881 br $ra
1882.align 16
1883.Lxts_enc_software:
1884___
1885$code.=<<___;
1886 stm${g} %r6,$ra,6*$SIZE_T($sp)
1887
1888 slgr $out,$inp
1889
1890 l${g} $s3,$stdframe($sp) # ivp
1891 llgf $s0,0($s3) # load iv
1892 llgf $s1,4($s3)
1893 llgf $s2,8($s3)
1894 llgf $s3,12($s3)
1895 stm${g} %r2,%r5,2*$SIZE_T($sp)
1896 la $key,0($key2)
1897 larl $tbl,AES_Te
1898 bras $ra,_s390x_AES_encrypt # generate the tweak
1899 lm${g} %r2,%r5,2*$SIZE_T($sp)
1900 stm $s0,$s3,$tweak($sp) # save the tweak
1901 j .Lxts_enc_enter
1902
1903.align 16
1904.Lxts_enc_loop:
1905 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1906 lrvg $s3,$tweak+8($sp)
1907 lghi %r1,0x87
1908 srag %r0,$s3,63 # broadcast upper bit
1909 ngr %r1,%r0 # rem
1910 algr $s1,$s1
1911 alcgr $s3,$s3
1912 xgr $s1,%r1
1913 lrvgr $s1,$s1 # flip byte order
1914 lrvgr $s3,$s3
1915 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1916 stg $s1,$tweak+0($sp) # save the tweak
1917 llgfr $s1,$s1
1918 srlg $s2,$s3,32
1919 stg $s3,$tweak+8($sp)
1920 llgfr $s3,$s3
1921 la $inp,16($inp) # $inp+=16
1922.Lxts_enc_enter:
1923 x $s0,0($inp) # ^=*($inp)
1924 x $s1,4($inp)
1925 x $s2,8($inp)
1926 x $s3,12($inp)
1927 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
1928 la $key,0($key1)
1929 bras $ra,_s390x_AES_encrypt
1930 lm${g} %r2,%r5,2*$SIZE_T($sp)
1931 x $s0,$tweak+0($sp) # ^=tweak
1932 x $s1,$tweak+4($sp)
1933 x $s2,$tweak+8($sp)
1934 x $s3,$tweak+12($sp)
1935 st $s0,0($out,$inp)
1936 st $s1,4($out,$inp)
1937 st $s2,8($out,$inp)
1938 st $s3,12($out,$inp)
1939 brct${g} $len,.Lxts_enc_loop
1940
1941 llgc $len,`2*$SIZE_T-1`($sp)
1942 nill $len,0x0f # $len%16
1943 jz .Lxts_enc_done
1944
1945 la $i3,0($inp,$out) # put aside real $out
1946.Lxts_enc_steal:
1947 llgc %r0,16($inp)
1948 llgc %r1,0($out,$inp)
1949 stc %r0,0($out,$inp)
1950 stc %r1,16($out,$inp)
1951 la $inp,1($inp)
1952 brct $len,.Lxts_enc_steal
1953 la $out,0($i3) # restore real $out
1954
1955 # generate last tweak...
1956 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1957 lrvg $s3,$tweak+8($sp)
1958 lghi %r1,0x87
1959 srag %r0,$s3,63 # broadcast upper bit
1960 ngr %r1,%r0 # rem
1961 algr $s1,$s1
1962 alcgr $s3,$s3
1963 xgr $s1,%r1
1964 lrvgr $s1,$s1 # flip byte order
1965 lrvgr $s3,$s3
1966 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1967 stg $s1,$tweak+0($sp) # save the tweak
1968 llgfr $s1,$s1
1969 srlg $s2,$s3,32
1970 stg $s3,$tweak+8($sp)
1971 llgfr $s3,$s3
1972
1973 x $s0,0($out) # ^=*(inp)|stolen cipther-text
1974 x $s1,4($out)
1975 x $s2,8($out)
1976 x $s3,12($out)
1977 st${g} $out,4*$SIZE_T($sp)
1978 la $key,0($key1)
1979 bras $ra,_s390x_AES_encrypt
1980 l${g} $out,4*$SIZE_T($sp)
1981 x $s0,`$tweak+0`($sp) # ^=tweak
1982 x $s1,`$tweak+4`($sp)
1983 x $s2,`$tweak+8`($sp)
1984 x $s3,`$tweak+12`($sp)
1985 st $s0,0($out)
1986 st $s1,4($out)
1987 st $s2,8($out)
1988 st $s3,12($out)
1989
1990.Lxts_enc_done:
1991 stg $sp,$tweak+0($sp) # wipe tweak
1992 stg $sp,$tweak+8($sp)
1993 lm${g} %r6,$ra,6*$SIZE_T($sp)
1994 br $ra
1995.size AES_xts_encrypt,.-AES_xts_encrypt
1996___
1997# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
1998# size_t len, const AES_KEY *key1, const AES_KEY *key2,
1999# const unsigned char iv[16]);
2000#
2001$code.=<<___;
2002.globl AES_xts_decrypt
2003.type AES_xts_decrypt,\@function
2004.align 16
2005AES_xts_decrypt:
2006 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
2007 xgr %r4,%r3
2008 xgr %r3,%r4
2009___
2010$code.=<<___ if ($SIZE_T==4);
2011 llgfr $len,$len
2012___
2013$code.=<<___;
2014 st${g} $len,1*$SIZE_T($sp) # save copy of $len
2015 aghi $len,-16
2016 bcr 4,$ra # abort if less than zero. formally
2017 # wrong, because $len is unsigned,
2018 # but who can afford asking to
2019 # process more than 2^63-1 bytes?
2020 tmll $len,0x0f
2021 jnz .Lxts_dec_proceed
2022 aghi $len,16
2023.Lxts_dec_proceed:
2024___
2025$code.=<<___ if (!$softonly);
2026 llgf %r0,240($key2)
2027 lhi %r1,16
2028 clr %r0,%r1
2029 jl .Lxts_dec_software
2030
2031 st${g} $ra,5*$SIZE_T($sp)
2032 stm${g} %r6,$s3,6*$SIZE_T($sp)
2033
2034 nill $len,0xfff0 # $len&=~15
2035 slgr $out,$inp
2036
2037 # generate the tweak value
2038 l${g} $s3,$stdframe($sp) # pointer to iv
2039 la $s2,$tweak($sp)
2040 lmg $s0,$s1,0($s3)
2041 lghi $s3,16
2042 stmg $s0,$s1,0($s2)
2043 la %r1,0($key2) # $key2 is not needed past this point
2044 .long 0xb92e00aa # km $s2,$s2, generate the tweak
2045 brc 1,.-4 # can this happen?
2046
2047 l %r0,240($key1)
2048 la %r1,0($key1) # $key1 is not needed anymore
2049
2050 ltgr $len,$len
2051 jz .Lxts_dec_km_short
2052 bras $ra,_s390x_xts_km
2053 jz .Lxts_dec_km_done
2054
2055 lrvgr $s2,$s0 # make copy in reverse byte order
2056 lrvgr $s3,$s1
2057 j .Lxts_dec_km_2ndtweak
2058
2059.Lxts_dec_km_short:
2060 llgc $len,`2*$SIZE_T-1`($sp)
2061 nill $len,0x0f # $len%=16
2062 lrvg $s0,$tweak+0($sp) # load the tweak
2063 lrvg $s1,$tweak+8($sp)
2064 lrvgr $s2,$s0 # make copy in reverse byte order
2065 lrvgr $s3,$s1
2066
2067.Lxts_dec_km_2ndtweak:
2068 lghi $i1,0x87
2069 srag $i2,$s1,63 # broadcast upper bit
2070 ngr $i1,$i2 # rem
2071 algr $s0,$s0
2072 alcgr $s1,$s1
2073 xgr $s0,$i1
2074 lrvgr $i1,$s0 # flip byte order
2075 lrvgr $i2,$s1
2076
2077 xg $i1,0($inp)
2078 xg $i2,8($inp)
2079 stg $i1,0($out,$inp)
2080 stg $i2,8($out,$inp)
2081 la $i2,0($out,$inp)
2082 lghi $i3,16
2083 .long 0xb92e0066 # km $i2,$i2
2084 brc 1,.-4 # can this happen?
2085 lrvgr $i1,$s0
2086 lrvgr $i2,$s1
2087 xg $i1,0($out,$inp)
2088 xg $i2,8($out,$inp)
2089 stg $i1,0($out,$inp)
2090 stg $i2,8($out,$inp)
2091
2092 la $i3,0($out,$inp) # put aside real $out
2093.Lxts_dec_km_steal:
2094 llgc $i1,16($inp)
2095 llgc $i2,0($out,$inp)
2096 stc $i1,0($out,$inp)
2097 stc $i2,16($out,$inp)
2098 la $inp,1($inp)
2099 brct $len,.Lxts_dec_km_steal
2100
2101 lgr $s0,$s2
2102 lgr $s1,$s3
2103 xg $s0,0($i3)
2104 xg $s1,8($i3)
2105 stg $s0,0($i3)
2106 stg $s1,8($i3)
2107 la $s0,0($i3)
2108 lghi $s1,16
2109 .long 0xb92e0088 # km $s0,$s0
2110 brc 1,.-4 # can this happen?
2111 xg $s2,0($i3)
2112 xg $s3,8($i3)
2113 stg $s2,0($i3)
2114 stg $s3,8($i3)
2115.Lxts_dec_km_done:
2116 stg $sp,$tweak+0($sp) # wipe tweak
2117 stg $sp,$tweak+8($sp)
2118 l${g} $ra,5*$SIZE_T($sp)
2119 lm${g} %r6,$s3,6*$SIZE_T($sp)
2120 br $ra
2121.align 16
2122.Lxts_dec_software:
2123___
2124$code.=<<___;
2125 stm${g} %r6,$ra,6*$SIZE_T($sp)
2126
2127 srlg $len,$len,4
2128 slgr $out,$inp
2129
2130 l${g} $s3,$stdframe($sp) # ivp
2131 llgf $s0,0($s3) # load iv
2132 llgf $s1,4($s3)
2133 llgf $s2,8($s3)
2134 llgf $s3,12($s3)
2135 stm${g} %r2,%r5,2*$SIZE_T($sp)
2136 la $key,0($key2)
2137 larl $tbl,AES_Te
2138 bras $ra,_s390x_AES_encrypt # generate the tweak
2139 lm${g} %r2,%r5,2*$SIZE_T($sp)
2140 larl $tbl,AES_Td
2141 lt${g}r $len,$len
2142 stm $s0,$s3,$tweak($sp) # save the tweak
2143 jz .Lxts_dec_short
2144 j .Lxts_dec_enter
2145
2146.align 16
2147.Lxts_dec_loop:
2148 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2149 lrvg $s3,$tweak+8($sp)
2150 lghi %r1,0x87
2151 srag %r0,$s3,63 # broadcast upper bit
2152 ngr %r1,%r0 # rem
2153 algr $s1,$s1
2154 alcgr $s3,$s3
2155 xgr $s1,%r1
2156 lrvgr $s1,$s1 # flip byte order
2157 lrvgr $s3,$s3
2158 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2159 stg $s1,$tweak+0($sp) # save the tweak
2160 llgfr $s1,$s1
2161 srlg $s2,$s3,32
2162 stg $s3,$tweak+8($sp)
2163 llgfr $s3,$s3
2164.Lxts_dec_enter:
2165 x $s0,0($inp) # tweak^=*(inp)
2166 x $s1,4($inp)
2167 x $s2,8($inp)
2168 x $s3,12($inp)
2169 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
2170 la $key,0($key1)
2171 bras $ra,_s390x_AES_decrypt
2172 lm${g} %r2,%r5,2*$SIZE_T($sp)
2173 x $s0,$tweak+0($sp) # ^=tweak
2174 x $s1,$tweak+4($sp)
2175 x $s2,$tweak+8($sp)
2176 x $s3,$tweak+12($sp)
2177 st $s0,0($out,$inp)
2178 st $s1,4($out,$inp)
2179 st $s2,8($out,$inp)
2180 st $s3,12($out,$inp)
2181 la $inp,16($inp)
2182 brct${g} $len,.Lxts_dec_loop
2183
2184 llgc $len,`2*$SIZE_T-1`($sp)
2185 nill $len,0x0f # $len%16
2186 jz .Lxts_dec_done
2187
2188 # generate pair of tweaks...
2189 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2190 lrvg $s3,$tweak+8($sp)
2191 lghi %r1,0x87
2192 srag %r0,$s3,63 # broadcast upper bit
2193 ngr %r1,%r0 # rem
2194 algr $s1,$s1
2195 alcgr $s3,$s3
2196 xgr $s1,%r1
2197 lrvgr $i2,$s1 # flip byte order
2198 lrvgr $i3,$s3
2199 stmg $i2,$i3,$tweak($sp) # save the 1st tweak
2200 j .Lxts_dec_2ndtweak
2201
2202.align 16
2203.Lxts_dec_short:
2204 llgc $len,`2*$SIZE_T-1`($sp)
2205 nill $len,0x0f # $len%16
2206 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2207 lrvg $s3,$tweak+8($sp)
2208.Lxts_dec_2ndtweak:
2209 lghi %r1,0x87
2210 srag %r0,$s3,63 # broadcast upper bit
2211 ngr %r1,%r0 # rem
2212 algr $s1,$s1
2213 alcgr $s3,$s3
2214 xgr $s1,%r1
2215 lrvgr $s1,$s1 # flip byte order
2216 lrvgr $s3,$s3
2217 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2218 stg $s1,$tweak-16+0($sp) # save the 2nd tweak
2219 llgfr $s1,$s1
2220 srlg $s2,$s3,32
2221 stg $s3,$tweak-16+8($sp)
2222 llgfr $s3,$s3
2223
2224 x $s0,0($inp) # tweak_the_2nd^=*(inp)
2225 x $s1,4($inp)
2226 x $s2,8($inp)
2227 x $s3,12($inp)
2228 stm${g} %r2,%r3,2*$SIZE_T($sp)
2229 la $key,0($key1)
2230 bras $ra,_s390x_AES_decrypt
2231 lm${g} %r2,%r5,2*$SIZE_T($sp)
2232 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
2233 x $s1,$tweak-16+4($sp)
2234 x $s2,$tweak-16+8($sp)
2235 x $s3,$tweak-16+12($sp)
2236 st $s0,0($out,$inp)
2237 st $s1,4($out,$inp)
2238 st $s2,8($out,$inp)
2239 st $s3,12($out,$inp)
2240
2241 la $i3,0($out,$inp) # put aside real $out
2242.Lxts_dec_steal:
2243 llgc %r0,16($inp)
2244 llgc %r1,0($out,$inp)
2245 stc %r0,0($out,$inp)
2246 stc %r1,16($out,$inp)
2247 la $inp,1($inp)
2248 brct $len,.Lxts_dec_steal
2249 la $out,0($i3) # restore real $out
2250
2251 lm $s0,$s3,$tweak($sp) # load the 1st tweak
2252 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
2253 x $s1,4($out)
2254 x $s2,8($out)
2255 x $s3,12($out)
2256 st${g} $out,4*$SIZE_T($sp)
2257 la $key,0($key1)
2258 bras $ra,_s390x_AES_decrypt
2259 l${g} $out,4*$SIZE_T($sp)
2260 x $s0,$tweak+0($sp) # ^=tweak
2261 x $s1,$tweak+4($sp)
2262 x $s2,$tweak+8($sp)
2263 x $s3,$tweak+12($sp)
2264 st $s0,0($out)
2265 st $s1,4($out)
2266 st $s2,8($out)
2267 st $s3,12($out)
2268 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
2269 stg $sp,$tweak-16+8($sp)
2270.Lxts_dec_done:
2271 stg $sp,$tweak+0($sp) # wipe tweak
2272 stg $sp,$tweak+8($sp)
2273 lm${g} %r6,$ra,6*$SIZE_T($sp)
2274 br $ra
2275.size AES_xts_decrypt,.-AES_xts_decrypt
2276___
2277}
2278$code.=<<___;
2279.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
2280___
2281
2282$code =~ s/\`([^\`]*)\`/eval $1/gem;
2283print $code;
2284close STDOUT or die "error closing STDOUT: $!"; # force flush
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette