Mailing List Archive

[PATCH 1/3] Add POWER9 little-endian variant of PPC AES implementation
* configure.ac: Add 'rijndael-ppc9le.lo'.
* cipher/Makefile.am: Add 'rijndael-ppc9le.c', 'rijndael-ppc-common.h'
and 'rijndael-ppc-functions.h'.
* cipher/rijndael-internal.h (USE_PPC_CRYPTO_WITH_PPC9LE): New.
(RIJNDAEL_context_s): Add 'use_ppc9le_crypto'.
* cipher/rijndael.c (_gcry_aes_ppc9le_encrypt)
(_gcry_aes_ppc9le_decrypt, _gcry_aes_ppc9le_cfb_enc)
(_gcry_aes_ppc9le_cfb_dec, _gcry_aes_ppc9le_ctr_enc)
(_gcry_aes_ppc9le_cbc_enc, _gcry_aes_ppc9le_cbc_dec)
(_gcry_aes_ppc9le_ocb_crypt, _gcry_aes_ppc9le_ocb_auth)
(_gcry_aes_ppc9le_xts_crypt): New.
(do_setkey, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc)
(_gcry_aes_ctr_enc, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec)
(_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth, _gcry_aes_xts_crypt)
[USE_PPC_CRYPTO_WITH_PPC9LE]: New.
* cipher/rijndael-ppc.c: Split common code to headers
'rijndael-ppc-common.h' and 'rijndael-ppc-functions.h'.
* cipher/rijndael-ppc-common.h: Split from 'rijndael-ppc.c'.
(asm_add_uint64, asm_sra_int64, asm_swap_uint64_halfs): New.
* cipher/rijndael-ppc-functions.h: Split from 'rijndael-ppc.c'.
(CFB_ENC_FUNC, CBC_ENC_FUNC): Unroll loop by 2.
(XTS_CRYPT_FUNC, GEN_TWEAK): Tweak generation without vperm
instruction.
* cipher/rijndael-ppc9le.c: New.
--

Provide POWER9 little-endian optimized variant of PPC vcrypto AES
implementation. This implementation uses 'lxvb16x' and 'stxvb16x'
instructions to load/store vectors directly in big-endian order.

Benchmark on POWER9 (~3.8Ghz):

Before:
AES | nanosecs/byte mebibytes/sec cycles/byte
CBC enc | 1.04 ns/B 918.7 MiB/s 3.94 c/B
CBC dec | 0.222 ns/B 4292 MiB/s 0.844 c/B
CFB enc | 1.04 ns/B 916.9 MiB/s 3.95 c/B
CFB dec | 0.224 ns/B 4252 MiB/s 0.852 c/B
CTR enc | 0.226 ns/B 4218 MiB/s 0.859 c/B
CTR dec | 0.225 ns/B 4233 MiB/s 0.856 c/B
XTS enc | 0.500 ns/B 1907 MiB/s 1.90 c/B
XTS dec | 0.494 ns/B 1932 MiB/s 1.88 c/B
OCB enc | 0.288 ns/B 3312 MiB/s 1.09 c/B
OCB dec | 0.292 ns/B 3266 MiB/s 1.11 c/B
OCB auth | 0.267 ns/B 3567 MiB/s 1.02 c/B

After (ctr & ocb & cbc-dec & cfb-dec ~15% and xts ~8% faster):
AES | nanosecs/byte mebibytes/sec cycles/byte
CBC enc | 1.04 ns/B 914.2 MiB/s 3.96 c/B
CBC dec | 0.191 ns/B 4984 MiB/s 0.727 c/B
CFB enc | 1.03 ns/B 930.0 MiB/s 3.90 c/B
CFB dec | 0.194 ns/B 4906 MiB/s 0.739 c/B
CTR enc | 0.196 ns/B 4868 MiB/s 0.744 c/B
CTR dec | 0.197 ns/B 4834 MiB/s 0.750 c/B
XTS enc | 0.460 ns/B 2075 MiB/s 1.75 c/B
XTS dec | 0.455 ns/B 2097 MiB/s 1.73 c/B
OCB enc | 0.250 ns/B 3812 MiB/s 0.951 c/B
OCB dec | 0.253 ns/B 3764 MiB/s 0.963 c/B
OCB auth | 0.232 ns/B 4106 MiB/s 0.883 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
0 files changed

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 10a5ab62f..ef83cc741 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -99,7 +99,8 @@ EXTRA_libcipher_la_SOURCES = \
rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \
rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \
rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \
- rijndael-ppc.c \
+ rijndael-ppc.c rijndael-ppc9le.c \
+ rijndael-ppc-common.h rijndael-ppc-functions.h \
rmd160.c \
rsa.c \
salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
@@ -221,6 +222,12 @@ rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile
rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile
`echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `

+rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile
`echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `

diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 5150a69d7..bdd3bee14 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -105,13 +105,18 @@
#endif /* ENABLE_ARM_CRYPTO_SUPPORT */

/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
- * accelerated code. */
+ * accelerated code. USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to
+ * enable POWER9 optimized variant. */
#undef USE_PPC_CRYPTO
+#undef USE_PPC_CRYPTO_WITH_PPC9LE
#ifdef ENABLE_PPC_CRYPTO_SUPPORT
# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
# if __GNUC__ >= 4
# define USE_PPC_CRYPTO 1
+# if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00)
+# define USE_PPC_CRYPTO_WITH_PPC9LE 1
+# endif
# endif
# endif
#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
@@ -169,6 +174,9 @@ typedef struct RIJNDAEL_context_s
#ifdef USE_PPC_CRYPTO
unsigned int use_ppc_crypto:1; /* PowerPC crypto shall be used. */
#endif /*USE_PPC_CRYPTO*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ unsigned int use_ppc9le_crypto:1; /* POWER9 LE crypto shall be used. */
+#endif
rijndael_cryptfn_t encrypt_fn;
rijndael_cryptfn_t decrypt_fn;
rijndael_prefetchfn_t prefetch_enc_fn;
diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h
new file mode 100644
index 000000000..165dd9f71
--- /dev/null
+++ b/cipher/rijndael-ppc-common.h
@@ -0,0 +1,326 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#ifndef G10_RIJNDAEL_PPC_COMMON_H
+#define G10_RIJNDAEL_PPC_COMMON_H
+
+#include <altivec.h>
+
+
+typedef vector unsigned char block;
+
+typedef union
+{
+ u32 data32[4];
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#define ALIGNED_LOAD(in_ptr, offs) \
+ (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr)))
+
+#define ALIGNED_STORE(out_ptr, offs, vec) \
+ (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr)))
+
+#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const)))
+
+#define VEC_LOAD_BE(in_ptr, offs, bige_const) \
+ (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \
+ bige_const))
+
+#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \
+ (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr)))
+
+#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \
+ (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \
+ (void *)(out_ptr)))
+
+#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \
+ (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr)))
+
+
+#define ROUND_KEY_VARIABLES \
+ block rkey0, rkeylast
+
+#define PRELOAD_ROUND_KEYS(nrounds) \
+ do { \
+ rkey0 = ALIGNED_LOAD (rk, 0); \
+ rkeylast = ALIGNED_LOAD (rk, nrounds); \
+ } while (0)
+
+#define AES_ENCRYPT(blk, nrounds) \
+ do { \
+ blk ^= rkey0; \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+ if (nrounds >= 12) \
+ { \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+ if (rounds > 12) \
+ { \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+ blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+ } \
+ } \
+ blk = asm_cipherlast_be (blk, rkeylast); \
+ } while (0)
+
+#define AES_DECRYPT(blk, nrounds) \
+ do { \
+ blk ^= rkey0; \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+ if (nrounds >= 12) \
+ { \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+ if (rounds > 12) \
+ { \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+ blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+ } \
+ } \
+ blk = asm_ncipherlast_be (blk, rkeylast); \
+ } while (0)
+
+
+#define ROUND_KEY_VARIABLES_ALL \
+ block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \
+ rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast
+
+#define PRELOAD_ROUND_KEYS_ALL(nrounds) \
+ do { \
+ rkey0 = ALIGNED_LOAD (rk, 0); \
+ rkey1 = ALIGNED_LOAD (rk, 1); \
+ rkey2 = ALIGNED_LOAD (rk, 2); \
+ rkey3 = ALIGNED_LOAD (rk, 3); \
+ rkey4 = ALIGNED_LOAD (rk, 4); \
+ rkey5 = ALIGNED_LOAD (rk, 5); \
+ rkey6 = ALIGNED_LOAD (rk, 6); \
+ rkey7 = ALIGNED_LOAD (rk, 7); \
+ rkey8 = ALIGNED_LOAD (rk, 8); \
+ rkey9 = ALIGNED_LOAD (rk, 9); \
+ if (nrounds >= 12) \
+ { \
+ rkey10 = ALIGNED_LOAD (rk, 10); \
+ rkey11 = ALIGNED_LOAD (rk, 11); \
+ if (rounds > 12) \
+ { \
+ rkey12 = ALIGNED_LOAD (rk, 12); \
+ rkey13 = ALIGNED_LOAD (rk, 13); \
+ } \
+ } \
+ rkeylast = ALIGNED_LOAD (rk, nrounds); \
+ } while (0)
+
+#define AES_ENCRYPT_ALL(blk, nrounds) \
+ do { \
+ blk ^= rkey0; \
+ blk = asm_cipher_be (blk, rkey1); \
+ blk = asm_cipher_be (blk, rkey2); \
+ blk = asm_cipher_be (blk, rkey3); \
+ blk = asm_cipher_be (blk, rkey4); \
+ blk = asm_cipher_be (blk, rkey5); \
+ blk = asm_cipher_be (blk, rkey6); \
+ blk = asm_cipher_be (blk, rkey7); \
+ blk = asm_cipher_be (blk, rkey8); \
+ blk = asm_cipher_be (blk, rkey9); \
+ if (nrounds >= 12) \
+ { \
+ blk = asm_cipher_be (blk, rkey10); \
+ blk = asm_cipher_be (blk, rkey11); \
+ if (rounds > 12) \
+ { \
+ blk = asm_cipher_be (blk, rkey12); \
+ blk = asm_cipher_be (blk, rkey13); \
+ } \
+ } \
+ blk = asm_cipherlast_be (blk, rkeylast); \
+ } while (0)
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_aligned_ld(unsigned long offset, const void *ptr)
+{
+ block vec;
+ __asm__ volatile ("lvx %0,%1,%2\n\t"
+ : "=v" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_aligned_st(block vec, unsigned long offset, void *ptr)
+{
+ __asm__ volatile ("stvx %0,%1,%2\n\t"
+ :
+ : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_vperm1(block vec, block mask)
+{
+ block o;
+ __asm__ volatile ("vperm %0,%1,%1,%2\n\t"
+ : "=v" (o)
+ : "v" (vec), "v" (mask));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint128(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vadduqm %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint64(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vaddudm %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_sra_int64(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vsrad %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static block
+asm_swap_uint64_halfs(block a)
+{
+ block res;
+ __asm__ volatile ("xxswapd %x0, %x1"
+ : "=wa" (res)
+ : "wa" (a));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_xor(block a, block b)
+{
+ block res;
+ __asm__ volatile ("vxor %0,%1,%2\n\t"
+ : "=v" (res)
+ : "v" (a), "v" (b));
+ return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipher_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vcipher %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipherlast_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vcipherlast %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipher_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vncipher %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipherlast_be(block b, block rk)
+{
+ block o;
+ __asm__ volatile ("vncipherlast %0, %1, %2\n\t"
+ : "=v" (o)
+ : "v" (b), "v" (rk));
+ return o;
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx)
+{
+ u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+ u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+ int rounds = ctx->rounds;
+ int rr;
+ int r;
+
+ r = 0;
+ rr = rounds;
+ for (r = 0, rr = rounds; r <= rounds; r++, rr--)
+ {
+ ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr));
+ }
+}
+
+#endif /* G10_RIJNDAEL_PPC_COMMON_H */
diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h
new file mode 100644
index 000000000..72f31852b
--- /dev/null
+++ b/cipher/rijndael-ppc-functions.h
@@ -0,0 +1,2020 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+{
+ const block bige_const = asm_load_be_const();
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block b;
+
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ AES_ENCRYPT (b, rounds);
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ return 0; /* does not use stack */
+}
+
+
+unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+ unsigned char *out,
+ const unsigned char *in)
+{
+ const block bige_const = asm_load_be_const();
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block b;
+
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ AES_DECRYPT (b, rounds);
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ return 0; /* does not use stack */
+}
+
+
+void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES_ALL;
+ block rkeylast_orig;
+ block iv;
+
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS_ALL (rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 2; nblocks -= 2)
+ {
+ block in2, iv1;
+
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+ in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+ in += 2;
+
+ AES_ENCRYPT_ALL (iv, rounds);
+
+ iv1 = iv;
+ rkeylast = rkeylast_orig ^ in2;
+
+ AES_ENCRYPT_ALL (iv, rounds);
+
+ VEC_STORE_BE (out++, 0, iv1, bige_const);
+ VEC_STORE_BE (out++, 0, iv, bige_const);
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+ AES_ENCRYPT_ALL (iv, rounds);
+
+ VEC_STORE_BE (out++, 0, iv, bige_const);
+ }
+
+ VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block iv, b, bin;
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block rkey;
+
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ in0 = iv;
+ in1 = VEC_LOAD_BE_NOSWAP (in, 0);
+ in2 = VEC_LOAD_BE_NOSWAP (in, 1);
+ in3 = VEC_LOAD_BE_NOSWAP (in, 2);
+ in4 = VEC_LOAD_BE_NOSWAP (in, 3);
+ in1 = VEC_BE_SWAP (in1, bige_const);
+ in2 = VEC_BE_SWAP (in2, bige_const);
+ in5 = VEC_LOAD_BE_NOSWAP (in, 4);
+ in6 = VEC_LOAD_BE_NOSWAP (in, 5);
+ in3 = VEC_BE_SWAP (in3, bige_const);
+ in4 = VEC_BE_SWAP (in4, bige_const);
+ in7 = VEC_LOAD_BE_NOSWAP (in, 6);
+ iv = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ in5 = VEC_BE_SWAP (in5, bige_const);
+ in6 = VEC_BE_SWAP (in6, bige_const);
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ in7 = VEC_BE_SWAP (in7, bige_const);
+ iv = VEC_BE_SWAP (iv, bige_const);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+ b4 = asm_xor (rkey0, in4);
+ b5 = asm_xor (rkey0, in5);
+ b6 = asm_xor (rkey0, in6);
+ b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+ in4 = asm_xor (rkeylast, in4);
+ b0 = asm_cipherlast_be (b0, in1);
+ b1 = asm_cipherlast_be (b1, in2);
+ in5 = asm_xor (rkeylast, in5);
+ in6 = asm_xor (rkeylast, in6);
+ b2 = asm_cipherlast_be (b2, in3);
+ b3 = asm_cipherlast_be (b3, in4);
+ in7 = asm_xor (rkeylast, in7);
+ in0 = asm_xor (rkeylast, iv);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_cipherlast_be (b4, in5);
+ b5 = asm_cipherlast_be (b5, in6);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_cipherlast_be (b6, in7);
+ b7 = asm_cipherlast_be (b7, in0);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ in0 = iv;
+ in1 = VEC_LOAD_BE (in, 0, bige_const);
+ in2 = VEC_LOAD_BE (in, 1, bige_const);
+ in3 = VEC_LOAD_BE (in, 2, bige_const);
+ iv = VEC_LOAD_BE (in, 3, bige_const);
+
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+ in0 = asm_xor (rkeylast, iv);
+ b0 = asm_cipherlast_be (b0, in1);
+ b1 = asm_cipherlast_be (b1, in2);
+ b2 = asm_cipherlast_be (b2, in3);
+ b3 = asm_cipherlast_be (b3, in0);
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ bin = VEC_LOAD_BE (in, 0, bige_const);
+ rkeylast = rkeylast_orig ^ bin;
+ b = iv;
+ iv = bin;
+
+ AES_ENCRYPT (b, rounds);
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ byte *out = (byte *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES_ALL;
+ block lastiv, b;
+ unsigned int outadd = -(!cbc_mac) & 16;
+
+ lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS_ALL (rounds);
+
+ for (; nblocks >= 2; nblocks -= 2)
+ {
+ block in2, lastiv1;
+
+ b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const);
+ in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+ in += 2;
+
+ AES_ENCRYPT_ALL (b, rounds);
+
+ lastiv1 = b;
+ b = lastiv1 ^ in2;
+
+ AES_ENCRYPT_ALL (b, rounds);
+
+ lastiv = b;
+ VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const);
+ out += outadd;
+ VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const);
+ out += outadd;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+ AES_ENCRYPT_ALL (b, rounds);
+
+ lastiv = b;
+ VEC_STORE_BE ((u128_t *)out, 0, b, bige_const);
+ out += outadd;
+ }
+
+ VEC_STORE_BE (iv_arg, 0, lastiv, bige_const);
+}
+
+void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block rkey;
+ block iv, b;
+
+ if (!ctx->decryption_prepared)
+ {
+ internal_aes_ppc_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+ rkeylast_orig = rkeylast;
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ in0 = VEC_BE_SWAP (in0, bige_const);
+ in1 = VEC_BE_SWAP (in1, bige_const);
+ in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ in2 = VEC_BE_SWAP (in2, bige_const);
+ in3 = VEC_BE_SWAP (in3, bige_const);
+ in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ in4 = VEC_BE_SWAP (in4, bige_const);
+ in5 = VEC_BE_SWAP (in5, bige_const);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+ in6 = VEC_BE_SWAP (in6, bige_const);
+ in7 = VEC_BE_SWAP (in7, bige_const);
+ b4 = asm_xor (rkey0, in4);
+ b5 = asm_xor (rkey0, in5);
+ b6 = asm_xor (rkey0, in6);
+ b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey); \
+ b4 = asm_ncipher_be (b4, rkey); \
+ b5 = asm_ncipher_be (b5, rkey); \
+ b6 = asm_ncipher_be (b6, rkey); \
+ b7 = asm_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ iv = asm_xor (rkeylast, iv);
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ b0 = asm_ncipherlast_be (b0, iv);
+ iv = in7;
+ b1 = asm_ncipherlast_be (b1, in0);
+ in3 = asm_xor (rkeylast, in3);
+ in4 = asm_xor (rkeylast, in4);
+ b2 = asm_ncipherlast_be (b2, in1);
+ b3 = asm_ncipherlast_be (b3, in2);
+ in5 = asm_xor (rkeylast, in5);
+ in6 = asm_xor (rkeylast, in6);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_ncipherlast_be (b4, in3);
+ b5 = asm_ncipherlast_be (b5, in4);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_ncipherlast_be (b6, in5);
+ b7 = asm_ncipherlast_be (b7, in6);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ in0 = VEC_LOAD_BE (in, 0, bige_const);
+ in1 = VEC_LOAD_BE (in, 1, bige_const);
+ in2 = VEC_LOAD_BE (in, 2, bige_const);
+ in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ b0 = asm_xor (rkey0, in0);
+ b1 = asm_xor (rkey0, in1);
+ b2 = asm_xor (rkey0, in2);
+ b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ iv = asm_xor (rkeylast, iv);
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+
+ b0 = asm_ncipherlast_be (b0, iv);
+ iv = in3;
+ b1 = asm_ncipherlast_be (b1, in0);
+ b2 = asm_ncipherlast_be (b2, in1);
+ b3 = asm_ncipherlast_be (b3, in2);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ rkeylast = rkeylast_orig ^ iv;
+
+ iv = VEC_LOAD_BE (in, 0, bige_const);
+ b = iv;
+ AES_DECRYPT (b, rounds);
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in++;
+ out++;
+ }
+
+ VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ static const unsigned char vec_one_const[16] =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ ROUND_KEY_VARIABLES;
+ block rkeylast_orig;
+ block ctr, b, one;
+
+ ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const);
+ one = VEC_LOAD_BE (&vec_one_const, 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+ rkeylast_orig = rkeylast;
+
+ if (nblocks >= 4)
+ {
+ block in0, in1, in2, in3, in4, in5, in6, in7;
+ block b0, b1, b2, b3, b4, b5, b6, b7;
+ block two, three, four;
+ block rkey;
+
+ two = asm_add_uint128 (one, one);
+ three = asm_add_uint128 (two, one);
+ four = asm_add_uint128 (two, two);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b1 = asm_add_uint128 (ctr, one);
+ b2 = asm_add_uint128 (ctr, two);
+ b3 = asm_add_uint128 (ctr, three);
+ b4 = asm_add_uint128 (ctr, four);
+ b5 = asm_add_uint128 (b1, four);
+ b6 = asm_add_uint128 (b2, four);
+ b7 = asm_add_uint128 (b3, four);
+ b0 = asm_xor (rkey0, ctr);
+ rkey = ALIGNED_LOAD (rk, 1);
+ ctr = asm_add_uint128 (b4, four);
+ b1 = asm_xor (rkey0, b1);
+ b2 = asm_xor (rkey0, b2);
+ b3 = asm_xor (rkey0, b3);
+ b0 = asm_cipher_be (b0, rkey);
+ b1 = asm_cipher_be (b1, rkey);
+ b2 = asm_cipher_be (b2, rkey);
+ b3 = asm_cipher_be (b3, rkey);
+ b4 = asm_xor (rkey0, b4);
+ b5 = asm_xor (rkey0, b5);
+ b6 = asm_xor (rkey0, b6);
+ b7 = asm_xor (rkey0, b7);
+ b4 = asm_cipher_be (b4, rkey);
+ b5 = asm_cipher_be (b5, rkey);
+ b6 = asm_cipher_be (b6, rkey);
+ b7 = asm_cipher_be (b7, rkey);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ DO_ROUND(2);
+ in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ DO_ROUND(3);
+ in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ DO_ROUND(4);
+ in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ DO_ROUND(5);
+ in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ DO_ROUND(6);
+ in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ DO_ROUND(7);
+ in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ DO_ROUND(8);
+ in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ DO_ROUND(9);
+
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in0 = VEC_BE_SWAP (in0, bige_const);
+ in1 = VEC_BE_SWAP (in1, bige_const);
+ in2 = VEC_BE_SWAP (in2, bige_const);
+ in3 = VEC_BE_SWAP (in3, bige_const);
+ in4 = VEC_BE_SWAP (in4, bige_const);
+ in5 = VEC_BE_SWAP (in5, bige_const);
+ in6 = VEC_BE_SWAP (in6, bige_const);
+ in7 = VEC_BE_SWAP (in7, bige_const);
+
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+ b0 = asm_cipherlast_be (b0, in0);
+ b1 = asm_cipherlast_be (b1, in1);
+ in4 = asm_xor (rkeylast, in4);
+ in5 = asm_xor (rkeylast, in5);
+ b2 = asm_cipherlast_be (b2, in2);
+ b3 = asm_cipherlast_be (b3, in3);
+ in6 = asm_xor (rkeylast, in6);
+ in7 = asm_xor (rkeylast, in7);
+ b4 = asm_cipherlast_be (b4, in4);
+ b5 = asm_cipherlast_be (b5, in5);
+ b6 = asm_cipherlast_be (b6, in6);
+ b7 = asm_cipherlast_be (b7, in7);
+
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ b1 = asm_add_uint128 (ctr, one);
+ b2 = asm_add_uint128 (ctr, two);
+ b3 = asm_add_uint128 (ctr, three);
+ b0 = asm_xor (rkey0, ctr);
+ ctr = asm_add_uint128 (ctr, four);
+ b1 = asm_xor (rkey0, b1);
+ b2 = asm_xor (rkey0, b2);
+ b3 = asm_xor (rkey0, b3);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+
+ in0 = VEC_LOAD_BE (in, 0, bige_const);
+ in1 = VEC_LOAD_BE (in, 1, bige_const);
+ in2 = VEC_LOAD_BE (in, 2, bige_const);
+ in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ in0 = asm_xor (rkeylast, in0);
+ in1 = asm_xor (rkeylast, in1);
+ in2 = asm_xor (rkeylast, in2);
+ in3 = asm_xor (rkeylast, in3);
+
+ b0 = asm_cipherlast_be (b0, in0);
+ b1 = asm_cipherlast_be (b1, in1);
+ b2 = asm_cipherlast_be (b2, in2);
+ b3 = asm_cipherlast_be (b3, in3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ b = ctr;
+ ctr = asm_add_uint128 (ctr, one);
+ rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+
+ AES_ENCRYPT (b, rounds);
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ out++;
+ in++;
+ }
+
+ VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
+}
+
+
+size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ u64 data_nblocks = c->u_mode.ocb.data_nblocks;
+ block l0, l1, l2, l;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b;
+ block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+ block rkey, rkeylf;
+ block ctr, iv;
+ ROUND_KEY_VARIABLES;
+
+ iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const);
+ ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const);
+
+ l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+ l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+ l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+ if (encrypt)
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ b ^= iv;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+ l = VEC_BE_SWAP(l, bige_const);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l2;
+ iv4 = iv ^ l1 ^ l2 ^ l0;
+ iv5 = iv ^ l2 ^ l0;
+ iv6 = iv ^ l2;
+ iv7 = iv ^ l2 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ b4 ^= iv4;
+ b5 ^= iv5;
+ b6 ^= iv6;
+ b7 ^= iv7;
+ iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ iv0 = asm_xor (rkeylf, iv0);
+ iv1 = asm_xor (rkeylf, iv1);
+ iv2 = asm_xor (rkeylf, iv2);
+ iv3 = asm_xor (rkeylf, iv3);
+ iv4 = asm_xor (rkeylf, iv4);
+ iv5 = asm_xor (rkeylf, iv5);
+ iv6 = asm_xor (rkeylf, iv6);
+ iv7 = asm_xor (rkeylf, iv7);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_cipherlast_be (b0, iv0);
+ b1 = asm_cipherlast_be (b1, iv1);
+ b2 = asm_cipherlast_be (b2, iv2);
+ b3 = asm_cipherlast_be (b3, iv3);
+ b4 = asm_cipherlast_be (b4, iv4);
+ b5 = asm_cipherlast_be (b5, iv5);
+ b6 = asm_cipherlast_be (b6, iv6);
+ b7 = asm_cipherlast_be (b7, iv7);
+
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4 && (data_nblocks % 4) == 0)
+ {
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast ^ rkey0;
+ b0 = asm_cipherlast_be (b0, rkey ^ iv0);
+ b1 = asm_cipherlast_be (b1, rkey ^ iv1);
+ b2 = asm_cipherlast_be (b2, rkey ^ iv2);
+ b3 = asm_cipherlast_be (b3, rkey ^ iv3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ b ^= iv;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+ }
+ else
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+ if (!ctx->decryption_prepared)
+ {
+ internal_aes_ppc_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ b ^= iv;
+ AES_DECRYPT (b, rounds);
+ b ^= iv;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+ l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+ l = VEC_BE_SWAP(l, bige_const);
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l2;
+ iv4 = iv ^ l1 ^ l2 ^ l0;
+ iv5 = iv ^ l2 ^ l0;
+ iv6 = iv ^ l2;
+ iv7 = iv ^ l2 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ b4 ^= iv4;
+ b5 ^= iv5;
+ b6 ^= iv6;
+ b7 ^= iv7;
+ iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey); \
+ b4 = asm_ncipher_be (b4, rkey); \
+ b5 = asm_ncipher_be (b5, rkey); \
+ b6 = asm_ncipher_be (b6, rkey); \
+ b7 = asm_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ iv0 = asm_xor (rkeylf, iv0);
+ iv1 = asm_xor (rkeylf, iv1);
+ iv2 = asm_xor (rkeylf, iv2);
+ iv3 = asm_xor (rkeylf, iv3);
+ iv4 = asm_xor (rkeylf, iv4);
+ iv5 = asm_xor (rkeylf, iv5);
+ iv6 = asm_xor (rkeylf, iv6);
+ iv7 = asm_xor (rkeylf, iv7);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_ncipherlast_be (b0, iv0);
+ b1 = asm_ncipherlast_be (b1, iv1);
+ b2 = asm_ncipherlast_be (b2, iv2);
+ b3 = asm_ncipherlast_be (b3, iv3);
+ b4 = asm_ncipherlast_be (b4, iv4);
+ b5 = asm_ncipherlast_be (b5, iv5);
+ b6 = asm_ncipherlast_be (b6, iv6);
+ b7 = asm_ncipherlast_be (b7, iv7);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4 && (data_nblocks % 4) == 0)
+ {
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+ iv ^= rkey0;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast ^ rkey0;
+ b0 = asm_ncipherlast_be (b0, rkey ^ iv0);
+ b1 = asm_ncipherlast_be (b1, rkey ^ iv1);
+ b2 = asm_ncipherlast_be (b2, rkey ^ iv2);
+ b3 = asm_ncipherlast_be (b3, rkey ^ iv3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (in, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ b ^= iv;
+ AES_DECRYPT (b, rounds);
+ b ^= iv;
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ctr ^= b;
+
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in += 1;
+ out += 1;
+ }
+ }
+
+ VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const);
+ VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
+ c->u_mode.ocb.data_nblocks = data_nblocks;
+
+ return 0;
+}
+
+size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
+{
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+ const u128_t *abuf = (const u128_t *)abuf_arg;
+ int rounds = ctx->rounds;
+ u64 data_nblocks = c->u_mode.ocb.aad_nblocks;
+ block l0, l1, l2, l;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b;
+ block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+ block rkey, frkey;
+ block ctr, iv;
+ ROUND_KEY_VARIABLES;
+
+ iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const);
+ ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const);
+
+ l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+ l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+ l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ ctr ^= b;
+
+ abuf += 1;
+ }
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+ b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+ b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+ b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+ b4 = VEC_LOAD_BE (abuf, 4, bige_const);
+ b5 = VEC_LOAD_BE (abuf, 5, bige_const);
+ b6 = VEC_LOAD_BE (abuf, 6, bige_const);
+ b7 = VEC_LOAD_BE (abuf, 7, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const);
+
+ frkey = rkey0;
+ iv ^= frkey;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l2;
+ iv4 = iv ^ l1 ^ l2 ^ l0;
+ iv5 = iv ^ l2 ^ l0;
+ iv6 = iv ^ l2;
+ iv7 = iv ^ l2 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ b4 ^= iv4;
+ b5 ^= iv5;
+ b6 ^= iv6;
+ b7 ^= iv7;
+ iv = iv7 ^ frkey;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_cipherlast_be (b0, rkey);
+ b1 = asm_cipherlast_be (b1, rkey);
+ b2 = asm_cipherlast_be (b2, rkey);
+ b3 = asm_cipherlast_be (b3, rkey);
+ b4 = asm_cipherlast_be (b4, rkey);
+ b5 = asm_cipherlast_be (b5, rkey);
+ b6 = asm_cipherlast_be (b6, rkey);
+ b7 = asm_cipherlast_be (b7, rkey);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+ abuf += 8;
+ }
+
+ if (nblocks >= 4 && (data_nblocks % 4) == 0)
+ {
+ b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+ b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+ b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+ b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+
+ l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+ frkey = rkey0;
+ iv ^= frkey;
+
+ iv0 = iv ^ l0;
+ iv1 = iv ^ l0 ^ l1;
+ iv2 = iv ^ l1;
+ iv3 = iv ^ l1 ^ l;
+
+ b0 ^= iv0;
+ b1 ^= iv1;
+ b2 ^= iv2;
+ b3 ^= iv3;
+ iv = iv3 ^ frkey;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_cipherlast_be (b0, rkey);
+ b1 = asm_cipherlast_be (b1, rkey);
+ b2 = asm_cipherlast_be (b2, rkey);
+ b3 = asm_cipherlast_be (b3, rkey);
+
+ ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+ abuf += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+ b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ iv ^= l;
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ b ^= iv;
+ AES_ENCRYPT (b, rounds);
+ ctr ^= b;
+
+ abuf += 1;
+ }
+
+ VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const);
+ VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
+ c->u_mode.ocb.aad_nblocks = data_nblocks;
+
+ return 0;
+}
+
+
+void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+#ifdef WORDS_BIGENDIAN
+ static const block vec_bswap128_const =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+#else
+ static const block vec_bswap128_const =
+ { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 };
+#endif
+ static const unsigned char vec_tweak_const[16] =
+ { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
+ static const vector unsigned long long vec_shift63_const =
+ { 63, 63 };
+ const block bige_const = asm_load_be_const();
+ RIJNDAEL_context *ctx = context;
+ const u128_t *in = (const u128_t *)inbuf_arg;
+ u128_t *out = (u128_t *)outbuf_arg;
+ int rounds = ctx->rounds;
+ block tweak;
+ block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf;
+ block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
+ block tweak_const, bswap128_const, shift63_const;
+ ROUND_KEY_VARIABLES;
+
+ tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const);
+ bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0);
+ shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0);
+
+ tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const);
+ tweak = asm_vperm1 (tweak, bswap128_const);
+
+#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \
+ do { \
+ block tmp1, tmp2; \
+ tmp1 = asm_swap_uint64_halfs(tin); \
+ tmp2 = asm_add_uint64(tin, tin); \
+ tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \
+ tout = asm_xor(tmp1, tmp2); \
+ } while (0)
+
+ if (encrypt)
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ GEN_TWEAK (tweak2, tweak1);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ GEN_TWEAK (tweak3, tweak2);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ GEN_TWEAK (tweak4, tweak3);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ GEN_TWEAK (tweak5, tweak4);
+ tweak4 = asm_vperm1 (tweak4, bswap128_const);
+ GEN_TWEAK (tweak6, tweak5);
+ tweak5 = asm_vperm1 (tweak5, bswap128_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ GEN_TWEAK (tweak7, tweak6);
+ tweak6 = asm_vperm1 (tweak6, bswap128_const);
+ GEN_TWEAK (tweak, tweak7);
+ tweak7 = asm_vperm1 (tweak7, bswap128_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+
+ tweak0 = asm_xor (tweak0, rkey0);
+ tweak1 = asm_xor (tweak1, rkey0);
+ tweak2 = asm_xor (tweak2, rkey0);
+ tweak3 = asm_xor (tweak3, rkey0);
+ tweak4 = asm_xor (tweak4, rkey0);
+ tweak5 = asm_xor (tweak5, rkey0);
+ tweak6 = asm_xor (tweak6, rkey0);
+ tweak7 = asm_xor (tweak7, rkey0);
+
+ b0 = asm_xor (b0, tweak0);
+ b1 = asm_xor (b1, tweak1);
+ b2 = asm_xor (b2, tweak2);
+ b3 = asm_xor (b3, tweak3);
+ b4 = asm_xor (b4, tweak4);
+ b5 = asm_xor (b5, tweak5);
+ b6 = asm_xor (b6, tweak6);
+ b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey); \
+ b4 = asm_cipher_be (b4, rkey); \
+ b5 = asm_cipher_be (b5, rkey); \
+ b6 = asm_cipher_be (b6, rkey); \
+ b7 = asm_cipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ tweak0 = asm_xor (tweak0, rkeylf);
+ tweak1 = asm_xor (tweak1, rkeylf);
+ tweak2 = asm_xor (tweak2, rkeylf);
+ tweak3 = asm_xor (tweak3, rkeylf);
+ tweak4 = asm_xor (tweak4, rkeylf);
+ tweak5 = asm_xor (tweak5, rkeylf);
+ tweak6 = asm_xor (tweak6, rkeylf);
+ tweak7 = asm_xor (tweak7, rkeylf);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_cipherlast_be (b0, tweak0);
+ b1 = asm_cipherlast_be (b1, tweak1);
+ b2 = asm_cipherlast_be (b2, tweak2);
+ b3 = asm_cipherlast_be (b3, tweak3);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_cipherlast_be (b4, tweak4);
+ b5 = asm_cipherlast_be (b5, tweak5);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_cipherlast_be (b6, tweak6);
+ b7 = asm_cipherlast_be (b7, tweak7);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ GEN_TWEAK (tweak2, tweak1);
+ GEN_TWEAK (tweak3, tweak2);
+ GEN_TWEAK (tweak, tweak3);
+
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+ b0 ^= tweak0 ^ rkey0;
+ b1 ^= tweak1 ^ rkey0;
+ b2 ^= tweak2 ^ rkey0;
+ b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_cipher_be (b0, rkey); \
+ b1 = asm_cipher_be (b1, rkey); \
+ b2 = asm_cipher_be (b2, rkey); \
+ b3 = asm_cipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_cipherlast_be (b0, rkey ^ tweak0);
+ b1 = asm_cipherlast_be (b1, rkey ^ tweak1);
+ b2 = asm_cipherlast_be (b2, rkey ^ tweak2);
+ b3 = asm_cipherlast_be (b3, rkey ^ tweak3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+ /* Generate next tweak. */
+ GEN_TWEAK (tweak, tweak);
+
+ AES_ENCRYPT (b, rounds);
+
+ b ^= tweak0;
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in++;
+ out++;
+ }
+ }
+ else
+ {
+ const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+ if (!ctx->decryption_prepared)
+ {
+ internal_aes_ppc_prepare_decryption (ctx);
+ ctx->decryption_prepared = 1;
+ }
+
+ PRELOAD_ROUND_KEYS (rounds);
+
+ for (; nblocks >= 8; nblocks -= 8)
+ {
+ b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+ b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+ b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+ b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+ b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+ GEN_TWEAK (tweak2, tweak1);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+ b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+ in += 8;
+
+ b0 = VEC_BE_SWAP(b0, bige_const);
+ b1 = VEC_BE_SWAP(b1, bige_const);
+ GEN_TWEAK (tweak3, tweak2);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ GEN_TWEAK (tweak4, tweak3);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+ b2 = VEC_BE_SWAP(b2, bige_const);
+ b3 = VEC_BE_SWAP(b3, bige_const);
+ GEN_TWEAK (tweak5, tweak4);
+ tweak4 = asm_vperm1 (tweak4, bswap128_const);
+ GEN_TWEAK (tweak6, tweak5);
+ tweak5 = asm_vperm1 (tweak5, bswap128_const);
+ b4 = VEC_BE_SWAP(b4, bige_const);
+ b5 = VEC_BE_SWAP(b5, bige_const);
+ GEN_TWEAK (tweak7, tweak6);
+ tweak6 = asm_vperm1 (tweak6, bswap128_const);
+ GEN_TWEAK (tweak, tweak7);
+ tweak7 = asm_vperm1 (tweak7, bswap128_const);
+ b6 = VEC_BE_SWAP(b6, bige_const);
+ b7 = VEC_BE_SWAP(b7, bige_const);
+
+ tweak0 = asm_xor (tweak0, rkey0);
+ tweak1 = asm_xor (tweak1, rkey0);
+ tweak2 = asm_xor (tweak2, rkey0);
+ tweak3 = asm_xor (tweak3, rkey0);
+ tweak4 = asm_xor (tweak4, rkey0);
+ tweak5 = asm_xor (tweak5, rkey0);
+ tweak6 = asm_xor (tweak6, rkey0);
+ tweak7 = asm_xor (tweak7, rkey0);
+
+ b0 = asm_xor (b0, tweak0);
+ b1 = asm_xor (b1, tweak1);
+ b2 = asm_xor (b2, tweak2);
+ b3 = asm_xor (b3, tweak3);
+ b4 = asm_xor (b4, tweak4);
+ b5 = asm_xor (b5, tweak5);
+ b6 = asm_xor (b6, tweak6);
+ b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey); \
+ b4 = asm_ncipher_be (b4, rkey); \
+ b5 = asm_ncipher_be (b5, rkey); \
+ b6 = asm_ncipher_be (b6, rkey); \
+ b7 = asm_ncipher_be (b7, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+
+ rkeylf = asm_xor (rkeylast, rkey0);
+
+ DO_ROUND(8);
+
+ tweak0 = asm_xor (tweak0, rkeylf);
+ tweak1 = asm_xor (tweak1, rkeylf);
+ tweak2 = asm_xor (tweak2, rkeylf);
+ tweak3 = asm_xor (tweak3, rkeylf);
+ tweak4 = asm_xor (tweak4, rkeylf);
+ tweak5 = asm_xor (tweak5, rkeylf);
+ tweak6 = asm_xor (tweak6, rkeylf);
+ tweak7 = asm_xor (tweak7, rkeylf);
+
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ b0 = asm_ncipherlast_be (b0, tweak0);
+ b1 = asm_ncipherlast_be (b1, tweak1);
+ b2 = asm_ncipherlast_be (b2, tweak2);
+ b3 = asm_ncipherlast_be (b3, tweak3);
+ b0 = VEC_BE_SWAP (b0, bige_const);
+ b1 = VEC_BE_SWAP (b1, bige_const);
+ b4 = asm_ncipherlast_be (b4, tweak4);
+ b5 = asm_ncipherlast_be (b5, tweak5);
+ b2 = VEC_BE_SWAP (b2, bige_const);
+ b3 = VEC_BE_SWAP (b3, bige_const);
+ b6 = asm_ncipherlast_be (b6, tweak6);
+ b7 = asm_ncipherlast_be (b7, tweak7);
+ VEC_STORE_BE_NOSWAP (out, 0, b0);
+ VEC_STORE_BE_NOSWAP (out, 1, b1);
+ b4 = VEC_BE_SWAP (b4, bige_const);
+ b5 = VEC_BE_SWAP (b5, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 2, b2);
+ VEC_STORE_BE_NOSWAP (out, 3, b3);
+ b6 = VEC_BE_SWAP (b6, bige_const);
+ b7 = VEC_BE_SWAP (b7, bige_const);
+ VEC_STORE_BE_NOSWAP (out, 4, b4);
+ VEC_STORE_BE_NOSWAP (out, 5, b5);
+ VEC_STORE_BE_NOSWAP (out, 6, b6);
+ VEC_STORE_BE_NOSWAP (out, 7, b7);
+ out += 8;
+ }
+
+ if (nblocks >= 4)
+ {
+ tweak0 = tweak;
+ GEN_TWEAK (tweak1, tweak0);
+ GEN_TWEAK (tweak2, tweak1);
+ GEN_TWEAK (tweak3, tweak2);
+ GEN_TWEAK (tweak, tweak3);
+
+ b0 = VEC_LOAD_BE (in, 0, bige_const);
+ b1 = VEC_LOAD_BE (in, 1, bige_const);
+ b2 = VEC_LOAD_BE (in, 2, bige_const);
+ b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+ tweak0 = asm_vperm1 (tweak0, bswap128_const);
+ tweak1 = asm_vperm1 (tweak1, bswap128_const);
+ tweak2 = asm_vperm1 (tweak2, bswap128_const);
+ tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+ b0 ^= tweak0 ^ rkey0;
+ b1 ^= tweak1 ^ rkey0;
+ b2 ^= tweak2 ^ rkey0;
+ b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+ rkey = ALIGNED_LOAD (rk, r); \
+ b0 = asm_ncipher_be (b0, rkey); \
+ b1 = asm_ncipher_be (b1, rkey); \
+ b2 = asm_ncipher_be (b2, rkey); \
+ b3 = asm_ncipher_be (b3, rkey);
+
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ if (rounds >= 12)
+ {
+ DO_ROUND(10);
+ DO_ROUND(11);
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ }
+ }
+
+#undef DO_ROUND
+
+ rkey = rkeylast;
+ b0 = asm_ncipherlast_be (b0, rkey ^ tweak0);
+ b1 = asm_ncipherlast_be (b1, rkey ^ tweak1);
+ b2 = asm_ncipherlast_be (b2, rkey ^ tweak2);
+ b3 = asm_ncipherlast_be (b3, rkey ^ tweak3);
+
+ VEC_STORE_BE (out, 0, b0, bige_const);
+ VEC_STORE_BE (out, 1, b1, bige_const);
+ VEC_STORE_BE (out, 2, b2, bige_const);
+ VEC_STORE_BE (out, 3, b3, bige_const);
+
+ in += 4;
+ out += 4;
+ nblocks -= 4;
+ }
+
+ for (; nblocks; nblocks--)
+ {
+ tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+ /* Xor-Encrypt/Decrypt-Xor block. */
+ b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+ /* Generate next tweak. */
+ GEN_TWEAK (tweak, tweak);
+
+ AES_DECRYPT (b, rounds);
+
+ b ^= tweak0;
+ VEC_STORE_BE (out, 0, b, bige_const);
+
+ in++;
+ out++;
+ }
+ }
+
+ tweak = asm_vperm1 (tweak, bswap128_const);
+ VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
+
+#undef GEN_TWEAK
+}
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index a8bcae468..3e727628b 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -1,6 +1,6 @@
/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
* Copyright (C) 2019 Shawn Landden <shawn@git.icu>
- * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -31,162 +31,7 @@

#ifdef USE_PPC_CRYPTO

-#include <altivec.h>
-
-
-typedef vector unsigned char block;
-
-typedef union
-{
- u32 data32[4];
-} __attribute__((packed, aligned(1), may_alias)) u128_t;
-
-
-#define ALWAYS_INLINE inline __attribute__((always_inline))
-#define NO_INLINE __attribute__((noinline))
-#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
-
-#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
-#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
-#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
-
-
-#define ALIGNED_LOAD(in_ptr, offs) \
- (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr)))
-
-#define ALIGNED_STORE(out_ptr, offs, vec) \
- (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr)))
-
-#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const)))
-
-#define VEC_LOAD_BE(in_ptr, offs, bige_const) \
- (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \
- bige_const))
-
-#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \
- (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr)))
-
-#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \
- (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \
- (void *)(out_ptr)))
-
-#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \
- (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr)))
-
-
-#define ROUND_KEY_VARIABLES \
- block rkey0, rkeylast
-
-#define PRELOAD_ROUND_KEYS(nrounds) \
- do { \
- rkey0 = ALIGNED_LOAD (rk, 0); \
- rkeylast = ALIGNED_LOAD (rk, nrounds); \
- } while (0)
-
-#define AES_ENCRYPT(blk, nrounds) \
- do { \
- blk ^= rkey0; \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \
- if (nrounds >= 12) \
- { \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \
- if (rounds > 12) \
- { \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \
- blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \
- } \
- } \
- blk = asm_cipherlast_be (blk, rkeylast); \
- } while (0)
-
-#define AES_DECRYPT(blk, nrounds) \
- do { \
- blk ^= rkey0; \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \
- if (nrounds >= 12) \
- { \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \
- if (rounds > 12) \
- { \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \
- blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \
- } \
- } \
- blk = asm_ncipherlast_be (blk, rkeylast); \
- } while (0)
-
-
-#define ROUND_KEY_VARIABLES_ALL \
- block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \
- rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast
-
-#define PRELOAD_ROUND_KEYS_ALL(nrounds) \
- do { \
- rkey0 = ALIGNED_LOAD (rk, 0); \
- rkey1 = ALIGNED_LOAD (rk, 1); \
- rkey2 = ALIGNED_LOAD (rk, 2); \
- rkey3 = ALIGNED_LOAD (rk, 3); \
- rkey4 = ALIGNED_LOAD (rk, 4); \
- rkey5 = ALIGNED_LOAD (rk, 5); \
- rkey6 = ALIGNED_LOAD (rk, 6); \
- rkey7 = ALIGNED_LOAD (rk, 7); \
- rkey8 = ALIGNED_LOAD (rk, 8); \
- rkey9 = ALIGNED_LOAD (rk, 9); \
- if (nrounds >= 12) \
- { \
- rkey10 = ALIGNED_LOAD (rk, 10); \
- rkey11 = ALIGNED_LOAD (rk, 11); \
- if (rounds > 12) \
- { \
- rkey12 = ALIGNED_LOAD (rk, 12); \
- rkey13 = ALIGNED_LOAD (rk, 13); \
- } \
- } \
- rkeylast = ALIGNED_LOAD (rk, nrounds); \
- } while (0)
-
-#define AES_ENCRYPT_ALL(blk, nrounds) \
- do { \
- blk ^= rkey0; \
- blk = asm_cipher_be (blk, rkey1); \
- blk = asm_cipher_be (blk, rkey2); \
- blk = asm_cipher_be (blk, rkey3); \
- blk = asm_cipher_be (blk, rkey4); \
- blk = asm_cipher_be (blk, rkey5); \
- blk = asm_cipher_be (blk, rkey6); \
- blk = asm_cipher_be (blk, rkey7); \
- blk = asm_cipher_be (blk, rkey8); \
- blk = asm_cipher_be (blk, rkey9); \
- if (nrounds >= 12) \
- { \
- blk = asm_cipher_be (blk, rkey10); \
- blk = asm_cipher_be (blk, rkey11); \
- if (rounds > 12) \
- { \
- blk = asm_cipher_be (blk, rkey12); \
- blk = asm_cipher_be (blk, rkey13); \
- } \
- } \
- blk = asm_cipherlast_be (blk, rkeylast); \
- } while (0)
+#include "rijndael-ppc-common.h"


#ifdef WORDS_BIGENDIAN
@@ -198,26 +43,6 @@ static const block vec_bswap32_const_neg =
#endif


-static ASM_FUNC_ATTR_INLINE block
-asm_aligned_ld(unsigned long offset, const void *ptr)
-{
- block vec;
- __asm__ volatile ("lvx %0,%1,%2\n\t"
- : "=v" (vec)
- : "r" (offset), "r" ((uintptr_t)ptr)
- : "memory", "r0");
- return vec;
-}
-
-static ASM_FUNC_ATTR_INLINE void
-asm_aligned_st(block vec, unsigned long offset, void *ptr)
-{
- __asm__ volatile ("stvx %0,%1,%2\n\t"
- :
- : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
- : "memory", "r0");
-}
-
static ASM_FUNC_ATTR_INLINE block
asm_load_be_const(void)
{
@@ -229,16 +54,6 @@ asm_load_be_const(void)
#endif
}

-static ASM_FUNC_ATTR_INLINE block
-asm_vperm1(block vec, block mask)
-{
- block o;
- __asm__ volatile ("vperm %0,%1,%1,%2\n\t"
- : "=v" (o)
- : "v" (vec), "v" (mask));
- return o;
-}
-
static ASM_FUNC_ATTR_INLINE block
asm_be_swap(block vec, block be_bswap_const)
{
@@ -272,66 +87,6 @@ asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
: "memory", "r0");
}

-static ASM_FUNC_ATTR_INLINE block
-asm_add_uint128(block a, block b)
-{
- block res;
- __asm__ volatile ("vadduqm %0,%1,%2\n\t"
- : "=v" (res)
- : "v" (a), "v" (b));
- return res;
-}
-
-static ASM_FUNC_ATTR_INLINE block
-asm_xor(block a, block b)
-{
- block res;
- __asm__ volatile ("vxor %0,%1,%2\n\t"
- : "=v" (res)
- : "v" (a), "v" (b));
- return res;
-}
-
-static ASM_FUNC_ATTR_INLINE block
-asm_cipher_be(block b, block rk)
-{
- block o;
- __asm__ volatile ("vcipher %0, %1, %2\n\t"
- : "=v" (o)
- : "v" (b), "v" (rk));
- return o;
-}
-
-static ASM_FUNC_ATTR_INLINE block
-asm_cipherlast_be(block b, block rk)
-{
- block o;
- __asm__ volatile ("vcipherlast %0, %1, %2\n\t"
- : "=v" (o)
- : "v" (b), "v" (rk));
- return o;
-}
-
-static ASM_FUNC_ATTR_INLINE block
-asm_ncipher_be(block b, block rk)
-{
- block o;
- __asm__ volatile ("vncipher %0, %1, %2\n\t"
- : "=v" (o)
- : "v" (b), "v" (rk));
- return o;
-}
-
-static ASM_FUNC_ATTR_INLINE block
-asm_ncipherlast_be(block b, block rk)
-{
- block o;
- __asm__ volatile ("vncipherlast %0, %1, %2\n\t"
- : "=v" (o)
- : "v" (b), "v" (rk));
- return o;
-}
-

static ASM_FUNC_ATTR_INLINE u32
_gcry_aes_sbox4_ppc8(u32 fourbytes)
@@ -439,7 +194,7 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
}
}

- rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b);
+ rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b);
}

/* Store in big-endian order. */
@@ -450,7 +205,7 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
#else
block rvec = ALIGNED_LOAD (ekey, r);
ALIGNED_STORE (ekey, r,
- vec_perm(rvec, rvec, vec_bswap32_const));
+ vec_perm(rvec, rvec, vec_bswap32_const));
(void)bige_const;
#endif
}
@@ -464,2012 +219,25 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
wipememory(&tkk, sizeof(tkk));
}

-
-/* Make a decryption key from an encryption key. */
-static ASM_FUNC_ATTR_INLINE void
-aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
-{
- u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
- u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
- int rounds = ctx->rounds;
- int rr;
- int r;
-
- r = 0;
- rr = rounds;
- for (r = 0, rr = rounds; r <= rounds; r++, rr--)
- {
- ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr));
- }
-}
-
-
void
_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
{
- aes_ppc8_prepare_decryption (ctx);
-}
-
-
-unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx,
- unsigned char *out,
- const unsigned char *in)
-{
- const block bige_const = asm_load_be_const();
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES;
- block b;
-
- b = VEC_LOAD_BE (in, 0, bige_const);
-
- PRELOAD_ROUND_KEYS (rounds);
-
- AES_ENCRYPT (b, rounds);
- VEC_STORE_BE (out, 0, b, bige_const);
-
- return 0; /* does not use stack */
-}
-
-
-unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,
- unsigned char *out,
- const unsigned char *in)
-{
- const block bige_const = asm_load_be_const();
- const u128_t *rk = (u128_t *)&ctx->keyschdec;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES;
- block b;
-
- b = VEC_LOAD_BE (in, 0, bige_const);
-
- PRELOAD_ROUND_KEYS (rounds);
-
- AES_DECRYPT (b, rounds);
- VEC_STORE_BE (out, 0, b, bige_const);
-
- return 0; /* does not use stack */
-}
-
-
-void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
-{
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = context;
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES_ALL;
- block rkeylast_orig;
- block iv;
-
- iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
-
- PRELOAD_ROUND_KEYS_ALL (rounds);
- rkeylast_orig = rkeylast;
-
- for (; nblocks; nblocks--)
- {
- rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
-
- AES_ENCRYPT_ALL (iv, rounds);
-
- VEC_STORE_BE (out, 0, iv, bige_const);
-
- out++;
- in++;
- }
-
- VEC_STORE_BE (iv_arg, 0, iv, bige_const);
-}
-
-void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
-{
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = context;
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES;
- block rkeylast_orig;
- block iv, b, bin;
- block in0, in1, in2, in3, in4, in5, in6, in7;
- block b0, b1, b2, b3, b4, b5, b6, b7;
- block rkey;
-
- iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
-
- PRELOAD_ROUND_KEYS (rounds);
- rkeylast_orig = rkeylast;
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- in0 = iv;
- in1 = VEC_LOAD_BE_NOSWAP (in, 0);
- in2 = VEC_LOAD_BE_NOSWAP (in, 1);
- in3 = VEC_LOAD_BE_NOSWAP (in, 2);
- in4 = VEC_LOAD_BE_NOSWAP (in, 3);
- in1 = VEC_BE_SWAP (in1, bige_const);
- in2 = VEC_BE_SWAP (in2, bige_const);
- in5 = VEC_LOAD_BE_NOSWAP (in, 4);
- in6 = VEC_LOAD_BE_NOSWAP (in, 5);
- in3 = VEC_BE_SWAP (in3, bige_const);
- in4 = VEC_BE_SWAP (in4, bige_const);
- in7 = VEC_LOAD_BE_NOSWAP (in, 6);
- iv = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
- in5 = VEC_BE_SWAP (in5, bige_const);
- in6 = VEC_BE_SWAP (in6, bige_const);
- b0 = asm_xor (rkey0, in0);
- b1 = asm_xor (rkey0, in1);
- in7 = VEC_BE_SWAP (in7, bige_const);
- iv = VEC_BE_SWAP (iv, bige_const);
- b2 = asm_xor (rkey0, in2);
- b3 = asm_xor (rkey0, in3);
- b4 = asm_xor (rkey0, in4);
- b5 = asm_xor (rkey0, in5);
- b6 = asm_xor (rkey0, in6);
- b7 = asm_xor (rkey0, in7);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey); \
- b4 = asm_cipher_be (b4, rkey); \
- b5 = asm_cipher_be (b5, rkey); \
- b6 = asm_cipher_be (b6, rkey); \
- b7 = asm_cipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- in1 = asm_xor (rkeylast, in1);
- in2 = asm_xor (rkeylast, in2);
- in3 = asm_xor (rkeylast, in3);
- in4 = asm_xor (rkeylast, in4);
- b0 = asm_cipherlast_be (b0, in1);
- b1 = asm_cipherlast_be (b1, in2);
- in5 = asm_xor (rkeylast, in5);
- in6 = asm_xor (rkeylast, in6);
- b2 = asm_cipherlast_be (b2, in3);
- b3 = asm_cipherlast_be (b3, in4);
- in7 = asm_xor (rkeylast, in7);
- in0 = asm_xor (rkeylast, iv);
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b4 = asm_cipherlast_be (b4, in5);
- b5 = asm_cipherlast_be (b5, in6);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b6 = asm_cipherlast_be (b6, in7);
- b7 = asm_cipherlast_be (b7, in0);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
-
- if (nblocks >= 4)
- {
- in0 = iv;
- in1 = VEC_LOAD_BE (in, 0, bige_const);
- in2 = VEC_LOAD_BE (in, 1, bige_const);
- in3 = VEC_LOAD_BE (in, 2, bige_const);
- iv = VEC_LOAD_BE (in, 3, bige_const);
-
- b0 = asm_xor (rkey0, in0);
- b1 = asm_xor (rkey0, in1);
- b2 = asm_xor (rkey0, in2);
- b3 = asm_xor (rkey0, in3);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- in1 = asm_xor (rkeylast, in1);
- in2 = asm_xor (rkeylast, in2);
- in3 = asm_xor (rkeylast, in3);
- in0 = asm_xor (rkeylast, iv);
- b0 = asm_cipherlast_be (b0, in1);
- b1 = asm_cipherlast_be (b1, in2);
- b2 = asm_cipherlast_be (b2, in3);
- b3 = asm_cipherlast_be (b3, in0);
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- bin = VEC_LOAD_BE (in, 0, bige_const);
- rkeylast = rkeylast_orig ^ bin;
- b = iv;
- iv = bin;
-
- AES_ENCRYPT (b, rounds);
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- out++;
- in++;
- }
-
- VEC_STORE_BE (iv_arg, 0, iv, bige_const);
-}
-
-
-void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks, int cbc_mac)
-{
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = context;
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES_ALL;
- block lastiv, b;
- unsigned int outadd = !cbc_mac;
-
- lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const);
-
- PRELOAD_ROUND_KEYS_ALL (rounds);
-
- for (; nblocks; nblocks--)
- {
- b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const);
-
- AES_ENCRYPT_ALL (b, rounds);
-
- lastiv = b;
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in++;
- out += outadd;
- }
-
- VEC_STORE_BE (iv_arg, 0, lastiv, bige_const);
-}
-
-void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
-{
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = context;
- const u128_t *rk = (u128_t *)&ctx->keyschdec;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES;
- block rkeylast_orig;
- block in0, in1, in2, in3, in4, in5, in6, in7;
- block b0, b1, b2, b3, b4, b5, b6, b7;
- block rkey;
- block iv, b;
-
- if (!ctx->decryption_prepared)
- {
- aes_ppc8_prepare_decryption (ctx);
- ctx->decryption_prepared = 1;
- }
-
- iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
-
- PRELOAD_ROUND_KEYS (rounds);
- rkeylast_orig = rkeylast;
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- in0 = VEC_LOAD_BE_NOSWAP (in, 0);
- in1 = VEC_LOAD_BE_NOSWAP (in, 1);
- in2 = VEC_LOAD_BE_NOSWAP (in, 2);
- in3 = VEC_LOAD_BE_NOSWAP (in, 3);
- in0 = VEC_BE_SWAP (in0, bige_const);
- in1 = VEC_BE_SWAP (in1, bige_const);
- in4 = VEC_LOAD_BE_NOSWAP (in, 4);
- in5 = VEC_LOAD_BE_NOSWAP (in, 5);
- in2 = VEC_BE_SWAP (in2, bige_const);
- in3 = VEC_BE_SWAP (in3, bige_const);
- in6 = VEC_LOAD_BE_NOSWAP (in, 6);
- in7 = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
- b0 = asm_xor (rkey0, in0);
- b1 = asm_xor (rkey0, in1);
- in4 = VEC_BE_SWAP (in4, bige_const);
- in5 = VEC_BE_SWAP (in5, bige_const);
- b2 = asm_xor (rkey0, in2);
- b3 = asm_xor (rkey0, in3);
- in6 = VEC_BE_SWAP (in6, bige_const);
- in7 = VEC_BE_SWAP (in7, bige_const);
- b4 = asm_xor (rkey0, in4);
- b5 = asm_xor (rkey0, in5);
- b6 = asm_xor (rkey0, in6);
- b7 = asm_xor (rkey0, in7);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_ncipher_be (b0, rkey); \
- b1 = asm_ncipher_be (b1, rkey); \
- b2 = asm_ncipher_be (b2, rkey); \
- b3 = asm_ncipher_be (b3, rkey); \
- b4 = asm_ncipher_be (b4, rkey); \
- b5 = asm_ncipher_be (b5, rkey); \
- b6 = asm_ncipher_be (b6, rkey); \
- b7 = asm_ncipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- iv = asm_xor (rkeylast, iv);
- in0 = asm_xor (rkeylast, in0);
- in1 = asm_xor (rkeylast, in1);
- in2 = asm_xor (rkeylast, in2);
- b0 = asm_ncipherlast_be (b0, iv);
- iv = in7;
- b1 = asm_ncipherlast_be (b1, in0);
- in3 = asm_xor (rkeylast, in3);
- in4 = asm_xor (rkeylast, in4);
- b2 = asm_ncipherlast_be (b2, in1);
- b3 = asm_ncipherlast_be (b3, in2);
- in5 = asm_xor (rkeylast, in5);
- in6 = asm_xor (rkeylast, in6);
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b4 = asm_ncipherlast_be (b4, in3);
- b5 = asm_ncipherlast_be (b5, in4);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b6 = asm_ncipherlast_be (b6, in5);
- b7 = asm_ncipherlast_be (b7, in6);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
-
- if (nblocks >= 4)
- {
- in0 = VEC_LOAD_BE (in, 0, bige_const);
- in1 = VEC_LOAD_BE (in, 1, bige_const);
- in2 = VEC_LOAD_BE (in, 2, bige_const);
- in3 = VEC_LOAD_BE (in, 3, bige_const);
-
- b0 = asm_xor (rkey0, in0);
- b1 = asm_xor (rkey0, in1);
- b2 = asm_xor (rkey0, in2);
- b3 = asm_xor (rkey0, in3);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_ncipher_be (b0, rkey); \
- b1 = asm_ncipher_be (b1, rkey); \
- b2 = asm_ncipher_be (b2, rkey); \
- b3 = asm_ncipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- iv = asm_xor (rkeylast, iv);
- in0 = asm_xor (rkeylast, in0);
- in1 = asm_xor (rkeylast, in1);
- in2 = asm_xor (rkeylast, in2);
-
- b0 = asm_ncipherlast_be (b0, iv);
- iv = in3;
- b1 = asm_ncipherlast_be (b1, in0);
- b2 = asm_ncipherlast_be (b2, in1);
- b3 = asm_ncipherlast_be (b3, in2);
-
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- rkeylast = rkeylast_orig ^ iv;
-
- iv = VEC_LOAD_BE (in, 0, bige_const);
- b = iv;
- AES_DECRYPT (b, rounds);
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in++;
- out++;
- }
-
- VEC_STORE_BE (iv_arg, 0, iv, bige_const);
-}
-
-
-void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
-{
- static const unsigned char vec_one_const[16] =
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = context;
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- ROUND_KEY_VARIABLES;
- block rkeylast_orig;
- block ctr, b, one;
-
- ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const);
- one = VEC_LOAD_BE (&vec_one_const, 0, bige_const);
-
- PRELOAD_ROUND_KEYS (rounds);
- rkeylast_orig = rkeylast;
-
- if (nblocks >= 4)
- {
- block in0, in1, in2, in3, in4, in5, in6, in7;
- block b0, b1, b2, b3, b4, b5, b6, b7;
- block two, three, four;
- block rkey;
-
- two = asm_add_uint128 (one, one);
- three = asm_add_uint128 (two, one);
- four = asm_add_uint128 (two, two);
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- b1 = asm_add_uint128 (ctr, one);
- b2 = asm_add_uint128 (ctr, two);
- b3 = asm_add_uint128 (ctr, three);
- b4 = asm_add_uint128 (ctr, four);
- b5 = asm_add_uint128 (b1, four);
- b6 = asm_add_uint128 (b2, four);
- b7 = asm_add_uint128 (b3, four);
- b0 = asm_xor (rkey0, ctr);
- rkey = ALIGNED_LOAD (rk, 1);
- ctr = asm_add_uint128 (b4, four);
- b1 = asm_xor (rkey0, b1);
- b2 = asm_xor (rkey0, b2);
- b3 = asm_xor (rkey0, b3);
- b0 = asm_cipher_be (b0, rkey);
- b1 = asm_cipher_be (b1, rkey);
- b2 = asm_cipher_be (b2, rkey);
- b3 = asm_cipher_be (b3, rkey);
- b4 = asm_xor (rkey0, b4);
- b5 = asm_xor (rkey0, b5);
- b6 = asm_xor (rkey0, b6);
- b7 = asm_xor (rkey0, b7);
- b4 = asm_cipher_be (b4, rkey);
- b5 = asm_cipher_be (b5, rkey);
- b6 = asm_cipher_be (b6, rkey);
- b7 = asm_cipher_be (b7, rkey);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey); \
- b4 = asm_cipher_be (b4, rkey); \
- b5 = asm_cipher_be (b5, rkey); \
- b6 = asm_cipher_be (b6, rkey); \
- b7 = asm_cipher_be (b7, rkey);
-
- in0 = VEC_LOAD_BE_NOSWAP (in, 0);
- DO_ROUND(2);
- in1 = VEC_LOAD_BE_NOSWAP (in, 1);
- DO_ROUND(3);
- in2 = VEC_LOAD_BE_NOSWAP (in, 2);
- DO_ROUND(4);
- in3 = VEC_LOAD_BE_NOSWAP (in, 3);
- DO_ROUND(5);
- in4 = VEC_LOAD_BE_NOSWAP (in, 4);
- DO_ROUND(6);
- in5 = VEC_LOAD_BE_NOSWAP (in, 5);
- DO_ROUND(7);
- in6 = VEC_LOAD_BE_NOSWAP (in, 6);
- DO_ROUND(8);
- in7 = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
- DO_ROUND(9);
-
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- in0 = VEC_BE_SWAP (in0, bige_const);
- in1 = VEC_BE_SWAP (in1, bige_const);
- in2 = VEC_BE_SWAP (in2, bige_const);
- in3 = VEC_BE_SWAP (in3, bige_const);
- in4 = VEC_BE_SWAP (in4, bige_const);
- in5 = VEC_BE_SWAP (in5, bige_const);
- in6 = VEC_BE_SWAP (in6, bige_const);
- in7 = VEC_BE_SWAP (in7, bige_const);
-
- in0 = asm_xor (rkeylast, in0);
- in1 = asm_xor (rkeylast, in1);
- in2 = asm_xor (rkeylast, in2);
- in3 = asm_xor (rkeylast, in3);
- b0 = asm_cipherlast_be (b0, in0);
- b1 = asm_cipherlast_be (b1, in1);
- in4 = asm_xor (rkeylast, in4);
- in5 = asm_xor (rkeylast, in5);
- b2 = asm_cipherlast_be (b2, in2);
- b3 = asm_cipherlast_be (b3, in3);
- in6 = asm_xor (rkeylast, in6);
- in7 = asm_xor (rkeylast, in7);
- b4 = asm_cipherlast_be (b4, in4);
- b5 = asm_cipherlast_be (b5, in5);
- b6 = asm_cipherlast_be (b6, in6);
- b7 = asm_cipherlast_be (b7, in7);
-
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
-
- if (nblocks >= 4)
- {
- b1 = asm_add_uint128 (ctr, one);
- b2 = asm_add_uint128 (ctr, two);
- b3 = asm_add_uint128 (ctr, three);
- b0 = asm_xor (rkey0, ctr);
- ctr = asm_add_uint128 (ctr, four);
- b1 = asm_xor (rkey0, b1);
- b2 = asm_xor (rkey0, b2);
- b3 = asm_xor (rkey0, b3);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
-
- in0 = VEC_LOAD_BE (in, 0, bige_const);
- in1 = VEC_LOAD_BE (in, 1, bige_const);
- in2 = VEC_LOAD_BE (in, 2, bige_const);
- in3 = VEC_LOAD_BE (in, 3, bige_const);
-
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- in0 = asm_xor (rkeylast, in0);
- in1 = asm_xor (rkeylast, in1);
- in2 = asm_xor (rkeylast, in2);
- in3 = asm_xor (rkeylast, in3);
-
- b0 = asm_cipherlast_be (b0, in0);
- b1 = asm_cipherlast_be (b1, in1);
- b2 = asm_cipherlast_be (b2, in2);
- b3 = asm_cipherlast_be (b3, in3);
-
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
- }
-
- for (; nblocks; nblocks--)
- {
- b = ctr;
- ctr = asm_add_uint128 (ctr, one);
- rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
-
- AES_ENCRYPT (b, rounds);
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- out++;
- in++;
- }
-
- VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
-}
-
-
-size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
- const void *inbuf_arg, size_t nblocks,
- int encrypt)
-{
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = (void *)&c->context.c;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- u64 data_nblocks = c->u_mode.ocb.data_nblocks;
- block l0, l1, l2, l;
- block b0, b1, b2, b3, b4, b5, b6, b7, b;
- block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
- block rkey, rkeylf;
- block ctr, iv;
- ROUND_KEY_VARIABLES;
-
- iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const);
- ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const);
-
- l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
- l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
- l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
-
- if (encrypt)
- {
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
-
- PRELOAD_ROUND_KEYS (rounds);
-
- for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
- {
- l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
- b = VEC_LOAD_BE (in, 0, bige_const);
-
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- iv ^= l;
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ctr ^= b;
- /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
- b ^= iv;
- AES_ENCRYPT (b, rounds);
- b ^= iv;
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in += 1;
- out += 1;
- }
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- b0 = VEC_LOAD_BE_NOSWAP (in, 0);
- b1 = VEC_LOAD_BE_NOSWAP (in, 1);
- b2 = VEC_LOAD_BE_NOSWAP (in, 2);
- b3 = VEC_LOAD_BE_NOSWAP (in, 3);
- b4 = VEC_LOAD_BE_NOSWAP (in, 4);
- b5 = VEC_LOAD_BE_NOSWAP (in, 5);
- b6 = VEC_LOAD_BE_NOSWAP (in, 6);
- b7 = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
- l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
- b0 = VEC_BE_SWAP(b0, bige_const);
- b1 = VEC_BE_SWAP(b1, bige_const);
- b2 = VEC_BE_SWAP(b2, bige_const);
- b3 = VEC_BE_SWAP(b3, bige_const);
- b4 = VEC_BE_SWAP(b4, bige_const);
- b5 = VEC_BE_SWAP(b5, bige_const);
- b6 = VEC_BE_SWAP(b6, bige_const);
- b7 = VEC_BE_SWAP(b7, bige_const);
- l = VEC_BE_SWAP(l, bige_const);
-
- ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
-
- iv ^= rkey0;
-
- iv0 = iv ^ l0;
- iv1 = iv ^ l0 ^ l1;
- iv2 = iv ^ l1;
- iv3 = iv ^ l1 ^ l2;
- iv4 = iv ^ l1 ^ l2 ^ l0;
- iv5 = iv ^ l2 ^ l0;
- iv6 = iv ^ l2;
- iv7 = iv ^ l2 ^ l;
-
- b0 ^= iv0;
- b1 ^= iv1;
- b2 ^= iv2;
- b3 ^= iv3;
- b4 ^= iv4;
- b5 ^= iv5;
- b6 ^= iv6;
- b7 ^= iv7;
- iv = iv7 ^ rkey0;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey); \
- b4 = asm_cipher_be (b4, rkey); \
- b5 = asm_cipher_be (b5, rkey); \
- b6 = asm_cipher_be (b6, rkey); \
- b7 = asm_cipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
-
- rkeylf = asm_xor (rkeylast, rkey0);
-
- DO_ROUND(8);
-
- iv0 = asm_xor (rkeylf, iv0);
- iv1 = asm_xor (rkeylf, iv1);
- iv2 = asm_xor (rkeylf, iv2);
- iv3 = asm_xor (rkeylf, iv3);
- iv4 = asm_xor (rkeylf, iv4);
- iv5 = asm_xor (rkeylf, iv5);
- iv6 = asm_xor (rkeylf, iv6);
- iv7 = asm_xor (rkeylf, iv7);
-
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- b0 = asm_cipherlast_be (b0, iv0);
- b1 = asm_cipherlast_be (b1, iv1);
- b2 = asm_cipherlast_be (b2, iv2);
- b3 = asm_cipherlast_be (b3, iv3);
- b4 = asm_cipherlast_be (b4, iv4);
- b5 = asm_cipherlast_be (b5, iv5);
- b6 = asm_cipherlast_be (b6, iv6);
- b7 = asm_cipherlast_be (b7, iv7);
-
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
-
- if (nblocks >= 4 && (data_nblocks % 4) == 0)
- {
- b0 = VEC_LOAD_BE (in, 0, bige_const);
- b1 = VEC_LOAD_BE (in, 1, bige_const);
- b2 = VEC_LOAD_BE (in, 2, bige_const);
- b3 = VEC_LOAD_BE (in, 3, bige_const);
-
- l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
-
- ctr ^= b0 ^ b1 ^ b2 ^ b3;
-
- iv ^= rkey0;
-
- iv0 = iv ^ l0;
- iv1 = iv ^ l0 ^ l1;
- iv2 = iv ^ l1;
- iv3 = iv ^ l1 ^ l;
-
- b0 ^= iv0;
- b1 ^= iv1;
- b2 ^= iv2;
- b3 ^= iv3;
- iv = iv3 ^ rkey0;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- rkey = rkeylast ^ rkey0;
- b0 = asm_cipherlast_be (b0, rkey ^ iv0);
- b1 = asm_cipherlast_be (b1, rkey ^ iv1);
- b2 = asm_cipherlast_be (b2, rkey ^ iv2);
- b3 = asm_cipherlast_be (b3, rkey ^ iv3);
-
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
- b = VEC_LOAD_BE (in, 0, bige_const);
-
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- iv ^= l;
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ctr ^= b;
- /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
- b ^= iv;
- AES_ENCRYPT (b, rounds);
- b ^= iv;
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in += 1;
- out += 1;
- }
- }
- else
- {
- const u128_t *rk = (u128_t *)&ctx->keyschdec;
-
- if (!ctx->decryption_prepared)
- {
- aes_ppc8_prepare_decryption (ctx);
- ctx->decryption_prepared = 1;
- }
-
- PRELOAD_ROUND_KEYS (rounds);
-
- for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
- {
- l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
- b = VEC_LOAD_BE (in, 0, bige_const);
-
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- iv ^= l;
- /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
- b ^= iv;
- AES_DECRYPT (b, rounds);
- b ^= iv;
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ctr ^= b;
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in += 1;
- out += 1;
- }
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- b0 = VEC_LOAD_BE_NOSWAP (in, 0);
- b1 = VEC_LOAD_BE_NOSWAP (in, 1);
- b2 = VEC_LOAD_BE_NOSWAP (in, 2);
- b3 = VEC_LOAD_BE_NOSWAP (in, 3);
- b4 = VEC_LOAD_BE_NOSWAP (in, 4);
- b5 = VEC_LOAD_BE_NOSWAP (in, 5);
- b6 = VEC_LOAD_BE_NOSWAP (in, 6);
- b7 = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
- l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
- b0 = VEC_BE_SWAP(b0, bige_const);
- b1 = VEC_BE_SWAP(b1, bige_const);
- b2 = VEC_BE_SWAP(b2, bige_const);
- b3 = VEC_BE_SWAP(b3, bige_const);
- b4 = VEC_BE_SWAP(b4, bige_const);
- b5 = VEC_BE_SWAP(b5, bige_const);
- b6 = VEC_BE_SWAP(b6, bige_const);
- b7 = VEC_BE_SWAP(b7, bige_const);
- l = VEC_BE_SWAP(l, bige_const);
-
- iv ^= rkey0;
-
- iv0 = iv ^ l0;
- iv1 = iv ^ l0 ^ l1;
- iv2 = iv ^ l1;
- iv3 = iv ^ l1 ^ l2;
- iv4 = iv ^ l1 ^ l2 ^ l0;
- iv5 = iv ^ l2 ^ l0;
- iv6 = iv ^ l2;
- iv7 = iv ^ l2 ^ l;
-
- b0 ^= iv0;
- b1 ^= iv1;
- b2 ^= iv2;
- b3 ^= iv3;
- b4 ^= iv4;
- b5 ^= iv5;
- b6 ^= iv6;
- b7 ^= iv7;
- iv = iv7 ^ rkey0;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_ncipher_be (b0, rkey); \
- b1 = asm_ncipher_be (b1, rkey); \
- b2 = asm_ncipher_be (b2, rkey); \
- b3 = asm_ncipher_be (b3, rkey); \
- b4 = asm_ncipher_be (b4, rkey); \
- b5 = asm_ncipher_be (b5, rkey); \
- b6 = asm_ncipher_be (b6, rkey); \
- b7 = asm_ncipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
-
- rkeylf = asm_xor (rkeylast, rkey0);
-
- DO_ROUND(8);
-
- iv0 = asm_xor (rkeylf, iv0);
- iv1 = asm_xor (rkeylf, iv1);
- iv2 = asm_xor (rkeylf, iv2);
- iv3 = asm_xor (rkeylf, iv3);
- iv4 = asm_xor (rkeylf, iv4);
- iv5 = asm_xor (rkeylf, iv5);
- iv6 = asm_xor (rkeylf, iv6);
- iv7 = asm_xor (rkeylf, iv7);
-
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- b0 = asm_ncipherlast_be (b0, iv0);
- b1 = asm_ncipherlast_be (b1, iv1);
- b2 = asm_ncipherlast_be (b2, iv2);
- b3 = asm_ncipherlast_be (b3, iv3);
- b4 = asm_ncipherlast_be (b4, iv4);
- b5 = asm_ncipherlast_be (b5, iv5);
- b6 = asm_ncipherlast_be (b6, iv6);
- b7 = asm_ncipherlast_be (b7, iv7);
-
- ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
-
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
-
- if (nblocks >= 4 && (data_nblocks % 4) == 0)
- {
- b0 = VEC_LOAD_BE (in, 0, bige_const);
- b1 = VEC_LOAD_BE (in, 1, bige_const);
- b2 = VEC_LOAD_BE (in, 2, bige_const);
- b3 = VEC_LOAD_BE (in, 3, bige_const);
-
- l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
-
- iv ^= rkey0;
-
- iv0 = iv ^ l0;
- iv1 = iv ^ l0 ^ l1;
- iv2 = iv ^ l1;
- iv3 = iv ^ l1 ^ l;
-
- b0 ^= iv0;
- b1 ^= iv1;
- b2 ^= iv2;
- b3 ^= iv3;
- iv = iv3 ^ rkey0;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_ncipher_be (b0, rkey); \
- b1 = asm_ncipher_be (b1, rkey); \
- b2 = asm_ncipher_be (b2, rkey); \
- b3 = asm_ncipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- rkey = rkeylast ^ rkey0;
- b0 = asm_ncipherlast_be (b0, rkey ^ iv0);
- b1 = asm_ncipherlast_be (b1, rkey ^ iv1);
- b2 = asm_ncipherlast_be (b2, rkey ^ iv2);
- b3 = asm_ncipherlast_be (b3, rkey ^ iv3);
-
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- ctr ^= b0 ^ b1 ^ b2 ^ b3;
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
- b = VEC_LOAD_BE (in, 0, bige_const);
-
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- iv ^= l;
- /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
- b ^= iv;
- AES_DECRYPT (b, rounds);
- b ^= iv;
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ctr ^= b;
-
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in += 1;
- out += 1;
- }
- }
-
- VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const);
- VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
- c->u_mode.ocb.data_nblocks = data_nblocks;
-
- return 0;
-}
-
-size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
- size_t nblocks)
-{
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = (void *)&c->context.c;
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
- const u128_t *abuf = (const u128_t *)abuf_arg;
- int rounds = ctx->rounds;
- u64 data_nblocks = c->u_mode.ocb.aad_nblocks;
- block l0, l1, l2, l;
- block b0, b1, b2, b3, b4, b5, b6, b7, b;
- block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
- block rkey, frkey;
- block ctr, iv;
- ROUND_KEY_VARIABLES;
-
- iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const);
- ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const);
-
- l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
- l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
- l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
-
- PRELOAD_ROUND_KEYS (rounds);
-
- for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
- {
- l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
- b = VEC_LOAD_BE (abuf, 0, bige_const);
-
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- iv ^= l;
- /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
- b ^= iv;
- AES_ENCRYPT (b, rounds);
- ctr ^= b;
-
- abuf += 1;
- }
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- b0 = VEC_LOAD_BE (abuf, 0, bige_const);
- b1 = VEC_LOAD_BE (abuf, 1, bige_const);
- b2 = VEC_LOAD_BE (abuf, 2, bige_const);
- b3 = VEC_LOAD_BE (abuf, 3, bige_const);
- b4 = VEC_LOAD_BE (abuf, 4, bige_const);
- b5 = VEC_LOAD_BE (abuf, 5, bige_const);
- b6 = VEC_LOAD_BE (abuf, 6, bige_const);
- b7 = VEC_LOAD_BE (abuf, 7, bige_const);
-
- l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const);
-
- frkey = rkey0;
- iv ^= frkey;
-
- iv0 = iv ^ l0;
- iv1 = iv ^ l0 ^ l1;
- iv2 = iv ^ l1;
- iv3 = iv ^ l1 ^ l2;
- iv4 = iv ^ l1 ^ l2 ^ l0;
- iv5 = iv ^ l2 ^ l0;
- iv6 = iv ^ l2;
- iv7 = iv ^ l2 ^ l;
-
- b0 ^= iv0;
- b1 ^= iv1;
- b2 ^= iv2;
- b3 ^= iv3;
- b4 ^= iv4;
- b5 ^= iv5;
- b6 ^= iv6;
- b7 ^= iv7;
- iv = iv7 ^ frkey;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey); \
- b4 = asm_cipher_be (b4, rkey); \
- b5 = asm_cipher_be (b5, rkey); \
- b6 = asm_cipher_be (b6, rkey); \
- b7 = asm_cipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- rkey = rkeylast;
- b0 = asm_cipherlast_be (b0, rkey);
- b1 = asm_cipherlast_be (b1, rkey);
- b2 = asm_cipherlast_be (b2, rkey);
- b3 = asm_cipherlast_be (b3, rkey);
- b4 = asm_cipherlast_be (b4, rkey);
- b5 = asm_cipherlast_be (b5, rkey);
- b6 = asm_cipherlast_be (b6, rkey);
- b7 = asm_cipherlast_be (b7, rkey);
-
- ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
-
- abuf += 8;
- }
-
- if (nblocks >= 4 && (data_nblocks % 4) == 0)
- {
- b0 = VEC_LOAD_BE (abuf, 0, bige_const);
- b1 = VEC_LOAD_BE (abuf, 1, bige_const);
- b2 = VEC_LOAD_BE (abuf, 2, bige_const);
- b3 = VEC_LOAD_BE (abuf, 3, bige_const);
-
- l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
-
- frkey = rkey0;
- iv ^= frkey;
-
- iv0 = iv ^ l0;
- iv1 = iv ^ l0 ^ l1;
- iv2 = iv ^ l1;
- iv3 = iv ^ l1 ^ l;
-
- b0 ^= iv0;
- b1 ^= iv1;
- b2 ^= iv2;
- b3 ^= iv3;
- iv = iv3 ^ frkey;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- rkey = rkeylast;
- b0 = asm_cipherlast_be (b0, rkey);
- b1 = asm_cipherlast_be (b1, rkey);
- b2 = asm_cipherlast_be (b2, rkey);
- b3 = asm_cipherlast_be (b3, rkey);
-
- ctr ^= b0 ^ b1 ^ b2 ^ b3;
-
- abuf += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
- b = VEC_LOAD_BE (abuf, 0, bige_const);
-
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- iv ^= l;
- /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
- b ^= iv;
- AES_ENCRYPT (b, rounds);
- ctr ^= b;
-
- abuf += 1;
- }
-
- VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const);
- VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
- c->u_mode.ocb.aad_nblocks = data_nblocks;
-
- return 0;
+ internal_aes_ppc_prepare_decryption (ctx);
}


-void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks, int encrypt)
-{
-#ifdef WORDS_BIGENDIAN
- static const block vec_bswap64_const =
- { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
- static const block vec_bswap128_const =
- { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
-#else
- static const block vec_bswap64_const =
- { ~8, ~9, ~10, ~11, ~12, ~13, ~14, ~15, ~0, ~1, ~2, ~3, ~4, ~5, ~6, ~7 };
- static const block vec_bswap128_const =
- { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 };
- static const block vec_tweakin_swap_const =
- { ~12, ~13, ~14, ~15, ~8, ~9, ~10, ~11, ~4, ~5, ~6, ~7, ~0, ~1, ~2, ~3 };
-#endif
- static const unsigned char vec_tweak_const[16] =
- { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
- static const vector unsigned long long vec_shift63_const =
- { 63, 63 };
- static const vector unsigned long long vec_shift1_const =
- { 1, 1 };
- const block bige_const = asm_load_be_const();
- RIJNDAEL_context *ctx = context;
- const u128_t *in = (const u128_t *)inbuf_arg;
- u128_t *out = (u128_t *)outbuf_arg;
- int rounds = ctx->rounds;
- block tweak;
- block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf;
- block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
- block tweak_const, bswap64_const, bswap128_const;
- vector unsigned long long shift63_const, shift1_const;
- ROUND_KEY_VARIABLES;
-
- tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const);
- bswap64_const = ALIGNED_LOAD (&vec_bswap64_const, 0);
- bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0);
- shift63_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift63_const, 0);
- shift1_const = (vector unsigned long long)ALIGNED_LOAD (&vec_shift1_const, 0);
-
-#ifdef WORDS_BIGENDIAN
- tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const);
- tweak = asm_vperm1 (tweak, bswap128_const);
-#else
- tweak = VEC_LOAD_BE (tweak_arg, 0, vec_tweakin_swap_const);
-#endif
-
-#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \
- do { \
- block tmp1, tmp2; \
- tmp1 = asm_vperm1((tin), bswap64_const); \
- tmp2 = (block)vec_sl((vector unsigned long long)(tin), shift1_const); \
- tmp1 = (block)(vec_sra((vector unsigned long long)tmp1, shift63_const)) & \
- tweak_const; \
- tout = asm_xor(tmp1, tmp2); \
- } while (0)
-
- if (encrypt)
- {
- const u128_t *rk = (u128_t *)&ctx->keyschenc;
-
- PRELOAD_ROUND_KEYS (rounds);
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- b0 = VEC_LOAD_BE_NOSWAP (in, 0);
- b1 = VEC_LOAD_BE_NOSWAP (in, 1);
- b2 = VEC_LOAD_BE_NOSWAP (in, 2);
- b3 = VEC_LOAD_BE_NOSWAP (in, 3);
- tweak0 = tweak;
- GEN_TWEAK (tweak1, tweak0);
- tweak0 = asm_vperm1 (tweak0, bswap128_const);
- b4 = VEC_LOAD_BE_NOSWAP (in, 4);
- b5 = VEC_LOAD_BE_NOSWAP (in, 5);
- GEN_TWEAK (tweak2, tweak1);
- tweak1 = asm_vperm1 (tweak1, bswap128_const);
- b6 = VEC_LOAD_BE_NOSWAP (in, 6);
- b7 = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
-
- b0 = VEC_BE_SWAP(b0, bige_const);
- b1 = VEC_BE_SWAP(b1, bige_const);
- GEN_TWEAK (tweak3, tweak2);
- tweak2 = asm_vperm1 (tweak2, bswap128_const);
- GEN_TWEAK (tweak4, tweak3);
- tweak3 = asm_vperm1 (tweak3, bswap128_const);
- b2 = VEC_BE_SWAP(b2, bige_const);
- b3 = VEC_BE_SWAP(b3, bige_const);
- GEN_TWEAK (tweak5, tweak4);
- tweak4 = asm_vperm1 (tweak4, bswap128_const);
- GEN_TWEAK (tweak6, tweak5);
- tweak5 = asm_vperm1 (tweak5, bswap128_const);
- b4 = VEC_BE_SWAP(b4, bige_const);
- b5 = VEC_BE_SWAP(b5, bige_const);
- GEN_TWEAK (tweak7, tweak6);
- tweak6 = asm_vperm1 (tweak6, bswap128_const);
- GEN_TWEAK (tweak, tweak7);
- tweak7 = asm_vperm1 (tweak7, bswap128_const);
- b6 = VEC_BE_SWAP(b6, bige_const);
- b7 = VEC_BE_SWAP(b7, bige_const);
-
- tweak0 = asm_xor (tweak0, rkey0);
- tweak1 = asm_xor (tweak1, rkey0);
- tweak2 = asm_xor (tweak2, rkey0);
- tweak3 = asm_xor (tweak3, rkey0);
- tweak4 = asm_xor (tweak4, rkey0);
- tweak5 = asm_xor (tweak5, rkey0);
- tweak6 = asm_xor (tweak6, rkey0);
- tweak7 = asm_xor (tweak7, rkey0);
-
- b0 = asm_xor (b0, tweak0);
- b1 = asm_xor (b1, tweak1);
- b2 = asm_xor (b2, tweak2);
- b3 = asm_xor (b3, tweak3);
- b4 = asm_xor (b4, tweak4);
- b5 = asm_xor (b5, tweak5);
- b6 = asm_xor (b6, tweak6);
- b7 = asm_xor (b7, tweak7);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey); \
- b4 = asm_cipher_be (b4, rkey); \
- b5 = asm_cipher_be (b5, rkey); \
- b6 = asm_cipher_be (b6, rkey); \
- b7 = asm_cipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
-
- rkeylf = asm_xor (rkeylast, rkey0);
-
- DO_ROUND(8);
-
- tweak0 = asm_xor (tweak0, rkeylf);
- tweak1 = asm_xor (tweak1, rkeylf);
- tweak2 = asm_xor (tweak2, rkeylf);
- tweak3 = asm_xor (tweak3, rkeylf);
- tweak4 = asm_xor (tweak4, rkeylf);
- tweak5 = asm_xor (tweak5, rkeylf);
- tweak6 = asm_xor (tweak6, rkeylf);
- tweak7 = asm_xor (tweak7, rkeylf);
-
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- b0 = asm_cipherlast_be (b0, tweak0);
- b1 = asm_cipherlast_be (b1, tweak1);
- b2 = asm_cipherlast_be (b2, tweak2);
- b3 = asm_cipherlast_be (b3, tweak3);
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b4 = asm_cipherlast_be (b4, tweak4);
- b5 = asm_cipherlast_be (b5, tweak5);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b6 = asm_cipherlast_be (b6, tweak6);
- b7 = asm_cipherlast_be (b7, tweak7);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
+#define GCRY_AES_PPC8 1
+#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt
+#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt
+#define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc
+#define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec
+#define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc
+#define CBC_DEC_FUNC _gcry_aes_ppc8_cbc_dec
+#define CTR_ENC_FUNC _gcry_aes_ppc8_ctr_enc
+#define OCB_CRYPT_FUNC _gcry_aes_ppc8_ocb_crypt
+#define OCB_AUTH_FUNC _gcry_aes_ppc8_ocb_auth
+#define XTS_CRYPT_FUNC _gcry_aes_ppc8_xts_crypt

- if (nblocks >= 4)
- {
- tweak0 = tweak;
- GEN_TWEAK (tweak1, tweak0);
- GEN_TWEAK (tweak2, tweak1);
- GEN_TWEAK (tweak3, tweak2);
- GEN_TWEAK (tweak, tweak3);
-
- b0 = VEC_LOAD_BE (in, 0, bige_const);
- b1 = VEC_LOAD_BE (in, 1, bige_const);
- b2 = VEC_LOAD_BE (in, 2, bige_const);
- b3 = VEC_LOAD_BE (in, 3, bige_const);
-
- tweak0 = asm_vperm1 (tweak0, bswap128_const);
- tweak1 = asm_vperm1 (tweak1, bswap128_const);
- tweak2 = asm_vperm1 (tweak2, bswap128_const);
- tweak3 = asm_vperm1 (tweak3, bswap128_const);
-
- b0 ^= tweak0 ^ rkey0;
- b1 ^= tweak1 ^ rkey0;
- b2 ^= tweak2 ^ rkey0;
- b3 ^= tweak3 ^ rkey0;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_cipher_be (b0, rkey); \
- b1 = asm_cipher_be (b1, rkey); \
- b2 = asm_cipher_be (b2, rkey); \
- b3 = asm_cipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- rkey = rkeylast;
- b0 = asm_cipherlast_be (b0, rkey ^ tweak0);
- b1 = asm_cipherlast_be (b1, rkey ^ tweak1);
- b2 = asm_cipherlast_be (b2, rkey ^ tweak2);
- b3 = asm_cipherlast_be (b3, rkey ^ tweak3);
-
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- tweak0 = asm_vperm1 (tweak, bswap128_const);
-
- /* Xor-Encrypt/Decrypt-Xor block. */
- b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
-
- /* Generate next tweak. */
- GEN_TWEAK (tweak, tweak);
-
- AES_ENCRYPT (b, rounds);
-
- b ^= tweak0;
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in++;
- out++;
- }
- }
- else
- {
- const u128_t *rk = (u128_t *)&ctx->keyschdec;
-
- if (!ctx->decryption_prepared)
- {
- aes_ppc8_prepare_decryption (ctx);
- ctx->decryption_prepared = 1;
- }
-
- PRELOAD_ROUND_KEYS (rounds);
-
- for (; nblocks >= 8; nblocks -= 8)
- {
- b0 = VEC_LOAD_BE_NOSWAP (in, 0);
- b1 = VEC_LOAD_BE_NOSWAP (in, 1);
- b2 = VEC_LOAD_BE_NOSWAP (in, 2);
- b3 = VEC_LOAD_BE_NOSWAP (in, 3);
- tweak0 = tweak;
- GEN_TWEAK (tweak1, tweak0);
- tweak0 = asm_vperm1 (tweak0, bswap128_const);
- b4 = VEC_LOAD_BE_NOSWAP (in, 4);
- b5 = VEC_LOAD_BE_NOSWAP (in, 5);
- GEN_TWEAK (tweak2, tweak1);
- tweak1 = asm_vperm1 (tweak1, bswap128_const);
- b6 = VEC_LOAD_BE_NOSWAP (in, 6);
- b7 = VEC_LOAD_BE_NOSWAP (in, 7);
- in += 8;
-
- b0 = VEC_BE_SWAP(b0, bige_const);
- b1 = VEC_BE_SWAP(b1, bige_const);
- GEN_TWEAK (tweak3, tweak2);
- tweak2 = asm_vperm1 (tweak2, bswap128_const);
- GEN_TWEAK (tweak4, tweak3);
- tweak3 = asm_vperm1 (tweak3, bswap128_const);
- b2 = VEC_BE_SWAP(b2, bige_const);
- b3 = VEC_BE_SWAP(b3, bige_const);
- GEN_TWEAK (tweak5, tweak4);
- tweak4 = asm_vperm1 (tweak4, bswap128_const);
- GEN_TWEAK (tweak6, tweak5);
- tweak5 = asm_vperm1 (tweak5, bswap128_const);
- b4 = VEC_BE_SWAP(b4, bige_const);
- b5 = VEC_BE_SWAP(b5, bige_const);
- GEN_TWEAK (tweak7, tweak6);
- tweak6 = asm_vperm1 (tweak6, bswap128_const);
- GEN_TWEAK (tweak, tweak7);
- tweak7 = asm_vperm1 (tweak7, bswap128_const);
- b6 = VEC_BE_SWAP(b6, bige_const);
- b7 = VEC_BE_SWAP(b7, bige_const);
-
- tweak0 = asm_xor (tweak0, rkey0);
- tweak1 = asm_xor (tweak1, rkey0);
- tweak2 = asm_xor (tweak2, rkey0);
- tweak3 = asm_xor (tweak3, rkey0);
- tweak4 = asm_xor (tweak4, rkey0);
- tweak5 = asm_xor (tweak5, rkey0);
- tweak6 = asm_xor (tweak6, rkey0);
- tweak7 = asm_xor (tweak7, rkey0);
-
- b0 = asm_xor (b0, tweak0);
- b1 = asm_xor (b1, tweak1);
- b2 = asm_xor (b2, tweak2);
- b3 = asm_xor (b3, tweak3);
- b4 = asm_xor (b4, tweak4);
- b5 = asm_xor (b5, tweak5);
- b6 = asm_xor (b6, tweak6);
- b7 = asm_xor (b7, tweak7);
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_ncipher_be (b0, rkey); \
- b1 = asm_ncipher_be (b1, rkey); \
- b2 = asm_ncipher_be (b2, rkey); \
- b3 = asm_ncipher_be (b3, rkey); \
- b4 = asm_ncipher_be (b4, rkey); \
- b5 = asm_ncipher_be (b5, rkey); \
- b6 = asm_ncipher_be (b6, rkey); \
- b7 = asm_ncipher_be (b7, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
-
- rkeylf = asm_xor (rkeylast, rkey0);
-
- DO_ROUND(8);
-
- tweak0 = asm_xor (tweak0, rkeylf);
- tweak1 = asm_xor (tweak1, rkeylf);
- tweak2 = asm_xor (tweak2, rkeylf);
- tweak3 = asm_xor (tweak3, rkeylf);
- tweak4 = asm_xor (tweak4, rkeylf);
- tweak5 = asm_xor (tweak5, rkeylf);
- tweak6 = asm_xor (tweak6, rkeylf);
- tweak7 = asm_xor (tweak7, rkeylf);
-
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- b0 = asm_ncipherlast_be (b0, tweak0);
- b1 = asm_ncipherlast_be (b1, tweak1);
- b2 = asm_ncipherlast_be (b2, tweak2);
- b3 = asm_ncipherlast_be (b3, tweak3);
- b0 = VEC_BE_SWAP (b0, bige_const);
- b1 = VEC_BE_SWAP (b1, bige_const);
- b4 = asm_ncipherlast_be (b4, tweak4);
- b5 = asm_ncipherlast_be (b5, tweak5);
- b2 = VEC_BE_SWAP (b2, bige_const);
- b3 = VEC_BE_SWAP (b3, bige_const);
- b6 = asm_ncipherlast_be (b6, tweak6);
- b7 = asm_ncipherlast_be (b7, tweak7);
- VEC_STORE_BE_NOSWAP (out, 0, b0);
- VEC_STORE_BE_NOSWAP (out, 1, b1);
- b4 = VEC_BE_SWAP (b4, bige_const);
- b5 = VEC_BE_SWAP (b5, bige_const);
- VEC_STORE_BE_NOSWAP (out, 2, b2);
- VEC_STORE_BE_NOSWAP (out, 3, b3);
- b6 = VEC_BE_SWAP (b6, bige_const);
- b7 = VEC_BE_SWAP (b7, bige_const);
- VEC_STORE_BE_NOSWAP (out, 4, b4);
- VEC_STORE_BE_NOSWAP (out, 5, b5);
- VEC_STORE_BE_NOSWAP (out, 6, b6);
- VEC_STORE_BE_NOSWAP (out, 7, b7);
- out += 8;
- }
-
- if (nblocks >= 4)
- {
- tweak0 = tweak;
- GEN_TWEAK (tweak1, tweak0);
- GEN_TWEAK (tweak2, tweak1);
- GEN_TWEAK (tweak3, tweak2);
- GEN_TWEAK (tweak, tweak3);
-
- b0 = VEC_LOAD_BE (in, 0, bige_const);
- b1 = VEC_LOAD_BE (in, 1, bige_const);
- b2 = VEC_LOAD_BE (in, 2, bige_const);
- b3 = VEC_LOAD_BE (in, 3, bige_const);
-
- tweak0 = asm_vperm1 (tweak0, bswap128_const);
- tweak1 = asm_vperm1 (tweak1, bswap128_const);
- tweak2 = asm_vperm1 (tweak2, bswap128_const);
- tweak3 = asm_vperm1 (tweak3, bswap128_const);
-
- b0 ^= tweak0 ^ rkey0;
- b1 ^= tweak1 ^ rkey0;
- b2 ^= tweak2 ^ rkey0;
- b3 ^= tweak3 ^ rkey0;
-
-#define DO_ROUND(r) \
- rkey = ALIGNED_LOAD (rk, r); \
- b0 = asm_ncipher_be (b0, rkey); \
- b1 = asm_ncipher_be (b1, rkey); \
- b2 = asm_ncipher_be (b2, rkey); \
- b3 = asm_ncipher_be (b3, rkey);
-
- DO_ROUND(1);
- DO_ROUND(2);
- DO_ROUND(3);
- DO_ROUND(4);
- DO_ROUND(5);
- DO_ROUND(6);
- DO_ROUND(7);
- DO_ROUND(8);
- DO_ROUND(9);
- if (rounds >= 12)
- {
- DO_ROUND(10);
- DO_ROUND(11);
- if (rounds > 12)
- {
- DO_ROUND(12);
- DO_ROUND(13);
- }
- }
-
-#undef DO_ROUND
-
- rkey = rkeylast;
- b0 = asm_ncipherlast_be (b0, rkey ^ tweak0);
- b1 = asm_ncipherlast_be (b1, rkey ^ tweak1);
- b2 = asm_ncipherlast_be (b2, rkey ^ tweak2);
- b3 = asm_ncipherlast_be (b3, rkey ^ tweak3);
-
- VEC_STORE_BE (out, 0, b0, bige_const);
- VEC_STORE_BE (out, 1, b1, bige_const);
- VEC_STORE_BE (out, 2, b2, bige_const);
- VEC_STORE_BE (out, 3, b3, bige_const);
-
- in += 4;
- out += 4;
- nblocks -= 4;
- }
-
- for (; nblocks; nblocks--)
- {
- tweak0 = asm_vperm1 (tweak, bswap128_const);
-
- /* Xor-Encrypt/Decrypt-Xor block. */
- b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
-
- /* Generate next tweak. */
- GEN_TWEAK (tweak, tweak);
-
- AES_DECRYPT (b, rounds);
-
- b ^= tweak0;
- VEC_STORE_BE (out, 0, b, bige_const);
-
- in++;
- out++;
- }
- }
-
-#ifdef WORDS_BIGENDIAN
- tweak = asm_vperm1 (tweak, bswap128_const);
- VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
-#else
- VEC_STORE_BE (tweak_arg, 0, tweak, vec_tweakin_swap_const);
-#endif
-
-#undef GEN_TWEAK
-}
+#include <rijndael-ppc-functions.h>

#endif /* USE_PPC_CRYPTO */
diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c
new file mode 100644
index 000000000..facdedd4f
--- /dev/null
+++ b/cipher/rijndael-ppc9le.c
@@ -0,0 +1,102 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+
+#include "rijndael-ppc-common.h"
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+ static const block vec_dummy = { 0 };
+ return vec_dummy;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+ (void)be_bswap_const;
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+ block vec;
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("lxvb16x %x0,0,%1\n\t"
+ : "=wa" (vec)
+ : "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("lxvb16x %x0,%1,%2\n\t"
+ : "=wa" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+ return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ volatile ("stxvb16x %x0,0,%1\n\t"
+ :
+ : "wa" (vec), "r" ((uintptr_t)ptr)
+ : "memory");
+ else
+#endif
+ __asm__ volatile ("stxvb16x %x0,%1,%2\n\t"
+ :
+ : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory", "r0");
+}
+
+
+#define GCRY_AES_PPC9LE 1
+#define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc9le_encrypt
+#define DECRYPT_BLOCK_FUNC _gcry_aes_ppc9le_decrypt
+#define CFB_ENC_FUNC _gcry_aes_ppc9le_cfb_enc
+#define CFB_DEC_FUNC _gcry_aes_ppc9le_cfb_dec
+#define CBC_ENC_FUNC _gcry_aes_ppc9le_cbc_enc
+#define CBC_DEC_FUNC _gcry_aes_ppc9le_cbc_dec
+#define CTR_ENC_FUNC _gcry_aes_ppc9le_ctr_enc
+#define OCB_CRYPT_FUNC _gcry_aes_ppc9le_ocb_crypt
+#define OCB_AUTH_FUNC _gcry_aes_ppc9le_ocb_auth
+#define XTS_CRYPT_FUNC _gcry_aes_ppc9le_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index ebd1a11a5..a1c4cfc1a 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -239,6 +239,43 @@ extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak,
size_t nblocks, int encrypt);
#endif /*USE_PPC_CRYPTO*/

+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+/* Power9 little-endian crypto implementations of AES */
+extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+
+extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
+extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c,
+ const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak,
+ void *outbuf_arg,
+ const void *inbuf_arg,
+ size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+
static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
const unsigned char *ax);
static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
@@ -384,6 +421,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
#ifdef USE_PPC_CRYPTO
ctx->use_ppc_crypto = 0;
#endif
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ ctx->use_ppc9le_crypto = 0;
+#endif

if (0)
{
@@ -464,6 +504,28 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
}
}
#endif
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00))
+ {
+ ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt;
+ ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->use_ppc_crypto = 1; /* same key-setup as USE_PPC_CRYPTO */
+ ctx->use_ppc9le_crypto = 1;
+ if (hd)
+ {
+ hd->bulk.cfb_enc = _gcry_aes_ppc9le_cfb_enc;
+ hd->bulk.cfb_dec = _gcry_aes_ppc9le_cfb_dec;
+ hd->bulk.cbc_enc = _gcry_aes_ppc9le_cbc_enc;
+ hd->bulk.cbc_dec = _gcry_aes_ppc9le_cbc_dec;
+ hd->bulk.ctr_enc = _gcry_aes_ppc9le_ctr_enc;
+ hd->bulk.ocb_crypt = _gcry_aes_ppc9le_ocb_crypt;
+ hd->bulk.ocb_auth = _gcry_aes_ppc9le_ocb_auth;
+ hd->bulk.xts_crypt = _gcry_aes_ppc9le_xts_crypt;
+ }
+ }
+#endif
#ifdef USE_PPC_CRYPTO
else if (hwfeatures & HWF_PPC_VCRYPTO)
{
@@ -924,6 +986,13 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ _gcry_aes_ppc9le_cfb_enc (ctx, iv, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -992,6 +1061,13 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ _gcry_aes_ppc9le_cbc_enc (ctx, iv, outbuf, inbuf, nblocks, cbc_mac);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -1067,6 +1143,13 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ _gcry_aes_ppc9le_ctr_enc (ctx, ctr, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -1317,6 +1400,13 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ _gcry_aes_ppc9le_cfb_dec (ctx, iv, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -1382,6 +1472,13 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ _gcry_aes_ppc9le_cbc_dec (ctx, iv, outbuf, inbuf, nblocks);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -1450,6 +1547,12 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
return _gcry_aes_armv8_ce_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ return _gcry_aes_ppc9le_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -1550,6 +1653,12 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
return _gcry_aes_armv8_ce_ocb_auth (c, abuf, nblocks);
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ return _gcry_aes_ppc9le_ocb_auth (c, abuf, nblocks);
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
@@ -1619,6 +1728,13 @@ _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
return;
}
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+ else if (ctx->use_ppc9le_crypto)
+ {
+ _gcry_aes_ppc9le_xts_crypt (ctx, tweak, outbuf, inbuf, nblocks, encrypt);
+ return;
+ }
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
#ifdef USE_PPC_CRYPTO
else if (ctx->use_ppc_crypto)
{
diff --git a/configure.ac b/configure.ac
index f31b75586..f9d3dd718 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2348,6 +2348,7 @@ if test "$found" = "1" ; then
powerpc64le-*-*)
# Build with the crypto extension implementation
GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc9le.lo"
;;
powerpc64-*-*)
# Big-Endian.


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel