Mailing List Archive

[PATCH 3/6] rijndael-ppc: add key setup and enable single block PowerPC AES
* cipher/Makefile.am: Add 'rijndael-ppc.c'.
* cipher/rijndael-internal.h (USE_PPC_CRYPTO): New.
(RIJNDAEL_context): Add 'use_ppc_crypto'.
* cipher/rijndael-ppc.c (backwards, swap_if_le): Remove.
(u128_t, ALWAYS_INLINE, NO_INLINE, NO_INSTRUMENT_FUNCTION)
(ASM_FUNC_ATTR, ASM_FUNC_ATTR_INLINE, ASM_FUNC_ATTR_NOINLINE)
(ALIGNED_LOAD, ALIGNED_STORE, VEC_LOAD_BE, VEC_STORE_BE)
(vec_bswap32_const, vec_aligned_ld, vec_load_be_const)
(vec_load_be, vec_aligned_st, vec_store_be, _gcry_aes_sbox4_ppc8)
(_gcry_aes_ppc8_setkey, _gcry_aes_ppc8_prepare_decryption)
(aes_ppc8_encrypt_altivec, aes_ppc8_decrypt_altivec): New.
(_gcry_aes_ppc8_encrypt, _gcry_aes_ppc8_decrypt): Rewrite.
(_gcry_aes_ppc8_ocb_crypt): Comment out.
* cipher/rijndael.c [USE_PPC_CRYPTO] (_gcry_aes_ppc8_setkey)
(_gcry_aes_ppc8_prepare_decryption, _gcry_aes_ppc8_encrypt)
(_gcry_aes_ppc8_decrypt): New prototypes.
(do_setkey) [USE_PPC_CRYPTO]: Add setup for PowerPC AES.
(prepare_decryption) [USE_PPC_CRYPTO]: Ditto.
* configure.ac: Add 'rijndael-ppc.lo'.
(gcry_cv_ppc_altivec, gcry_cv_cc_ppc_altivec_cflags)
(gcry_cv_gcc_inline_asm_ppc_altivec)
(gcry_cv_gcc_inline_asm_ppc_arch_3_00): New checks.
--

Benchmark on POWER8 ~3.8Ghz:
Before:
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 7.27 ns/B 131.2 MiB/s 27.61 c/B
ECB dec | 7.70 ns/B 123.8 MiB/s 29.28 c/B
CBC enc | 6.38 ns/B 149.5 MiB/s 24.24 c/B
CBC dec | 6.17 ns/B 154.5 MiB/s 23.45 c/B
CFB enc | 6.45 ns/B 147.9 MiB/s 24.51 c/B
CFB dec | 6.20 ns/B 153.8 MiB/s 23.57 c/B
OFB enc | 7.36 ns/B 129.6 MiB/s 27.96 c/B
OFB dec | 7.36 ns/B 129.6 MiB/s 27.96 c/B
CTR enc | 6.22 ns/B 153.2 MiB/s 23.65 c/B
CTR dec | 6.22 ns/B 153.3 MiB/s 23.65 c/B
XTS enc | 6.67 ns/B 142.9 MiB/s 25.36 c/B
XTS dec | 6.70 ns/B 142.3 MiB/s 25.46 c/B
CCM enc | 12.61 ns/B 75.60 MiB/s 47.93 c/B
CCM dec | 12.62 ns/B 75.56 MiB/s 47.96 c/B
CCM auth | 6.41 ns/B 148.8 MiB/s 24.36 c/B
EAX enc | 12.62 ns/B 75.55 MiB/s 47.96 c/B
EAX dec | 12.62 ns/B 75.55 MiB/s 47.97 c/B
EAX auth | 6.39 ns/B 149.2 MiB/s 24.30 c/B
GCM enc | 9.81 ns/B 97.24 MiB/s 37.27 c/B
GCM dec | 9.81 ns/B 97.20 MiB/s 37.28 c/B
GCM auth | 3.59 ns/B 265.8 MiB/s 13.63 c/B
OCB enc | 6.39 ns/B 149.3 MiB/s 24.27 c/B
OCB dec | 6.38 ns/B 149.5 MiB/s 24.25 c/B
OCB auth | 6.35 ns/B 150.2 MiB/s 24.13 c/B

After:
ECB enc | 1.29 ns/B 737.7 MiB/s 4.91 c/B
ECB dec | 1.34 ns/B 711.1 MiB/s 5.10 c/B
CBC enc | 2.13 ns/B 448.5 MiB/s 8.08 c/B
CBC dec | 1.05 ns/B 908.0 MiB/s 3.99 c/B
CFB enc | 2.17 ns/B 439.9 MiB/s 8.24 c/B
CFB dec | 2.22 ns/B 429.8 MiB/s 8.43 c/B
OFB enc | 1.49 ns/B 640.1 MiB/s 5.66 c/B
OFB dec | 1.49 ns/B 640.1 MiB/s 5.66 c/B
CTR enc | 2.21 ns/B 432.5 MiB/s 8.38 c/B
CTR dec | 2.20 ns/B 432.5 MiB/s 8.38 c/B
XTS enc | 2.32 ns/B 410.6 MiB/s 8.83 c/B
XTS dec | 2.33 ns/B 409.7 MiB/s 8.85 c/B
CCM enc | 4.36 ns/B 218.7 MiB/s 16.57 c/B
CCM dec | 4.36 ns/B 218.8 MiB/s 16.56 c/B
CCM auth | 2.17 ns/B 440.4 MiB/s 8.23 c/B
EAX enc | 4.37 ns/B 218.3 MiB/s 16.60 c/B
EAX dec | 4.36 ns/B 218.7 MiB/s 16.57 c/B
EAX auth | 2.16 ns/B 440.7 MiB/s 8.22 c/B
GCM enc | 5.78 ns/B 165.0 MiB/s 21.96 c/B
GCM dec | 5.78 ns/B 165.0 MiB/s 21.96 c/B
GCM auth | 3.59 ns/B 265.9 MiB/s 13.63 c/B
OCB enc | 2.33 ns/B 410.1 MiB/s 8.84 c/B
OCB dec | 2.34 ns/B 407.2 MiB/s 8.90 c/B
OCB auth | 2.32 ns/B 411.1 MiB/s 8.82 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
0 files changed

diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 2aae82e27..1f2d8ec97 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -96,6 +96,7 @@ EXTRA_libcipher_la_SOURCES = \
rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \
rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \
rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \
+ rijndael-ppc.c \
rmd160.c \
rsa.c \
salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
@@ -197,3 +198,15 @@ crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile

crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile
`echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS
+ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto
+else
+ppc_vcrypto_cflags =
+endif
+
+rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile
+ `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
+
+rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile
+ `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) `
diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h
index 78b08e8f8..5150a69d7 100644
--- a/cipher/rijndael-internal.h
+++ b/cipher/rijndael-internal.h
@@ -75,7 +75,7 @@
# define USE_PADLOCK 1
# endif
# endif
-#endif /*ENABLE_PADLOCK_SUPPORT*/
+#endif /* ENABLE_PADLOCK_SUPPORT */

/* USE_AESNI inidicates whether to compile with Intel AES-NI code. We
need the vector-size attribute which seems to be available since
@@ -104,6 +104,18 @@
# endif
#endif /* ENABLE_ARM_CRYPTO_SUPPORT */

+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code. */
+#undef USE_PPC_CRYPTO
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+ defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+# if __GNUC__ >= 4
+# define USE_PPC_CRYPTO 1
+# endif
+# endif
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
+
struct RIJNDAEL_context_s;

typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s *ctx,
@@ -154,6 +166,9 @@ typedef struct RIJNDAEL_context_s
#ifdef USE_ARM_CE
unsigned int use_arm_ce:1; /* ARMv8 CE shall be used. */
#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ unsigned int use_ppc_crypto:1; /* PowerPC crypto shall be used. */
+#endif /*USE_PPC_CRYPTO*/
rijndael_cryptfn_t encrypt_fn;
rijndael_cryptfn_t decrypt_fn;
rijndael_prefetchfn_t prefetch_enc_fn;
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index 2e5dd2f89..a7c47a876 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -1,5 +1,6 @@
-/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
* Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -24,138 +25,397 @@

#include <config.h>

-/* PPC AES extensions */
-#include <altivec.h>
#include "rijndael-internal.h"
#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO
+
+#include <altivec.h>
+

typedef vector unsigned char block;
-static const vector unsigned char backwards =
- { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
-
-#ifdef __LITTLE_ENDIAN__
-#define swap_if_le(a) \
- vec_perm(a, a, backwards)
-#elif __BIG_ENDIAN__
-#define swap_if_le(a) (a)
+
+typedef union
+{
+ u32 data32[4];
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#define ALIGNED_LOAD(in_ptr) \
+ (vec_aligned_ld (0, (const unsigned char *)(in_ptr)))
+
+#define ALIGNED_STORE(out_ptr, vec) \
+ (vec_aligned_st ((vec), 0, (unsigned char *)(out_ptr)))
+
+#define VEC_LOAD_BE(in_ptr, bige_const) \
+ (vec_load_be (0, (const unsigned char *)(in_ptr), bige_const))
+
+#define VEC_STORE_BE(out_ptr, vec, bige_const) \
+ (vec_store_be ((vec), 0, (unsigned char *)(out_ptr), bige_const))
+
+
+static const block vec_bswap32_const =
+ { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+
+
+static ASM_FUNC_ATTR_INLINE block
+vec_aligned_ld(unsigned long offset, const unsigned char *ptr)
+{
+#ifndef WORDS_BIGENDIAN
+ block vec;
+ __asm__ ("lvx %0,%1,%2\n\t"
+ : "=v" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory");
+ return vec;
#else
-#error "What endianness?"
+ return vec_vsx_ld (offset, ptr);
#endif
+}

-/* Passes in AltiVec registers (big-endian)
- * sadly compilers don't know how to unroll outer loops into
- * inner loops with more registers on static functions,
- * so that this can be properly optimized for OOO multi-issue
- * without having to hand-unroll.
- */
-static block _gcry_aes_ppc8_encrypt_altivec (const RIJNDAEL_context *ctx,
- block a)
+
+static ASM_FUNC_ATTR_INLINE block
+vec_load_be_const(void)
+{
+#ifndef WORDS_BIGENDIAN
+ return ~ALIGNED_LOAD(&vec_bswap32_const);
+#else
+ static const block vec_dummy = { 0 };
+ return vec_dummy;
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE block
+vec_load_be(unsigned long offset, const unsigned char *ptr,
+ block be_bswap_const)
+{
+#ifndef WORDS_BIGENDIAN
+ block vec;
+ /* GCC vec_vsx_ld is generating two instructions on little-endian. Use
+ * lxvw4x directly instead. */
+ __asm__ ("lxvw4x %x0,%1,%2\n\t"
+ : "=wa" (vec)
+ : "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory");
+ __asm__ ("vperm %0,%1,%1,%2\n\t"
+ : "=v" (vec)
+ : "v" (vec), "v" (be_bswap_const));
+ return vec;
+#else
+ (void)be_bswap_const;
+ return vec_vsx_ld (offset, ptr);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_aligned_st(block vec, unsigned long offset, unsigned char *ptr)
+{
+#ifndef WORDS_BIGENDIAN
+ __asm__ ("stvx %0,%1,%2\n\t"
+ :
+ : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory");
+#else
+ vec_vsx_st (vec, offset, ptr);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_store_be(block vec, unsigned long offset, unsigned char *ptr,
+ block be_bswap_const)
+{
+#ifndef WORDS_BIGENDIAN
+ /* GCC vec_vsx_st is generating two instructions on little-endian. Use
+ * stxvw4x directly instead. */
+ __asm__ ("vperm %0,%1,%1,%2\n\t"
+ : "=v" (vec)
+ : "v" (vec), "v" (be_bswap_const));
+ __asm__ ("stxvw4x %x0,%1,%2\n\t"
+ :
+ : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+ : "memory");
+#else
+ (void)be_bswap_const;
+ vec_vsx_st (vec, offset, ptr);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+_gcry_aes_sbox4_ppc8(u32 fourbytes)
+{
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ block data_vec;
+ u32 data32[4];
+ } u;
+
+ u.data32[0] = fourbytes;
+ u.data_vec = vec_sbox_be(u.data_vec);
+ return u.data32[0];
+}
+
+void
+_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+ const block bige_const = vec_load_be_const();
+ union
+ {
+ PROPERLY_ALIGNED_TYPE dummy;
+ byte data[MAXKC][4];
+ u32 data32[MAXKC];
+ } tkk[2];
+ unsigned int rounds = ctx->rounds;
+ int KC = rounds - 6;
+ unsigned int keylen = KC * 4;
+ u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+ unsigned int i, r, t;
+ byte rcon = 1;
+ int j;
+#define k tkk[0].data
+#define k_u32 tkk[0].data32
+#define tk tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W (ctx->keyschenc)
+#define W_u32 (ctx->keyschenc32)
+
+ for (i = 0; i < keylen; i++)
+ {
+ k[i >> 2][i & 3] = key[i];
+ }
+
+ for (j = KC-1; j >= 0; j--)
+ {
+ tk_u32[j] = k_u32[j];
+ }
+ r = 0;
+ t = 0;
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+ while (r < rounds + 1)
+ {
+ tk_u32[0] ^=
+ le_bswap32(
+ _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon);
+
+ if (KC != 8)
+ {
+ for (j = 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+ else
+ {
+ for (j = 1; j < KC/2; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+
+ tk_u32[KC/2] ^=
+ le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1])));
+
+ for (j = KC/2 + 1; j < KC; j++)
+ {
+ tk_u32[j] ^= tk_u32[j-1];
+ }
+ }
+
+ /* Copy values into round key array. */
+ for (j = 0; (j < KC) && (r < rounds + 1); )
+ {
+ for (; (j < KC) && (t < 4); j++, t++)
+ {
+ W_u32[r][t] = le_bswap32(tk_u32[j]);
+ }
+ if (t == 4)
+ {
+ r++;
+ t = 0;
+ }
+ }
+
+ rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b);
+ }
+
+ /* Store in big-endian order. */
+ for (r = 0; r <= rounds; r++)
+ {
+#ifndef WORDS_BIGENDIAN
+ VEC_STORE_BE(&ekey[r], ALIGNED_LOAD(&ekey[r]), bige_const);
+#else
+ block rvec = ALIGNED_LOAD(&ekey[r]);
+ ALIGNED_STORE(&ekey[r],
+ vec_perm(rvec, rvec, vec_bswap32_const));
+ (void)bige_const;
+#endif
+ }
+
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+ wipememory(&tkk, sizeof(tkk));
+}
+
+
+/* Make a decryption key from an encryption key. */
+void
+_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
{
+ u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+ u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+ int rounds = ctx->rounds;
+ int rr;
int r;
+
+ r = 0;
+ rr = rounds;
+ for (r = 0, rr = rounds; r <= rounds; r++, rr--)
+ {
+ ALIGNED_STORE(&dkey[r], ALIGNED_LOAD(&ekey[rr]));
+ }
+}
+
+
+static ASM_FUNC_ATTR_INLINE block
+aes_ppc8_encrypt_altivec (const RIJNDAEL_context *ctx, block a)
+{
+ u128_t *rk = (u128_t *)ctx->keyschenc;
int rounds = ctx->rounds;
- block *rk = (block*)ctx->keyschenc;
+ int r;

- a = rk[0] ^ a;
- for (r = 1;r < rounds;r++)
+#define DO_ROUND(r) (a = vec_cipher_be (a, ALIGNED_LOAD (&rk[r])))
+
+ a = ALIGNED_LOAD(&rk[0]) ^ a;
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ r = 10;
+ if (rounds >= 12)
{
- __asm__ volatile ("vcipher %0, %0, %1\n\t"
- :"+v" (a)
- :"v" (rk[r])
- );
+ DO_ROUND(10);
+ DO_ROUND(11);
+ r = 12;
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ r = 14;
+ }
}
- __asm__ volatile ("vcipherlast %0, %0, %1\n\t"
- :"+v" (a)
- :"v" (rk[r])
- );
+ a = vec_cipherlast_be(a, ALIGNED_LOAD(&rk[r]));
+
+#undef DO_ROUND
+
return a;
}


-static block _gcry_aes_ppc8_decrypt_altivec (const RIJNDAEL_context *ctx,
- block a)
+static ASM_FUNC_ATTR_INLINE block
+aes_ppc8_decrypt_altivec (const RIJNDAEL_context *ctx, block a)
{
- int r;
+ u128_t *rk = (u128_t *)ctx->keyschdec;
int rounds = ctx->rounds;
- block *rk = (block*)ctx->keyschdec;
+ int r;

- a = rk[0] ^ a;
- for (r = 1;r < rounds;r++)
+#define DO_ROUND(r) (a = vec_ncipher_be (a, ALIGNED_LOAD (&rk[r])))
+
+ a = ALIGNED_LOAD(&rk[0]) ^ a;
+ DO_ROUND(1);
+ DO_ROUND(2);
+ DO_ROUND(3);
+ DO_ROUND(4);
+ DO_ROUND(5);
+ DO_ROUND(6);
+ DO_ROUND(7);
+ DO_ROUND(8);
+ DO_ROUND(9);
+ r = 10;
+ if (rounds >= 12)
{
- __asm__ volatile ("vncipher %0, %0, %1\n\t"
- :"+v" (a)
- :"v" (rk[r])
- );
+ DO_ROUND(10);
+ DO_ROUND(11);
+ r = 12;
+ if (rounds > 12)
+ {
+ DO_ROUND(12);
+ DO_ROUND(13);
+ r = 14;
+ }
}
- __asm__ volatile ("vncipherlast %0, %0, %1\n\t"
- :"+v" (a)
- :"v" (rk[r])
- );
+ a = vec_ncipherlast_be(a, ALIGNED_LOAD(&rk[r]));
+
+#undef DO_ROUND
+
return a;
}

+
unsigned int _gcry_aes_ppc8_encrypt (const RIJNDAEL_context *ctx,
unsigned char *b,
const unsigned char *a)
{
- uintptr_t zero = 0;
+ const block bige_const = vec_load_be_const();
block sa;

- if ((uintptr_t)a % 16 == 0)
- {
- sa = vec_ld (0, a);
- }
- else
- {
- block unalignedprev, unalignedcur;
- unalignedprev = vec_ld (0, a);
- unalignedcur = vec_ld (16, a);
- sa = vec_perm (unalignedprev, unalignedcur, vec_lvsl(0, a));
- }
-
- sa = swap_if_le(sa);
- sa = _gcry_aes_ppc8_encrypt_altivec(ctx, sa);
-
- __asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
- :
- : "wa" (sa), "r" (zero), "r" ((uintptr_t)b));
+ sa = VEC_LOAD_BE (a, bige_const);
+ sa = aes_ppc8_encrypt_altivec (ctx, sa);
+ VEC_STORE_BE (b, sa, bige_const);

return 0; /* does not use stack */
}

+
unsigned int _gcry_aes_ppc8_decrypt (const RIJNDAEL_context *ctx,
unsigned char *b,
const unsigned char *a)
{
- uintptr_t zero = 0;
- block sa, unalignedprev, unalignedcur;
-
- if ((uintptr_t)a % 16 == 0)
- {
- sa = vec_ld(0, a);
- }
- else
- {
- unalignedprev = vec_ld (0, a);
- unalignedcur = vec_ld (16, a);
- sa = vec_perm (unalignedprev, unalignedcur, vec_lvsl(0, a));
- }
+ const block bige_const = vec_load_be_const();
+ block sa;

- sa = swap_if_le (sa);
- sa = _gcry_aes_ppc8_decrypt_altivec (ctx, sa);
+ sa = VEC_LOAD_BE (a, bige_const);
+ sa = aes_ppc8_decrypt_altivec (ctx, sa);
+ VEC_STORE_BE (b, sa, bige_const);

- if ((uintptr_t)b % 16 == 0)
- {
- vec_vsx_st(swap_if_le(sa), 0, b);
- }
- else
- {
- __asm__ volatile ("stxvb16x %x0, %1, %2\n\t"
- :
- : "wa" (sa), "r" (zero), "r" ((uintptr_t)b));
- }
return 0; /* does not use stack */
}

+
+#if 0
size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
const void *inbuf_arg, size_t nblocks,
int encrypt)
@@ -673,4 +933,6 @@ size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
}
return 0;
}
+#endif

+#endif /* USE_PPC_CRYPTO */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 2c9aa6733..8a27dfe0b 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -199,6 +199,19 @@ extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak,
size_t nblocks, int encrypt);
#endif /*USE_ARM_ASM*/

+#ifdef USE_PPC_CRYPTO
+/* PowerPC Crypto implementations of AES */
+extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src);
+#endif /*USE_PPC_CRYPTO*/
+
static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
const unsigned char *ax);
static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
@@ -280,7 +293,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
int i,j, r, t, rconpointer = 0;
int KC;
#if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \
- || defined(USE_ARM_CE)
+ || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO)
unsigned int hwfeatures;
#endif

@@ -324,7 +337,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
ctx->rounds = rounds;

#if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) \
- || defined(USE_ARM_CE)
+ || defined(USE_ARM_CE) || defined(USE_PPC_CRYPTO)
hwfeatures = _gcry_get_hw_features ();
#endif

@@ -341,6 +354,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
#ifdef USE_ARM_CE
ctx->use_arm_ce = 0;
#endif
+#ifdef USE_PPC_CRYPTO
+ ctx->use_ppc_crypto = 0;
+#endif

if (0)
{
@@ -420,6 +436,19 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
hd->bulk.xts_crypt = _gcry_aes_armv8_ce_xts_crypt;
}
}
+#endif
+#ifdef USE_PPC_CRYPTO
+ else if (hwfeatures & HWF_PPC_VCRYPTO)
+ {
+ ctx->encrypt_fn = _gcry_aes_ppc8_encrypt;
+ ctx->decrypt_fn = _gcry_aes_ppc8_decrypt;
+ ctx->prefetch_enc_fn = NULL;
+ ctx->prefetch_dec_fn = NULL;
+ ctx->use_ppc_crypto = 1;
+ if (hd)
+ {
+ }
+ }
#endif
else
{
@@ -446,6 +475,10 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
#ifdef USE_ARM_CE
else if (ctx->use_arm_ce)
_gcry_aes_armv8_ce_setkey (ctx, key);
+#endif
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ _gcry_aes_ppc8_setkey (ctx, key);
#endif
else
{
@@ -584,7 +617,19 @@ prepare_decryption( RIJNDAEL_context *ctx )
{
_gcry_aes_armv8_ce_prepare_decryption (ctx);
}
-#endif /*USE_SSSE3*/
+#endif /*USE_ARM_CE*/
+#ifdef USE_ARM_CE
+ else if (ctx->use_arm_ce)
+ {
+ _gcry_aes_armv8_ce_prepare_decryption (ctx);
+ }
+#endif /*USE_ARM_CE*/
+#ifdef USE_PPC_CRYPTO
+ else if (ctx->use_ppc_crypto)
+ {
+ _gcry_aes_ppc8_prepare_decryption (ctx);
+ }
+#endif
#ifdef USE_PADLOCK
else if (ctx->use_padlock)
{
diff --git a/configure.ac b/configure.ac
index 6980f381a..586145aa4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1655,6 +1655,7 @@ if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then
[.Defined if underlying assembler is compatible with Intel syntax assembly implementations])
fi

+
#
# Check whether compiler is configured for ARMv6 or newer architecture
#
@@ -1831,6 +1832,112 @@ if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then
fi


+#
+# Check whether PowerPC AltiVec/VSX intrinsics
+#
+AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
+ [gcry_cv_cc_ppc_altivec],
+ [.if test "$mpi_cpu_arch" != "ppc" ; then
+ gcry_cv_cc_ppc_altivec="n/a"
+ else
+ gcry_cv_cc_ppc_altivec=no
+ AC_COMPILE_IFELSE([.AC_LANG_SOURCE(
+ [.[.#include <altivec.h>
+ typedef vector unsigned char block;
+ block fn(block in)
+ {
+ block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+ return vec_cipher_be (t, in);
+ }
+ ]])],
+ [gcry_cv_cc_ppc_altivec=yes])
+ fi])
+if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then
+ AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1,
+ [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics])
+fi
+
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -maltivec -mvsx -mcrypto"
+
+if test "$gcry_cv_cc_ppc_altivec" = "no" &&
+ test "$mpi_cpu_arch" = "ppc" ; then
+ AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags],
+ [gcry_cv_cc_ppc_altivec_cflags],
+ [.gcry_cv_cc_ppc_altivec_cflags=no
+ AC_COMPILE_IFELSE([.AC_LANG_SOURCE(
+ [.[.#include <altivec.h>
+ typedef vector unsigned char block;
+ block fn(block in)
+ {
+ block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+ return vec_cipher_be (t, in);
+ }]])],
+ [gcry_cv_cc_ppc_altivec_cflags=yes])])
+ if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
+ AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1,
+ [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics])
+ AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1,
+ [.Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags])
+ fi
+fi
+
+AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS,
+ test "$gcry_cv_cc_ppc_altivec_cflags" = "yes")
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto instructions],
+ [gcry_cv_gcc_inline_asm_ppc_altivec],
+ [.if test "$mpi_cpu_arch" != "ppc" ; then
+ gcry_cv_gcc_inline_asm_ppc_altivec="n/a"
+ else
+ gcry_cv_gcc_inline_asm_ppc_altivec=no
+ AC_COMPILE_IFELSE([.AC_LANG_SOURCE(
+ [.[.__asm__(".globl testfn;\n"
+ "testfn:\n"
+ "stvx %v31,%r12,%r0;\n"
+ "lvx %v20,%r12,%r0;\n"
+ "vcipher %v0, %v1, %v22;\n"
+ "lxvw4x %vs32, %r0, %r1;\n"
+ );
+ ]])],
+ [gcry_cv_gcc_inline_asm_ppc_altivec=yes])
+ fi])
+if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1,
+ [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports PowerISA 3.00 instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 instructions],
+ [gcry_cv_gcc_inline_asm_ppc_arch_3_00],
+ [.if test "$mpi_cpu_arch" != "ppc" ; then
+ gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a"
+ else
+ gcry_cv_gcc_inline_asm_ppc_arch_3_00=no
+ AC_COMPILE_IFELSE([.AC_LANG_SOURCE(
+ [[__asm__(".globl testfn;\n"
+ "testfn:\n"
+ "stxvb16x %r1,%v12,%v30;\n"
+ );
+ ]])],
+ [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes])
+ fi])
+if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1,
+ [Defined if inline assembler supports PowerISA 3.00 instructions])
+fi
+
+
#######################################
#### Checks for library functions. ####
#######################################
@@ -2229,6 +2336,20 @@ if test "$found" = "1" ; then
GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-ce.lo"
GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv8-aarch64-ce.lo"
;;
+ powerpc64le-*-*)
+ # Build with the crypto extension implementation
+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+ ;;
+ powerpc64-*-*)
+ # Big-Endian.
+ # Build with the crypto extension implementation
+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+ ;;
+ powerpc-*-*)
+ # Big-Endian.
+ # Build with the crypto extension implementation
+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-ppc.lo"
+ ;;
esac

case "$mpi_cpu_arch" in


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel