Mailing List Archive

[PATCH 4/5] aes-ppc: use target and optimize attributes for P8 and P9
* cipher/rijndael-ppc-functions.h: Add PPC_OPT_ATTR attribute
macro for all functions.
* cipher/rijndael-ppc.c (FUNC_ATTR_OPT, PPC_OPT_ATTR): New.
(_gcry_aes_ppc8_setkey, _gcry_aes_ppc8_prepare_decryption): Add
PPC_OPT_ATTR attribute macro.
* cipher/rijndael-ppc9le.c (FUNC_ATTR_OPT, PPC_OPT_ATTR): New.
--

This change makes sure that PPC accelerated AES gets compiled
with proper optimization level and right target setting.

Benchmark on POWER9:
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 0.305 ns/B 3129 MiB/s 0.701 c/B
ECB dec | 0.305 ns/B 3127 MiB/s 0.701 c/B
CBC enc | 1.66 ns/B 575.3 MiB/s 3.81 c/B
CBC dec | 0.318 ns/B 2997 MiB/s 0.732 c/B
CFB enc | 1.66 ns/B 574.7 MiB/s 3.82 c/B
CFB dec | 0.319 ns/B 2987 MiB/s 0.734 c/B
OFB enc | 2.15 ns/B 443.4 MiB/s 4.95 c/B
OFB dec | 2.15 ns/B 443.3 MiB/s 4.95 c/B
CTR enc | 0.328 ns/B 2907 MiB/s 0.754 c/B
CTR dec | 0.328 ns/B 2906 MiB/s 0.755 c/B
XTS enc | 0.516 ns/B 1849 MiB/s 1.19 c/B
XTS dec | 0.515 ns/B 1850 MiB/s 1.19 c/B
CCM enc | 1.98 ns/B 480.6 MiB/s 4.56 c/B
CCM dec | 1.98 ns/B 480.5 MiB/s 4.56 c/B
CCM auth | 1.66 ns/B 574.9 MiB/s 3.82 c/B
EAX enc | 1.99 ns/B 480.2 MiB/s 4.57 c/B
EAX dec | 1.99 ns/B 480.2 MiB/s 4.57 c/B
EAX auth | 1.66 ns/B 575.2 MiB/s 3.81 c/B
GCM enc | 0.552 ns/B 1727 MiB/s 1.27 c/B
GCM dec | 0.552 ns/B 1728 MiB/s 1.27 c/B
GCM auth | 0.225 ns/B 4240 MiB/s 0.517 c/B
OCB enc | 0.381 ns/B 2504 MiB/s 0.876 c/B
OCB dec | 0.385 ns/B 2477 MiB/s 0.886 c/B
OCB auth | 0.356 ns/B 2682 MiB/s 0.818 c/B
SIV enc | 1.98 ns/B 480.9 MiB/s 4.56 c/B
SIV dec | 2.11 ns/B 452.9 MiB/s 4.84 c/B
SIV auth | 1.66 ns/B 575.4 MiB/s 3.81 c/B
GCM-SIV enc | 0.726 ns/B 1314 MiB/s 1.67 c/B
GCM-SIV dec | 0.843 ns/B 1131 MiB/s 1.94 c/B
GCM-SIV auth | 0.377 ns/B 2527 MiB/s 0.868 c/B

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
cipher/rijndael-ppc-functions.h | 71 ++++++++++++++++++---------------
cipher/rijndael-ppc.c | 17 +++++++-
cipher/rijndael-ppc9le.c | 13 ++++++
3 files changed, 66 insertions(+), 35 deletions(-)

diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h
index 79eca7a2..ec5cda73 100644
--- a/cipher/rijndael-ppc-functions.h
+++ b/cipher/rijndael-ppc-functions.h
@@ -1,6 +1,6 @@
/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
* Copyright (C) 2019 Shawn Landden <shawn@git.icu>
- * Copyright (C) 2019-2020, 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2019-2020, 2022-2023 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -23,9 +23,9 @@
* is released under.
*/

-unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
- unsigned char *out,
- const unsigned char *in)
+unsigned int PPC_OPT_ATTR
+ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
+ const unsigned char *in)
{
const block bige_const = asm_load_be_const();
const u128_t *rk = (u128_t *)&ctx->keyschenc;
@@ -44,9 +44,9 @@ unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
}


-unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
- unsigned char *out,
- const unsigned char *in)
+unsigned int PPC_OPT_ATTR
+DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
+ const unsigned char *in)
{
const block bige_const = asm_load_be_const();
const u128_t *rk = (u128_t *)&ctx->keyschdec;
@@ -65,9 +65,9 @@ unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
}


-void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
+void PPC_OPT_ATTR
+CFB_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = context;
@@ -119,8 +119,9 @@ void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
}


-void ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks, int encrypt)
+void PPC_OPT_ATTR
+ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = context;
@@ -375,9 +376,9 @@ void ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
}


-void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
+void PPC_OPT_ATTR
+CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = context;
@@ -573,9 +574,9 @@ void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
}


-void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks, int cbc_mac)
+void PPC_OPT_ATTR
+CBC_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int cbc_mac)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = context;
@@ -641,9 +642,10 @@ void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
}

-void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
+
+void PPC_OPT_ATTR
+CBC_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = context;
@@ -846,9 +848,9 @@ void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
}


-void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks)
+void PPC_OPT_ATTR
+CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
{
static const unsigned char vec_one_const[16] =
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
@@ -1079,9 +1081,9 @@ void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
}


-size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
- const void *inbuf_arg, size_t nblocks,
- int encrypt)
+size_t PPC_OPT_ATTR
+OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = (void *)&c->context.c;
@@ -1585,7 +1587,9 @@ size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
return 0;
}

-size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
+
+size_t PPC_OPT_ATTR
+OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
{
const block bige_const = asm_load_be_const();
RIJNDAEL_context *ctx = (void *)&c->context.c;
@@ -1794,9 +1798,9 @@ size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
}


-void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
- void *outbuf_arg, const void *inbuf_arg,
- size_t nblocks, int encrypt)
+void PPC_OPT_ATTR
+XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
{
#ifdef WORDS_BIGENDIAN
static const block vec_bswap128_const =
@@ -2294,8 +2298,9 @@ void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
}


-void CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg,
- const void *inbuf_arg, size_t nblocks)
+void PPC_OPT_ATTR
+CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
{
#ifndef WORDS_BIGENDIAN
static const vec_u32 vec_u32_one = { 1, 0, 0, 0 };
diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c
index d16fbb40..f376e0f1 100644
--- a/cipher/rijndael-ppc.c
+++ b/cipher/rijndael-ppc.c
@@ -34,6 +34,19 @@
#include "rijndael-ppc-common.h"


+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT
+#endif
+
+#ifdef HAVE_GCC_ATTRIBUTE_PPC_TARGET
+# define PPC_OPT_ATTR __attribute__((target("cpu=power8"))) FUNC_ATTR_OPT
+#else
+# define PPC_OPT_ATTR FUNC_ATTR_OPT
+#endif
+
+
#ifndef WORDS_BIGENDIAN
static const block vec_bswap32_const_neg =
{ ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 };
@@ -124,7 +137,7 @@ keysched_idx(unsigned int in)
}


-void
+void PPC_OPT_ATTR
_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
{
u32 tk_u32[MAXKC];
@@ -179,7 +192,7 @@ _gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
}


-void
+void PPC_OPT_ATTR
_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
{
internal_aes_ppc_prepare_decryption (ctx);
diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c
index f7055290..e462befc 100644
--- a/cipher/rijndael-ppc9le.c
+++ b/cipher/rijndael-ppc9le.c
@@ -34,6 +34,19 @@
#include "rijndael-ppc-common.h"


+#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE
+# define FUNC_ATTR_OPT __attribute__((optimize("-O2")))
+#else
+# define FUNC_ATTR_OPT
+#endif
+
+#ifdef HAVE_GCC_ATTRIBUTE_PPC_TARGET
+# define PPC_OPT_ATTR __attribute__((target("cpu=power9"))) FUNC_ATTR_OPT
+#else
+# define PPC_OPT_ATTR FUNC_ATTR_OPT
+#endif
+
+
static ASM_FUNC_ATTR_INLINE block
asm_load_be_const(void)
{
--
2.37.2


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel