Mailing List Archive

[PATCH 4/7] Move bulk OCB L pointer array setup code to common header
* cipher/bulkhelp.h: New.
* cipher/camellia-glue.c (_gcry_camellia_ocb_crypt)
(_gcry_camellia_ocb_crypt): Use new
`bulk_ocb_prepare_L_pointers_array_blkXX` function for OCB L pointer
array setup.
* cipher/serpent.c (_gcry_serpent_ocb_crypt)
(_gcry_serpent_ocb_auth): Likewise.
* cipher/sm4.c (_gcry_sm4_ocb_crypt, _gcry_sm4_ocb_auth): Likewise.
* cipher/twofish.c (_gcry_twofish_ocb_crypt)
(_gcry_twofish_ocb_auth): Likewise.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
cipher/bulkhelp.h | 103 +++++++++++++++++++++++++++++++++++++++++
cipher/camellia-glue.c | 78 ++-----------------------------
cipher/serpent.c | 99 +++++++--------------------------------
cipher/sm4.c | 63 ++-----------------------
cipher/twofish.c | 37 ++-------------
5 files changed, 132 insertions(+), 248 deletions(-)
create mode 100644 cipher/bulkhelp.h

diff --git a/cipher/bulkhelp.h b/cipher/bulkhelp.h
new file mode 100644
index 00000000..72668d42
--- /dev/null
+++ b/cipher/bulkhelp.h
@@ -0,0 +1,103 @@
+/* bulkhelp.h - Some bulk processing helpers
+ * Copyright (C) 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GCRYPT_BULKHELP_H
+#define GCRYPT_BULKHELP_H
+
+
+#include "g10lib.h"
+#include "cipher-internal.h"
+
+
+#ifdef __x86_64__
+/* Use u64 to store pointers for x32 support (assembly function assumes
+ * 64-bit pointers). */
+typedef u64 ocb_L_uintptr_t;
+#else
+typedef uintptr_t ocb_L_uintptr_t;
+#endif
+
+
+static inline ocb_L_uintptr_t *
+bulk_ocb_prepare_L_pointers_array_blk32 (gcry_cipher_hd_t c,
+ ocb_L_uintptr_t Ls[32], u64 blkn)
+{
+ unsigned int n = 32 - (blkn % 32);
+ unsigned int i;
+
+ for (i = 0; i < 32; i += 8)
+ {
+ Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
+ Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ return &Ls[(31 + n) % 32];
+}
+
+
+static inline ocb_L_uintptr_t *
+bulk_ocb_prepare_L_pointers_array_blk16 (gcry_cipher_hd_t c,
+ ocb_L_uintptr_t Ls[16], u64 blkn)
+{
+ unsigned int n = 16 - (blkn % 16);
+ unsigned int i;
+
+ for (i = 0; i < 16; i += 8)
+ {
+ Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ }
+
+ Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+ return &Ls[(15 + n) % 16];
+}
+
+
+static inline ocb_L_uintptr_t *
+bulk_ocb_prepare_L_pointers_array_blk8 (gcry_cipher_hd_t c,
+ ocb_L_uintptr_t Ls[8], u64 blkn)
+{
+ unsigned int n = 8 - (blkn % 8);
+
+ Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+ Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+
+ return &Ls[(7 + n) % 8];
+}
+
+
+#endif /*GCRYPT_BULKHELP_H*/
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 7f009db4..7f6e92d2 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -65,6 +65,7 @@
#include "bufhelp.h"
#include "cipher-internal.h"
#include "cipher-selftest.h"
+#include "bulkhelp.h"

/* Helper macro to force alignment to 16 bytes. */
#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
@@ -788,9 +789,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_aesni_avx2 = 0;
u64 Ls[32];
- unsigned int n = 32 - (blkn % 32);
u64 *l;
- int i;

if (nblocks >= 32)
{
@@ -808,24 +807,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
bulk_ocb_fn = encrypt ? _gcry_camellia_gfni_avx2_ocb_enc
: _gcry_camellia_gfni_avx2_ocb_dec;
#endif
-
- for (i = 0; i < 32; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
- Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(31 + n) % 32];
+ l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);

/* Process data in 32 block chunks. */
while (nblocks >= 32)
@@ -860,27 +842,11 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_aesni_avx = 0;
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
@@ -947,9 +913,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_aesni_avx2 = 0;
u64 Ls[32];
- unsigned int n = 32 - (blkn % 32);
u64 *l;
- int i;

if (nblocks >= 32)
{
@@ -965,23 +929,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
bulk_auth_fn = _gcry_camellia_gfni_avx2_ocb_auth;
#endif

- for (i = 0; i < 32; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
- Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(31 + n) % 32];
+ l = bulk_ocb_prepare_L_pointers_array_blk32 (c, Ls, blkn);

/* Process data in 32 block chunks. */
while (nblocks >= 32)
@@ -1016,27 +964,11 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_aesni_avx = 0;
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
diff --git a/cipher/serpent.c b/cipher/serpent.c
index 159d889f..dfe5cc28 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -31,6 +31,7 @@
#include "bufhelp.h"
#include "cipher-internal.h"
#include "cipher-selftest.h"
+#include "bulkhelp.h"


/* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
@@ -1272,27 +1273,11 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_avx2 = 0;
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
@@ -1329,21 +1314,11 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_sse2 = 0;
u64 Ls[8];
- unsigned int n = 8 - (blkn % 8);
u64 *l;

if (nblocks >= 8)
{
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- l = &Ls[(7 + n) % 8];
+ l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);

/* Process data in 8 block chunks. */
while (nblocks >= 8)
@@ -1380,33 +1355,25 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
if (ctx->use_neon)
{
int did_use_neon = 0;
- const void *Ls[8];
- unsigned int n = 8 - (blkn % 8);
- const void **l;
+ uintptr_t Ls[8];
+ uintptr_t *l;

if (nblocks >= 8)
{
- Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
- Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
- Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
- Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
- Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
- Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
- Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
- l = &Ls[(7 + n) % 8];
+ l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);

/* Process data in 8 block chunks. */
while (nblocks >= 8)
{
blkn += 8;
- *l = ocb_get_l(c, blkn - blkn % 8);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);

if (encrypt)
_gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
+ c->u_ctr.ctr, (void **)Ls);
else
_gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
+ c->u_ctr.ctr, (void **)Ls);

nblocks -= 8;
outbuf += 8 * sizeof(serpent_block_t);
@@ -1456,27 +1423,11 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_avx2 = 0;
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
@@ -1508,21 +1459,11 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_sse2 = 0;
u64 Ls[8];
- unsigned int n = 8 - (blkn % 8);
u64 *l;

if (nblocks >= 8)
{
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- l = &Ls[(7 + n) % 8];
+ l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);

/* Process data in 8 block chunks. */
while (nblocks >= 8)
@@ -1554,29 +1495,21 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
if (ctx->use_neon)
{
int did_use_neon = 0;
- const void *Ls[8];
- unsigned int n = 8 - (blkn % 8);
- const void **l;
+ uintptr_t Ls[8];
+ uintptr_t *l;

if (nblocks >= 8)
{
- Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
- Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
- Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
- Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
- Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
- Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
- Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
- l = &Ls[(7 + n) % 8];
+ l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);

/* Process data in 8 block chunks. */
while (nblocks >= 8)
{
blkn += 8;
- *l = ocb_get_l(c, blkn - blkn % 8);
+ *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);

_gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum, Ls);
+ c->u_mode.ocb.aad_sum, (void **)Ls);

nblocks -= 8;
abuf += 8 * sizeof(serpent_block_t);
diff --git a/cipher/sm4.c b/cipher/sm4.c
index d36d9ceb..0148365c 100644
--- a/cipher/sm4.c
+++ b/cipher/sm4.c
@@ -30,6 +30,7 @@
#include "bufhelp.h"
#include "cipher-internal.h"
#include "cipher-selftest.h"
+#include "bulkhelp.h"

/* Helper macro to force alignment to 64 bytes. */
#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
@@ -1030,27 +1031,11 @@ _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
if (ctx->use_aesni_avx2)
{
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
@@ -1077,22 +1062,11 @@ _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
if (ctx->use_aesni_avx)
{
u64 Ls[8];
- unsigned int n = 8 - (blkn % 8);
u64 *l;

if (nblocks >= 8)
{
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(7 + n) % 8];
+ l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);

/* Process data in 8 block chunks. */
while (nblocks >= 8)
@@ -1184,27 +1158,11 @@ _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
if (ctx->use_aesni_avx2)
{
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
@@ -1227,22 +1185,11 @@ _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
if (ctx->use_aesni_avx)
{
u64 Ls[8];
- unsigned int n = 8 - (blkn % 8);
u64 *l;

if (nblocks >= 8)
{
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(7 + n) % 8];
+ l = bulk_ocb_prepare_L_pointers_array_blk8 (c, Ls, blkn);

/* Process data in 8 block chunks. */
while (nblocks >= 8)
diff --git a/cipher/twofish.c b/cipher/twofish.c
index d19e0790..4ae5d5a6 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -47,6 +47,7 @@
#include "bufhelp.h"
#include "cipher-internal.h"
#include "cipher-selftest.h"
+#include "bulkhelp.h"


#define TWOFISH_BLOCKSIZE 16
@@ -1358,27 +1359,11 @@ _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_avx2 = 0;
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
@@ -1471,27 +1456,11 @@ _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_avx2 = 0;
u64 Ls[16];
- unsigned int n = 16 - (blkn % 16);
u64 *l;
- int i;

if (nblocks >= 16)
{
- for (i = 0; i < 16; i += 8)
- {
- /* Use u64 to store pointers for x32 support (assembly function
- * assumes 64-bit pointers). */
- Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
- Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
- Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
- }
-
- Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
- l = &Ls[(15 + n) % 16];
+ l = bulk_ocb_prepare_L_pointers_array_blk16 (c, Ls, blkn);

/* Process data in 16 block chunks. */
while (nblocks >= 16)
--
2.34.1


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@lists.gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel