Mailing List Archive

[PATCH 4/8] camellia: add CTR-mode byte addition for AVX/AVX2/AVX512 impl.
* cipher/camellia-aesni-avx-amd64.S
(_gcry_camellia_aesni_avx_ctr_enc): Add byte addition fast-path.
* cipher/camellia-aesni-avx2-amd64.h (ctr_enc): Likewise.
* cipher/camellia-gfni-avx512-amd64.S
(_gcry_camellia_gfni_avx512_ctr_enc): Likewise.
* cipher/camellia-glue.c (CAMELLIA_context): Add 'use_avx2'.
(camellia_setkey, _gcry_camellia_ctr_enc, _gcry_camellia_cbc_dec)
(_gcry_camellia_cfb_dec, _gcry_camellia_ocb_crypt)
(_gcry_camellia_ocb_auth) [USE_AESNI_AVX2]: Use 'use_avx2' to check
if any of the AVX2 implementations is enabled.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
cipher/camellia-aesni-avx-amd64.S | 78 +++++++++++++++++++++++
cipher/camellia-aesni-avx2-amd64.h | 83 ++++++++++++++++++++++--
cipher/camellia-gfni-avx512-amd64.S | 97 +++++++++++++++++++++++++++--
cipher/camellia-glue.c | 14 +++--
4 files changed, 257 insertions(+), 15 deletions(-)

diff --git a/cipher/camellia-aesni-avx-amd64.S b/cipher/camellia-aesni-avx-amd64.S
index 93c96791..5ec33b9b 100644
--- a/cipher/camellia-aesni-avx-amd64.S
+++ b/cipher/camellia-aesni-avx-amd64.S
@@ -761,6 +761,38 @@ _camellia_aesni_avx_data:
.Ltranspose_8x8_shuf:
.byte 0, 1, 4, 5, 2, 3, 6, 7, 8+0, 8+1, 8+4, 8+5, 8+2, 8+3, 8+6, 8+7

+/* CTR byte addition constants */
+.Lbige_addb_1:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+.Lbige_addb_2:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2
+.Lbige_addb_3:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3
+.Lbige_addb_4:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4
+.Lbige_addb_5:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5
+.Lbige_addb_6:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6
+.Lbige_addb_7:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
+.Lbige_addb_8:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8
+.Lbige_addb_9:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9
+.Lbige_addb_10:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10
+.Lbige_addb_11:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11
+.Lbige_addb_12:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12
+.Lbige_addb_13:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13
+.Lbige_addb_14:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14
+.Lbige_addb_15:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15
+
.align 4
/* 4-bit mask */
.L0f0f0f0f:
@@ -930,6 +962,9 @@ _gcry_camellia_aesni_avx_ctr_enc:
andq $~31, %rsp;
movq %rsp, %rax;

+ cmpb $(0x100 - 16), 15(%rcx);
+ jbe .Lctr_byteadd;
+
vmovdqa .Lbswap128_mask rRIP, %xmm14;

/* load IV and byteswap */
@@ -978,6 +1013,8 @@ _gcry_camellia_aesni_avx_ctr_enc:
vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */
vmovdqu %xmm13, (%rcx);

+.align 8
+.Lload_ctr_done:
/* inpack16_pre: */
vmovq (key_table)(CTX), %xmm15;
vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
@@ -1026,6 +1063,47 @@ _gcry_camellia_aesni_avx_ctr_enc:
leave;
CFI_LEAVE();
ret_spec_stop;
+
+.align 8
+.Lctr_byteadd_full_ctr_carry:
+ movq 8(%rcx), %r11;
+ movq (%rcx), %r10;
+ bswapq %r11;
+ bswapq %r10;
+ addq $16, %r11;
+ adcq $0, %r10;
+ bswapq %r11;
+ bswapq %r10;
+ movq %r11, 8(%rcx);
+ movq %r10, (%rcx);
+ jmp .Lctr_byteadd_xmm;
+.align 8
+.Lctr_byteadd:
+ vmovdqu (%rcx), %xmm15;
+ je .Lctr_byteadd_full_ctr_carry;
+ addb $16, 15(%rcx);
+.Lctr_byteadd_xmm:
+ vmovdqa %xmm15, %xmm0;
+ vpaddb .Lbige_addb_1 rRIP, %xmm15, %xmm14;
+ vmovdqu %xmm15, 15 * 16(%rax);
+ vpaddb .Lbige_addb_2 rRIP, %xmm15, %xmm13;
+ vmovdqu %xmm14, 14 * 16(%rax);
+ vpaddb .Lbige_addb_3 rRIP, %xmm15, %xmm12;
+ vmovdqu %xmm13, 13 * 16(%rax);
+ vpaddb .Lbige_addb_4 rRIP, %xmm15, %xmm11;
+ vpaddb .Lbige_addb_5 rRIP, %xmm15, %xmm10;
+ vpaddb .Lbige_addb_6 rRIP, %xmm15, %xmm9;
+ vpaddb .Lbige_addb_7 rRIP, %xmm15, %xmm8;
+ vpaddb .Lbige_addb_8 rRIP, %xmm0, %xmm7;
+ vpaddb .Lbige_addb_9 rRIP, %xmm0, %xmm6;
+ vpaddb .Lbige_addb_10 rRIP, %xmm0, %xmm5;
+ vpaddb .Lbige_addb_11 rRIP, %xmm0, %xmm4;
+ vpaddb .Lbige_addb_12 rRIP, %xmm0, %xmm3;
+ vpaddb .Lbige_addb_13 rRIP, %xmm0, %xmm2;
+ vpaddb .Lbige_addb_14 rRIP, %xmm0, %xmm1;
+ vpaddb .Lbige_addb_15 rRIP, %xmm0, %xmm0;
+
+ jmp .Lload_ctr_done;
CFI_ENDPROC();
ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)

diff --git a/cipher/camellia-aesni-avx2-amd64.h b/cipher/camellia-aesni-avx2-amd64.h
index c92a0559..7d451c09 100644
--- a/cipher/camellia-aesni-avx2-amd64.h
+++ b/cipher/camellia-aesni-avx2-amd64.h
@@ -805,6 +805,36 @@ ELF(.type FUNC_NAME(_constants),@object;)
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0

+/* CTR byte addition constants */
+.align 32
+.Lbige_addb_0_1:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+.Lbige_addb_2_3:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3
+.Lbige_addb_4_5:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5
+.Lbige_addb_6_7:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
+.Lbige_addb_8_9:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9
+.Lbige_addb_10_11:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11
+.Lbige_addb_12_13:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13
+.Lbige_addb_14_15:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15
+.Lbige_addb_16_16:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16
+
#ifdef CAMELLIA_GFNI_BUILD

/* Pre-filters and post-filters bit-matrixes for Camellia sboxes s1, s2, s3
@@ -1151,9 +1181,6 @@ FUNC_NAME(ctr_enc):
movq %rsp, %rbp;
CFI_DEF_CFA_REGISTER(%rbp);

- movq 8(%rcx), %r11;
- bswapq %r11;
-
cmpl $128, key_bitlength(CTX);
movl $32, %r8d;
movl $24, %eax;
@@ -1163,6 +1190,12 @@ FUNC_NAME(ctr_enc):
andq $~63, %rsp;
movq %rsp, %rax;

+ cmpb $(0x100 - 32), 15(%rcx);
+ jbe .Lctr_byteadd;
+
+ movq 8(%rcx), %r11;
+ bswapq %r11;
+
vpcmpeqd %ymm15, %ymm15, %ymm15;
vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */

@@ -1275,7 +1308,7 @@ FUNC_NAME(ctr_enc):
vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13;
vmovdqu %xmm13, (%rcx);

-.align 4
+.align 8
.Lload_ctr_done:
/* inpack32_pre: */
vpbroadcastq (key_table)(CTX), %ymm15;
@@ -1325,6 +1358,48 @@ FUNC_NAME(ctr_enc):
leave;
CFI_LEAVE();
ret_spec_stop;
+
+.align 8
+.Lctr_byteadd_full_ctr_carry:
+ movq 8(%rcx), %r11;
+ movq (%rcx), %r10;
+ bswapq %r11;
+ bswapq %r10;
+ addq $32, %r11;
+ adcq $0, %r10;
+ bswapq %r11;
+ bswapq %r10;
+ movq %r11, 8(%rcx);
+ movq %r10, (%rcx);
+ jmp .Lctr_byteadd_ymm;
+.align 8
+.Lctr_byteadd:
+ vbroadcasti128 (%rcx), %ymm8;
+ je .Lctr_byteadd_full_ctr_carry;
+ addb $32, 15(%rcx);
+.Lctr_byteadd_ymm:
+ vpaddb .Lbige_addb_16_16 rRIP, %ymm8, %ymm0;
+ vpaddb .Lbige_addb_0_1 rRIP, %ymm8, %ymm15;
+ vpaddb .Lbige_addb_2_3 rRIP, %ymm8, %ymm14;
+ vmovdqu %ymm15, 15 * 32(%rax);
+ vpaddb .Lbige_addb_4_5 rRIP, %ymm8, %ymm13;
+ vmovdqu %ymm14, 14 * 32(%rax);
+ vpaddb .Lbige_addb_6_7 rRIP, %ymm8, %ymm12;
+ vmovdqu %ymm13, 13 * 32(%rax);
+ vpaddb .Lbige_addb_8_9 rRIP, %ymm8, %ymm11;
+ vpaddb .Lbige_addb_10_11 rRIP, %ymm8, %ymm10;
+ vpaddb .Lbige_addb_12_13 rRIP, %ymm8, %ymm9;
+ vpaddb .Lbige_addb_14_15 rRIP, %ymm8, %ymm8;
+ vpaddb .Lbige_addb_0_1 rRIP, %ymm0, %ymm7;
+ vpaddb .Lbige_addb_2_3 rRIP, %ymm0, %ymm6;
+ vpaddb .Lbige_addb_4_5 rRIP, %ymm0, %ymm5;
+ vpaddb .Lbige_addb_6_7 rRIP, %ymm0, %ymm4;
+ vpaddb .Lbige_addb_8_9 rRIP, %ymm0, %ymm3;
+ vpaddb .Lbige_addb_10_11 rRIP, %ymm0, %ymm2;
+ vpaddb .Lbige_addb_12_13 rRIP, %ymm0, %ymm1;
+ vpaddb .Lbige_addb_14_15 rRIP, %ymm0, %ymm0;
+
+ jmp .Lload_ctr_done;
CFI_ENDPROC();
ELF(.size FUNC_NAME(ctr_enc),.-FUNC_NAME(ctr_enc);)

diff --git a/cipher/camellia-gfni-avx512-amd64.S b/cipher/camellia-gfni-avx512-amd64.S
index 64fef8b6..c62b7848 100644
--- a/cipher/camellia-gfni-avx512-amd64.S
+++ b/cipher/camellia-gfni-avx512-amd64.S
@@ -689,6 +689,35 @@ ELF(.type _gcry_camellia_gfni_avx512__constants,@object;)
BV8(0, 0, 0, 1, 1, 1, 0, 0),
BV8(0, 0, 0, 0, 0, 0, 0, 1))

+/* CTR byte addition constants */
+.align 64
+.Lbige_addb_0_1:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+.Lbige_addb_2_3:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3
+.Lbige_addb_4_5:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5
+.Lbige_addb_6_7:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
+.Lbige_addb_8_9:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9
+.Lbige_addb_10_11:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11
+.Lbige_addb_12_13:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13
+.Lbige_addb_14_15:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15
+.Lbige_addb_16:
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16
+
ELF(.size _gcry_camellia_gfni_avx512__constants,.-_gcry_camellia_gfni_avx512__constants;)

.text
@@ -836,6 +865,14 @@ _gcry_camellia_gfni_avx512_ctr_enc:
CFI_STARTPROC();
spec_stop_avx512;

+ cmpl $128, key_bitlength(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ cmpb $(0x100 - 64), 15(%rcx);
+ jbe .Lctr_byteadd;
+
vbroadcasti64x2 .Lbswap128_mask rRIP, %zmm19;
vmovdqa64 .Lcounter0123_lo rRIP, %zmm21;
vbroadcasti64x2 .Lcounter4444_lo rRIP, %zmm22;
@@ -851,11 +888,6 @@ _gcry_camellia_gfni_avx512_ctr_enc:
vbroadcasti64x2 (%rcx), %zmm0;
vpshufb %zmm19, %zmm0, %zmm0;

- cmpl $128, key_bitlength(CTX);
- movl $32, %r8d;
- movl $24, %eax;
- cmovel %eax, %r8d; /* max */
-
/* check need for handling 64-bit overflow and carry */
cmpq $(0xffffffffffffffff - 64), %r11;
ja .Lload_ctr_carry;
@@ -901,8 +933,9 @@ _gcry_camellia_gfni_avx512_ctr_enc:

.align 4
.Lload_ctr_done:
+ vbroadcasti64x2 .Lpack_bswap rRIP, %zmm17;
vpbroadcastq (key_table)(CTX), %zmm16;
- vpshufb .Lpack_bswap rRIP, %zmm16, %zmm16;
+ vpshufb %zmm17, %zmm16, %zmm16;

/* Byte-swap IVs and update counter. */
addq $64, %r11;
@@ -928,6 +961,8 @@ _gcry_camellia_gfni_avx512_ctr_enc:
movq %r11, 8(%rcx);
movq %r10, (%rcx);

+.align 16
+.Lctr_inpack64_pre:
/* inpack64_pre: */
vpxorq %zmm0, %zmm16, %zmm0;
vpxorq %zmm1, %zmm16, %zmm1;
@@ -972,6 +1007,56 @@ _gcry_camellia_gfni_avx512_ctr_enc:
clear_regs();

ret_spec_stop;
+
+.align 16
+.Lctr_byteadd_full_ctr_carry:
+ movq 8(%rcx), %r11;
+ movq (%rcx), %r10;
+ bswapq %r11;
+ bswapq %r10;
+ addq $64, %r11;
+ adcq $0, %r10;
+ bswapq %r11;
+ bswapq %r10;
+ movq %r11, 8(%rcx);
+ movq %r10, (%rcx);
+ jmp .Lctr_byteadd_zmm;
+.align 16
+.Lctr_byteadd:
+ vbroadcasti64x2 (%rcx), %zmm12;
+ je .Lctr_byteadd_full_ctr_carry;
+ addb $64, 15(%rcx);
+.Lctr_byteadd_zmm:
+ vbroadcasti64x2 .Lbige_addb_16 rRIP, %zmm16;
+ vmovdqa64 .Lbige_addb_0_1 rRIP, %zmm17;
+ vmovdqa64 .Lbige_addb_4_5 rRIP, %zmm18;
+ vmovdqa64 .Lbige_addb_8_9 rRIP, %zmm19;
+ vmovdqa64 .Lbige_addb_12_13 rRIP, %zmm20;
+ vpaddb %zmm16, %zmm12, %zmm8;
+ vpaddb %zmm17, %zmm12, %zmm15;
+ vpaddb %zmm18, %zmm12, %zmm14;
+ vpaddb %zmm19, %zmm12, %zmm13;
+ vpaddb %zmm20, %zmm12, %zmm12;
+ vpaddb %zmm16, %zmm8, %zmm4;
+ vpaddb %zmm17, %zmm8, %zmm11;
+ vpaddb %zmm18, %zmm8, %zmm10;
+ vpaddb %zmm19, %zmm8, %zmm9;
+ vpaddb %zmm20, %zmm8, %zmm8;
+ vpaddb %zmm16, %zmm4, %zmm0;
+ vpaddb %zmm17, %zmm4, %zmm7;
+ vpaddb %zmm18, %zmm4, %zmm6;
+ vpaddb %zmm19, %zmm4, %zmm5;
+ vpaddb %zmm20, %zmm4, %zmm4;
+ vpaddb %zmm17, %zmm0, %zmm3;
+ vpaddb %zmm18, %zmm0, %zmm2;
+ vpaddb %zmm19, %zmm0, %zmm1;
+ vpaddb %zmm20, %zmm0, %zmm0;
+
+ vbroadcasti64x2 .Lpack_bswap rRIP, %zmm17
+ vpbroadcastq (key_table)(CTX), %zmm16;
+ vpshufb %zmm17, %zmm16, %zmm16;
+
+ jmp .Lctr_inpack64_pre;
CFI_ENDPROC();
ELF(.size _gcry_camellia_gfni_avx512_ctr_enc,.-_gcry_camellia_gfni_avx512_ctr_enc;)

diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 8b4b4b3c..76a09eb1 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -117,6 +117,7 @@ typedef struct
unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used. */
#endif /*USE_AESNI_AVX*/
#ifdef USE_AESNI_AVX2
+ unsigned int use_avx2:1; /* If any of AVX2 implementation is enabled. */
unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used. */
unsigned int use_vaes_avx2:1; /* VAES/AVX2 implementation shall be used. */
unsigned int use_gfni_avx2:1; /* GFNI/AVX2 implementation shall be used. */
@@ -463,12 +464,15 @@ camellia_setkey(void *c, const byte *key, unsigned keylen,
ctx->use_vaes_avx2 = 0;
ctx->use_gfni_avx2 = 0;
ctx->use_gfni_avx512 = 0;
+ ctx->use_avx2 = ctx->use_aesni_avx2;
#endif
#ifdef USE_VAES_AVX2
ctx->use_vaes_avx2 = (hwf & HWF_INTEL_VAES_VPCLMUL) && (hwf & HWF_INTEL_AVX2);
+ ctx->use_avx2 |= ctx->use_vaes_avx2;
#endif
#ifdef USE_GFNI_AVX2
ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2);
+ ctx->use_avx2 |= ctx->use_gfni_avx2;
#endif
#ifdef USE_GFNI_AVX512
ctx->use_gfni_avx512 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX512);
@@ -838,7 +842,7 @@ _gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
#endif

#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2)
+ if (ctx->use_avx2)
{
int did_use_aesni_avx2 = 0;
typeof (&_gcry_camellia_aesni_avx2_ctr_enc) bulk_ctr_fn =
@@ -956,7 +960,7 @@ _gcry_camellia_cbc_dec(void *context, unsigned char *iv,
#endif

#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2)
+ if (ctx->use_avx2)
{
int did_use_aesni_avx2 = 0;
typeof (&_gcry_camellia_aesni_avx2_cbc_dec) bulk_cbc_fn =
@@ -1074,7 +1078,7 @@ _gcry_camellia_cfb_dec(void *context, unsigned char *iv,
#endif

#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2)
+ if (ctx->use_avx2)
{
int did_use_aesni_avx2 = 0;
typeof (&_gcry_camellia_aesni_avx2_cfb_dec) bulk_cfb_fn =
@@ -1301,7 +1305,7 @@ _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
#endif

#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2)
+ if (ctx->use_avx2)
{
int did_use_aesni_avx2 = 0;
u64 Ls[32];
@@ -1435,7 +1439,7 @@ _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
#endif

#ifdef USE_AESNI_AVX2
- if (ctx->use_aesni_avx2)
+ if (ctx->use_avx2)
{
int did_use_aesni_avx2 = 0;
u64 Ls[32];
--
2.37.2


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel