Mailing List Archive

[PATCH 5/8] camellia-avx2: add fast path for full 32 block ECB input
* cipher/camellia-aesni-avx2-amd64.h (enc_blk1_32, dec_blk1_32): Add
fast path for 32 block input.
--

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
cipher/camellia-aesni-avx2-amd64.h | 41 ++++++++++++++++++++++++------
1 file changed, 33 insertions(+), 8 deletions(-)

diff --git a/cipher/camellia-aesni-avx2-amd64.h b/cipher/camellia-aesni-avx2-amd64.h
index 7d451c09..92f0ce5f 100644
--- a/cipher/camellia-aesni-avx2-amd64.h
+++ b/cipher/camellia-aesni-avx2-amd64.h
@@ -2127,12 +2127,9 @@ FUNC_NAME(enc_blk1_32):

cmpl $31, %ecx;
vpxor %xmm0, %xmm0, %xmm0;
- ja 1f;
+ ja .Lenc_blk32;
jb 2f;
vmovdqu 15 * 32(%rdx), %xmm0;
- jmp 2f;
- 1:
- vmovdqu 15 * 32(%rdx), %ymm0;
2:
vmovdqu %ymm0, (%rax);

@@ -2195,13 +2192,29 @@ FUNC_NAME(enc_blk1_32):
STORE_OUTPUT(ymm9, 14);
STORE_OUTPUT(ymm8, 15);

+.align 8
2:
+.Lenc_blk32_done:
vzeroall;

leave;
CFI_LEAVE();
ret_spec_stop;
CFI_ENDPROC();
+
+.align 8
+.Lenc_blk32:
+ inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx, (key_table)(CTX));
+
+ call FUNC_NAME(enc_blk32);
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+ jmp .Lenc_blk32_done;
+ CFI_ENDPROC();
ELF(.size FUNC_NAME(enc_blk1_32),.-FUNC_NAME(enc_blk1_32);)

.align 16
@@ -2235,12 +2248,9 @@ FUNC_NAME(dec_blk1_32):

cmpl $31, %ecx;
vpxor %xmm0, %xmm0, %xmm0;
- ja 1f;
+ ja .Ldec_blk32;
jb 2f;
vmovdqu 15 * 32(%rdx), %xmm0;
- jmp 2f;
- 1:
- vmovdqu 15 * 32(%rdx), %ymm0;
2:
vmovdqu %ymm0, (%rax);

@@ -2284,12 +2294,27 @@ FUNC_NAME(dec_blk1_32):
STORE_OUTPUT(ymm9, 14);
STORE_OUTPUT(ymm8, 15);

+.align 8
2:
+.Ldec_blk32_done:
vzeroall;

leave;
CFI_LEAVE();
ret_spec_stop;
+
+.align 8
+.Ldec_blk32:
+ inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ call FUNC_NAME(dec_blk32);
+
+ write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+ %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+ %ymm8, %rsi);
+ jmp .Ldec_blk32_done;
CFI_ENDPROC();
ELF(.size FUNC_NAME(dec_blk1_32),.-FUNC_NAME(dec_blk1_32);)

--
2.37.2


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel