Mailing List Archive

[PATCH 5/8] mpi/ec: small optimization for ec_mulm_25519
* mpi/ec.c (ec_addm_25519): Remove one addition.
(ec_subm_25519): Change order of add_n and set_cond to remove
need to clear 'n'.
(ec_mulm_25519): Avoid extra memory copies; Use _gcry_mpih_addmul_1
for multiplying by 19 and adding; Remove one addition at end.
--

Benchmarks on AMD Ryzen 7 5800X:

Before:
Ed25519 | nanosecs/iter cycles/iter auto Mhz
keygen | 304980 1478913 4849
sign | 328657 1589657 4837
verify | 625133 3032355 4851

After (~22% faster):
Ed25519 | nanosecs/iter cycles/iter auto Mhz
keygen | 244288 1184862 4850
sign | 267831 1298934 4850
verify | 504745 2449106 4852

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
mpi/ec.c | 41 ++++++++++++-----------------------------
1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/mpi/ec.c b/mpi/ec.c
index 0b6ae9a9..e1d4b32c 100644
--- a/mpi/ec.c
+++ b/mpi/ec.c
@@ -369,15 +369,13 @@ ec_addm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
log_bug ("addm_25519: different sizes\n");

- memset (n, 0, sizeof n);
up = u->d;
vp = v->d;
wp = w->d;

_gcry_mpih_add_n (wp, up, vp, wsize);
- borrow = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize);
- mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL));
- _gcry_mpih_add_n (wp, wp, n, wsize);
+ borrow = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+ mpih_set_cond (wp, n, wsize, (borrow == 0UL));
wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
}

@@ -392,14 +390,13 @@ ec_subm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
log_bug ("subm_25519: different sizes\n");

- memset (n, 0, sizeof n);
up = u->d;
vp = v->d;
wp = w->d;

borrow = _gcry_mpih_sub_n (wp, up, vp, wsize);
- mpih_set_cond (n, ctx->p->d, wsize, (borrow != 0UL));
- _gcry_mpih_add_n (wp, wp, n, wsize);
+ _gcry_mpih_add_n (n, wp, ctx->p->d, wsize);
+ mpih_set_cond (wp, n, wsize, (borrow != 0UL));
wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
}

@@ -409,7 +406,6 @@ ec_mulm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
mpi_ptr_t wp, up, vp;
mpi_size_t wsize = LIMB_SIZE_25519;
mpi_limb_t n[LIMB_SIZE_25519*2];
- mpi_limb_t m[LIMB_SIZE_25519+1];
mpi_limb_t cy;
int msb;

@@ -425,32 +421,19 @@ ec_mulm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
memcpy (wp, n, wsize * BYTES_PER_MPI_LIMB);
wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));

- memcpy (m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB);
- _gcry_mpih_rshift (m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB));
-
- memcpy (n, m, wsize * BYTES_PER_MPI_LIMB);
- cy = _gcry_mpih_lshift (m, m, LIMB_SIZE_25519, 4);
- m[LIMB_SIZE_25519] = cy;
- cy = _gcry_mpih_add_n (m, m, n, wsize);
- m[LIMB_SIZE_25519] += cy;
- cy = _gcry_mpih_add_n (m, m, n, wsize);
- m[LIMB_SIZE_25519] += cy;
- cy = _gcry_mpih_add_n (m, m, n, wsize);
- m[LIMB_SIZE_25519] += cy;
+ _gcry_mpih_rshift (n, n+LIMB_SIZE_25519-1, LIMB_SIZE_25519+1,
+ (255 % BITS_PER_MPI_LIMB));

- cy = _gcry_mpih_add_n (wp, wp, m, wsize);
- m[LIMB_SIZE_25519] += cy;
+ cy = _gcry_mpih_addmul_1 (wp, n, wsize, 19);

- memset (m, 0, wsize * BYTES_PER_MPI_LIMB);
+ memset (n, 0, wsize * BYTES_PER_MPI_LIMB);
msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB));
- m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19;
+ n[0] = (cy * 2 + msb) * 19;
wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
- _gcry_mpih_add_n (wp, wp, m, wsize);
+ _gcry_mpih_add_n (wp, wp, n, wsize);

- m[0] = 0;
- cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize);
- mpih_set_cond (m, ctx->p->d, wsize, (cy != 0UL));
- _gcry_mpih_add_n (wp, wp, m, wsize);
+ cy = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+ mpih_set_cond (wp, n, wsize, (cy == 0UL));
}

static void
--
2.30.2


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@gnupg.org
http://lists.gnupg.org/mailman/listinfo/gcrypt-devel