Mailing List Archive

[PATCH] aarch64-asm: use ADR for getting pointers for local labels
* cipher/asm-common-aarch64.h (GET_DATA_POINTER): Remove.
(GET_LOCAL_POINTER): New.
* cipher/camellia-aarch64.S: Use GET_LOCAL_POINTER instead of ADR
instruction directly.
* cipher/chacha20-aarch64.S: Use GET_LOCAL_POINTER instead of
GET_DATA_POINTER.
* cipher/cipher-gcm-armv8-aarch64-ce.S: Likewise.
* cipher/crc-armv8-aarch64-ce.S: Likewise.
* cipher/sha1-armv8-aarch64-ce.S: Likewise.
* cipher/sha256-armv8-aarch64-ce.S: Likewise.
* cipher/sm3-aarch64.S: Likewise.
* cipher/sm3-armv8-aarch64-ce.S: Likewise.
* cipher/sm4-aarch64.S: Likewise.
---

Switch to use ADR instead of ADRP/LDR or ADRP/ADD for getting
data pointers within assembly files. ADR is more portable across
targets and does not require labels to be declared in GOT tables.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
cipher/asm-common-aarch64.h | 15 ++-------------
cipher/camellia-aarch64.S | 4 ++--
cipher/chacha20-aarch64.S | 8 ++++----
cipher/cipher-gcm-armv8-aarch64-ce.S | 6 +++---
cipher/crc-armv8-aarch64-ce.S | 4 ++--
cipher/sha1-armv8-aarch64-ce.S | 2 +-
cipher/sha256-armv8-aarch64-ce.S | 2 +-
cipher/sm3-aarch64.S | 2 +-
cipher/sm3-armv8-aarch64-ce.S | 2 +-
cipher/sm4-aarch64.S | 2 +-
10 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
index d3f7801c..b38b17a6 100644
--- a/cipher/asm-common-aarch64.h
+++ b/cipher/asm-common-aarch64.h
@@ -29,19 +29,8 @@
# define ELF(...) /*_*/
#endif

-#ifdef __APPLE__
-#define GET_DATA_POINTER(reg, name) \
- adrp reg, name@GOTPAGE ; \
- add reg, reg, name@GOTPAGEOFF ;
-#elif defined(_WIN32)
-#define GET_DATA_POINTER(reg, name) \
- adrp reg, name ; \
- add reg, reg, #:lo12:name ;
-#else
-#define GET_DATA_POINTER(reg, name) \
- adrp reg, :got:name ; \
- ldr reg, [reg, #:got_lo12:name] ;
-#endif
+#define GET_LOCAL_POINTER(reg, label) \
+ adr reg, label;

#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
/* CFI directives to emit DWARF stack unwinding information. */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 30b568d3..c019c168 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -214,7 +214,7 @@ _gcry_camellia_arm_encrypt_block:
* w3: keybitlen
*/

- adr RTAB1, _gcry_camellia_arm_tables;
+ GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
@@ -274,7 +274,7 @@ _gcry_camellia_arm_decrypt_block:
* w3: keybitlen
*/

- adr RTAB1, _gcry_camellia_arm_tables;
+ GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 2a980b95..540f892b 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -206,10 +206,10 @@ _gcry_chacha20_aarch64_blocks4:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];

@@ -383,10 +383,10 @@ _gcry_chacha20_poly1305_aarch64_blocks4:
mov POLY_RSTATE, x4;
mov POLY_RSRC, x5;

- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];

diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 687fabe3..78f3ad2d 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -169,7 +169,7 @@ _gcry_ghash_armv8_ce_pmull:

cbz x3, .Ldo_nothing;

- GET_DATA_POINTER(x5, .Lrconst)
+ GET_LOCAL_POINTER(x5, .Lrconst)

eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -368,7 +368,7 @@ _gcry_polyval_armv8_ce_pmull:

cbz x3, .Lpolyval_do_nothing;

- GET_DATA_POINTER(x5, .Lrconst)
+ GET_LOCAL_POINTER(x5, .Lrconst)

eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -589,7 +589,7 @@ _gcry_ghash_setup_armv8_ce_pmull:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(x2, .Lrconst)
+ GET_LOCAL_POINTER(x2, .Lrconst)

eor vZZ.16b, vZZ.16b, vZZ.16b

diff --git a/cipher/crc-armv8-aarch64-ce.S b/cipher/crc-armv8-aarch64-ce.S
index 7ac884af..b6cdbb3d 100644
--- a/cipher/crc-armv8-aarch64-ce.S
+++ b/cipher/crc-armv8-aarch64-ce.S
@@ -71,7 +71,7 @@ _gcry_crc32r_armv8_ce_bulk:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(x7, .Lcrc32_constants)
+ GET_LOCAL_POINTER(x7, .Lcrc32_constants)
add x9, x3, #consts_k(5 - 1)
cmp x2, #128

@@ -280,7 +280,7 @@ _gcry_crc32_armv8_ce_bulk:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(x7, .Lcrc32_constants)
+ GET_LOCAL_POINTER(x7, .Lcrc32_constants)
add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
cmp x2, #128
ld1 {v7.16b}, [x4]
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index ea26564b..f95717ee 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -109,7 +109,7 @@ _gcry_sha1_transform_armv8_ce:

cbz x2, .Ldo_nothing;

- GET_DATA_POINTER(x4, .LK_VEC);
+ GET_LOCAL_POINTER(x4, .LK_VEC);

ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */
ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index d0fa6285..5616eada 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -119,7 +119,7 @@ _gcry_sha256_transform_armv8_ce:

cbz x2, .Ldo_nothing;

- GET_DATA_POINTER(x3, .LK);
+ GET_LOCAL_POINTER(x3, .LK);
mov x4, x3

ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */
diff --git a/cipher/sm3-aarch64.S b/cipher/sm3-aarch64.S
index 3fb89006..0e58254b 100644
--- a/cipher/sm3-aarch64.S
+++ b/cipher/sm3-aarch64.S
@@ -425,7 +425,7 @@ _gcry_sm3_transform_aarch64:
CFI_DEF_CFA_REGISTER(RFRAME);

sub addr0, sp, #STACK_SIZE;
- GET_DATA_POINTER(RKPTR, .LKtable);
+ GET_LOCAL_POINTER(RKPTR, .LKtable);
and sp, addr0, #(~63);

/* Preload first block. */
diff --git a/cipher/sm3-armv8-aarch64-ce.S b/cipher/sm3-armv8-aarch64-ce.S
index 0900b84f..d592d08a 100644
--- a/cipher/sm3-armv8-aarch64-ce.S
+++ b/cipher/sm3-armv8-aarch64-ce.S
@@ -170,7 +170,7 @@ _gcry_sm3_transform_armv8_ce:
ext CTX2.16b, CTX2.16b, CTX2.16b, #8;

.Lloop:
- GET_DATA_POINTER(x3, .Lsm3_Ktable);
+ GET_LOCAL_POINTER(x3, .Lsm3_Ktable);
ld1 {v0.16b-v3.16b}, [x1], #64;
sub x2, x2, #1;

diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
index 306b425e..8d06991b 100644
--- a/cipher/sm4-aarch64.S
+++ b/cipher/sm4-aarch64.S
@@ -84,7 +84,7 @@ ELF(.size _gcry_sm4_aarch64_consts,.-_gcry_sm4_aarch64_consts)
/* Helper macros. */

#define preload_sbox(ptr) \
- GET_DATA_POINTER(ptr, .Lsm4_sbox); \
+ GET_LOCAL_POINTER(ptr, .Lsm4_sbox); \
ld1 {v16.16b-v19.16b}, [ptr], #64; \
ld1 {v20.16b-v23.16b}, [ptr], #64; \
ld1 {v24.16b-v27.16b}, [ptr], #64; \
--
2.34.1


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@lists.gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel
[PATCH] aarch64-asm: use ADR for getting pointers for local labels [ In reply to ]
* cipher/asm-common-aarch64.h (GET_DATA_POINTER): Remove.
(GET_LOCAL_POINTER): New.
* cipher/camellia-aarch64.S: Use GET_LOCAL_POINTER instead of ADR
instruction directly.
* cipher/chacha20-aarch64.S: Use GET_LOCAL_POINTER instead of
GET_DATA_POINTER.
* cipher/cipher-gcm-armv8-aarch64-ce.S: Likewise.
* cipher/crc-armv8-aarch64-ce.S: Likewise.
* cipher/sha1-armv8-aarch64-ce.S: Likewise.
* cipher/sha256-armv8-aarch64-ce.S: Likewise.
* cipher/sm3-aarch64.S: Likewise.
* cipher/sm3-armv8-aarch64-ce.S: Likewise.
* cipher/sm4-aarch64.S: Likewise.
---

Switch to use ADR instead of ADRP/LDR or ADRP/ADD for getting
data pointers within assembly files. ADR is more portable across
targets and does not require labels to be declared in GOT tables.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
cipher/asm-common-aarch64.h | 15 ++-------------
cipher/camellia-aarch64.S | 4 ++--
cipher/chacha20-aarch64.S | 8 ++++----
cipher/cipher-gcm-armv8-aarch64-ce.S | 6 +++---
cipher/crc-armv8-aarch64-ce.S | 4 ++--
cipher/sha1-armv8-aarch64-ce.S | 2 +-
cipher/sha256-armv8-aarch64-ce.S | 2 +-
cipher/sm3-aarch64.S | 2 +-
cipher/sm3-armv8-aarch64-ce.S | 2 +-
cipher/sm4-aarch64.S | 2 +-
10 files changed, 18 insertions(+), 29 deletions(-)

diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
index d3f7801c..b38b17a6 100644
--- a/cipher/asm-common-aarch64.h
+++ b/cipher/asm-common-aarch64.h
@@ -29,19 +29,8 @@
# define ELF(...) /*_*/
#endif

-#ifdef __APPLE__
-#define GET_DATA_POINTER(reg, name) \
- adrp reg, name@GOTPAGE ; \
- add reg, reg, name@GOTPAGEOFF ;
-#elif defined(_WIN32)
-#define GET_DATA_POINTER(reg, name) \
- adrp reg, name ; \
- add reg, reg, #:lo12:name ;
-#else
-#define GET_DATA_POINTER(reg, name) \
- adrp reg, :got:name ; \
- ldr reg, [reg, #:got_lo12:name] ;
-#endif
+#define GET_LOCAL_POINTER(reg, label) \
+ adr reg, label;

#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
/* CFI directives to emit DWARF stack unwinding information. */
diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
index 30b568d3..c019c168 100644
--- a/cipher/camellia-aarch64.S
+++ b/cipher/camellia-aarch64.S
@@ -214,7 +214,7 @@ _gcry_camellia_arm_encrypt_block:
* w3: keybitlen
*/

- adr RTAB1, _gcry_camellia_arm_tables;
+ GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
@@ -274,7 +274,7 @@ _gcry_camellia_arm_decrypt_block:
* w3: keybitlen
*/

- adr RTAB1, _gcry_camellia_arm_tables;
+ GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
mov RMASK, #(0xff<<4); /* byte mask */
add RTAB2, RTAB1, #(1 * 4);
add RTAB3, RTAB1, #(2 * 4);
diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
index 2a980b95..540f892b 100644
--- a/cipher/chacha20-aarch64.S
+++ b/cipher/chacha20-aarch64.S
@@ -206,10 +206,10 @@ _gcry_chacha20_aarch64_blocks4:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];

@@ -383,10 +383,10 @@ _gcry_chacha20_poly1305_aarch64_blocks4:
mov POLY_RSTATE, x4;
mov POLY_RSRC, x5;

- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
add INPUT_CTR, INPUT, #(12*4);
ld1 {ROT8.16b}, [CTR];
- GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+ GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
mov INPUT_POS, INPUT;
ld1 {VCTR.16b}, [CTR];

diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
index 687fabe3..78f3ad2d 100644
--- a/cipher/cipher-gcm-armv8-aarch64-ce.S
+++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -169,7 +169,7 @@ _gcry_ghash_armv8_ce_pmull:

cbz x3, .Ldo_nothing;

- GET_DATA_POINTER(x5, .Lrconst)
+ GET_LOCAL_POINTER(x5, .Lrconst)

eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -368,7 +368,7 @@ _gcry_polyval_armv8_ce_pmull:

cbz x3, .Lpolyval_do_nothing;

- GET_DATA_POINTER(x5, .Lrconst)
+ GET_LOCAL_POINTER(x5, .Lrconst)

eor vZZ.16b, vZZ.16b, vZZ.16b
ld1 {rhash.16b}, [x1]
@@ -589,7 +589,7 @@ _gcry_ghash_setup_armv8_ce_pmull:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(x2, .Lrconst)
+ GET_LOCAL_POINTER(x2, .Lrconst)

eor vZZ.16b, vZZ.16b, vZZ.16b

diff --git a/cipher/crc-armv8-aarch64-ce.S b/cipher/crc-armv8-aarch64-ce.S
index 7ac884af..b6cdbb3d 100644
--- a/cipher/crc-armv8-aarch64-ce.S
+++ b/cipher/crc-armv8-aarch64-ce.S
@@ -71,7 +71,7 @@ _gcry_crc32r_armv8_ce_bulk:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(x7, .Lcrc32_constants)
+ GET_LOCAL_POINTER(x7, .Lcrc32_constants)
add x9, x3, #consts_k(5 - 1)
cmp x2, #128

@@ -280,7 +280,7 @@ _gcry_crc32_armv8_ce_bulk:
*/
CFI_STARTPROC()

- GET_DATA_POINTER(x7, .Lcrc32_constants)
+ GET_LOCAL_POINTER(x7, .Lcrc32_constants)
add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
cmp x2, #128
ld1 {v7.16b}, [x4]
diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
index ea26564b..f95717ee 100644
--- a/cipher/sha1-armv8-aarch64-ce.S
+++ b/cipher/sha1-armv8-aarch64-ce.S
@@ -109,7 +109,7 @@ _gcry_sha1_transform_armv8_ce:

cbz x2, .Ldo_nothing;

- GET_DATA_POINTER(x4, .LK_VEC);
+ GET_LOCAL_POINTER(x4, .LK_VEC);

ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */
ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
index d0fa6285..5616eada 100644
--- a/cipher/sha256-armv8-aarch64-ce.S
+++ b/cipher/sha256-armv8-aarch64-ce.S
@@ -119,7 +119,7 @@ _gcry_sha256_transform_armv8_ce:

cbz x2, .Ldo_nothing;

- GET_DATA_POINTER(x3, .LK);
+ GET_LOCAL_POINTER(x3, .LK);
mov x4, x3

ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */
diff --git a/cipher/sm3-aarch64.S b/cipher/sm3-aarch64.S
index 3fb89006..0e58254b 100644
--- a/cipher/sm3-aarch64.S
+++ b/cipher/sm3-aarch64.S
@@ -425,7 +425,7 @@ _gcry_sm3_transform_aarch64:
CFI_DEF_CFA_REGISTER(RFRAME);

sub addr0, sp, #STACK_SIZE;
- GET_DATA_POINTER(RKPTR, .LKtable);
+ GET_LOCAL_POINTER(RKPTR, .LKtable);
and sp, addr0, #(~63);

/* Preload first block. */
diff --git a/cipher/sm3-armv8-aarch64-ce.S b/cipher/sm3-armv8-aarch64-ce.S
index 0900b84f..d592d08a 100644
--- a/cipher/sm3-armv8-aarch64-ce.S
+++ b/cipher/sm3-armv8-aarch64-ce.S
@@ -170,7 +170,7 @@ _gcry_sm3_transform_armv8_ce:
ext CTX2.16b, CTX2.16b, CTX2.16b, #8;

.Lloop:
- GET_DATA_POINTER(x3, .Lsm3_Ktable);
+ GET_LOCAL_POINTER(x3, .Lsm3_Ktable);
ld1 {v0.16b-v3.16b}, [x1], #64;
sub x2, x2, #1;

diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
index 306b425e..8d06991b 100644
--- a/cipher/sm4-aarch64.S
+++ b/cipher/sm4-aarch64.S
@@ -84,7 +84,7 @@ ELF(.size _gcry_sm4_aarch64_consts,.-_gcry_sm4_aarch64_consts)
/* Helper macros. */

#define preload_sbox(ptr) \
- GET_DATA_POINTER(ptr, .Lsm4_sbox); \
+ GET_LOCAL_POINTER(ptr, .Lsm4_sbox); \
ld1 {v16.16b-v19.16b}, [ptr], #64; \
ld1 {v20.16b-v23.16b}, [ptr], #64; \
ld1 {v24.16b-v27.16b}, [ptr], #64; \
--
2.34.1


_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@lists.gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel
Re: [PATCH] aarch64-asm: use ADR for getting pointers for local labels [ In reply to ]
Hi Jussi,

On 5/12/22 2:11 PM, Jussi Kivilinna wrote:
> * cipher/asm-common-aarch64.h (GET_DATA_POINTER): Remove.
> (GET_LOCAL_POINTER): New.
> * cipher/camellia-aarch64.S: Use GET_LOCAL_POINTER instead of ADR
> instruction directly.
> * cipher/chacha20-aarch64.S: Use GET_LOCAL_POINTER instead of
> GET_DATA_POINTER.
> * cipher/cipher-gcm-armv8-aarch64-ce.S: Likewise.
> * cipher/crc-armv8-aarch64-ce.S: Likewise.
> * cipher/sha1-armv8-aarch64-ce.S: Likewise.
> * cipher/sha256-armv8-aarch64-ce.S: Likewise.
> * cipher/sm3-aarch64.S: Likewise.
> * cipher/sm3-armv8-aarch64-ce.S: Likewise.
> * cipher/sm4-aarch64.S: Likewise.
> ---
>
> Switch to use ADR instead of ADRP/LDR or ADRP/ADD for getting
> data pointers within assembly files. ADR is more portable across
> targets and does not require labels to be declared in GOT tables.
>
> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
> ---

Looks good to me. I don't have an apple M1 machine, only tested on
arm64 Linux.

Reviewed-and-tested-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>

Best regards,
Tianjia

> cipher/asm-common-aarch64.h | 15 ++-------------
> cipher/camellia-aarch64.S | 4 ++--
> cipher/chacha20-aarch64.S | 8 ++++----
> cipher/cipher-gcm-armv8-aarch64-ce.S | 6 +++---
> cipher/crc-armv8-aarch64-ce.S | 4 ++--
> cipher/sha1-armv8-aarch64-ce.S | 2 +-
> cipher/sha256-armv8-aarch64-ce.S | 2 +-
> cipher/sm3-aarch64.S | 2 +-
> cipher/sm3-armv8-aarch64-ce.S | 2 +-
> cipher/sm4-aarch64.S | 2 +-
> 10 files changed, 18 insertions(+), 29 deletions(-)
>
> diff --git a/cipher/asm-common-aarch64.h b/cipher/asm-common-aarch64.h
> index d3f7801c..b38b17a6 100644
> --- a/cipher/asm-common-aarch64.h
> +++ b/cipher/asm-common-aarch64.h
> @@ -29,19 +29,8 @@
> # define ELF(...) /*_*/
> #endif
>
> -#ifdef __APPLE__
> -#define GET_DATA_POINTER(reg, name) \
> - adrp reg, name@GOTPAGE ; \
> - add reg, reg, name@GOTPAGEOFF ;
> -#elif defined(_WIN32)
> -#define GET_DATA_POINTER(reg, name) \
> - adrp reg, name ; \
> - add reg, reg, #:lo12:name ;
> -#else
> -#define GET_DATA_POINTER(reg, name) \
> - adrp reg, :got:name ; \
> - ldr reg, [reg, #:got_lo12:name] ;
> -#endif
> +#define GET_LOCAL_POINTER(reg, label) \
> + adr reg, label;
>
> #ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
> /* CFI directives to emit DWARF stack unwinding information. */
> diff --git a/cipher/camellia-aarch64.S b/cipher/camellia-aarch64.S
> index 30b568d3..c019c168 100644
> --- a/cipher/camellia-aarch64.S
> +++ b/cipher/camellia-aarch64.S
> @@ -214,7 +214,7 @@ _gcry_camellia_arm_encrypt_block:
> * w3: keybitlen
> */
>
> - adr RTAB1, _gcry_camellia_arm_tables;
> + GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
> mov RMASK, #(0xff<<4); /* byte mask */
> add RTAB2, RTAB1, #(1 * 4);
> add RTAB3, RTAB1, #(2 * 4);
> @@ -274,7 +274,7 @@ _gcry_camellia_arm_decrypt_block:
> * w3: keybitlen
> */
>
> - adr RTAB1, _gcry_camellia_arm_tables;
> + GET_LOCAL_POINTER(RTAB1, _gcry_camellia_arm_tables);
> mov RMASK, #(0xff<<4); /* byte mask */
> add RTAB2, RTAB1, #(1 * 4);
> add RTAB3, RTAB1, #(2 * 4);
> diff --git a/cipher/chacha20-aarch64.S b/cipher/chacha20-aarch64.S
> index 2a980b95..540f892b 100644
> --- a/cipher/chacha20-aarch64.S
> +++ b/cipher/chacha20-aarch64.S
> @@ -206,10 +206,10 @@ _gcry_chacha20_aarch64_blocks4:
> */
> CFI_STARTPROC()
>
> - GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
> + GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
> add INPUT_CTR, INPUT, #(12*4);
> ld1 {ROT8.16b}, [CTR];
> - GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
> + GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
> mov INPUT_POS, INPUT;
> ld1 {VCTR.16b}, [CTR];
>
> @@ -383,10 +383,10 @@ _gcry_chacha20_poly1305_aarch64_blocks4:
> mov POLY_RSTATE, x4;
> mov POLY_RSRC, x5;
>
> - GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
> + GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
> add INPUT_CTR, INPUT, #(12*4);
> ld1 {ROT8.16b}, [CTR];
> - GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
> + GET_LOCAL_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
> mov INPUT_POS, INPUT;
> ld1 {VCTR.16b}, [CTR];
>
> diff --git a/cipher/cipher-gcm-armv8-aarch64-ce.S b/cipher/cipher-gcm-armv8-aarch64-ce.S
> index 687fabe3..78f3ad2d 100644
> --- a/cipher/cipher-gcm-armv8-aarch64-ce.S
> +++ b/cipher/cipher-gcm-armv8-aarch64-ce.S
> @@ -169,7 +169,7 @@ _gcry_ghash_armv8_ce_pmull:
>
> cbz x3, .Ldo_nothing;
>
> - GET_DATA_POINTER(x5, .Lrconst)
> + GET_LOCAL_POINTER(x5, .Lrconst)
>
> eor vZZ.16b, vZZ.16b, vZZ.16b
> ld1 {rhash.16b}, [x1]
> @@ -368,7 +368,7 @@ _gcry_polyval_armv8_ce_pmull:
>
> cbz x3, .Lpolyval_do_nothing;
>
> - GET_DATA_POINTER(x5, .Lrconst)
> + GET_LOCAL_POINTER(x5, .Lrconst)
>
> eor vZZ.16b, vZZ.16b, vZZ.16b
> ld1 {rhash.16b}, [x1]
> @@ -589,7 +589,7 @@ _gcry_ghash_setup_armv8_ce_pmull:
> */
> CFI_STARTPROC()
>
> - GET_DATA_POINTER(x2, .Lrconst)
> + GET_LOCAL_POINTER(x2, .Lrconst)
>
> eor vZZ.16b, vZZ.16b, vZZ.16b
>
> diff --git a/cipher/crc-armv8-aarch64-ce.S b/cipher/crc-armv8-aarch64-ce.S
> index 7ac884af..b6cdbb3d 100644
> --- a/cipher/crc-armv8-aarch64-ce.S
> +++ b/cipher/crc-armv8-aarch64-ce.S
> @@ -71,7 +71,7 @@ _gcry_crc32r_armv8_ce_bulk:
> */
> CFI_STARTPROC()
>
> - GET_DATA_POINTER(x7, .Lcrc32_constants)
> + GET_LOCAL_POINTER(x7, .Lcrc32_constants)
> add x9, x3, #consts_k(5 - 1)
> cmp x2, #128
>
> @@ -280,7 +280,7 @@ _gcry_crc32_armv8_ce_bulk:
> */
> CFI_STARTPROC()
>
> - GET_DATA_POINTER(x7, .Lcrc32_constants)
> + GET_LOCAL_POINTER(x7, .Lcrc32_constants)
> add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
> cmp x2, #128
> ld1 {v7.16b}, [x4]
> diff --git a/cipher/sha1-armv8-aarch64-ce.S b/cipher/sha1-armv8-aarch64-ce.S
> index ea26564b..f95717ee 100644
> --- a/cipher/sha1-armv8-aarch64-ce.S
> +++ b/cipher/sha1-armv8-aarch64-ce.S
> @@ -109,7 +109,7 @@ _gcry_sha1_transform_armv8_ce:
>
> cbz x2, .Ldo_nothing;
>
> - GET_DATA_POINTER(x4, .LK_VEC);
> + GET_LOCAL_POINTER(x4, .LK_VEC);
>
> ld1 {vH0123.4s}, [x0] /* load h0,h1,h2,h3 */
> ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
> diff --git a/cipher/sha256-armv8-aarch64-ce.S b/cipher/sha256-armv8-aarch64-ce.S
> index d0fa6285..5616eada 100644
> --- a/cipher/sha256-armv8-aarch64-ce.S
> +++ b/cipher/sha256-armv8-aarch64-ce.S
> @@ -119,7 +119,7 @@ _gcry_sha256_transform_armv8_ce:
>
> cbz x2, .Ldo_nothing;
>
> - GET_DATA_POINTER(x3, .LK);
> + GET_LOCAL_POINTER(x3, .LK);
> mov x4, x3
>
> ld1 {vH0123.4s-vH4567.4s}, [x0] /* load state */
> diff --git a/cipher/sm3-aarch64.S b/cipher/sm3-aarch64.S
> index 3fb89006..0e58254b 100644
> --- a/cipher/sm3-aarch64.S
> +++ b/cipher/sm3-aarch64.S
> @@ -425,7 +425,7 @@ _gcry_sm3_transform_aarch64:
> CFI_DEF_CFA_REGISTER(RFRAME);
>
> sub addr0, sp, #STACK_SIZE;
> - GET_DATA_POINTER(RKPTR, .LKtable);
> + GET_LOCAL_POINTER(RKPTR, .LKtable);
> and sp, addr0, #(~63);
>
> /* Preload first block. */
> diff --git a/cipher/sm3-armv8-aarch64-ce.S b/cipher/sm3-armv8-aarch64-ce.S
> index 0900b84f..d592d08a 100644
> --- a/cipher/sm3-armv8-aarch64-ce.S
> +++ b/cipher/sm3-armv8-aarch64-ce.S
> @@ -170,7 +170,7 @@ _gcry_sm3_transform_armv8_ce:
> ext CTX2.16b, CTX2.16b, CTX2.16b, #8;
>
> .Lloop:
> - GET_DATA_POINTER(x3, .Lsm3_Ktable);
> + GET_LOCAL_POINTER(x3, .Lsm3_Ktable);
> ld1 {v0.16b-v3.16b}, [x1], #64;
> sub x2, x2, #1;
>
> diff --git a/cipher/sm4-aarch64.S b/cipher/sm4-aarch64.S
> index 306b425e..8d06991b 100644
> --- a/cipher/sm4-aarch64.S
> +++ b/cipher/sm4-aarch64.S
> @@ -84,7 +84,7 @@ ELF(.size _gcry_sm4_aarch64_consts,.-_gcry_sm4_aarch64_consts)
> /* Helper macros. */
>
> #define preload_sbox(ptr) \
> - GET_DATA_POINTER(ptr, .Lsm4_sbox); \
> + GET_LOCAL_POINTER(ptr, .Lsm4_sbox); \
> ld1 {v16.16b-v19.16b}, [ptr], #64; \
> ld1 {v20.16b-v23.16b}, [ptr], #64; \
> ld1 {v24.16b-v27.16b}, [ptr], #64; \

_______________________________________________
Gcrypt-devel mailing list
Gcrypt-devel@lists.gnupg.org
https://lists.gnupg.org/mailman/listinfo/gcrypt-devel