Mailing List Archive

[PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3
After selecting ARCH_SUPPORTS_INT128, when optimizing for size the
compiler generates calls to __lshrti3, __ashrti3, and __ashrti3 for
shifting __int128 values, causing a link failure:

loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in
function `mul_u64_u32_shr':
<PATH>/include/linux/math64.h:161:(.text+0x5e4): undefined
reference to `__lshrti3'

Provide the implementation of these functions if ARCH_SUPPORTS_INT128.

Reported-by: Huacai Chen <chenhuacai@kernel.org>
Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
---
arch/loongarch/include/asm/asm-prototypes.h | 6 +++
arch/loongarch/lib/Makefile | 2 +
arch/loongarch/lib/tishift.S | 56 +++++++++++++++++++++
3 files changed, 64 insertions(+)
create mode 100644 arch/loongarch/lib/tishift.S

diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index cf8e1a4e7c19..51f224bcfc65 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -6,3 +6,9 @@
#include <asm/page.h>
#include <asm/ftrace.h>
#include <asm-generic/asm-prototypes.h>
+
+#ifdef CONFIG_ARCH_SUPPORTS_INT128
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+#endif
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index a77bf160bfc4..f61af161f16e 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -9,3 +9,5 @@ lib-y += delay.o memset.o memcpy.o memmove.o \
obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o

obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
diff --git a/arch/loongarch/lib/tishift.S b/arch/loongarch/lib/tishift.S
new file mode 100644
index 000000000000..eb43f29f4d0b
--- /dev/null
+++ b/arch/loongarch/lib/tishift.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/asmmacro.h>
+#include <linux/linkage.h>
+#include <linux/export.h>
+
+SYM_FUNC_START(__lshrti3)
+ slli.d t2, a1, 1
+ nor t3, zero, a2
+ srl.d t1, a0, a2
+ sll.d t2, t2, t3
+ andi t0, a2, 64
+ srl.d a1, a1, a2
+ or t1, t2, t1
+ maskeqz a0, a1, t0
+ masknez a1, a1, t0
+ masknez t0, t1, t0
+ or a0, t0, a0
+ jr ra
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
+
+SYM_FUNC_START(__ashrti3)
+ nor t3, zero, a2
+ slli.d t2, a1, 1
+ srl.d t1, a0, a2
+ sll.d t2, t2, t3
+ andi t0, a2, 64
+ or t1, t2, t1
+ sra.d a2, a1, a2
+ srai.d a1, a1, 63
+ maskeqz a0, a2, t0
+ maskeqz a1, a1, t0
+ masknez a2, a2, t0
+ masknez t0, t1, t0
+ or a1, a1, a2
+ or a0, t0, a0
+ jr ra
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__ashlti3)
+ srli.d t2, a0, 1
+ nor t3, zero, a2
+ sll.d t1, a1, a2
+ srl.d t2, t2, t3
+ andi t0, a2, 64
+ sll.d a0, a0, a2
+ or t1, t2, t1
+ maskeqz a1, a0, t0
+ masknez a0, a0, t0
+ masknez t0, t1, t0
+ or a1, t0, a1
+ jr ra
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
--
2.44.0
Re: [PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3 [ In reply to ]
Hi, Ruoyao,

I don't think #ifdef CONFIG_ARCH_SUPPORTS_INT128 is needed here.
S390/ARM64/RISCV all built it unconditionally. And I think this patch
can be squashed to the one enable ARCH_SUPPORTS_INT128, as S390 does.

Huacai

On Fri, Apr 26, 2024 at 8:14?PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> After selecting ARCH_SUPPORTS_INT128, when optimizing for size the
> compiler generates calls to __lshrti3, __ashrti3, and __ashrti3 for
> shifting __int128 values, causing a link failure:
>
> loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in
> function `mul_u64_u32_shr':
> <PATH>/include/linux/math64.h:161:(.text+0x5e4): undefined
> reference to `__lshrti3'
>
> Provide the implementation of these functions if ARCH_SUPPORTS_INT128.
>
> Reported-by: Huacai Chen <chenhuacai@kernel.org>
> Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/
> Signed-off-by: Xi Ruoyao <xry111@xry111.site>
> ---
> arch/loongarch/include/asm/asm-prototypes.h | 6 +++
> arch/loongarch/lib/Makefile | 2 +
> arch/loongarch/lib/tishift.S | 56 +++++++++++++++++++++
> 3 files changed, 64 insertions(+)
> create mode 100644 arch/loongarch/lib/tishift.S
>
> diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> index cf8e1a4e7c19..51f224bcfc65 100644
> --- a/arch/loongarch/include/asm/asm-prototypes.h
> +++ b/arch/loongarch/include/asm/asm-prototypes.h
> @@ -6,3 +6,9 @@
> #include <asm/page.h>
> #include <asm/ftrace.h>
> #include <asm-generic/asm-prototypes.h>
> +
> +#ifdef CONFIG_ARCH_SUPPORTS_INT128
> +__int128_t __ashlti3(__int128_t a, int b);
> +__int128_t __ashrti3(__int128_t a, int b);
> +__int128_t __lshrti3(__int128_t a, int b);
> +#endif
> diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
> index a77bf160bfc4..f61af161f16e 100644
> --- a/arch/loongarch/lib/Makefile
> +++ b/arch/loongarch/lib/Makefile
> @@ -9,3 +9,5 @@ lib-y += delay.o memset.o memcpy.o memmove.o \
> obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
>
> obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
> +
> +obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
> diff --git a/arch/loongarch/lib/tishift.S b/arch/loongarch/lib/tishift.S
> new file mode 100644
> index 000000000000..eb43f29f4d0b
> --- /dev/null
> +++ b/arch/loongarch/lib/tishift.S
> @@ -0,0 +1,56 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <asm/asmmacro.h>
> +#include <linux/linkage.h>
> +#include <linux/export.h>
> +
> +SYM_FUNC_START(__lshrti3)
> + slli.d t2, a1, 1
> + nor t3, zero, a2
> + srl.d t1, a0, a2
> + sll.d t2, t2, t3
> + andi t0, a2, 64
> + srl.d a1, a1, a2
> + or t1, t2, t1
> + maskeqz a0, a1, t0
> + masknez a1, a1, t0
> + masknez t0, t1, t0
> + or a0, t0, a0
> + jr ra
> +SYM_FUNC_END(__lshrti3)
> +EXPORT_SYMBOL(__lshrti3)
> +
> +SYM_FUNC_START(__ashrti3)
> + nor t3, zero, a2
> + slli.d t2, a1, 1
> + srl.d t1, a0, a2
> + sll.d t2, t2, t3
> + andi t0, a2, 64
> + or t1, t2, t1
> + sra.d a2, a1, a2
> + srai.d a1, a1, 63
> + maskeqz a0, a2, t0
> + maskeqz a1, a1, t0
> + masknez a2, a2, t0
> + masknez t0, t1, t0
> + or a1, a1, a2
> + or a0, t0, a0
> + jr ra
> +SYM_FUNC_END(__ashrti3)
> +EXPORT_SYMBOL(__ashrti3)
> +
> +SYM_FUNC_START(__ashlti3)
> + srli.d t2, a0, 1
> + nor t3, zero, a2
> + sll.d t1, a1, a2
> + srl.d t2, t2, t3
> + andi t0, a2, 64
> + sll.d a0, a0, a2
> + or t1, t2, t1
> + maskeqz a1, a0, t0
> + masknez a0, a0, t0
> + masknez t0, t1, t0
> + or a1, t0, a1
> + jr ra
> +SYM_FUNC_END(__ashlti3)
> +EXPORT_SYMBOL(__ashlti3)
> --
> 2.44.0
>
Re: [PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3 [ In reply to ]
On Sat, 2024-04-27 at 10:50 +0800, Huacai Chen wrote:
> Hi, Ruoyao,
>
> I don't think #ifdef CONFIG_ARCH_SUPPORTS_INT128 is needed here.
> S390/ARM64/RISCV all built it unconditionally.

The problem here is RISCV and ARM64 are using an incorrect prototype for
these functions in asm-prototypes.h:

long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);

where "long long" is not 128-bit. Despite this seems working for RISC-V
and ARM64 I really dislike it.

S390 seems assuming CONFIG_ARCH_SUPPORTS_INT128 is always true, but I
don't think we can assume it too (at least it'll likely to be false for
LA32, so doing so will cause trouble when we add LA32 support).

So if we don't want to check CONFIG_ARCH_SUPPORTS_INT128 and still use a
correct prototype, we'll do:

diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
index 51f224bcfc65..0a57db01116d 100644
--- a/arch/loongarch/include/asm/asm-prototypes.h
+++ b/arch/loongarch/include/asm/asm-prototypes.h
@@ -7,8 +7,6 @@
#include <asm/ftrace.h>
#include <asm-generic/asm-prototypes.h>

-#ifdef CONFIG_ARCH_SUPPORTS_INT128
-__int128_t __ashlti3(__int128_t a, int b);
-__int128_t __ashrti3(__int128_t a, int b);
-__int128_t __lshrti3(__int128_t a, int b);
-#endif
+struct { u64 lo, hi; } __ashlti3(u64 lo, u64 hi, int b);
+struct { u64 lo, hi; } __ashrti3(u64 lo, u64 hi, int b);
+struct { u64 lo, hi; } __lshrti3(u64 lo, u64 hi, int b);
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index f61af161f16e..23600c128e05 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -10,4 +10,4 @@ obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o

obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o

-obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
+obj-$(CONFIG_64BIT) += tishift.o

Is this really better than checking CONFIG_ARCH_SUPPORTS_INT128? I
don't know...

--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
Re: [PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3 [ In reply to ]
On Sat, 2024-04-27 at 12:00 +0800, Xi Ruoyao wrote:
> On Sat, 2024-04-27 at 10:50 +0800, Huacai Chen wrote:
> > Hi, Ruoyao,
> >
> > I don't think #ifdef CONFIG_ARCH_SUPPORTS_INT128 is needed here.
> > S390/ARM64/RISCV all built it unconditionally.
>
> The problem here is RISCV and ARM64 are using an incorrect prototype for
> these functions in asm-prototypes.h:
>
> long long __lshrti3(long long a, int b);
> long long __ashrti3(long long a, int b);
> long long __ashlti3(long long a, int b);
>
> where "long long" is not 128-bit.  Despite this seems working for RISC-V
> and ARM64 I really dislike it.
>
> S390 seems assuming CONFIG_ARCH_SUPPORTS_INT128 is always true, but I
> don't think we can assume it too (at least it'll likely to be false for
> LA32, so doing so will cause trouble when we add LA32 support).
>
> So if we don't want to check CONFIG_ARCH_SUPPORTS_INT128 and still use a
> correct prototype, we'll do:
>
> diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> index 51f224bcfc65..0a57db01116d 100644
> --- a/arch/loongarch/include/asm/asm-prototypes.h
> +++ b/arch/loongarch/include/asm/asm-prototypes.h
> @@ -7,8 +7,6 @@
>  #include <asm/ftrace.h>
>  #include <asm-generic/asm-prototypes.h>
>  
> -#ifdef CONFIG_ARCH_SUPPORTS_INT128
> -__int128_t __ashlti3(__int128_t a, int b);
> -__int128_t __ashrti3(__int128_t a, int b);
> -__int128_t __lshrti3(__int128_t a, int b);
> -#endif
> +struct { u64 lo, hi; } __ashlti3(u64 lo, u64 hi, int b);
> +struct { u64 lo, hi; } __ashrti3(u64 lo, u64 hi, int b);
> +struct { u64 lo, hi; } __lshrti3(u64 lo, u64 hi, int b);

Whoops. This is still incorrect for LA32. On LA32 an "int128" (if it
ever exists) should be passed as a pointer, but this is passing it in 4
GPRs. So if we want to keep the prototype correct we need to either use
"struct { u64 lo, hi; }" in the parameter list too, or guard it with
#ifdef CONFIG_64BIT.

So to me checking CONFIG_ARCH_SUPPORTS_INT128 is just easier.

If you insists on not checking CONFIG_ARCH_SUPPORTS_INT128 I'll just use
an incorrect prototype like RISC-V but put a comment here, like:

/* The prototypes are incorrect but this file is only used by
modpost which does not care. */
long long __ashlti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __lshrti3(long long a, int b);

How do you think?

--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
Re: [PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3 [ In reply to ]
On Sat, Apr 27, 2024 at 12:13?PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Sat, 2024-04-27 at 12:00 +0800, Xi Ruoyao wrote:
> > On Sat, 2024-04-27 at 10:50 +0800, Huacai Chen wrote:
> > > Hi, Ruoyao,
> > >
> > > I don't think #ifdef CONFIG_ARCH_SUPPORTS_INT128 is needed here.
> > > S390/ARM64/RISCV all built it unconditionally.
> >
> > The problem here is RISCV and ARM64 are using an incorrect prototype for
> > these functions in asm-prototypes.h:
> >
> > long long __lshrti3(long long a, int b);
> > long long __ashrti3(long long a, int b);
> > long long __ashlti3(long long a, int b);
> >
> > where "long long" is not 128-bit. Despite this seems working for RISC-V
> > and ARM64 I really dislike it.
> >
> > S390 seems assuming CONFIG_ARCH_SUPPORTS_INT128 is always true, but I
> > don't think we can assume it too (at least it'll likely to be false for
> > LA32, so doing so will cause trouble when we add LA32 support).
> >
> > So if we don't want to check CONFIG_ARCH_SUPPORTS_INT128 and still use a
> > correct prototype, we'll do:
> >
> > diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h
> > index 51f224bcfc65..0a57db01116d 100644
> > --- a/arch/loongarch/include/asm/asm-prototypes.h
> > +++ b/arch/loongarch/include/asm/asm-prototypes.h
> > @@ -7,8 +7,6 @@
> > #include <asm/ftrace.h>
> > #include <asm-generic/asm-prototypes.h>
> >
> > -#ifdef CONFIG_ARCH_SUPPORTS_INT128
> > -__int128_t __ashlti3(__int128_t a, int b);
> > -__int128_t __ashrti3(__int128_t a, int b);
> > -__int128_t __lshrti3(__int128_t a, int b);
> > -#endif
> > +struct { u64 lo, hi; } __ashlti3(u64 lo, u64 hi, int b);
> > +struct { u64 lo, hi; } __ashrti3(u64 lo, u64 hi, int b);
> > +struct { u64 lo, hi; } __lshrti3(u64 lo, u64 hi, int b);
>
> Whoops. This is still incorrect for LA32. On LA32 an "int128" (if it
> ever exists) should be passed as a pointer, but this is passing it in 4
> GPRs. So if we want to keep the prototype correct we need to either use
> "struct { u64 lo, hi; }" in the parameter list too, or guard it with
> #ifdef CONFIG_64BIT.
>
> So to me checking CONFIG_ARCH_SUPPORTS_INT128 is just easier.
>
> If you insists on not checking CONFIG_ARCH_SUPPORTS_INT128 I'll just use
> an incorrect prototype like RISC-V but put a comment here, like:
>
> /* The prototypes are incorrect but this file is only used by
> modpost which does not care. */
> long long __ashlti3(long long a, int b);
> long long __ashrti3(long long a, int b);
> long long __lshrti3(long long a, int b);
>
> How do you think?
OK, then just keep the original status.


Huacai
>
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
>
Re: [PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3 [ In reply to ]
On Sat, 2024-04-27 at 15:20 +0800, Huacai Chen wrote:
> > /* The prototypes are incorrect but this file is only used by
> >     modpost which does not care.  */
> > long long __ashlti3(long long a, int b);
> > long long __ashrti3(long long a, int b);
> > long long __lshrti3(long long a, int b);
> >
> > How do you think?
> OK, then just keep the original status.

Then I'll just send a patch squashing the origin version of this patch
and the patch selecting ARCH_SUPPORTS_INT128 in a day if there are no
other review comments.

--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
Re: [PATCH] LoongArch: Provide __lshrti3, __ashrti3, and __ashrti3 [ In reply to ]
On Sat, Apr 27, 2024 at 3:49?PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Sat, 2024-04-27 at 15:20 +0800, Huacai Chen wrote:
> > > /* The prototypes are incorrect but this file is only used by
> > > modpost which does not care. */
> > > long long __ashlti3(long long a, int b);
> > > long long __ashrti3(long long a, int b);
> > > long long __lshrti3(long long a, int b);
> > >
> > > How do you think?
> > OK, then just keep the original status.
>
> Then I'll just send a patch squashing the origin version of this patch
> and the patch selecting ARCH_SUPPORTS_INT128 in a day if there are no
> other review comments.
Needn't, I will squash it when I apply.

Huacai
>
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University