Mailing List Archive

[RESEND RFC PATCH v2 1/6] riscv/cmpxchg: Deduplicate cmpxchg() asm functions
In this header every cmpxchg define (_relaxed, _acquire, _release,
vanilla) contain it's own asm file, both for 4-byte variables an 8-byte
variables, on a total of 8 versions of mostly the same asm.

This is usually bad, as it means any change may be done in up to 8
different places.

Unify those versions by creating a new define with enough parameters to
generate any version of the previous 8.

(This did not cause any change in generated asm)

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
arch/riscv/include/asm/cmpxchg.h | 102 ++++++++-----------------------
1 file changed, 24 insertions(+), 78 deletions(-)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 12debce235e52..21984d24cbfe7 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -163,6 +163,22 @@
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*/
+
+#define ___cmpxchg(lr_sfx, sc_sfx, prepend, append) \
+{ \
+ __asm__ __volatile__ ( \
+ prepend \
+ "0: lr" lr_sfx " %0, %2\n" \
+ " bne %0, %z3, 1f\n" \
+ " sc" sc_sfx " %1, %z4, %2\n" \
+ " bnez %1, 0b\n" \
+ append \
+ "1:\n" \
+ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
+ : "rJ" ((long)__old), "rJ" (__new) \
+ : "memory"); \
+}
+
#define __cmpxchg_relaxed(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
@@ -172,26 +188,10 @@
register unsigned int __rc; \
switch (size) { \
case 4: \
- __asm__ __volatile__ ( \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".w", ".w", "", ""); \
break; \
case 8: \
- __asm__ __volatile__ ( \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".d", ".d", "", ""); \
break; \
default: \
BUILD_BUG(); \
@@ -216,28 +216,10 @@
register unsigned int __rc; \
switch (size) { \
case 4: \
- __asm__ __volatile__ ( \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- RISCV_ACQUIRE_BARRIER \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".w", ".w", "", RISCV_ACQUIRE_BARRIER); \
break; \
case 8: \
- __asm__ __volatile__ ( \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- RISCV_ACQUIRE_BARRIER \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".d", ".d", "", RISCV_ACQUIRE_BARRIER); \
break; \
default: \
BUILD_BUG(); \
@@ -262,28 +244,10 @@
register unsigned int __rc; \
switch (size) { \
case 4: \
- __asm__ __volatile__ ( \
- RISCV_RELEASE_BARRIER \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".w", ".w", RISCV_RELEASE_BARRIER, ""); \
break; \
case 8: \
- __asm__ __volatile__ ( \
- RISCV_RELEASE_BARRIER \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".d", ".d", RISCV_RELEASE_BARRIER, ""); \
break; \
default: \
BUILD_BUG(); \
@@ -308,28 +272,10 @@
register unsigned int __rc; \
switch (size) { \
case 4: \
- __asm__ __volatile__ ( \
- "0: lr.w %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.w.rl %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- " fence rw, rw\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" ((long)__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".w", ".w.rl", "", " fence rw, rw\n"); \
break; \
case 8: \
- __asm__ __volatile__ ( \
- "0: lr.d %0, %2\n" \
- " bne %0, %z3, 1f\n" \
- " sc.d.rl %1, %z4, %2\n" \
- " bnez %1, 0b\n" \
- " fence rw, rw\n" \
- "1:\n" \
- : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
- : "rJ" (__old), "rJ" (__new) \
- : "memory"); \
+ ___cmpxchg(".d", ".d.rl", "", " fence rw, rw\n"); \
break; \
default: \
BUILD_BUG(); \
--
2.40.0
Re: [RESEND RFC PATCH v2 1/6] riscv/cmpxchg: Deduplicate cmpxchg() asm functions [ In reply to ]
On Tue, Mar 21, 2023 at 04:42:44AM -0300, Leonardo Bras wrote:
> In this header every cmpxchg define (_relaxed, _acquire, _release,
> vanilla) contain it's own asm file, both for 4-byte variables an 8-byte
> variables, on a total of 8 versions of mostly the same asm.
>
> This is usually bad, as it means any change may be done in up to 8
> different places.
>
> Unify those versions by creating a new define with enough parameters to
> generate any version of the previous 8.
>
> (This did not cause any change in generated asm)
>
> Signed-off-by: Leonardo Bras <leobras@redhat.com>
> ---
> arch/riscv/include/asm/cmpxchg.h | 102 ++++++++-----------------------
> 1 file changed, 24 insertions(+), 78 deletions(-)

Diff looks nice,


> +#define ___cmpxchg(lr_sfx, sc_sfx, prepend, append) \
> +{ \
> + __asm__ __volatile__ ( \
> + prepend \
> + "0: lr" lr_sfx " %0, %2\n" \
> + " bne %0, %z3, 1f\n" \
> + " sc" sc_sfx " %1, %z4, %2\n" \
> + " bnez %1, 0b\n" \
> + append \
> + "1:\n" \
> + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
> + : "rJ" ((long)__old), "rJ" (__new) \
> + : "memory"); \
> +}

Though I'm not really a fan of macros depending on local variables with
"magic" names, can this be avoided?

Thanks,
Andrea
Re: [RESEND RFC PATCH v2 1/6] riscv/cmpxchg: Deduplicate cmpxchg() asm functions [ In reply to ]
On Tue, Mar 21, 2023 at 9:45?PM Andrea Parri <parri.andrea@gmail.com> wrote:
>
> On Tue, Mar 21, 2023 at 04:42:44AM -0300, Leonardo Bras wrote:
> > In this header every cmpxchg define (_relaxed, _acquire, _release,
> > vanilla) contain it's own asm file, both for 4-byte variables an 8-byte
> > variables, on a total of 8 versions of mostly the same asm.
> >
> > This is usually bad, as it means any change may be done in up to 8
> > different places.
> >
> > Unify those versions by creating a new define with enough parameters to
> > generate any version of the previous 8.
> >
> > (This did not cause any change in generated asm)
> >
> > Signed-off-by: Leonardo Bras <leobras@redhat.com>
> > ---
> > arch/riscv/include/asm/cmpxchg.h | 102 ++++++++-----------------------
> > 1 file changed, 24 insertions(+), 78 deletions(-)
>
> Diff looks nice,

Hello Andrea, thanks for reviewing!


> > +#define ___cmpxchg(lr_sfx, sc_sfx, prepend, append) \
> > +{ \
> > + __asm__ __volatile__ ( \
> > + prepend \
> > + "0: lr" lr_sfx " %0, %2\n" \
> > + " bne %0, %z3, 1f\n" \
> > + " sc" sc_sfx " %1, %z4, %2\n" \
> > + " bnez %1, 0b\n" \
> > + append \
> > + "1:\n" \
> > + : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \
> > + : "rJ" ((long)__old), "rJ" (__new) \
> > + : "memory"); \
> > +}
>
> Though I'm not really a fan of macros depending on local variables with
> "magic" names, can this be avoided?

You mean __ret, __rc, __ptr, __old and __new ?
If so, yes, we could add them to the macro signature, but it would
become quite big with not much to gain. To reduce impact it could be
something like:

+#define ___cmpxchg(lr_sfx, sc_sfx, prepend, append, ret, r, p, o, n)

Is this a possible fix?
>
> Thanks,
> Andrea
>

Thank you!
Leo
Re: [RESEND RFC PATCH v2 1/6] riscv/cmpxchg: Deduplicate cmpxchg() asm functions [ In reply to ]
> > Though I'm not really a fan of macros depending on local variables with
> > "magic" names, can this be avoided?
>
> You mean __ret, __rc, __ptr, __old and __new ?

Indeed, the wording was from coding-style.rst


> If so, yes, we could add them to the macro signature, but it would
> become quite big with not much to gain. To reduce impact it could be
> something like:
>
> +#define ___cmpxchg(lr_sfx, sc_sfx, prepend, append, ret, r, p, o, n)
>
> Is this a possible fix?

I believe that'll do it, open to other approaches.

Andrea