Mailing List Archive

[XEN v4 10/11] xen/Arm: GICv3: Define macros to read/write 64 bit
On AArch32, ldrd/strd instructions are not atomic when used to access MMIO.
Furthermore, ldrd/strd instructions are not decoded by Arm when running as
a guest to access emulated MMIO region.
Thus, we have defined readq_relaxed_non_atomic()/writeq_relaxed_non_atomic()
which in turn calls readl_relaxed()/writel_relaxed() for the lower and upper
32 bits.
For AArch64, readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() invokes
readq_relaxed()/writeq_relaxed() respectively.
As GICv3 registers (GICD_IROUTER, GICR_TYPER) can be accessed in a non atomic
manner, so we have used readq_relaxed_non_atomic()/writeq_relaxed_non_atomic().

Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
---

Changes from :-
v1 - 1. Use ldrd/strd for readq_relaxed()/writeq_relaxed().
2. No need to use le64_to_cpu() as the returned byte order is already in cpu
endianess.

v2 - 1. Replace {read/write}q_relaxed with {read/write}q_relaxed_non_atomic().

v3 - 1. Use inline function definitions for {read/write}q_relaxed_non_atomic().
2. For AArch64, {read/write}q_relaxed_non_atomic() should invoke {read/write}q_relaxed().
Thus, we can avoid any ifdef in gic-v3.c.

xen/arch/arm/gic-v3.c | 6 +++---
xen/arch/arm/include/asm/arm32/io.h | 20 ++++++++++++++++++++
xen/arch/arm/include/asm/arm64/io.h | 2 ++
3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
index 6457e7033c..3c5b88148c 100644
--- a/xen/arch/arm/gic-v3.c
+++ b/xen/arch/arm/gic-v3.c
@@ -651,7 +651,7 @@ static void __init gicv3_dist_init(void)
affinity &= ~GICD_IROUTER_SPI_MODE_ANY;

for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i++ )
- writeq_relaxed(affinity, GICD + GICD_IROUTER + i * 8);
+ writeq_relaxed_non_atomic(affinity, GICD + GICD_IROUTER + i * 8);
}

static int gicv3_enable_redist(void)
@@ -745,7 +745,7 @@ static int __init gicv3_populate_rdist(void)
}

do {
- typer = readq_relaxed(ptr + GICR_TYPER);
+ typer = readq_relaxed_non_atomic(ptr + GICR_TYPER);

if ( (typer >> 32) == aff )
{
@@ -1265,7 +1265,7 @@ static void gicv3_irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
affinity &= ~GICD_IROUTER_SPI_MODE_ANY;

if ( desc->irq >= NR_GIC_LOCAL_IRQS )
- writeq_relaxed(affinity, (GICD + GICD_IROUTER + desc->irq * 8));
+ writeq_relaxed_non_atomic(affinity, (GICD + GICD_IROUTER + desc->irq * 8));

spin_unlock(&gicv3.lock);
}
diff --git a/xen/arch/arm/include/asm/arm32/io.h b/xen/arch/arm/include/asm/arm32/io.h
index 73a879e9fb..782b564809 100644
--- a/xen/arch/arm/include/asm/arm32/io.h
+++ b/xen/arch/arm/include/asm/arm32/io.h
@@ -80,10 +80,30 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
__raw_readw(c)); __r; })
#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32) \
__raw_readl(c)); __r; })
+/*
+ * ldrd instructions are not decoded by Arm when running as a guest to access
+ * emulated MMIO region. Thus, readq_relaxed_non_atomic() invokes readl_relaxed()
+ * twice to read the lower and upper 32 bits.
+ */
+static inline u64 readq_relaxed_non_atomic(const volatile void __iomem *addr)
+{
+ u64 val = (((u64)readl_relaxed(addr + 4)) << 32) | readl_relaxed(addr);
+ return val;
+}

#define writeb_relaxed(v,c) __raw_writeb(v,c)
#define writew_relaxed(v,c) __raw_writew((__force u16) cpu_to_le16(v),c)
#define writel_relaxed(v,c) __raw_writel((__force u32) cpu_to_le32(v),c)
+/*
+ * strd instructions are not decoded by Arm when running as a guest to access
+ * emulated MMIO region. Thus, writeq_relaxed_non_atomic() invokes writel_relaxed()
+ * twice to write the lower and upper 32 bits.
+ */
+static inline void writeq_relaxed_non_atomic(u64 val, volatile void __iomem *addr)
+{
+ writel_relaxed((u32)val, addr);
+ writel_relaxed((u32)(val >> 32), addr + 4);
+}

#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
diff --git a/xen/arch/arm/include/asm/arm64/io.h b/xen/arch/arm/include/asm/arm64/io.h
index 30bfc78d9e..2e2ab24f78 100644
--- a/xen/arch/arm/include/asm/arm64/io.h
+++ b/xen/arch/arm/include/asm/arm64/io.h
@@ -102,11 +102,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; })
#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; })
#define readq_relaxed(c) ({ u64 __v = le64_to_cpu((__force __le64)__raw_readq(c)); __v; })
+#define readq_relaxed_non_atomic(c) readq_relaxed((c))

#define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c)))
#define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define writeq_relaxed_non_atomic(v,c) writeq_relaxed((v),(c))

/*
* I/O memory access primitives. Reads are ordered relative to any
--
2.17.1
Re: [XEN v4 10/11] xen/Arm: GICv3: Define macros to read/write 64 bit [ In reply to ]
Hi,

On 28/11/2022 15:56, Ayan Kumar Halder wrote:
> On AArch32, ldrd/strd instructions are not atomic when used to access MMIO.
> Furthermore, ldrd/strd instructions are not decoded by Arm when running as
> a guest to access emulated MMIO region.
> Thus, we have defined readq_relaxed_non_atomic()/writeq_relaxed_non_atomic()
> which in turn calls readl_relaxed()/writel_relaxed() for the lower and upper
> 32 bits.
> For AArch64, readq_relaxed_non_atomic()/writeq_relaxed_non_atomic() invokes
> readq_relaxed()/writeq_relaxed() respectively.
> As GICv3 registers (GICD_IROUTER, GICR_TYPER) can be accessed in a non atomic
> manner, so we have used readq_relaxed_non_atomic()/writeq_relaxed_non_atomic().

I had another look at the code and I think there needs some
clarification necessary because the accesses to IROUTER is non-obvious.

>
> Signed-off-by: Ayan Kumar Halder <ayan.kumar.halder@amd.com>
> ---
>
> Changes from :-
> v1 - 1. Use ldrd/strd for readq_relaxed()/writeq_relaxed().
> 2. No need to use le64_to_cpu() as the returned byte order is already in cpu
> endianess.
>
> v2 - 1. Replace {read/write}q_relaxed with {read/write}q_relaxed_non_atomic().
>
> v3 - 1. Use inline function definitions for {read/write}q_relaxed_non_atomic().
> 2. For AArch64, {read/write}q_relaxed_non_atomic() should invoke {read/write}q_relaxed().
> Thus, we can avoid any ifdef in gic-v3.c.
>
> xen/arch/arm/gic-v3.c | 6 +++---
> xen/arch/arm/include/asm/arm32/io.h | 20 ++++++++++++++++++++
> xen/arch/arm/include/asm/arm64/io.h | 2 ++
> 3 files changed, 25 insertions(+), 3 deletions(-)
>
> diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c
> index 6457e7033c..3c5b88148c 100644
> --- a/xen/arch/arm/gic-v3.c
> +++ b/xen/arch/arm/gic-v3.c
> @@ -651,7 +651,7 @@ static void __init gicv3_dist_init(void)
> affinity &= ~GICD_IROUTER_SPI_MODE_ANY;
>
> for ( i = NR_GIC_LOCAL_IRQS; i < nr_lines; i++ )
> - writeq_relaxed(affinity, GICD + GICD_IROUTER + i * 8);
> + writeq_relaxed_non_atomic(affinity, GICD + GICD_IROUTER + i * 8);

Using non atomic here makes sense because AFAIU the interrupt will be
disabled. Therefore the GIC should not use the register.

> }
>
> static int gicv3_enable_redist(void)
> @@ -745,7 +745,7 @@ static int __init gicv3_populate_rdist(void)
> }
>
> do {
> - typer = readq_relaxed(ptr + GICR_TYPER);
> + typer = readq_relaxed_non_atomic(ptr + GICR_TYPER);

This non-atomic read is OK because GICR_TYPER is read-only.

>
> if ( (typer >> 32) == aff )
> {
> @@ -1265,7 +1265,7 @@ static void gicv3_irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
> affinity &= ~GICD_IROUTER_SPI_MODE_ANY;
>
> if ( desc->irq >= NR_GIC_LOCAL_IRQS )
> - writeq_relaxed(affinity, (GICD + GICD_IROUTER + desc->irq * 8));
> + writeq_relaxed_non_atomic(affinity, (GICD + GICD_IROUTER + desc->irq * 8));

This can be called with interrupt enabled. So a non-atomic access means
the GIC will see a transient value when only one of two 32-bit will be
updated.

In practice this is fine because only Aff3 is so far defined in the top
32-bits. So effectively, they will be RESS0 and never change.

Cheers,

--
Julien Grall