Mailing List Archive: [xen master] locking: attempt to ensure lock wrappers are always inline

commit 197ecd838a2aaf959a469df3696d4559c4f8b762
Author: Roger Pau Monné <roger.pau@citrix.com>
AuthorDate: Mon Mar 4 14:29:36 2024 +0100
Commit: Andrew Cooper <andrew.cooper3@citrix.com>
CommitDate: Tue Mar 12 15:50:04 2024 +0000

locking: attempt to ensure lock wrappers are always inline

In order to prevent the locking speculation barriers from being inside of
`call`ed functions that could be speculatively bypassed.

While there also add an extra locking barrier to _mm_write_lock() in the branch
taken when the lock is already held.

Note some functions are switched to use the unsafe variants (without speculation
barrier) of the locking primitives, but a speculation barrier is always added
to the exposed public lock wrapping helper. That's the case with
sched_spin_lock_double() or pcidevs_lock() for example.

This is part of XSA-453 / CVE-2024-2193

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
xen/arch/x86/hvm/vpt.c | 10 +++++++---
xen/arch/x86/include/asm/irq.h | 1 +
xen/arch/x86/mm/mm-locks.h | 28 +++++++++++++++-------------
xen/arch/x86/mm/p2m-pod.c | 2 +-
xen/common/event_channel.c | 5 +++--
xen/common/grant_table.c | 6 +++---
xen/common/sched/core.c | 19 ++++++++++++-------
xen/common/sched/private.h | 26 ++++++++++++++++++++++++--
xen/common/timer.c | 8 +++++---
xen/drivers/passthrough/pci.c | 5 +++--
xen/include/xen/event.h | 4 ++--
xen/include/xen/pci.h | 8 ++++++--
12 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
index 8f53e88d67..e1d6845a28 100644
--- a/xen/arch/x86/hvm/vpt.c
+++ b/xen/arch/x86/hvm/vpt.c
@@ -150,7 +150,7 @@ static int pt_irq_masked(struct periodic_time *pt)
* pt->vcpu field, because another thread holding the pt_migrate lock
* may already be spinning waiting for your vcpu lock.
*/
-static void pt_vcpu_lock(struct vcpu *v)
+static always_inline void pt_vcpu_lock(struct vcpu *v)
{
spin_lock(&v->arch.hvm.tm_lock);
}
@@ -169,9 +169,13 @@ static void pt_vcpu_unlock(struct vcpu *v)
* need to take an additional lock that protects against pt->vcpu
* changing.
*/
-static void pt_lock(struct periodic_time *pt)
+static always_inline void pt_lock(struct periodic_time *pt)
{
- read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
+ /*
+ * Use the speculation unsafe variant for the first lock, as the following
+ * lock taking helper already includes a speculation barrier.
+ */
+ _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate);
spin_lock(&pt->vcpu->arch.hvm.tm_lock);
}

diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h
index 7d49f3c190..413994d213 100644
--- a/xen/arch/x86/include/asm/irq.h
+++ b/xen/arch/x86/include/asm/irq.h
@@ -151,6 +151,7 @@ void cf_check irq_complete_move(struct irq_desc *desc);

extern struct irq_desc *irq_desc;

+/* Not speculation safe, only used for AP bringup. */
void lock_vector_lock(void);
void unlock_vector_lock(void);

diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h
index 00b1bc402d..273fff86ba 100644
--- a/xen/arch/x86/mm/mm-locks.h
+++ b/xen/arch/x86/mm/mm-locks.h
@@ -74,8 +74,8 @@ static inline void _set_lock_level(int l)
this_cpu(mm_lock_level) = l;
}

-static inline void _mm_lock(const struct domain *d, mm_lock_t *l,
- const char *func, int level, int rec)
+static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l,
+ const char *func, int level, int rec)
{
if ( !((mm_locked_by_me(l)) && rec) )
_check_lock_level(d, level);
@@ -125,8 +125,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l)
return (l->locker == smp_processor_id());
}

-static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
- const char *func, int level)
+static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
+ const char *func, int level)
{
if ( !mm_write_locked_by_me(l) )
{
@@ -137,6 +137,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l,
l->unlock_level = _get_lock_level();
_set_lock_level(_lock_level(d, level));
}
+ else
+ block_speculation();
l->recurse_count++;
}

@@ -150,8 +152,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l)
percpu_write_unlock(p2m_percpu_rwlock, &l->lock);
}

-static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
- int level)
+static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l,
+ int level)
{
_check_lock_level(d, level);
percpu_read_lock(p2m_percpu_rwlock, &l->lock);
@@ -166,15 +168,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l)

/* This wrapper uses the line number to express the locking order below */
#define declare_mm_lock(name) \
- static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \
- const char *func, int rec) \
+ static always_inline void mm_lock_##name( \
+ const struct domain *d, mm_lock_t *l, const char *func, int rec) \
{ _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); }
#define declare_mm_rwlock(name) \
- static inline void mm_write_lock_##name(const struct domain *d, \
- mm_rwlock_t *l, const char *func) \
+ static always_inline void mm_write_lock_##name( \
+ const struct domain *d, mm_rwlock_t *l, const char *func) \
{ _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \
- static inline void mm_read_lock_##name(const struct domain *d, \
- mm_rwlock_t *l) \
+ static always_inline void mm_read_lock_##name(const struct domain *d, \
+ mm_rwlock_t *l) \
{ _mm_read_lock(d, l, MM_LOCK_ORDER_##name); }
/* These capture the name of the calling function */
#define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0)
@@ -309,7 +311,7 @@ declare_mm_lock(altp2mlist)
#define MM_LOCK_ORDER_altp2m 40
declare_mm_rwlock(altp2m);

-static inline void p2m_lock(struct p2m_domain *p)
+static always_inline void p2m_lock(struct p2m_domain *p)
{
if ( p2m_is_altp2m(p) )
mm_write_lock(altp2m, p->domain, &p->lock);
diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
index bc78c61062..65d31e5523 100644
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -24,7 +24,7 @@
#define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0)

/* Enforce lock ordering when grabbing the "external" page_alloc lock */
-static inline void lock_page_alloc(struct p2m_domain *p2m)
+static always_inline void lock_page_alloc(struct p2m_domain *p2m)
{
page_alloc_mm_pre_lock(p2m->domain);
spin_lock(&(p2m->domain->page_alloc_lock));
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 15aec5dcbb..bbfda2538e 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -45,7 +45,7 @@
* just assume the event channel is free or unbound at the moment when the
* evtchn_read_trylock() returns false.
*/
-static inline void evtchn_write_lock(struct evtchn *evtchn)
+static always_inline void evtchn_write_lock(struct evtchn *evtchn)
{
write_lock(&evtchn->lock);

@@ -349,7 +349,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port)
return rc;
}

-static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn)
+static always_inline void double_evtchn_lock(struct evtchn *lchn,
+ struct evtchn *rchn)
{
ASSERT(lchn != rchn);

diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 37b178a67b..7708930882 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -403,7 +403,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)

static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);

-static inline void grant_read_lock(struct grant_table *gt)
+static always_inline void grant_read_lock(struct grant_table *gt)
{
percpu_read_lock(grant_rwlock, &gt->lock);
}
@@ -413,7 +413,7 @@ static inline void grant_read_unlock(struct grant_table *gt)
percpu_read_unlock(grant_rwlock, &gt->lock);
}

-static inline void grant_write_lock(struct grant_table *gt)
+static always_inline void grant_write_lock(struct grant_table *gt)
{
percpu_write_lock(grant_rwlock, &gt->lock);
}
@@ -450,7 +450,7 @@ nr_active_grant_frames(struct grant_table *gt)
return num_act_frames_from_sha_frames(nr_grant_frames(gt));
}

-static inline struct active_grant_entry *
+static always_inline struct active_grant_entry *
active_entry_acquire(struct grant_table *t, grant_ref_t e)
{
struct active_grant_entry *act;
diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index c5db373972..3e48da1cdd 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu)
* This avoids dead- or live-locks when this code is running on both
* cpus at the same time.
*/
-static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2,
- unsigned long *flags)
+static always_inline void sched_spin_lock_double(
+ spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags)
{
+ /*
+ * In order to avoid extra overhead, use the locking primitives without the
+ * speculation barrier, and introduce a single barrier here.
+ */
if ( lock1 == lock2 )
{
- spin_lock_irqsave(lock1, *flags);
+ *flags = _spin_lock_irqsave(lock1);
}
else if ( lock1 < lock2 )
{
- spin_lock_irqsave(lock1, *flags);
- spin_lock(lock2);
+ *flags = _spin_lock_irqsave(lock1);
+ _spin_lock(lock2);
}
else
{
- spin_lock_irqsave(lock2, *flags);
- spin_lock(lock1);
+ *flags = _spin_lock_irqsave(lock2);
+ _spin_lock(lock1);
}
+ block_lock_speculation();
}

static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2,
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index 26a196f428..459d1dfb11 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -208,8 +208,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
#define cpumask_scratch (&this_cpu(cpumask_scratch))
#define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))

+/*
+ * Deal with _spin_lock_irqsave() returning the flags value instead of storing
+ * it in a passed parameter.
+ */
+#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
+#define _sched_spinlock1(lock, irq, arg) ({ \
+ BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
+ (arg) = _spin_lock##irq(lock); \
+})
+
+#define _sched_spinlock__(nr) _sched_spinlock ## nr
+#define _sched_spinlock_(nr) _sched_spinlock__(nr)
+#define _sched_spinlock(lock, irq, args...) \
+ _sched_spinlock_(count_args(args))(lock, irq, ## args)
+
#define sched_lock(kind, param, cpu, irq, arg...) \
-static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
+static always_inline spinlock_t \
+*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
{ \
for ( ; ; ) \
{ \
@@ -221,10 +237,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
* \
* It may also be the case that v->processor may change but the \
* lock may be the same; this will succeed in that case. \
+ * \
+ * Use the speculation unsafe locking helper, there's a speculation \
+ * barrier before returning to the caller. \
*/ \
- spin_lock##irq(lock, ## arg); \
+ _sched_spinlock(lock, irq, ## arg); \
if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
+ { \
+ block_lock_speculation(); \
return lock; \
+ } \
spin_unlock##irq(lock, ## arg); \
} \
}
diff --git a/xen/common/timer.c b/xen/common/timer.c
index 785177e7fa..a21798b76f 100644
--- a/xen/common/timer.c
+++ b/xen/common/timer.c
@@ -239,7 +239,7 @@ static inline void deactivate_timer(struct timer *timer)
list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive);
}

-static inline bool timer_lock(struct timer *timer)
+static inline bool timer_lock_unsafe(struct timer *timer)
{
unsigned int cpu;

@@ -253,7 +253,8 @@ static inline bool timer_lock(struct timer *timer)
rcu_read_unlock(&timer_cpu_read_lock);
return 0;
}
- spin_lock(&per_cpu(timers, cpu).lock);
+ /* Use the speculation unsafe variant, the wrapper has the barrier. */
+ _spin_lock(&per_cpu(timers, cpu).lock);
if ( likely(timer->cpu == cpu) )
break;
spin_unlock(&per_cpu(timers, cpu).lock);
@@ -266,8 +267,9 @@ static inline bool timer_lock(struct timer *timer)
#define timer_lock_irqsave(t, flags) ({ \
bool __x; \
local_irq_save(flags); \
- if ( !(__x = timer_lock(t)) ) \
+ if ( !(__x = timer_lock_unsafe(t)) ) \
local_irq_restore(flags); \
+ block_lock_speculation(); \
__x; \
})

diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 194701c913..6a1eda675d 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -52,9 +52,10 @@ struct pci_seg {

static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;

-void pcidevs_lock(void)
+/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */
+void pcidevs_lock_unsafe(void)
{
- spin_lock_recursive(&_pcidevs_lock);
+ _spin_lock_recursive(&_pcidevs_lock);
}

void pcidevs_unlock(void)
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 8e509e0784..f1472ea1eb 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
#define bucket_from_port(d, p) \
((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET])

-static inline void evtchn_read_lock(struct evtchn *evtchn)
+static always_inline void evtchn_read_lock(struct evtchn *evtchn)
{
read_lock(&evtchn->lock);
}

-static inline bool evtchn_read_trylock(struct evtchn *evtchn)
+static always_inline bool evtchn_read_trylock(struct evtchn *evtchn)
{
return read_trylock(&evtchn->lock);
}
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 1df1863b13..63e49f0117 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -166,8 +166,12 @@ struct pci_dev {
* devices, it also sync the access to the msi capability that is not
* interrupt handling related (the mask bit register).
*/
-
-void pcidevs_lock(void);
+void pcidevs_lock_unsafe(void);
+static always_inline void pcidevs_lock(void)
+{
+ pcidevs_lock_unsafe();
+ block_lock_speculation();
+}
void pcidevs_unlock(void);
bool __must_check pcidevs_locked(void);

--
generated by git-patchbot for /home/xen/git/xen.git#master