Mailing List Archive

[xen master] xen/locks: add dynamic lock recursion checks
commit 6b09ca19d9e5270189fea419c2dc27254e405119
Author: Juergen Gross <jgross@suse.com>
AuthorDate: Fri Dec 2 10:26:55 2022 +0100
Commit: Jan Beulich <jbeulich@suse.com>
CommitDate: Fri Dec 2 10:26:55 2022 +0100

xen/locks: add dynamic lock recursion checks

Add checking of lock recursion to the hypervisor. This is done by using
a percpu data array for storing the address of each taken lock. Any
attempt to take a lock twice (with the exception of recursive
spinlocks) will result in a crash. This is especially meant for
detecting attempts to take a rwlock multiple times as a reader, which
will only result in a deadlock in case of another cpu trying to get the
lock as a writer in between.

The additional checks are not performance neutral, so they are enabled
only in debug builds per default, as the checks are active only with
CONFIG_DEBUG_LOCKS enabled. The size of the percpu data array can be
selected via a boot parameter.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
---
docs/misc/xen-command-line.pandoc | 12 ++++
xen/common/rwlock.c | 6 ++
xen/common/spinlock.c | 145 ++++++++++++++++++++++++++++++++++++++
xen/include/xen/rwlock.h | 20 ++++++
xen/include/xen/spinlock.h | 4 ++
5 files changed, 187 insertions(+)

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 424b12cfb2..b7ee97be76 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1645,6 +1645,18 @@ This option is intended for debugging purposes only. Enable MSR_DEBUGCTL.LBR
in hypervisor context to be able to dump the Last Interrupt/Exception To/From
record with other registers.

+### lock-depth-size
+> `= <integer>`
+
+> Default: `lock-depth-size=64`
+
+Specifies the maximum number of nested locks tested for illegal recursions.
+Higher nesting levels still work, but recursion testing is omitted for those
+levels. In case an illegal recursion is detected the system will crash
+immediately. Specifying `0` will disable all testing of illegal lock nesting.
+
+This option is available for hypervisors built with CONFIG_DEBUG_LOCKS only.
+
### loglvl
> `= <level>[/<rate-limited level>]` where level is `none | error | warning | info | debug | all`

diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c
index aa15529bbe..18224a4bb5 100644
--- a/xen/common/rwlock.c
+++ b/xen/common/rwlock.c
@@ -54,6 +54,8 @@ void queue_read_lock_slowpath(rwlock_t *lock)
* Signal the next one in queue to become queue head.
*/
spin_unlock(&lock->lock);
+
+ lock_enter(&lock->lock.debug);
}

/*
@@ -100,6 +102,8 @@ void queue_write_lock_slowpath(rwlock_t *lock)
}
unlock:
spin_unlock(&lock->lock);
+
+ lock_enter(&lock->lock.debug);
}


@@ -146,4 +150,6 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata,
/* Give the coherency fabric a break. */
cpu_relax();
};
+
+ lock_enter(&percpu_rwlock->rwlock.lock.debug);
}
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 62c83aaa6a..84996c3fbc 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -1,5 +1,8 @@
+#include <xen/cpu.h>
#include <xen/lib.h>
#include <xen/irq.h>
+#include <xen/notifier.h>
+#include <xen/param.h>
#include <xen/smp.h>
#include <xen/time.h>
#include <xen/spinlock.h>
@@ -11,11 +14,77 @@

#ifdef CONFIG_DEBUG_LOCKS

+/* Max. number of entries in locks_taken array. */
+static unsigned int __ro_after_init lock_depth_size = 64;
+integer_param("lock-depth-size", lock_depth_size);
+
+/*
+ * Array of addresses of taken locks.
+ * nr_locks_taken is the index after the last entry. As locks tend to be
+ * nested cleanly, when freeing a lock it will probably be the one before
+ * nr_locks_taken, and new entries can be entered at that index. It is fine
+ * for a lock to be released out of order, though.
+ */
+static DEFINE_PER_CPU(const union lock_debug **, locks_taken);
+static DEFINE_PER_CPU(unsigned int, nr_locks_taken);
+static bool __read_mostly max_depth_reached;
+
static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);

+static int cf_check cpu_lockdebug_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ if ( !per_cpu(locks_taken, cpu) )
+ per_cpu(locks_taken, cpu) = xzalloc_array(const union lock_debug *,
+ lock_depth_size);
+ if ( !per_cpu(locks_taken, cpu) )
+ printk(XENLOG_WARNING
+ "cpu %u: failed to allocate lock recursion check area\n",
+ cpu);
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ XFREE(per_cpu(locks_taken, cpu));
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static struct notifier_block cpu_lockdebug_nfb = {
+ .notifier_call = cpu_lockdebug_callback,
+};
+
+static int __init cf_check lockdebug_init(void)
+{
+ if ( lock_depth_size )
+ {
+ register_cpu_notifier(&cpu_lockdebug_nfb);
+ cpu_lockdebug_callback(&cpu_lockdebug_nfb, CPU_UP_PREPARE,
+ (void *)(unsigned long)smp_processor_id());
+ }
+
+ return 0;
+}
+presmp_initcall(lockdebug_init);
+
void check_lock(union lock_debug *debug, bool try)
{
bool irq_safe = !local_irq_is_enabled();
+ unsigned int cpu = smp_processor_id();
+ const union lock_debug *const *taken = per_cpu(locks_taken, cpu);
+ unsigned int nr_taken = per_cpu(nr_locks_taken, cpu);
+ unsigned int i;

BUILD_BUG_ON(LOCK_DEBUG_PAD_BITS <= 0);

@@ -63,6 +132,16 @@ void check_lock(union lock_debug *debug, bool try)
BUG();
}
}
+
+ if ( try )
+ return;
+
+ for ( i = 0; i < nr_taken; i++ )
+ if ( taken[i] == debug )
+ {
+ printk("CHECKLOCK FAILURE: lock at %p taken recursively\n", debug);
+ BUG();
+ }
}

static void check_barrier(union lock_debug *debug)
@@ -84,15 +163,81 @@ static void check_barrier(union lock_debug *debug)
BUG_ON(!local_irq_is_enabled() && !debug->irq_safe);
}

+void lock_enter(const union lock_debug *debug)
+{
+ unsigned int cpu = smp_processor_id();
+ const union lock_debug **taken = per_cpu(locks_taken, cpu);
+ unsigned int *nr_taken = &per_cpu(nr_locks_taken, cpu);
+ unsigned long flags;
+
+ if ( !taken )
+ return;
+
+ local_irq_save(flags);
+
+ if ( *nr_taken < lock_depth_size )
+ taken[(*nr_taken)++] = debug;
+ else if ( !max_depth_reached )
+ {
+ max_depth_reached = true;
+ printk("CHECKLOCK max lock depth %u reached!\n", lock_depth_size);
+ WARN();
+ }
+
+ local_irq_restore(flags);
+}
+
+void lock_exit(const union lock_debug *debug)
+{
+ unsigned int cpu = smp_processor_id();
+ const union lock_debug **taken = per_cpu(locks_taken, cpu);
+ unsigned int *nr_taken = &per_cpu(nr_locks_taken, cpu);
+ unsigned int i;
+ unsigned long flags;
+
+ if ( !taken )
+ return;
+
+ local_irq_save(flags);
+
+ for ( i = *nr_taken; i > 0; i-- )
+ {
+ if ( taken[i - 1] == debug )
+ {
+ memmove(taken + i - 1, taken + i,
+ (*nr_taken - i) * sizeof(*taken));
+ (*nr_taken)--;
+ taken[*nr_taken] = NULL;
+
+ local_irq_restore(flags);
+
+ return;
+ }
+ }
+
+ if ( !max_depth_reached )
+ {
+ printk("CHECKLOCK released lock at %p not recorded!\n", debug);
+ WARN();
+ }
+
+ local_irq_restore(flags);
+}
+
static void got_lock(union lock_debug *debug)
{
debug->cpu = smp_processor_id();
+
+ lock_enter(debug);
}

static void rel_lock(union lock_debug *debug)
{
if ( atomic_read(&spin_debug) > 0 )
BUG_ON(debug->cpu != smp_processor_id());
+
+ lock_exit(debug);
+
debug->cpu = SPINLOCK_NO_CPU;
}

diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h
index 0cc9167715..b8d52a5aa9 100644
--- a/xen/include/xen/rwlock.h
+++ b/xen/include/xen/rwlock.h
@@ -66,7 +66,10 @@ static inline int _read_trylock(rwlock_t *lock)
* arch_lock_acquire_barrier().
*/
if ( likely(_can_read_lock(cnts)) )
+ {
+ lock_enter(&lock->lock.debug);
return 1;
+ }
atomic_sub(_QR_BIAS, &lock->cnts);
}
preempt_enable();
@@ -91,6 +94,7 @@ static inline void _read_lock(rwlock_t *lock)
{
/* The slow path calls check_lock() via spin_lock(). */
check_lock(&lock->lock.debug, false);
+ lock_enter(&lock->lock.debug);
return;
}

@@ -123,6 +127,8 @@ static inline unsigned long _read_lock_irqsave(rwlock_t *lock)
*/
static inline void _read_unlock(rwlock_t *lock)
{
+ lock_exit(&lock->lock.debug);
+
arch_lock_release_barrier();
/*
* Atomically decrement the reader count
@@ -170,6 +176,7 @@ static inline void _write_lock(rwlock_t *lock)
{
/* The slow path calls check_lock() via spin_lock(). */
check_lock(&lock->lock.debug, false);
+ lock_enter(&lock->lock.debug);
return;
}

@@ -215,6 +222,8 @@ static inline int _write_trylock(rwlock_t *lock)
return 0;
}

+ lock_enter(&lock->lock.debug);
+
/*
* atomic_cmpxchg() is a full barrier so no need for an
* arch_lock_acquire_barrier().
@@ -225,6 +234,9 @@ static inline int _write_trylock(rwlock_t *lock)
static inline void _write_unlock(rwlock_t *lock)
{
ASSERT(_is_write_locked_by_me(atomic_read(&lock->cnts)));
+
+ lock_exit(&lock->lock.debug);
+
arch_lock_release_barrier();
atomic_and(~(_QW_CPUMASK | _QW_WMASK), &lock->cnts);
preempt_enable();
@@ -343,6 +355,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata,
/* All other paths have implicit check_lock() calls via read_lock(). */
check_lock(&percpu_rwlock->rwlock.lock.debug, false);
}
+
+ lock_enter(&percpu_rwlock->rwlock.lock.debug);
}

static inline void _percpu_read_unlock(percpu_rwlock_t **per_cpudata,
@@ -353,6 +367,9 @@ static inline void _percpu_read_unlock(percpu_rwlock_t **per_cpudata,

/* Verify the read lock was taken for this lock */
ASSERT(this_cpu_ptr(per_cpudata) != NULL);
+
+ lock_exit(&percpu_rwlock->rwlock.lock.debug);
+
/*
* Detect using a second percpu_rwlock_t simulatenously and fallback
* to standard read_unlock.
@@ -379,6 +396,9 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata,

ASSERT(percpu_rwlock->writer_activating);
percpu_rwlock->writer_activating = 0;
+
+ lock_exit(&percpu_rwlock->rwlock.lock.debug);
+
write_unlock(&percpu_rwlock->rwlock);
}

diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index 961891bea4..2fa6ba3654 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -22,12 +22,16 @@ union lock_debug {
};
#define _LOCK_DEBUG { LOCK_DEBUG_INITVAL }
void check_lock(union lock_debug *debug, bool try);
+void lock_enter(const union lock_debug *debug);
+void lock_exit(const union lock_debug *debug);
void spin_debug_enable(void);
void spin_debug_disable(void);
#else
union lock_debug { };
#define _LOCK_DEBUG { }
#define check_lock(l, t) ((void)0)
+#define lock_enter(l) ((void)0)
+#define lock_exit(l) ((void)0)
#define spin_debug_enable() ((void)0)
#define spin_debug_disable() ((void)0)
#endif
--
generated by git-patchbot for /home/xen/git/xen.git#master