Mailing List Archive

[PATCH 08/10] rcu: Implement PCPU_RCU_PREEMPT_COUNT framework
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
corresponding functions, rcu_preempt core uses the functions to
implement rcu_read_[un]lock, rcu_preempt_depth(), special bits,
switching and so on.

Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
include/linux/rcupdate.h | 33 +++++++++++++++++++++++++++++++++
kernel/rcu/Kconfig | 8 ++++++++
kernel/rcu/rcu.h | 4 ++++
kernel/rcu/tree_plugin.h | 8 ++++++++
4 files changed, 53 insertions(+)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index cc77d76a870b..bf369741ef93 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -70,6 +70,8 @@ static inline bool same_state_synchronize_rcu(unsigned long oldstate1, unsigned

void rcu_read_unlock_special(void);

+#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
void __rcu_read_lock(void);
void __rcu_read_unlock(void);

@@ -81,6 +83,37 @@ void __rcu_read_unlock(void);
*/
#define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting)
#define rcu_preempt_depth_set(val) WRITE_ONCE(current->rcu_read_lock_nesting, (val))
+#define pcpu_rcu_preempt_special_set() do { } while (0)
+#define pcpu_rcu_preempt_special_clear() do { } while (0)
+
+#else /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
+
+#include <asm/rcu_preempt.h>
+
+static __always_inline void __rcu_read_lock(void)
+{
+ pcpu_rcu_preempt_count_add(1);
+ barrier();
+}
+
+static __always_inline void __rcu_read_unlock(void)
+{
+ barrier();
+ if (unlikely(pcpu_rcu_preempt_count_dec_and_test()))
+ pcpu_rcu_read_unlock_special();
+}
+
+static inline int rcu_preempt_depth(void)
+{
+ return pcpu_rcu_preempt_count();
+}
+
+static inline void rcu_preempt_depth_set(int val)
+{
+ pcpu_rcu_preempt_count_set(val);
+}
+
+#endif /* #else #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */

#else /* #ifdef CONFIG_PREEMPT_RCU */

diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index e7d2dd267593..5d91147bc9a3 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -345,4 +345,12 @@ config RCU_DOUBLE_CHECK_CB_TIME
Say Y here if you need tighter callback-limit enforcement.
Say N here if you are unsure.

+config HAVE_PCPU_RCU_PREEMPT_COUNT
+ bool
+
+config PCPU_RCU_PREEMPT_COUNT
+ def_bool y
+ depends on PREEMPT_RCU && HAVE_PCPU_RCU_PREEMPT_COUNT
+ depends on !PROVE_LOCKING && !RCU_STRICT_GRACE_PERIOD
+
endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index ea5ae957c687..2322b040c5cd 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -671,6 +671,10 @@ static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { re
static inline void
rcu_preempt_switch(struct task_struct *prev, struct task_struct *next)
{
+#ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
+ prev->rcu_read_lock_nesting = rcu_preempt_depth();
+ pcpu_rcu_preempt_switch(next->rcu_read_lock_nesting, next->rcu_read_unlock_special.s);
+#endif // #ifdef CONFIG_PCPU_RCU_PREEMPT_COUNT
}

#endif /* __KERNEL_RCU_H */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 880b3fef1158..db68d0c1c1f2 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -107,10 +107,13 @@ static bool sync_rcu_exp_done(struct rcu_node *rnp);

#define set_rcu_preempt_special(reason) do { \
WRITE_ONCE(current->rcu_read_unlock_special.b.reason, true); \
+ pcpu_rcu_preempt_special_set(); \
} while (0)

#define clear_rcu_preempt_special(reason) do { \
WRITE_ONCE(current->rcu_read_unlock_special.b.reason, false); \
+ if (!current->rcu_read_unlock_special.s) \
+ pcpu_rcu_preempt_special_clear(); \
} while (0)

/*
@@ -379,6 +382,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
return READ_ONCE(rnp->gp_tasks) != NULL;
}

+#ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT
+
/* limit value for ->rcu_read_lock_nesting. */
#define RCU_NEST_PMAX (INT_MAX / 2)

@@ -436,6 +441,8 @@ void __rcu_read_unlock(void)
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);

+#endif /* #ifndef CONFIG_PCPU_RCU_PREEMPT_COUNT */
+
/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
@@ -489,6 +496,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
return;
}
t->rcu_read_unlock_special.s = 0;
+ pcpu_rcu_preempt_special_clear();
if (special.b.need_qs) {
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
rdp->cpu_no_qs.b.norm = false;
--
2.19.1.6.gb485710b
Re: [PATCH 08/10] rcu: Implement PCPU_RCU_PREEMPT_COUNT framework [ In reply to ]
Le Thu, Mar 28, 2024 at 03:53:16PM +0800, Lai Jiangshan a ?crit :
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>
> When the arch code provides HAVE_PCPU_RCU_PREEMPT_COUNT and the
> corresponding functions, rcu_preempt core uses the functions to
> implement rcu_read_[un]lock, rcu_preempt_depth(), special bits,
> switching and so on.

The changelogs don't explain the reason for all of that. I'm guessing
from the cover letter that the purpose is to save some instructions on
calls to rcu_read_unlock() due to inline calls and per CPU optimized
instructions but it's hard to guess from the actual individual patches,
which are meants to be the actual git records.

Thanks.