Mailing List Archive

[PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow
The limbo and overflow code picks a CPU to use from the domain's list
of online CPUs. Work is then scheduled on these CPUs to maintain
the limbo list and any counters that may overflow.

cpumask_any() may pick a CPU that is marked nohz_full, which will
either penalise the work that CPU was dedicated to, or delay the
processing of limbo list or counters that may overflow. Perhaps
indefinitely. Delaying the overflow handling will skew the bandwidth
values calculated by mba_sc, which expects to be called once a second.

Add cpumask_any_housekeeping() as a replacement for cpumask_any()
that prefers housekeeping CPUs. This helper will still return
a nohz_full CPU if that is the only option. The CPU to use is
re-evaluated each time the limbo/overflow work runs. This ensures
the work will move off a nohz_full CPU once a houskeeping CPU is
available.

Signed-off-by: James Morse <james.morse@arm.com>
---
arch/x86/kernel/cpu/resctrl/internal.h | 23 +++++++++++++++++++++++
arch/x86/kernel/cpu/resctrl/monitor.c | 17 ++++++++++++-----
include/linux/tick.h | 3 ++-
3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 87545e4beb70..0b5fd5a0cda2 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -7,6 +7,7 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/jump_label.h>
+#include <linux/tick.h>
#include <asm/resctrl.h>

#define L3_QOS_CDP_ENABLE 0x01ULL
@@ -55,6 +56,28 @@
/* Max event bits supported */
#define MAX_EVT_CONFIG_BITS GENMASK(6, 0)

+/**
+ * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
+ * aren't marked nohz_full
+ * @mask: The mask to pick a CPU from.
+ *
+ * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
+ * nohz_full, these are preferred.
+ */
+static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
+{
+ int cpu, hk_cpu;
+
+ cpu = cpumask_any(mask);
+ if (tick_nohz_full_cpu(cpu)) {
+ hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
+ if (hk_cpu < nr_cpu_ids)
+ cpu = hk_cpu;
+ }
+
+ return cpu;
+}
+
struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index a2ae4be4b2ba..3bec5c59ca0e 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -745,9 +745,9 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
void cqm_handle_limbo(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- int cpu = smp_processor_id();
struct rdt_resource *r;
struct rdt_domain *d;
+ int cpu;

mutex_lock(&rdtgroup_mutex);

@@ -756,8 +756,10 @@ void cqm_handle_limbo(struct work_struct *work)

__check_limbo(d, false);

- if (has_busy_rmid(r, d))
+ if (has_busy_rmid(r, d)) {
+ cpu = cpumask_any_housekeeping(&d->cpu_mask);
schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
+ }

mutex_unlock(&rdtgroup_mutex);
}
@@ -767,7 +769,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
unsigned long delay = msecs_to_jiffies(delay_ms);
int cpu;

- cpu = cpumask_any(&dom->cpu_mask);
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask);
dom->cqm_work_cpu = cpu;

schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
@@ -777,10 +779,10 @@ void mbm_handle_overflow(struct work_struct *work)
{
unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
struct rdtgroup *prgrp, *crgrp;
- int cpu = smp_processor_id();
struct list_head *head;
struct rdt_resource *r;
struct rdt_domain *d;
+ int cpu;

mutex_lock(&rdtgroup_mutex);

@@ -801,6 +803,11 @@ void mbm_handle_overflow(struct work_struct *work)
update_mba_bw(prgrp, d);
}

+ /*
+ * Re-check for housekeeping CPUs. This allows the overflow handler to
+ * move off a nohz_full CPU quickly.
+ */
+ cpu = cpumask_any_housekeeping(&d->cpu_mask);
schedule_delayed_work_on(cpu, &d->mbm_over, delay);

out_unlock:
@@ -814,7 +821,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)

if (!static_branch_likely(&rdt_mon_enable_key))
return;
- cpu = cpumask_any(&dom->cpu_mask);
+ cpu = cpumask_any_housekeeping(&dom->cpu_mask);
dom->mbm_work_cpu = cpu;
schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
}
diff --git a/include/linux/tick.h b/include/linux/tick.h
index bfd571f18cfd..ae2e9019fc18 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
static inline void tick_nohz_idle_stop_tick_protected(void) { }
#endif /* !CONFIG_NO_HZ_COMMON */

+extern cpumask_var_t tick_nohz_full_mask;
+
#ifdef CONFIG_NO_HZ_FULL
extern bool tick_nohz_full_running;
-extern cpumask_var_t tick_nohz_full_mask;

static inline bool tick_nohz_full_enabled(void)
{
--
2.39.2
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
On Mon, 20 Mar 2023, James Morse wrote:

> The limbo and overflow code picks a CPU to use from the domain's list
> of online CPUs. Work is then scheduled on these CPUs to maintain
> the limbo list and any counters that may overflow.
>
> cpumask_any() may pick a CPU that is marked nohz_full, which will
> either penalise the work that CPU was dedicated to, or delay the
> processing of limbo list or counters that may overflow. Perhaps
> indefinitely. Delaying the overflow handling will skew the bandwidth
> values calculated by mba_sc, which expects to be called once a second.
>
> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
> that prefers housekeeping CPUs. This helper will still return
> a nohz_full CPU if that is the only option. The CPU to use is
> re-evaluated each time the limbo/overflow work runs. This ensures
> the work will move off a nohz_full CPU once a houskeeping CPU is
> available.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 23 +++++++++++++++++++++++
> arch/x86/kernel/cpu/resctrl/monitor.c | 17 ++++++++++++-----
> include/linux/tick.h | 3 ++-
> 3 files changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 87545e4beb70..0b5fd5a0cda2 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -7,6 +7,7 @@
> #include <linux/kernfs.h>
> #include <linux/fs_context.h>
> #include <linux/jump_label.h>
> +#include <linux/tick.h>
> #include <asm/resctrl.h>
>
> #define L3_QOS_CDP_ENABLE 0x01ULL
> @@ -55,6 +56,28 @@
> /* Max event bits supported */
> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>
> +/**
> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that

Choose

> + * aren't marked nohz_full
> + * @mask: The mask to pick a CPU from.
> + *
> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use

housekeeping

> + * nohz_full, these are preferred.
> + */
> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> +{
> + int cpu, hk_cpu;
> +
> + cpu = cpumask_any(mask);
> + if (tick_nohz_full_cpu(cpu)) {
> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> + if (hk_cpu < nr_cpu_ids)
> + cpu = hk_cpu;
> + }
> +
> + return cpu;
> +}
> +
> struct rdt_fs_context {
> struct kernfs_fs_context kfc;
> bool enable_cdpl2;
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index a2ae4be4b2ba..3bec5c59ca0e 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -745,9 +745,9 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
> void cqm_handle_limbo(struct work_struct *work)
> {
> unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
> - int cpu = smp_processor_id();
> struct rdt_resource *r;
> struct rdt_domain *d;
> + int cpu;
>
> mutex_lock(&rdtgroup_mutex);
>
> @@ -756,8 +756,10 @@ void cqm_handle_limbo(struct work_struct *work)
>
> __check_limbo(d, false);
>
> - if (has_busy_rmid(r, d))
> + if (has_busy_rmid(r, d)) {
> + cpu = cpumask_any_housekeeping(&d->cpu_mask);
> schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
> + }
>
> mutex_unlock(&rdtgroup_mutex);
> }
> @@ -767,7 +769,7 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms)
> unsigned long delay = msecs_to_jiffies(delay_ms);
> int cpu;
>
> - cpu = cpumask_any(&dom->cpu_mask);
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> dom->cqm_work_cpu = cpu;
>
> schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
> @@ -777,10 +779,10 @@ void mbm_handle_overflow(struct work_struct *work)
> {
> unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
> struct rdtgroup *prgrp, *crgrp;
> - int cpu = smp_processor_id();
> struct list_head *head;
> struct rdt_resource *r;
> struct rdt_domain *d;
> + int cpu;
>
> mutex_lock(&rdtgroup_mutex);
>
> @@ -801,6 +803,11 @@ void mbm_handle_overflow(struct work_struct *work)
> update_mba_bw(prgrp, d);
> }
>
> + /*
> + * Re-check for housekeeping CPUs. This allows the overflow handler to
> + * move off a nohz_full CPU quickly.
> + */
> + cpu = cpumask_any_housekeeping(&d->cpu_mask);
> schedule_delayed_work_on(cpu, &d->mbm_over, delay);
>
> out_unlock:
> @@ -814,7 +821,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>
> if (!static_branch_likely(&rdt_mon_enable_key))
> return;
> - cpu = cpumask_any(&dom->cpu_mask);
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> dom->mbm_work_cpu = cpu;
> schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> }
> diff --git a/include/linux/tick.h b/include/linux/tick.h
> index bfd571f18cfd..ae2e9019fc18 100644
> --- a/include/linux/tick.h
> +++ b/include/linux/tick.h
> @@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
> static inline void tick_nohz_idle_stop_tick_protected(void) { }
> #endif /* !CONFIG_NO_HZ_COMMON */
>
> +extern cpumask_var_t tick_nohz_full_mask;
> +
> #ifdef CONFIG_NO_HZ_FULL
> extern bool tick_nohz_full_running;
> -extern cpumask_var_t tick_nohz_full_mask;

Its definition seems to also be inside #ifdef:

kernel/time/tick-sched.c-#ifdef CONFIG_NO_HZ_FULL
kernel/time/tick-sched.c:cpumask_var_t tick_nohz_full_mask;
kernel/time/tick-sched.c:EXPORT_SYMBOL_GPL(tick_nohz_full_mask);


--
i.
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
On Mon, 20 Mar 2023, James Morse wrote:

> The limbo and overflow code picks a CPU to use from the domain's list
> of online CPUs. Work is then scheduled on these CPUs to maintain
> the limbo list and any counters that may overflow.
>
> cpumask_any() may pick a CPU that is marked nohz_full, which will
> either penalise the work that CPU was dedicated to, or delay the
> processing of limbo list or counters that may overflow. Perhaps
> indefinitely. Delaying the overflow handling will skew the bandwidth
> values calculated by mba_sc, which expects to be called once a second.
>
> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
> that prefers housekeeping CPUs. This helper will still return
> a nohz_full CPU if that is the only option. The CPU to use is
> re-evaluated each time the limbo/overflow work runs. This ensures
> the work will move off a nohz_full CPU once a houskeeping CPU is

housekeeping

> available.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 23 +++++++++++++++++++++++
> arch/x86/kernel/cpu/resctrl/monitor.c | 17 ++++++++++++-----
> include/linux/tick.h | 3 ++-
> 3 files changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 87545e4beb70..0b5fd5a0cda2 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -7,6 +7,7 @@
> #include <linux/kernfs.h>
> #include <linux/fs_context.h>
> #include <linux/jump_label.h>
> +#include <linux/tick.h>
> #include <asm/resctrl.h>
>
> #define L3_QOS_CDP_ENABLE 0x01ULL
> @@ -55,6 +56,28 @@
> /* Max event bits supported */
> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>
> +/**
> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
> + * aren't marked nohz_full
> + * @mask: The mask to pick a CPU from.
> + *
> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
> + * nohz_full, these are preferred.
> + */
> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> +{
> + int cpu, hk_cpu;
> +
> + cpu = cpumask_any(mask);
> + if (tick_nohz_full_cpu(cpu)) {
> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);

Why cpumask_nth_and() is not enough here? ..._andnot() seems to alter
tick_nohz_full_mask which doesn't seem desirable?


--
i.
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi James,

On 3/20/2023 10:26 AM, James Morse wrote:
> The limbo and overflow code picks a CPU to use from the domain's list
> of online CPUs. Work is then scheduled on these CPUs to maintain
> the limbo list and any counters that may overflow.
>
> cpumask_any() may pick a CPU that is marked nohz_full, which will
> either penalise the work that CPU was dedicated to, or delay the

penalise -> penalize

> processing of limbo list or counters that may overflow. Perhaps
> indefinitely. Delaying the overflow handling will skew the bandwidth
> values calculated by mba_sc, which expects to be called once a second.
>
> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
> that prefers housekeeping CPUs. This helper will still return
> a nohz_full CPU if that is the only option. The CPU to use is
> re-evaluated each time the limbo/overflow work runs. This ensures
> the work will move off a nohz_full CPU once a houskeeping CPU is
> available.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 23 +++++++++++++++++++++++
> arch/x86/kernel/cpu/resctrl/monitor.c | 17 ++++++++++++-----
> include/linux/tick.h | 3 ++-
> 3 files changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 87545e4beb70..0b5fd5a0cda2 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -7,6 +7,7 @@
> #include <linux/kernfs.h>
> #include <linux/fs_context.h>
> #include <linux/jump_label.h>
> +#include <linux/tick.h>
> #include <asm/resctrl.h>
>
> #define L3_QOS_CDP_ENABLE 0x01ULL
> @@ -55,6 +56,28 @@
> /* Max event bits supported */
> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>
> +/**
> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
> + * aren't marked nohz_full

"Chose any cpu" -> "Choose any CPU"

> + * @mask: The mask to pick a CPU from.
> + *
> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
> + * nohz_full, these are preferred.
> + */
> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> +{
> + int cpu, hk_cpu;
> +
> + cpu = cpumask_any(mask);
> + if (tick_nohz_full_cpu(cpu)) {
> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> + if (hk_cpu < nr_cpu_ids)
> + cpu = hk_cpu;
> + }
> +

I think as a start this could perhaps be a #if defined(CONFIG_NO_HZ_FULL). There
appears to be a precedent for this in kernel/rcu/tree_nocb.h.

Apart from the issue that Ilpo pointed out I would prefer that any changes outside
resctrl are submitted separately to that subsystem.

...

> @@ -801,6 +803,11 @@ void mbm_handle_overflow(struct work_struct *work)
> update_mba_bw(prgrp, d);
> }
>
> + /*
> + * Re-check for housekeeping CPUs. This allows the overflow handler to
> + * move off a nohz_full CPU quickly.
> + */
> + cpu = cpumask_any_housekeeping(&d->cpu_mask);
> schedule_delayed_work_on(cpu, &d->mbm_over, delay);
>
> out_unlock:

From what I can tell the nohz_full CPUs are set during boot and do not change.


> @@ -814,7 +821,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>
> if (!static_branch_likely(&rdt_mon_enable_key))
> return;
> - cpu = cpumask_any(&dom->cpu_mask);
> + cpu = cpumask_any_housekeeping(&dom->cpu_mask);
> dom->mbm_work_cpu = cpu;
> schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
> }
> diff --git a/include/linux/tick.h b/include/linux/tick.h
> index bfd571f18cfd..ae2e9019fc18 100644
> --- a/include/linux/tick.h
> +++ b/include/linux/tick.h
> @@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
> static inline void tick_nohz_idle_stop_tick_protected(void) { }
> #endif /* !CONFIG_NO_HZ_COMMON */
>
> +extern cpumask_var_t tick_nohz_full_mask;
> +
> #ifdef CONFIG_NO_HZ_FULL
> extern bool tick_nohz_full_running;
> -extern cpumask_var_t tick_nohz_full_mask;
>
> static inline bool tick_nohz_full_enabled(void)
> {

In addition to what Ilpo pointed out, be careful here.
cpumask_var_t is a pointer (or array) and needs to be
allocated before use. Moving its declaration but not the
allocation code seems risky.

Reinette
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi Ilpo,

On 21/03/2023 13:21, Ilpo Järvinen wrote:
> On Mon, 20 Mar 2023, James Morse wrote:
>
>> The limbo and overflow code picks a CPU to use from the domain's list
>> of online CPUs. Work is then scheduled on these CPUs to maintain
>> the limbo list and any counters that may overflow.
>>
>> cpumask_any() may pick a CPU that is marked nohz_full, which will
>> either penalise the work that CPU was dedicated to, or delay the
>> processing of limbo list or counters that may overflow. Perhaps
>> indefinitely. Delaying the overflow handling will skew the bandwidth
>> values calculated by mba_sc, which expects to be called once a second.
>>
>> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
>> that prefers housekeeping CPUs. This helper will still return
>> a nohz_full CPU if that is the only option. The CPU to use is
>> re-evaluated each time the limbo/overflow work runs. This ensures
>> the work will move off a nohz_full CPU once a houskeeping CPU is
>> available.

>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 87545e4beb70..0b5fd5a0cda2 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -55,6 +56,28 @@
>> /* Max event bits supported */
>> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>>
>> +/**
>> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
>> + * aren't marked nohz_full
>> + * @mask: The mask to pick a CPU from.
>> + *
>> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
>> + * nohz_full, these are preferred.
>> + */
>> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>> +{
>> + int cpu, hk_cpu;
>> +
>> + cpu = cpumask_any(mask);
>> + if (tick_nohz_full_cpu(cpu)) {
>> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>> + if (hk_cpu < nr_cpu_ids)
>> + cpu = hk_cpu;
>> + }
>> +
>> + return cpu;
>> +}
>> diff --git a/include/linux/tick.h b/include/linux/tick.h
>> index bfd571f18cfd..ae2e9019fc18 100644
>> --- a/include/linux/tick.h
>> +++ b/include/linux/tick.h
>> @@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
>> static inline void tick_nohz_idle_stop_tick_protected(void) { }
>> #endif /* !CONFIG_NO_HZ_COMMON */
>>
>> +extern cpumask_var_t tick_nohz_full_mask;
>> +
>> #ifdef CONFIG_NO_HZ_FULL
>> extern bool tick_nohz_full_running;
>> -extern cpumask_var_t tick_nohz_full_mask;
>
> Its definition seems to also be inside #ifdef:
>
> kernel/time/tick-sched.c-#ifdef CONFIG_NO_HZ_FULL
> kernel/time/tick-sched.c:cpumask_var_t tick_nohz_full_mask;
> kernel/time/tick-sched.c:EXPORT_SYMBOL_GPL(tick_nohz_full_mask);

Indeed, but all the uses are guarded by tick_nohz_full_cpu(), which the compiler knows is
false if CONFIG_NO_HZ_FULL is not selected.

Moving the prototype is enough to let the compiler parse the code to check its correct,
before dead-code-eliminating it. There is no need to carry around the cpumask if its never
going to be used. This would only cause a problem if someone adds a user of
tick_nohz_full_mask which isn't guarded by IS_ENABLED(). I argue that would be a bug.

All this is being done to avoid more #ifdeffery!)


Thanks,

James
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi Ilpo,

On 21/03/2023 15:14, Ilpo Järvinen wrote:
> On Mon, 20 Mar 2023, James Morse wrote:
>
>> The limbo and overflow code picks a CPU to use from the domain's list
>> of online CPUs. Work is then scheduled on these CPUs to maintain
>> the limbo list and any counters that may overflow.
>>
>> cpumask_any() may pick a CPU that is marked nohz_full, which will
>> either penalise the work that CPU was dedicated to, or delay the
>> processing of limbo list or counters that may overflow. Perhaps
>> indefinitely. Delaying the overflow handling will skew the bandwidth
>> values calculated by mba_sc, which expects to be called once a second.
>>
>> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
>> that prefers housekeeping CPUs. This helper will still return
>> a nohz_full CPU if that is the only option. The CPU to use is
>> re-evaluated each time the limbo/overflow work runs. This ensures
>> the work will move off a nohz_full CPU once a houskeeping CPU is
>
> housekeeping
>
>> available.

>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 87545e4beb70..0b5fd5a0cda2 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h

>> +/**
>> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
>> + * aren't marked nohz_full
>> + * @mask: The mask to pick a CPU from.
>> + *
>> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
>> + * nohz_full, these are preferred.
>> + */
>> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>> +{
>> + int cpu, hk_cpu;
>> +
>> + cpu = cpumask_any(mask);
>> + if (tick_nohz_full_cpu(cpu)) {
>> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>
> Why cpumask_nth_and() is not enough here? ..._andnot() seems to alter
> tick_nohz_full_mask which doesn't seem desirable?

tick_nohz_full_mask is the list of CPUs we should avoid. This wants to find the first cpu
set in the domain mask, and clear in tick_nohz_full_mask.

Where does cpumask_nth_andnot() modify its arguments? Its arguments are const.


Thanks,

James
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi Reinette,

On 01/04/2023 00:24, Reinette Chatre wrote:
> On 3/20/2023 10:26 AM, James Morse wrote:
>> The limbo and overflow code picks a CPU to use from the domain's list
>> of online CPUs. Work is then scheduled on these CPUs to maintain
>> the limbo list and any counters that may overflow.
>>
>> cpumask_any() may pick a CPU that is marked nohz_full, which will
>> either penalise the work that CPU was dedicated to, or delay the
>
> penalise -> penalize

(s->z is the difference between British English and American English)


>> processing of limbo list or counters that may overflow. Perhaps
>> indefinitely. Delaying the overflow handling will skew the bandwidth
>> values calculated by mba_sc, which expects to be called once a second.
>>
>> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
>> that prefers housekeeping CPUs. This helper will still return
>> a nohz_full CPU if that is the only option. The CPU to use is
>> re-evaluated each time the limbo/overflow work runs. This ensures
>> the work will move off a nohz_full CPU once a houskeeping CPU is
>> available.

>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index 87545e4beb70..0b5fd5a0cda2 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -55,6 +56,28 @@
>> /* Max event bits supported */
>> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>>
>> +/**
>> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
>> + * aren't marked nohz_full
>> + * @mask: The mask to pick a CPU from.
>> + *
>> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
>> + * nohz_full, these are preferred.
>> + */
>> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>> +{
>> + int cpu, hk_cpu;
>> +
>> + cpu = cpumask_any(mask);
>> + if (tick_nohz_full_cpu(cpu)) {
>> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>> + if (hk_cpu < nr_cpu_ids)
>> + cpu = hk_cpu;
>> + }
>> +

> I think as a start this could perhaps be a #if defined(CONFIG_NO_HZ_FULL). There
> appears to be a precedent for this in kernel/rcu/tree_nocb.h.

This harms readability, and prevents the compiler from testing that this is valid C code
for any compile of this code.

With if-def's here you'd be reliant on come CI system to build with the required
combination of Kconfig symbols to expose any warnings.

It's much better to use IS_ENABLED() in the helpers and rely on the compiler's
dead-code-elimination to remove paths that have been configured out.

(See the section on Conditional Compilation in coding-style for a much better summary!)


> Apart from the issue that Ilpo pointed out I would prefer that any changes outside
> resctrl are submitted separately to that subsystem.

Sure, I'll pull those three lines out as a separate patch.


>> @@ -801,6 +803,11 @@ void mbm_handle_overflow(struct work_struct *work)
>> update_mba_bw(prgrp, d);
>> }
>>
>> + /*
>> + * Re-check for housekeeping CPUs. This allows the overflow handler to
>> + * move off a nohz_full CPU quickly.
>> + */
>> + cpu = cpumask_any_housekeeping(&d->cpu_mask);
>> schedule_delayed_work_on(cpu, &d->mbm_over, delay);
>>
>> out_unlock:
>
> From what I can tell the nohz_full CPUs are set during boot and do not change.

But the house keeping CPUs can be taken offline, and brought back.

With this change the work moves off the nohz_full CPU and back to the housekeeping CPU the
next time this runs. Without it, you're stuck on a nohz_full CPU until you take that CPU
offline too.


>> diff --git a/include/linux/tick.h b/include/linux/tick.h
>> index bfd571f18cfd..ae2e9019fc18 100644
>> --- a/include/linux/tick.h
>> +++ b/include/linux/tick.h
>> @@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
>> static inline void tick_nohz_idle_stop_tick_protected(void) { }
>> #endif /* !CONFIG_NO_HZ_COMMON */
>>
>> +extern cpumask_var_t tick_nohz_full_mask;
>> +
>> #ifdef CONFIG_NO_HZ_FULL
>> extern bool tick_nohz_full_running;
>> -extern cpumask_var_t tick_nohz_full_mask;
>>
>> static inline bool tick_nohz_full_enabled(void)
>> {
>
> In addition to what Ilpo pointed out, be careful here.
> cpumask_var_t is a pointer (or array) and needs to be
> allocated before use. Moving its declaration but not the
> allocation code seems risky.

Risky how? Any use of tick_nohz_full_mask that isn't guarded by something like
tick_nohz_full_cpu() will lead to a link error regardless of the type.


Thanks,

James
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
On Thu, 27 Apr 2023, James Morse wrote:

> Hi Ilpo,
>
> On 21/03/2023 15:14, Ilpo J?rvinen wrote:
> > On Mon, 20 Mar 2023, James Morse wrote:
> >
> >> The limbo and overflow code picks a CPU to use from the domain's list
> >> of online CPUs. Work is then scheduled on these CPUs to maintain
> >> the limbo list and any counters that may overflow.
> >>
> >> cpumask_any() may pick a CPU that is marked nohz_full, which will
> >> either penalise the work that CPU was dedicated to, or delay the
> >> processing of limbo list or counters that may overflow. Perhaps
> >> indefinitely. Delaying the overflow handling will skew the bandwidth
> >> values calculated by mba_sc, which expects to be called once a second.
> >>
> >> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
> >> that prefers housekeeping CPUs. This helper will still return
> >> a nohz_full CPU if that is the only option. The CPU to use is
> >> re-evaluated each time the limbo/overflow work runs. This ensures
> >> the work will move off a nohz_full CPU once a houskeeping CPU is
> >
> > housekeeping
> >
> >> available.
>
> >> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> >> index 87545e4beb70..0b5fd5a0cda2 100644
> >> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> >> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>
> >> +/**
> >> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
> >> + * aren't marked nohz_full
> >> + * @mask: The mask to pick a CPU from.
> >> + *
> >> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
> >> + * nohz_full, these are preferred.
> >> + */
> >> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
> >> +{
> >> + int cpu, hk_cpu;
> >> +
> >> + cpu = cpumask_any(mask);
> >> + if (tick_nohz_full_cpu(cpu)) {
> >> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
> >
> > Why cpumask_nth_and() is not enough here? ..._andnot() seems to alter
> > tick_nohz_full_mask which doesn't seem desirable?
>
> tick_nohz_full_mask is the list of CPUs we should avoid. This wants to find the first cpu
> set in the domain mask, and clear in tick_nohz_full_mask.
>
> Where does cpumask_nth_andnot() modify its arguments? Its arguments are const.

Ah, it doesn't, I'm sorry about that.

I think I was trapped by ambiguous English:
* cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd.
...which can be understood as it clearing it in 2nd.


--
i.
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi James,

On 4/27/2023 7:10 AM, James Morse wrote:
> Hi Reinette,
>
> On 01/04/2023 00:24, Reinette Chatre wrote:
>> On 3/20/2023 10:26 AM, James Morse wrote:
>>> The limbo and overflow code picks a CPU to use from the domain's list
>>> of online CPUs. Work is then scheduled on these CPUs to maintain
>>> the limbo list and any counters that may overflow.
>>>
>>> cpumask_any() may pick a CPU that is marked nohz_full, which will
>>> either penalise the work that CPU was dedicated to, or delay the
>>
>> penalise -> penalize
>
> (s->z is the difference between British English and American English)

My apologies.

>>> processing of limbo list or counters that may overflow. Perhaps
>>> indefinitely. Delaying the overflow handling will skew the bandwidth
>>> values calculated by mba_sc, which expects to be called once a second.
>>>
>>> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
>>> that prefers housekeeping CPUs. This helper will still return
>>> a nohz_full CPU if that is the only option. The CPU to use is
>>> re-evaluated each time the limbo/overflow work runs. This ensures
>>> the work will move off a nohz_full CPU once a houskeeping CPU is
>>> available.
>
>>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>>> index 87545e4beb70..0b5fd5a0cda2 100644
>>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>>> @@ -55,6 +56,28 @@
>>> /* Max event bits supported */
>>> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>>>
>>> +/**
>>> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
>>> + * aren't marked nohz_full
>>> + * @mask: The mask to pick a CPU from.
>>> + *
>>> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
>>> + * nohz_full, these are preferred.
>>> + */
>>> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>>> +{
>>> + int cpu, hk_cpu;
>>> +
>>> + cpu = cpumask_any(mask);
>>> + if (tick_nohz_full_cpu(cpu)) {
>>> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>>> + if (hk_cpu < nr_cpu_ids)
>>> + cpu = hk_cpu;
>>> + }
>>> +
>
>> I think as a start this could perhaps be a #if defined(CONFIG_NO_HZ_FULL). There
>> appears to be a precedent for this in kernel/rcu/tree_nocb.h.
>
> This harms readability, and prevents the compiler from testing that this is valid C code
> for any compile of this code.
>
> With if-def's here you'd be reliant on come CI system to build with the required
> combination of Kconfig symbols to expose any warnings.
>
> It's much better to use IS_ENABLED() in the helpers and rely on the compiler's
> dead-code-elimination to remove paths that have been configured out.
>
> (See the section on Conditional Compilation in coding-style for a much better summary!)

My assumption was that you intended to implement what is described first in
the document you point to. That is, providing no-stub versions for all
and then calling everything unconditionally. Since I did not see universal stubs
for the code you are using I was looking at how other areas in the kernel handled
the same.

Reading your response to Ilpo and what you write later I now see that you are using
a combination of no-op stubs and conditional compilation. That is, you use a no-op stub,
instead of "IS_ENABLED()" or "#if" to conditionally compile some code. I am not familiar
with how compilers handle these scenarios.

>> Apart from the issue that Ilpo pointed out I would prefer that any changes outside
>> resctrl are submitted separately to that subsystem.
>
> Sure, I'll pull those three lines out as a separate patch.
>
>
>>> @@ -801,6 +803,11 @@ void mbm_handle_overflow(struct work_struct *work)
>>> update_mba_bw(prgrp, d);
>>> }
>>>
>>> + /*
>>> + * Re-check for housekeeping CPUs. This allows the overflow handler to
>>> + * move off a nohz_full CPU quickly.
>>> + */
>>> + cpu = cpumask_any_housekeeping(&d->cpu_mask);
>>> schedule_delayed_work_on(cpu, &d->mbm_over, delay);
>>>
>>> out_unlock:
>>
>> From what I can tell the nohz_full CPUs are set during boot and do not change.
>
> But the house keeping CPUs can be taken offline, and brought back.
>
> With this change the work moves off the nohz_full CPU and back to the housekeeping CPU the
> next time this runs. Without it, you're stuck on a nohz_full CPU until you take that CPU
> offline too.

Good point, thanks.

>>> diff --git a/include/linux/tick.h b/include/linux/tick.h
>>> index bfd571f18cfd..ae2e9019fc18 100644
>>> --- a/include/linux/tick.h
>>> +++ b/include/linux/tick.h
>>> @@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
>>> static inline void tick_nohz_idle_stop_tick_protected(void) { }
>>> #endif /* !CONFIG_NO_HZ_COMMON */
>>>
>>> +extern cpumask_var_t tick_nohz_full_mask;
>>> +
>>> #ifdef CONFIG_NO_HZ_FULL
>>> extern bool tick_nohz_full_running;
>>> -extern cpumask_var_t tick_nohz_full_mask;
>>>
>>> static inline bool tick_nohz_full_enabled(void)
>>> {
>>
>> In addition to what Ilpo pointed out, be careful here.
>> cpumask_var_t is a pointer (or array) and needs to be
>> allocated before use. Moving its declaration but not the
>> allocation code seems risky.
>
> Risky how? Any use of tick_nohz_full_mask that isn't guarded by something like
> tick_nohz_full_cpu() will lead to a link error regardless of the type.

I assumed that the intention was to create an actual "no-op" stub for this
mask, enabling it to be used unconditionally. That the intention is for it
to be guarded and how the compiler deals with this was not obvious to me. I think
it would be good to call out this usage when submitting this to the appropriate
maintainers. A comment near the declaration may help users to know how it is
intended to be used.

Reinette
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi Ilpo,

On 27/04/2023 15:25, Ilpo Järvinen wrote:
> On Thu, 27 Apr 2023, James Morse wrote:
>> On 21/03/2023 15:14, Ilpo Jï¿œrvinen wrote:
>>> On Mon, 20 Mar 2023, James Morse wrote:
>>>
>>>> The limbo and overflow code picks a CPU to use from the domain's list
>>>> of online CPUs. Work is then scheduled on these CPUs to maintain
>>>> the limbo list and any counters that may overflow.
>>>>
>>>> cpumask_any() may pick a CPU that is marked nohz_full, which will
>>>> either penalise the work that CPU was dedicated to, or delay the
>>>> processing of limbo list or counters that may overflow. Perhaps
>>>> indefinitely. Delaying the overflow handling will skew the bandwidth
>>>> values calculated by mba_sc, which expects to be called once a second.
>>>>
>>>> Add cpumask_any_housekeeping() as a replacement for cpumask_any()
>>>> that prefers housekeeping CPUs. This helper will still return
>>>> a nohz_full CPU if that is the only option. The CPU to use is
>>>> re-evaluated each time the limbo/overflow work runs. This ensures
>>>> the work will move off a nohz_full CPU once a houskeeping CPU is
>>>
>>> housekeeping
>>>
>>>> available.
>>
>>>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>>>> index 87545e4beb70..0b5fd5a0cda2 100644
>>>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>>>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>>
>>>> +/**
>>>> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
>>>> + * aren't marked nohz_full
>>>> + * @mask: The mask to pick a CPU from.
>>>> + *
>>>> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
>>>> + * nohz_full, these are preferred.
>>>> + */
>>>> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>>>> +{
>>>> + int cpu, hk_cpu;
>>>> +
>>>> + cpu = cpumask_any(mask);
>>>> + if (tick_nohz_full_cpu(cpu)) {
>>>> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>>>
>>> Why cpumask_nth_and() is not enough here? ..._andnot() seems to alter
>>> tick_nohz_full_mask which doesn't seem desirable?
>>
>> tick_nohz_full_mask is the list of CPUs we should avoid. This wants to find the first cpu
>> set in the domain mask, and clear in tick_nohz_full_mask.
>>
>> Where does cpumask_nth_andnot() modify its arguments? Its arguments are const.
>
> Ah, it doesn't, I'm sorry about that.
>
> I think I was trapped by ambiguous English:
> * cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd.
> ...which can be understood as it clearing it in 2nd.
Great, I'm not going mad!

How could the english there be clearer?
"get the first cpu that is set in 1st cpumask, and not set in 2nd." ?


Thanks,

James
Re: [PATCH v3 08/19] x86/resctrl: Add cpumask_any_housekeeping() for limbo/overflow [ In reply to ]
Hi Reinette,

On 28/04/2023 00:36, Reinette Chatre wrote:
> On 4/27/2023 7:10 AM, James Morse wrote:
>> On 01/04/2023 00:24, Reinette Chatre wrote:
>>> On 3/20/2023 10:26 AM, James Morse wrote:

>>>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>>>> index 87545e4beb70..0b5fd5a0cda2 100644
>>>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>>>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>>>> @@ -55,6 +56,28 @@
>>>> /* Max event bits supported */
>>>> #define MAX_EVT_CONFIG_BITS GENMASK(6, 0)
>>>>
>>>> +/**
>>>> + * cpumask_any_housekeeping() - Chose any cpu in @mask, preferring those that
>>>> + * aren't marked nohz_full
>>>> + * @mask: The mask to pick a CPU from.
>>>> + *
>>>> + * Returns a CPU in @mask. If there are houskeeping CPUs that don't use
>>>> + * nohz_full, these are preferred.
>>>> + */
>>>> +static inline unsigned int cpumask_any_housekeeping(const struct cpumask *mask)
>>>> +{
>>>> + int cpu, hk_cpu;
>>>> +
>>>> + cpu = cpumask_any(mask);
>>>> + if (tick_nohz_full_cpu(cpu)) {
>>>> + hk_cpu = cpumask_nth_andnot(0, mask, tick_nohz_full_mask);
>>>> + if (hk_cpu < nr_cpu_ids)
>>>> + cpu = hk_cpu;
>>>> + }
>>>> +
>>
>>> I think as a start this could perhaps be a #if defined(CONFIG_NO_HZ_FULL). There
>>> appears to be a precedent for this in kernel/rcu/tree_nocb.h.
>>
>> This harms readability, and prevents the compiler from testing that this is valid C code
>> for any compile of this code.
>>
>> With if-def's here you'd be reliant on come CI system to build with the required
>> combination of Kconfig symbols to expose any warnings.
>>
>> It's much better to use IS_ENABLED() in the helpers and rely on the compiler's
>> dead-code-elimination to remove paths that have been configured out.
>>
>> (See the section on Conditional Compilation in coding-style for a much better summary!)
>
> My assumption was that you intended to implement what is described first in
> the document you point to. That is, providing no-stub versions for all
> and then calling everything unconditionally. Since I did not see universal stubs
> for the code you are using I was looking at how other areas in the kernel handled
> the same.
>
> Reading your response to Ilpo and what you write later I now see that you are using
> a combination of no-op stubs and conditional compilation. That is, you use a no-op stub,
> instead of "IS_ENABLED()" or "#if" to conditionally compile some code. I am not familiar
> with how compilers handle these scenarios.
>

>>>> diff --git a/include/linux/tick.h b/include/linux/tick.h
>>>> index bfd571f18cfd..ae2e9019fc18 100644
>>>> --- a/include/linux/tick.h
>>>> +++ b/include/linux/tick.h
>>>> @@ -174,9 +174,10 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
>>>> static inline void tick_nohz_idle_stop_tick_protected(void) { }
>>>> #endif /* !CONFIG_NO_HZ_COMMON */
>>>>
>>>> +extern cpumask_var_t tick_nohz_full_mask;
>>>> +
>>>> #ifdef CONFIG_NO_HZ_FULL
>>>> extern bool tick_nohz_full_running;
>>>> -extern cpumask_var_t tick_nohz_full_mask;
>>>>
>>>> static inline bool tick_nohz_full_enabled(void)
>>>> {
>>>
>>> In addition to what Ilpo pointed out, be careful here.
>>> cpumask_var_t is a pointer (or array) and needs to be
>>> allocated before use. Moving its declaration but not the
>>> allocation code seems risky.
>>
>> Risky how? Any use of tick_nohz_full_mask that isn't guarded by something like
>> tick_nohz_full_cpu() will lead to a link error regardless of the type.
>
> I assumed that the intention was to create an actual "no-op" stub for this
> mask, enabling it to be used unconditionally. That the intention is for it
> to be guarded and how the compiler deals with this was not obvious to me. I think
> it would be good to call out this usage when submitting this to the appropriate
> maintainers. A comment near the declaration may help users to know how it is
> intended to be used.

Right, I'll add a comment:
/*
* Mask of CPUs that are nohz_full.
*
* Users should be guarded by CONFIG_NO_HZ_FULL or a tick_nohz_full_cpu()
* check.
*/




Thanks,

James