Mailing List Archive

[PATCH] x86/smp: use APIC ALLBUT destination shorthand when possible
If the IPI destination mask matches the mask of online CPUs use the
APIC ALLBUT destination shorthand in order to send an IPI to all CPUs
on the system except the current one. This can only be safely used
when no CPU hotplug or unplug operations are taking place, no offline
CPUs or those have been onlined and parked and finally when all CPUs
in the system have been accounted for (ie: the number of CPUs doesn't
exceed NR_CPUS and APIC IDs are below MAX_APICS).

This is specially beneficial when using the PV shim, since using the
shorthand avoids performing an APIC register write (or multiple ones
if using xAPIC mode) for each destination when doing a global TLB
flush.

The lock time of flush_lock on a 32 vCPU guest using the shim without
the shorthand is:

Global lock flush_lock: addr=ffff82d0804b21c0, lockval=f602f602, not locked
lock:228455938(79406065573135), block:205908580(556416605761539)

Average lock time: 347577ns

While the same guest using the shorthand:

Global lock flush_lock: addr=ffff82d0804b41c0, lockval=d9c4d9bc, cpu=12
lock:1890775(416719148054), block:1663958(2500161282949)

Average lock time: 220395ns

Approximately a 1/3 improvement in the lock time.

Note that this requires locking the CPU maps (get_cpu_maps) which uses
a trylock. This is currently safe as all users of cpu_add_remove_lock
do a trylock, but will need reevaluating if non-trylock users appear.

Also there's some code movement of __prepare_ICR and
__default_send_IPI_shortcut, which is a non-functional change but I
didn't feel like it should be split to a separate patch.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v1:
- Move the shorthand logic to send_IPI_mask.
- Check interrupts are enabled before trying to get the cpu maps
lock.
- Move __prepare_ICR and __default_send_IPI_shortcut.
---
xen/arch/x86/acpi/boot.c | 1 +
xen/arch/x86/mpparse.c | 5 +++
xen/arch/x86/smp.c | 86 +++++++++++++++++++++++++++------------
xen/include/asm-x86/smp.h | 2 +
4 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index 15542a9bdf..88e1a89ff0 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -103,6 +103,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
processor->lapic_flags & ACPI_MADT_ENABLED
? KERN_WARNING "WARNING: " : KERN_INFO,
processor->local_apic_id, processor->uid);
+ cpu_overflow = true;
/*
* Must not return an error here, to prevent
* acpi_table_parse_entries() from terminating early.
diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
index f057d9162f..8d7739fbf4 100644
--- a/xen/arch/x86/mpparse.c
+++ b/xen/arch/x86/mpparse.c
@@ -66,6 +66,9 @@ static unsigned int __initdata disabled_cpus;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;

+/* Record whether CPUs haven't been added due to overflows. */
+bool __read_mostly cpu_overflow;
+
void __init set_nr_cpu_ids(unsigned int max_cpus)
{
unsigned int tot_cpus = num_processors + disabled_cpus;
@@ -160,6 +163,7 @@ static int MP_processor_info_x(struct mpc_config_processor *m,
printk_once(XENLOG_WARNING
"WARNING: NR_CPUS limit of %u reached - ignoring further processors\n",
nr_cpu_ids);
+ cpu_overflow = true;
return -ENOSPC;
}

@@ -167,6 +171,7 @@ static int MP_processor_info_x(struct mpc_config_processor *m,
&& genapic.name == apic_default.name) {
printk_once(XENLOG_WARNING
"WARNING: CPUs limit of 8 reached - ignoring futher processors\n");
+ cpu_overflow = true;
return -ENOSPC;
}

diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
index c8e5913e47..6510dd84ab 100644
--- a/xen/arch/x86/smp.c
+++ b/xen/arch/x86/smp.c
@@ -8,6 +8,7 @@
* later.
*/

+#include <xen/cpu.h>
#include <xen/irq.h>
#include <xen/sched.h>
#include <xen/delay.h>
@@ -23,6 +24,31 @@
#include <irq_vectors.h>
#include <mach_apic.h>

+static inline int __prepare_ICR(unsigned int shortcut, int vector)
+{
+ return APIC_DM_FIXED | shortcut | vector;
+}
+
+static void __default_send_IPI_shortcut(unsigned int shortcut, int vector,
+ unsigned int dest)
+{
+ unsigned int cfg;
+
+ /*
+ * Wait for idle.
+ */
+ apic_wait_icr_idle();
+
+ /*
+ * prepare target chip field
+ */
+ cfg = __prepare_ICR(shortcut, vector) | dest;
+ /*
+ * Send the IPI. The write to APIC_ICR fires this off.
+ */
+ apic_write(APIC_ICR, cfg);
+}
+
/*
* send_IPI_mask(cpumask, vector): sends @vector IPI to CPUs in @cpumask,
* excluding the local CPU. @cpumask may be empty.
@@ -30,7 +56,40 @@

void send_IPI_mask(const cpumask_t *mask, int vector)
{
- alternative_vcall(genapic.send_IPI_mask, mask, vector);
+ bool cpus_locked = false;
+
+ /*
+ * Prevent any CPU hot{un}plug while sending the IPIs if we are to use
+ * a shorthand, also refuse to use a shorthand if not all CPUs are
+ * online or have been parked.
+ */
+ if ( system_state > SYS_STATE_smp_boot && !cpu_overflow &&
+ /* NB: get_cpu_maps lock requires enabled interrupts. */
+ local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
+ (park_offline_cpus ||
+ cpumask_equal(&cpu_online_map, &cpu_present_map)) )
+ {
+ cpumask_copy(this_cpu(scratch_cpumask), &cpu_online_map);
+ cpumask_clear_cpu(smp_processor_id(), this_cpu(scratch_cpumask));
+ }
+ else
+ {
+ if ( cpus_locked )
+ {
+ put_cpu_maps();
+ cpus_locked = false;
+ }
+ cpumask_clear(this_cpu(scratch_cpumask));
+ }
+
+ if ( cpumask_equal(mask, this_cpu(scratch_cpumask)) )
+ __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
+ APIC_DEST_PHYSICAL);
+ else
+ alternative_vcall(genapic.send_IPI_mask, mask, vector);
+
+ if ( cpus_locked )
+ put_cpu_maps();
}

void send_IPI_self(int vector)
@@ -80,11 +139,6 @@ void send_IPI_self(int vector)
* The following functions deal with sending IPIs between CPUs.
*/

-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
- return APIC_DM_FIXED | shortcut | vector;
-}
-
static inline int __prepare_ICR2 (unsigned int mask)
{
return SET_xAPIC_DEST_FIELD(mask);
@@ -99,26 +153,6 @@ void apic_wait_icr_idle(void)
cpu_relax();
}

-static void __default_send_IPI_shortcut(unsigned int shortcut, int vector,
- unsigned int dest)
-{
- unsigned int cfg;
-
- /*
- * Wait for idle.
- */
- apic_wait_icr_idle();
-
- /*
- * prepare target chip field
- */
- cfg = __prepare_ICR(shortcut, vector) | dest;
- /*
- * Send the IPI. The write to APIC_ICR fires this off.
- */
- apic_write(APIC_ICR, cfg);
-}
-
void send_IPI_self_legacy(uint8_t vector)
{
__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index dbeed2fd41..3df4185744 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -84,6 +84,8 @@ extern cpumask_t **socket_cpumask;
#define get_cpu_current(cpu) \
(get_cpu_info_from_stack((unsigned long)stack_base[cpu])->current_vcpu)

+extern bool cpu_overflow;
+
#endif /* !__ASSEMBLY__ */

#endif
--
2.24.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [PATCH] x86/smp: use APIC ALLBUT destination shorthand when possible [ In reply to ]
On 09.01.2020 17:22, Roger Pau Monne wrote:
> If the IPI destination mask matches the mask of online CPUs use the
> APIC ALLBUT destination shorthand in order to send an IPI to all CPUs
> on the system except the current one. This can only be safely used
> when no CPU hotplug or unplug operations are taking place, no offline
> CPUs or those have been onlined and parked and finally when all CPUs
> in the system have been accounted for (ie: the number of CPUs doesn't
> exceed NR_CPUS and APIC IDs are below MAX_APICS).
>
> This is specially beneficial when using the PV shim, since using the
> shorthand avoids performing an APIC register write (or multiple ones
> if using xAPIC mode) for each destination when doing a global TLB
> flush.
>
> The lock time of flush_lock on a 32 vCPU guest using the shim without
> the shorthand is:
>
> Global lock flush_lock: addr=ffff82d0804b21c0, lockval=f602f602, not locked
> lock:228455938(79406065573135), block:205908580(556416605761539)
>
> Average lock time: 347577ns
>
> While the same guest using the shorthand:
>
> Global lock flush_lock: addr=ffff82d0804b41c0, lockval=d9c4d9bc, cpu=12
> lock:1890775(416719148054), block:1663958(2500161282949)
>
> Average lock time: 220395ns
>
> Approximately a 1/3 improvement in the lock time.
>
> Note that this requires locking the CPU maps (get_cpu_maps) which uses
> a trylock. This is currently safe as all users of cpu_add_remove_lock
> do a trylock, but will need reevaluating if non-trylock users appear.

All of this looks okay to me, but I have a number of mechanical
(hopefully not too nitpicky) comments.

> Also there's some code movement of __prepare_ICR and
> __default_send_IPI_shortcut, which is a non-functional change but I
> didn't feel like it should be split to a separate patch.

This may better be split out - see below for why.

> --- a/xen/arch/x86/acpi/boot.c
> +++ b/xen/arch/x86/acpi/boot.c
> @@ -103,6 +103,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
> processor->lapic_flags & ACPI_MADT_ENABLED
> ? KERN_WARNING "WARNING: " : KERN_INFO,
> processor->local_apic_id, processor->uid);
> + cpu_overflow = true;

I don't think this is a good name. Seeing that we have "disabled_cpus",
how about "unaccounted_cpus" or some such? (This could still be a boolean
for the time being, if preferred to not be a true count.)

Thinking about it, what about the period of time between a CPU having
got physically hot-added (and hence known at the hardware level) and
it actually getting brought up for the first time? IOW - do you
perhaps need to exclude use of the shortcut also when disabled_cpus
is non-zero?

> @@ -23,6 +24,31 @@
> #include <irq_vectors.h>
> #include <mach_apic.h>
>
> +static inline int __prepare_ICR(unsigned int shortcut, int vector)
> +{
> + return APIC_DM_FIXED | shortcut | vector;
> +}

I think __prepare_ICR2() should be moved together with it, and I also
think movement like this should include correcting the name (by
dropping at least one of the underscores). As per recent comments
elsewhere "inline" may also want dropping at this occasion.

> +static void __default_send_IPI_shortcut(unsigned int shortcut, int vector,
> + unsigned int dest)

The renaming should go even farther here: There's nothing "default"
about this function - there's not second, non-default implementation.
Just like other similar functions it should just be
send_IPI_shortcut().

> @@ -30,7 +56,40 @@
>
> void send_IPI_mask(const cpumask_t *mask, int vector)
> {
> - alternative_vcall(genapic.send_IPI_mask, mask, vector);
> + bool cpus_locked = false;
> +
> + /*
> + * Prevent any CPU hot{un}plug while sending the IPIs if we are to use
> + * a shorthand, also refuse to use a shorthand if not all CPUs are
> + * online or have been parked.
> + */
> + if ( system_state > SYS_STATE_smp_boot && !cpu_overflow &&
> + /* NB: get_cpu_maps lock requires enabled interrupts. */
> + local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
> + (park_offline_cpus ||
> + cpumask_equal(&cpu_online_map, &cpu_present_map)) )

On the first and second pass I thought this contradicts the description.
To disambiguate (and assuming I understand it correctly), how about
saying something like "This can only be safely used when no CPU hotplug
or unplug operations are taking place, there are no offline CPUs (unless
those have been onlined and parked) and finally ..."?

> + {
> + cpumask_copy(this_cpu(scratch_cpumask), &cpu_online_map);
> + cpumask_clear_cpu(smp_processor_id(), this_cpu(scratch_cpumask));

cpumask_andnot(this_cpu(scratch_cpumask), &cpu_online_map,
cpumask_of(smp_processor_id()));

?

> + }
> + else
> + {
> + if ( cpus_locked )
> + {
> + put_cpu_maps();
> + cpus_locked = false;
> + }
> + cpumask_clear(this_cpu(scratch_cpumask));

Is there a guarantee that the function won't be called with an
empty mask? All backing functions look to gracefully deal with
this case, yet ...

> + }
> +
> + if ( cpumask_equal(mask, this_cpu(scratch_cpumask)) )> + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
> + APIC_DEST_PHYSICAL);

... you'd make this an "all-but" message then. Adding a
!cpumask_empty() check would seem a little expensive, so how
about you copy cpumask_of(smp_processor_id()) above and add
!cpumask_test_cpu(smp_processor_id(), ...) here?

> + else
> + alternative_vcall(genapic.send_IPI_mask, mask, vector);

Is there no reason at all to also check here whether APIC_DEST_ALL
could be used? Oh, I see - the backing functions all exclude the
local CPU. I wonder why e.g. flush_area_mask() then clears the
CPU off the mask it passes on. And with this behavior the
single cpumask_equal() check above is too restrictive - you'll
want to check whether mask matches the calculated all-but one or
cpu_online_map. I.e. perhaps you want

cpumask_or(this_cpu(scratch_cpumask), mask,
cpumask_of(smp_processor_id()));

and then

if ( cpumask_equal(this_cpu(scratch_cpumask), &cpu_online_map) )

?

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [PATCH] x86/smp: use APIC ALLBUT destination shorthand when possible [ In reply to ]
On Thu, Jan 16, 2020 at 11:44:51AM +0100, Jan Beulich wrote:
> On 09.01.2020 17:22, Roger Pau Monne wrote:
> > If the IPI destination mask matches the mask of online CPUs use the
> > APIC ALLBUT destination shorthand in order to send an IPI to all CPUs
> > on the system except the current one. This can only be safely used
> > when no CPU hotplug or unplug operations are taking place, no offline
> > CPUs or those have been onlined and parked and finally when all CPUs
> > in the system have been accounted for (ie: the number of CPUs doesn't
> > exceed NR_CPUS and APIC IDs are below MAX_APICS).
> >
> > This is specially beneficial when using the PV shim, since using the
> > shorthand avoids performing an APIC register write (or multiple ones
> > if using xAPIC mode) for each destination when doing a global TLB
> > flush.
> >
> > The lock time of flush_lock on a 32 vCPU guest using the shim without
> > the shorthand is:
> >
> > Global lock flush_lock: addr=ffff82d0804b21c0, lockval=f602f602, not locked
> > lock:228455938(79406065573135), block:205908580(556416605761539)
> >
> > Average lock time: 347577ns
> >
> > While the same guest using the shorthand:
> >
> > Global lock flush_lock: addr=ffff82d0804b41c0, lockval=d9c4d9bc, cpu=12
> > lock:1890775(416719148054), block:1663958(2500161282949)
> >
> > Average lock time: 220395ns
> >
> > Approximately a 1/3 improvement in the lock time.
> >
> > Note that this requires locking the CPU maps (get_cpu_maps) which uses
> > a trylock. This is currently safe as all users of cpu_add_remove_lock
> > do a trylock, but will need reevaluating if non-trylock users appear.
>
> All of this looks okay to me, but I have a number of mechanical
> (hopefully not too nitpicky) comments.
>
> > Also there's some code movement of __prepare_ICR and
> > __default_send_IPI_shortcut, which is a non-functional change but I
> > didn't feel like it should be split to a separate patch.
>
> This may better be split out - see below for why.

Done in a pre-patch.

>
> > --- a/xen/arch/x86/acpi/boot.c
> > +++ b/xen/arch/x86/acpi/boot.c
> > @@ -103,6 +103,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
> > processor->lapic_flags & ACPI_MADT_ENABLED
> > ? KERN_WARNING "WARNING: " : KERN_INFO,
> > processor->local_apic_id, processor->uid);
> > + cpu_overflow = true;
>
> I don't think this is a good name. Seeing that we have "disabled_cpus",
> how about "unaccounted_cpus" or some such? (This could still be a boolean
> for the time being, if preferred to not be a true count.)

Done, left it as a boolean though.

> Thinking about it, what about the period of time between a CPU having
> got physically hot-added (and hence known at the hardware level) and
> it actually getting brought up for the first time? IOW - do you
> perhaps need to exclude use of the shortcut also when disabled_cpus
> is non-zero?

Yes, I guess there's no signal to Xen that a new CPU is going to be
physically hot-added, and hence as long as there are empty slots the
scenario you describe above is possible.

I don't think there's also anyway to figure out if a system is capable
of physical CPU hotplug, so as long as there are disabled_cpus we
shouldn't use the shortcut (that's kind of a shame, because I think
there are many systems reporting disabled CPUs in MADT).

> > @@ -23,6 +24,31 @@
> > #include <irq_vectors.h>
> > #include <mach_apic.h>
> >
> > +static inline int __prepare_ICR(unsigned int shortcut, int vector)
> > +{
> > + return APIC_DM_FIXED | shortcut | vector;
> > +}
>
> I think __prepare_ICR2() should be moved together with it, and I also
> think movement like this should include correcting the name (by
> dropping at least one of the underscores). As per recent comments
> elsewhere "inline" may also want dropping at this occasion.

I've dropped both underscores and the inline attribute in a pre-patch.

> > +static void __default_send_IPI_shortcut(unsigned int shortcut, int vector,
> > + unsigned int dest)
>
> The renaming should go even farther here: There's nothing "default"
> about this function - there's not second, non-default implementation.
> Just like other similar functions it should just be
> send_IPI_shortcut().

Ack, done in the pre-patch.

> > @@ -30,7 +56,40 @@
> >
> > void send_IPI_mask(const cpumask_t *mask, int vector)
> > {
> > - alternative_vcall(genapic.send_IPI_mask, mask, vector);
> > + bool cpus_locked = false;
> > +
> > + /*
> > + * Prevent any CPU hot{un}plug while sending the IPIs if we are to use
> > + * a shorthand, also refuse to use a shorthand if not all CPUs are
> > + * online or have been parked.
> > + */
> > + if ( system_state > SYS_STATE_smp_boot && !cpu_overflow &&
> > + /* NB: get_cpu_maps lock requires enabled interrupts. */
> > + local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
> > + (park_offline_cpus ||
> > + cpumask_equal(&cpu_online_map, &cpu_present_map)) )
>
> On the first and second pass I thought this contradicts the description.
> To disambiguate (and assuming I understand it correctly), how about
> saying something like "This can only be safely used when no CPU hotplug
> or unplug operations are taking place, there are no offline CPUs (unless
> those have been onlined and parked) and finally ..."?

I'm not sure I understand what should come after the finally ...
Taking your suggestion I would write:

This can only be safely used when no CPU hotplug or unplug operations
are taking place, there are no offline CPUs (unless those have been
onlined and parked), there are no disabled CPUs and all possible CPUs
in the system have been accounted for.

>
> > + {
> > + cpumask_copy(this_cpu(scratch_cpumask), &cpu_online_map);
> > + cpumask_clear_cpu(smp_processor_id(), this_cpu(scratch_cpumask));
>
> cpumask_andnot(this_cpu(scratch_cpumask), &cpu_online_map,
> cpumask_of(smp_processor_id()));
>
> ?
>
> > + }
> > + else
> > + {
> > + if ( cpus_locked )
> > + {
> > + put_cpu_maps();
> > + cpus_locked = false;
> > + }
> > + cpumask_clear(this_cpu(scratch_cpumask));
>
> Is there a guarantee that the function won't be called with an
> empty mask? All backing functions look to gracefully deal with
> this case, yet ...
>
> > + }
> > +
> > + if ( cpumask_equal(mask, this_cpu(scratch_cpumask)) )> + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
> > + APIC_DEST_PHYSICAL);
>
> ... you'd make this an "all-but" message then. Adding a
> !cpumask_empty() check would seem a little expensive, so how
> about you copy cpumask_of(smp_processor_id()) above and add
> !cpumask_test_cpu(smp_processor_id(), ...) here?

A cpumask_empty check at the top of the function would be easier to
parse, but it could incur in more overhead, I've implemented what you
describe.

> > + else
> > + alternative_vcall(genapic.send_IPI_mask, mask, vector);
>
> Is there no reason at all to also check here whether APIC_DEST_ALL
> could be used? Oh, I see - the backing functions all exclude the
> local CPU. I wonder why e.g. flush_area_mask() then clears the
> CPU off the mask it passes on. And with this behavior the
> single cpumask_equal() check above is too restrictive - you'll
> want to check whether mask matches the calculated all-but one or
> cpu_online_map. I.e. perhaps you want
>
> cpumask_or(this_cpu(scratch_cpumask), mask,
> cpumask_of(smp_processor_id()));
>
> and then
>
> if ( cpumask_equal(this_cpu(scratch_cpumask), &cpu_online_map) )
>
> ?

Oh, OK, in which case most of the comments above are moot if we go
this route. What I have now if I properly parsed your comments is:

bool cpus_locked = false;
cpumask_t *scratch = this_cpu(scratch_cpumask);

/*
* This can only be safely used when no CPU hotplug or unplug operations
* are taking place, there are no offline CPUs (unless those have been
* onlined and parked), there are no disabled CPUs and all possible CPUs in
* the system have been accounted for.
*/
if ( system_state > SYS_STATE_smp_boot &&
!unaccounted_cpus && !disabled_cpus &&
/* NB: get_cpu_maps lock requires enabled interrupts. */
local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
(park_offline_cpus ||
cpumask_equal(&cpu_online_map, &cpu_present_map)) )
cpumask_or(scratch, mask, cpumask_of(smp_processor_id()));
else
{
if ( cpus_locked )
{
put_cpu_maps();
cpus_locked = false;
}
cpumask_clear(scratch);
}

if ( cpumask_equal(scratch, &cpu_online_map) )
send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_PHYSICAL);
else
alternative_vcall(genapic.send_IPI_mask, mask, vector);

if ( cpus_locked )
put_cpu_maps();

Can assert this matches your expectations? I think it fixes your
comments about empty masks and a mask containing the current pCPU ID.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
Re: [PATCH] x86/smp: use APIC ALLBUT destination shorthand when possible [ In reply to ]
On 16.01.2020 16:05, Roger Pau Monné wrote:
> On Thu, Jan 16, 2020 at 11:44:51AM +0100, Jan Beulich wrote:
>> Thinking about it, what about the period of time between a CPU having
>> got physically hot-added (and hence known at the hardware level) and
>> it actually getting brought up for the first time? IOW - do you
>> perhaps need to exclude use of the shortcut also when disabled_cpus
>> is non-zero?
>
> Yes, I guess there's no signal to Xen that a new CPU is going to be
> physically hot-added, and hence as long as there are empty slots the
> scenario you describe above is possible.
>
> I don't think there's also anyway to figure out if a system is capable
> of physical CPU hotplug, so as long as there are disabled_cpus we
> shouldn't use the shortcut (that's kind of a shame, because I think
> there are many systems reporting disabled CPUs in MADT).

We may want to provide a command line control to assert "I'm not going
to".

>>> @@ -30,7 +56,40 @@
>>>
>>> void send_IPI_mask(const cpumask_t *mask, int vector)
>>> {
>>> - alternative_vcall(genapic.send_IPI_mask, mask, vector);
>>> + bool cpus_locked = false;
>>> +
>>> + /*
>>> + * Prevent any CPU hot{un}plug while sending the IPIs if we are to use
>>> + * a shorthand, also refuse to use a shorthand if not all CPUs are
>>> + * online or have been parked.
>>> + */
>>> + if ( system_state > SYS_STATE_smp_boot && !cpu_overflow &&
>>> + /* NB: get_cpu_maps lock requires enabled interrupts. */
>>> + local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
>>> + (park_offline_cpus ||
>>> + cpumask_equal(&cpu_online_map, &cpu_present_map)) )
>>
>> On the first and second pass I thought this contradicts the description.
>> To disambiguate (and assuming I understand it correctly), how about
>> saying something like "This can only be safely used when no CPU hotplug
>> or unplug operations are taking place, there are no offline CPUs (unless
>> those have been onlined and parked) and finally ..."?
>
> I'm not sure I understand what should come after the finally ...

That was to continue with what you had in your text.

>>> + else
>>> + {
>>> + if ( cpus_locked )
>>> + {
>>> + put_cpu_maps();
>>> + cpus_locked = false;
>>> + }
>>> + cpumask_clear(this_cpu(scratch_cpumask));
>>
>> Is there a guarantee that the function won't be called with an
>> empty mask? All backing functions look to gracefully deal with
>> this case, yet ...
>>
>>> + }
>>> +
>>> + if ( cpumask_equal(mask, this_cpu(scratch_cpumask)) )> + __default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
>>> + APIC_DEST_PHYSICAL);
>>
>> ... you'd make this an "all-but" message then. Adding a
>> !cpumask_empty() check would seem a little expensive, so how
>> about you copy cpumask_of(smp_processor_id()) above and add
>> !cpumask_test_cpu(smp_processor_id(), ...) here?
>
> A cpumask_empty check at the top of the function would be easier to
> parse, but it could incur in more overhead, I've implemented what you
> describe.
>
>>> + else
>>> + alternative_vcall(genapic.send_IPI_mask, mask, vector);
>>
>> Is there no reason at all to also check here whether APIC_DEST_ALL
>> could be used? Oh, I see - the backing functions all exclude the
>> local CPU. I wonder why e.g. flush_area_mask() then clears the
>> CPU off the mask it passes on. And with this behavior the
>> single cpumask_equal() check above is too restrictive - you'll
>> want to check whether mask matches the calculated all-but one or
>> cpu_online_map. I.e. perhaps you want
>>
>> cpumask_or(this_cpu(scratch_cpumask), mask,
>> cpumask_of(smp_processor_id()));
>>
>> and then
>>
>> if ( cpumask_equal(this_cpu(scratch_cpumask), &cpu_online_map) )
>>
>> ?
>
> Oh, OK, in which case most of the comments above are moot if we go
> this route. What I have now if I properly parsed your comments is:
>
> bool cpus_locked = false;
> cpumask_t *scratch = this_cpu(scratch_cpumask);
>
> /*
> * This can only be safely used when no CPU hotplug or unplug operations
> * are taking place, there are no offline CPUs (unless those have been
> * onlined and parked), there are no disabled CPUs and all possible CPUs in
> * the system have been accounted for.
> */
> if ( system_state > SYS_STATE_smp_boot &&
> !unaccounted_cpus && !disabled_cpus &&
> /* NB: get_cpu_maps lock requires enabled interrupts. */
> local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
> (park_offline_cpus ||
> cpumask_equal(&cpu_online_map, &cpu_present_map)) )
> cpumask_or(scratch, mask, cpumask_of(smp_processor_id()));
> else
> {
> if ( cpus_locked )
> {
> put_cpu_maps();
> cpus_locked = false;
> }
> cpumask_clear(scratch);
> }
>
> if ( cpumask_equal(scratch, &cpu_online_map) )
> send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_PHYSICAL);
> else
> alternative_vcall(genapic.send_IPI_mask, mask, vector);
>
> if ( cpus_locked )
> put_cpu_maps();
>
> Can assert this matches your expectations? I think it fixes your
> comments about empty masks and a mask containing the current pCPU ID.

Looks like so.

Jan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel