Mailing List Archive

[xen-unstable] x86: IRQ affinity should track vCPU affinity
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1276755726 -3600
# Node ID 0695a5cdcb42d98dcd4bbda35614753787aa7983
# Parent b9c541d9c13822e92719ccfe77fbd0241410f2c5
x86: IRQ affinity should track vCPU affinity

With IRQs getting bound to the CPU the binding vCPU currently runs on
there can result quite a bit of extra cross CPU traffic as soon as
that vCPU moves to a different pCPU. Likewise, when a domain re-binds
an event channel associated with a pIRQ, that IRQ's affinity should
also be adjusted.

The open issue is how to break ties for interrupts shared by multiple
domains - currently, the last request (at any point in time) is being
honored.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
---
xen/arch/ia64/xen/irq.c | 5 +++
xen/arch/x86/hvm/hvm.c | 2 -
xen/arch/x86/irq.c | 22 +++++++++++----
xen/common/event_channel.c | 65 ++++++++++++++++++++++++++++++++++++++++-----
xen/common/sched_credit.c | 8 ++++-
xen/common/sched_credit2.c | 9 +++++-
xen/common/sched_sedf.c | 2 +
xen/common/schedule.c | 7 ++++
xen/include/asm-x86/irq.h | 2 -
xen/include/xen/cpumask.h | 2 -
xen/include/xen/event.h | 3 ++
xen/include/xen/irq.h | 1
xen/include/xen/sched-if.h | 1
xen/include/xen/sched.h | 9 +++++-
14 files changed, 120 insertions(+), 18 deletions(-)

diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/ia64/xen/irq.c
--- a/xen/arch/ia64/xen/irq.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/ia64/xen/irq.c Thu Jun 17 07:22:06 2010 +0100
@@ -612,6 +612,11 @@ xen_debug_irq(unsigned long vector, stru
}
}

+void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *mask)
+{
+ /* FIXME */
+}
+
/*
* Exit an interrupt context. Process softirqs if needed and possible:
*/
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/x86/hvm/hvm.c Thu Jun 17 07:22:06 2010 +0100
@@ -271,7 +271,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
continue;
irq = desc - irq_desc;
ASSERT(MSI_IRQ(irq));
- irq_set_affinity(irq, *cpumask_of(v->processor));
+ irq_set_affinity(desc, cpumask_of(v->processor));
spin_unlock_irq(&desc->lock);
}
spin_unlock(&d->event_lock);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/arch/x86/irq.c Thu Jun 17 07:22:06 2010 +0100
@@ -501,16 +501,28 @@ void move_native_irq(int irq)
}

/* For re-setting irq interrupt affinity for specific irq */
-void irq_set_affinity(int irq, cpumask_t mask)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
+void irq_set_affinity(struct irq_desc *desc, const cpumask_t *mask)
+{
if (!desc->handler->set_affinity)
return;

ASSERT(spin_is_locked(&desc->lock));
+ desc->status &= ~IRQ_MOVE_PENDING;
+ wmb();
+ cpus_copy(desc->pending_mask, *mask);
+ wmb();
desc->status |= IRQ_MOVE_PENDING;
- cpus_copy(desc->pending_mask, mask);
+}
+
+void pirq_set_affinity(struct domain *d, int pirq, const cpumask_t *mask)
+{
+ unsigned long flags;
+ struct irq_desc *desc = domain_spin_lock_irq_desc(d, pirq, &flags);
+
+ if ( !desc )
+ return;
+ irq_set_affinity(desc, mask);
+ spin_unlock_irqrestore(&desc->lock, flags);
}

DEFINE_PER_CPU(unsigned int, irq_count);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/event_channel.c
--- a/xen/common/event_channel.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/event_channel.c Thu Jun 17 07:22:06 2010 +0100
@@ -295,10 +295,36 @@ static long evtchn_bind_ipi(evtchn_bind_
}


+static void link_pirq_port(int port, struct evtchn *chn, struct vcpu *v)
+{
+ chn->u.pirq.prev_port = 0;
+ chn->u.pirq.next_port = v->pirq_evtchn_head;
+ if ( v->pirq_evtchn_head )
+ evtchn_from_port(v->domain, v->pirq_evtchn_head)
+ ->u.pirq.prev_port = port;
+ v->pirq_evtchn_head = port;
+}
+
+static void unlink_pirq_port(struct evtchn *chn, struct vcpu *v)
+{
+ struct domain *d = v->domain;
+
+ if ( chn->u.pirq.prev_port )
+ evtchn_from_port(d, chn->u.pirq.prev_port)->u.pirq.next_port =
+ chn->u.pirq.next_port;
+ else
+ v->pirq_evtchn_head = chn->u.pirq.next_port;
+ if ( chn->u.pirq.next_port )
+ evtchn_from_port(d, chn->u.pirq.next_port)->u.pirq.prev_port =
+ chn->u.pirq.prev_port;
+}
+
+
static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
{
struct evtchn *chn;
struct domain *d = current->domain;
+ struct vcpu *v = d->vcpu[0];
int port, pirq = bind->pirq;
long rc;

@@ -319,7 +345,7 @@ static long evtchn_bind_pirq(evtchn_bind
chn = evtchn_from_port(d, port);

d->pirq_to_evtchn[pirq] = port;
- rc = pirq_guest_bind(d->vcpu[0], pirq,
+ rc = pirq_guest_bind(v, pirq,
!!(bind->flags & BIND_PIRQ__WILL_SHARE));
if ( rc != 0 )
{
@@ -328,7 +354,8 @@ static long evtchn_bind_pirq(evtchn_bind
}

chn->state = ECS_PIRQ;
- chn->u.pirq = pirq;
+ chn->u.pirq.irq = pirq;
+ link_pirq_port(port, chn, v);

bind->port = port;

@@ -376,8 +403,9 @@ static long __evtchn_close(struct domain
break;

case ECS_PIRQ:
- pirq_guest_unbind(d1, chn1->u.pirq);
- d1->pirq_to_evtchn[chn1->u.pirq] = 0;
+ pirq_guest_unbind(d1, chn1->u.pirq.irq);
+ d1->pirq_to_evtchn[chn1->u.pirq.irq] = 0;
+ unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]);
break;

case ECS_VIRQ:
@@ -688,7 +716,7 @@ static long evtchn_status(evtchn_status_
break;
case ECS_PIRQ:
status->status = EVTCHNSTAT_pirq;
- status->u.pirq = chn->u.pirq;
+ status->u.pirq = chn->u.pirq.irq;
break;
case ECS_VIRQ:
status->status = EVTCHNSTAT_virq;
@@ -747,8 +775,16 @@ long evtchn_bind_vcpu(unsigned int port,
break;
case ECS_UNBOUND:
case ECS_INTERDOMAIN:
+ chn->notify_vcpu_id = vcpu_id;
+ break;
case ECS_PIRQ:
+ if ( chn->notify_vcpu_id == vcpu_id )
+ break;
+ unlink_pirq_port(chn, d->vcpu[chn->notify_vcpu_id]);
chn->notify_vcpu_id = vcpu_id;
+ pirq_set_affinity(d, chn->u.pirq.irq,
+ cpumask_of(d->vcpu[vcpu_id]->processor));
+ link_pirq_port(port, chn, d->vcpu[vcpu_id]);
break;
default:
rc = -EINVAL;
@@ -1061,6 +1097,23 @@ void evtchn_destroy_final(struct domain
xfree(d->poll_mask);
d->poll_mask = NULL;
#endif
+}
+
+
+void evtchn_move_pirqs(struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ const cpumask_t *mask = cpumask_of(v->processor);
+ unsigned int port;
+ struct evtchn *chn;
+
+ spin_lock(&d->event_lock);
+ for ( port = v->pirq_evtchn_head; port; port = chn->u.pirq.next_port )
+ {
+ chn = evtchn_from_port(d, port);
+ pirq_set_affinity(d, chn->u.pirq.irq, mask);
+ }
+ spin_unlock(&d->event_lock);
}


@@ -1105,7 +1158,7 @@ static void domain_dump_evtchn_info(stru
chn->u.interdomain.remote_port);
break;
case ECS_PIRQ:
- printk(" p=%d", chn->u.pirq);
+ printk(" p=%d", chn->u.pirq.irq);
break;
case ECS_VIRQ:
printk(" v=%d", chn->u.virq);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_credit.c Thu Jun 17 07:22:06 2010 +0100
@@ -1168,7 +1168,7 @@ csched_runq_steal(int peer_cpu, int cpu,

static struct csched_vcpu *
csched_load_balance(struct csched_private *prv, int cpu,
- struct csched_vcpu *snext)
+ struct csched_vcpu *snext, bool_t *stolen)
{
struct csched_vcpu *speer;
cpumask_t workers;
@@ -1221,7 +1221,10 @@ csched_load_balance(struct csched_privat
speer = csched_runq_steal(peer_cpu, cpu, snext->pri);
spin_unlock(per_cpu(schedule_data, peer_cpu).schedule_lock);
if ( speer != NULL )
+ {
+ *stolen = 1;
return speer;
+ }
}

out:
@@ -1269,6 +1272,7 @@ csched_schedule(
BUG_ON( is_idle_vcpu(current) || list_empty(runq) );

snext = __runq_elem(runq->next);
+ ret.migrated = 0;

/* Tasklet work (which runs in idle VCPU context) overrides all else. */
if ( tasklet_work_scheduled )
@@ -1288,7 +1292,7 @@ csched_schedule(
if ( snext->pri > CSCHED_PRI_TS_OVER )
__runq_remove(snext);
else
- snext = csched_load_balance(prv, cpu, snext);
+ snext = csched_load_balance(prv, cpu, snext, &ret.migrated);

/*
* Update idlers mask if necessary. When we're idling, other CPUs
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_credit2.c Thu Jun 17 07:22:06 2010 +0100
@@ -991,10 +991,17 @@ csched_schedule(
}
#endif

+ ret.migrated = 0;
+
if ( !is_idle_vcpu(snext->vcpu) )
{
snext->start_time = now;
- snext->vcpu->processor = cpu; /* Safe because lock for old processor is held */
+ /* Safe because lock for old processor is held */
+ if ( snext->vcpu->processor != cpu )
+ {
+ snext->vcpu->processor = cpu;
+ ret.migrated = 1;
+ }
}

/*
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/sched_sedf.c Thu Jun 17 07:22:06 2010 +0100
@@ -874,6 +874,8 @@ static struct task_slice sedf_do_schedul
ret.time);
ret.time = EXTRA_QUANTUM;
}
+
+ ret.migrated = 0;

EDOM_INFO(ret.task)->sched_start_abs = now;
CHECK(ret.time > 0);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/common/schedule.c
--- a/xen/common/schedule.c Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/common/schedule.c Thu Jun 17 07:22:06 2010 +0100
@@ -272,6 +272,7 @@ int sched_move_domain(struct domain *d,
cpus_setall(v->cpu_affinity);
v->processor = new_p;
v->sched_priv = vcpu_priv[v->vcpu_id];
+ evtchn_move_pirqs(v);

new_p = cycle_cpu(new_p, c->cpu_valid);
}
@@ -418,6 +419,9 @@ static void vcpu_migrate(struct vcpu *v)
v->processor = new_cpu;
spin_unlock_irqrestore(
per_cpu(schedule_data, old_cpu).schedule_lock, flags);
+
+ if ( old_cpu != new_cpu )
+ evtchn_move_pirqs(v);

/* Wake on new CPU. */
vcpu_wake(v);
@@ -1094,6 +1098,9 @@ static void schedule(void)

stop_timer(&prev->periodic_timer);

+ if ( next_slice.migrated )
+ evtchn_move_pirqs(next);
+
/* Ensure that the domain has an up-to-date time base. */
update_vcpu_system_time(next);
vcpu_periodic_timer_work(next);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/asm-x86/irq.h Thu Jun 17 07:22:06 2010 +0100
@@ -143,7 +143,7 @@ void move_native_irq(int irq);

void move_masked_irq(int irq);

-void irq_set_affinity(int irq, cpumask_t mask);
+void irq_set_affinity(struct irq_desc *, const cpumask_t *mask);

#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/cpumask.h Thu Jun 17 07:22:06 2010 +0100
@@ -206,7 +206,7 @@ static inline int __cpus_weight(const cp
}

#define cpus_copy(dest, src) __cpus_copy(&(dest), &(src))
-static inline void __cpus_copy(cpumask_t *dstp, cpumask_t *srcp)
+static inline void __cpus_copy(cpumask_t *dstp, const cpumask_t *srcp)
{
bitmap_copy(dstp->bits, srcp->bits, NR_CPUS);
}
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/event.h
--- a/xen/include/xen/event.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/event.h Thu Jun 17 07:22:06 2010 +0100
@@ -47,6 +47,9 @@ long evtchn_bind_vcpu(unsigned int port,
/* Unmask a local event-channel port. */
int evtchn_unmask(unsigned int port);

+/* Move all PIRQs after a vCPU was moved to another pCPU. */
+void evtchn_move_pirqs(struct vcpu *v);
+
/* Allocate/free a Xen-attached event channel port. */
int alloc_unbound_xen_event_channel(
struct vcpu *local_vcpu, domid_t remote_domid);
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/irq.h Thu Jun 17 07:22:06 2010 +0100
@@ -139,6 +139,7 @@ extern int pirq_guest_unmask(struct doma
extern int pirq_guest_unmask(struct domain *d);
extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share);
extern void pirq_guest_unbind(struct domain *d, int irq);
+extern void pirq_set_affinity(struct domain *d, int irq, const cpumask_t *);
extern irq_desc_t *domain_spin_lock_irq_desc(
struct domain *d, int irq, unsigned long *pflags);

diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/sched-if.h Thu Jun 17 07:22:06 2010 +0100
@@ -79,6 +79,7 @@ struct task_slice {
struct task_slice {
struct vcpu *task;
s_time_t time;
+ bool_t migrated;
};

struct scheduler {
diff -r b9c541d9c138 -r 0695a5cdcb42 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Tue Jun 15 13:27:14 2010 +0100
+++ b/xen/include/xen/sched.h Thu Jun 17 07:22:06 2010 +0100
@@ -61,7 +61,11 @@ struct evtchn
u16 remote_port;
struct domain *remote_dom;
} interdomain; /* state == ECS_INTERDOMAIN */
- u16 pirq; /* state == ECS_PIRQ */
+ struct {
+ u16 irq;
+ u16 next_port;
+ u16 prev_port;
+ } pirq; /* state == ECS_PIRQ */
u16 virq; /* state == ECS_VIRQ */
} u;
#ifdef FLASK_ENABLE
@@ -141,6 +145,9 @@ struct vcpu
* < 0: multiple ports may be being polled.
*/
int poll_evtchn;
+
+ /* (over-)protected by ->domain->event_lock */
+ int pirq_evtchn_head;

unsigned long pause_flags;
atomic_t pause_count;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog