Pluggable schedulers' code knows what (and when) to lock better than
generic code, let's delegate to them all the concurrency related issues.
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
diff -r 84b3e46aa7d2 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_credit.c Wed Nov 23 15:09:14 2011 +0100
@@ -161,6 +161,7 @@ struct csched_dom {
* System-wide private data
*/
struct csched_private {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
spinlock_t lock;
struct list_head active_sdom;
uint32_t ncpus;
@@ -800,6 +801,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
+ /* Protect both get and put branches with the pluggable scheduler
+ * lock. Runq lock not needed anywhere in here. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit.weight = sdom->weight;
@@ -809,8 +814,6 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
- spin_lock_irqsave(&prv->lock, flags);
-
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
@@ -824,9 +827,10 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;
- spin_unlock_irqrestore(&prv->lock, flags);
}
+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}
diff -r 84b3e46aa7d2 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_credit2.c Wed Nov 23 15:09:14 2011 +0100
@@ -1384,6 +1384,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
+ /* Must hold csched_priv lock to read and update sdom,
+ * runq lock to update csvcs. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit2.weight = sdom->weight;
@@ -1397,10 +1401,6 @@ csched_dom_cntl(
struct list_head *iter;
int old_weight;
- /* Must hold csched_priv lock to update sdom, runq lock to
- * update csvcs. */
- spin_lock_irqsave(&prv->lock, flags);
-
old_weight = sdom->weight;
sdom->weight = op->u.credit2.weight;
@@ -1411,22 +1411,23 @@ csched_dom_cntl(
struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);
/* NB: Locking order is important here. Because we grab this lock here, we
- * must never lock csched_priv.lock if we're holding a runqueue
- * lock. */
- vcpu_schedule_lock_irq(svc->vcpu);
+ * must never lock csched_priv.lock if we're holding a runqueue lock.
+ * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
+ * been disabled. */
+ vcpu_schedule_lock(svc->vcpu);
BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
svc->weight = sdom->weight;
update_max_weight(svc->rqd, svc->weight, old_weight);
- vcpu_schedule_unlock_irq(svc->vcpu);
+ vcpu_schedule_unlock(svc->vcpu);
}
-
- spin_unlock_irqrestore(&prv->lock, flags);
}
}
+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}
diff -r 84b3e46aa7d2 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_sedf.c Wed Nov 23 15:09:14 2011 +0100
@@ -1397,18 +1397,28 @@ static int sedf_adjust_weights(struct cp
static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
struct vcpu *v;
- int rc;
+ int rc = 0;
PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
+ /* Serialize everything against runq lock to prevent concurrent update
+ * (notice all non-current VCPUs of the domain have been paused in the
+ * caller). */
+ if ( p == current->domain )
+ vcpu_schedule_lock_irq(current);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
/* Check for sane parameters. */
if ( !op->u.sedf.period && !op->u.sedf.weight )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
if ( op->u.sedf.weight )
{
if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
@@ -1443,7 +1453,11 @@ static int sedf_adjust(const struct sche
(op->u.sedf.period < PERIOD_MIN) ||
(op->u.sedf.slice > op->u.sedf.period) ||
(op->u.sedf.slice < SLICE_MIN) )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->extraweight = 0;
EDOM_INFO(v)->period_orig =
@@ -1455,7 +1469,7 @@ static int sedf_adjust(const struct sche
rc = sedf_adjust_weights(p->cpupool, op);
if ( rc )
- return rc;
+ goto out;
for_each_vcpu ( p, v )
{
@@ -1469,7 +1483,11 @@ static int sedf_adjust(const struct sche
else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
if ( p->vcpu[0] == NULL )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
@@ -1477,8 +1495,12 @@ static int sedf_adjust(const struct sche
op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
}
- PRINT(2,"sedf_adjust_finished\n");
- return 0;
+out:
+ if ( p == current->domain )
+ vcpu_schedule_unlock_irq(current);
+
+ PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
+ return rc;
}
const struct scheduler sched_sedf_def = {
diff -r 84b3e46aa7d2 xen/common/schedule.c
--- a/xen/common/schedule.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/schedule.c Wed Nov 23 15:09:14 2011 +0100
@@ -1015,15 +1015,8 @@ long sched_adjust(struct domain *d, stru
/*
* Most VCPUs we can simply pause. If we are adjusting this VCPU then
- * we acquire the local schedule_lock to guard against concurrent updates.
- *
- * We only acquire the local schedule lock after we have paused all other
- * VCPUs in this domain. There are two reasons for this:
- * 1- We don't want to hold up interrupts as pausing a VCPU can
- * trigger a tlb shootdown.
- * 2- Pausing other VCPUs involves briefly locking the schedule
- * lock of the CPU they are running on. This CPU could be the
- * same as ours.
+ * concurrent updates shall be prevented within the actual pluggable
+ * scheduler.
*/
for_each_vcpu ( d, v )
@@ -1032,15 +1025,9 @@ long sched_adjust(struct domain *d, stru
vcpu_pause(v);
}
- if ( d == current->domain )
- vcpu_schedule_lock_irq(current);
-
if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
- if ( d == current->domain )
- vcpu_schedule_unlock_irq(current);
-
for_each_vcpu ( d, v )
{
if ( v != current )
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-------------------------------------------------------------------
Dario Faggioli, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
PhD Candidate, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)
generic code, let's delegate to them all the concurrency related issues.
Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
diff -r 84b3e46aa7d2 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_credit.c Wed Nov 23 15:09:14 2011 +0100
@@ -161,6 +161,7 @@ struct csched_dom {
* System-wide private data
*/
struct csched_private {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
spinlock_t lock;
struct list_head active_sdom;
uint32_t ncpus;
@@ -800,6 +801,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
+ /* Protect both get and put branches with the pluggable scheduler
+ * lock. Runq lock not needed anywhere in here. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit.weight = sdom->weight;
@@ -809,8 +814,6 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
- spin_lock_irqsave(&prv->lock, flags);
-
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
@@ -824,9 +827,10 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;
- spin_unlock_irqrestore(&prv->lock, flags);
}
+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}
diff -r 84b3e46aa7d2 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_credit2.c Wed Nov 23 15:09:14 2011 +0100
@@ -1384,6 +1384,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
+ /* Must hold csched_priv lock to read and update sdom,
+ * runq lock to update csvcs. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit2.weight = sdom->weight;
@@ -1397,10 +1401,6 @@ csched_dom_cntl(
struct list_head *iter;
int old_weight;
- /* Must hold csched_priv lock to update sdom, runq lock to
- * update csvcs. */
- spin_lock_irqsave(&prv->lock, flags);
-
old_weight = sdom->weight;
sdom->weight = op->u.credit2.weight;
@@ -1411,22 +1411,23 @@ csched_dom_cntl(
struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);
/* NB: Locking order is important here. Because we grab this lock here, we
- * must never lock csched_priv.lock if we're holding a runqueue
- * lock. */
- vcpu_schedule_lock_irq(svc->vcpu);
+ * must never lock csched_priv.lock if we're holding a runqueue lock.
+ * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
+ * been disabled. */
+ vcpu_schedule_lock(svc->vcpu);
BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
svc->weight = sdom->weight;
update_max_weight(svc->rqd, svc->weight, old_weight);
- vcpu_schedule_unlock_irq(svc->vcpu);
+ vcpu_schedule_unlock(svc->vcpu);
}
-
- spin_unlock_irqrestore(&prv->lock, flags);
}
}
+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}
diff -r 84b3e46aa7d2 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/sched_sedf.c Wed Nov 23 15:09:14 2011 +0100
@@ -1397,18 +1397,28 @@ static int sedf_adjust_weights(struct cp
static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
struct vcpu *v;
- int rc;
+ int rc = 0;
PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
+ /* Serialize everything against runq lock to prevent concurrent update
+ * (notice all non-current VCPUs of the domain have been paused in the
+ * caller). */
+ if ( p == current->domain )
+ vcpu_schedule_lock_irq(current);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
/* Check for sane parameters. */
if ( !op->u.sedf.period && !op->u.sedf.weight )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
if ( op->u.sedf.weight )
{
if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
@@ -1443,7 +1453,11 @@ static int sedf_adjust(const struct sche
(op->u.sedf.period < PERIOD_MIN) ||
(op->u.sedf.slice > op->u.sedf.period) ||
(op->u.sedf.slice < SLICE_MIN) )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->extraweight = 0;
EDOM_INFO(v)->period_orig =
@@ -1455,7 +1469,7 @@ static int sedf_adjust(const struct sche
rc = sedf_adjust_weights(p->cpupool, op);
if ( rc )
- return rc;
+ goto out;
for_each_vcpu ( p, v )
{
@@ -1469,7 +1483,11 @@ static int sedf_adjust(const struct sche
else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
if ( p->vcpu[0] == NULL )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
@@ -1477,8 +1495,12 @@ static int sedf_adjust(const struct sche
op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
}
- PRINT(2,"sedf_adjust_finished\n");
- return 0;
+out:
+ if ( p == current->domain )
+ vcpu_schedule_unlock_irq(current);
+
+ PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
+ return rc;
}
const struct scheduler sched_sedf_def = {
diff -r 84b3e46aa7d2 xen/common/schedule.c
--- a/xen/common/schedule.c Wed Nov 23 12:03:37 2011 +0000
+++ b/xen/common/schedule.c Wed Nov 23 15:09:14 2011 +0100
@@ -1015,15 +1015,8 @@ long sched_adjust(struct domain *d, stru
/*
* Most VCPUs we can simply pause. If we are adjusting this VCPU then
- * we acquire the local schedule_lock to guard against concurrent updates.
- *
- * We only acquire the local schedule lock after we have paused all other
- * VCPUs in this domain. There are two reasons for this:
- * 1- We don't want to hold up interrupts as pausing a VCPU can
- * trigger a tlb shootdown.
- * 2- Pausing other VCPUs involves briefly locking the schedule
- * lock of the CPU they are running on. This CPU could be the
- * same as ours.
+ * concurrent updates shall be prevented within the actual pluggable
+ * scheduler.
*/
for_each_vcpu ( d, v )
@@ -1032,15 +1025,9 @@ long sched_adjust(struct domain *d, stru
vcpu_pause(v);
}
- if ( d == current->domain )
- vcpu_schedule_lock_irq(current);
-
if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
- if ( d == current->domain )
- vcpu_schedule_unlock_irq(current);
-
for_each_vcpu ( d, v )
{
if ( v != current )
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-------------------------------------------------------------------
Dario Faggioli, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
PhD Candidate, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)