Mailing List Archive

[PATCHv2 1 of 1] Rework locking for sched_adjust.
The main idea is to move (as much as possible) locking logic
from generic code to the various pluggable schedulers.

While at it, the following is also accomplished:
- pausing all the non-current VCPUs of a domain while changing its
scheduling parameters is not effective in avoiding races and it is
prone to deadlock, so that is removed.
- sedf needs a global lock for preventing races while adjusting
domains' scheduling parameters (as it is for credit and credit2),
so that is added.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

diff -r 38eb74c01d9d xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/sched_credit.c Wed Dec 07 15:45:55 2011 +0100
@@ -161,6 +161,7 @@ struct csched_dom {
* System-wide private data
*/
struct csched_private {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
spinlock_t lock;
struct list_head active_sdom;
uint32_t ncpus;
@@ -800,6 +801,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;

+ /* Protect both get and put branches with the pluggable scheduler
+ * lock. Runq lock not needed anywhere in here. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit.weight = sdom->weight;
@@ -809,8 +814,6 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);

- spin_lock_irqsave(&prv->lock, flags);
-
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
@@ -824,9 +827,10 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;

- spin_unlock_irqrestore(&prv->lock, flags);
}

+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}

diff -r 38eb74c01d9d xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/sched_credit2.c Wed Dec 07 15:45:55 2011 +0100
@@ -1384,6 +1384,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;

+ /* Must hold csched_priv lock to read and update sdom,
+ * runq lock to update csvcs. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit2.weight = sdom->weight;
@@ -1397,10 +1401,6 @@ csched_dom_cntl(
struct list_head *iter;
int old_weight;

- /* Must hold csched_priv lock to update sdom, runq lock to
- * update csvcs. */
- spin_lock_irqsave(&prv->lock, flags);
-
old_weight = sdom->weight;

sdom->weight = op->u.credit2.weight;
@@ -1411,22 +1411,23 @@ csched_dom_cntl(
struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);

/* NB: Locking order is important here. Because we grab this lock here, we
- * must never lock csched_priv.lock if we're holding a runqueue
- * lock. */
- vcpu_schedule_lock_irq(svc->vcpu);
+ * must never lock csched_priv.lock if we're holding a runqueue lock.
+ * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
+ * been disabled. */
+ vcpu_schedule_lock(svc->vcpu);

BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));

svc->weight = sdom->weight;
update_max_weight(svc->rqd, svc->weight, old_weight);

- vcpu_schedule_unlock_irq(svc->vcpu);
+ vcpu_schedule_unlock(svc->vcpu);
}
-
- spin_unlock_irqrestore(&prv->lock, flags);
}
}

+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}

diff -r 38eb74c01d9d xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/sched_sedf.c Wed Dec 07 15:45:55 2011 +0100
@@ -61,6 +61,11 @@ struct sedf_dom_info {
struct domain *domain;
};

+struct sedf_priv_info {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
+ spinlock_t lock;
+};
+
struct sedf_vcpu_info {
struct vcpu *vcpu;
struct list_head list;
@@ -115,6 +120,8 @@ struct sedf_cpu_info {
s_time_t current_slice_expires;
};

+#define SEDF_PRIV(_ops) \
+ ((struct sedf_priv_info *)((_ops)->sched_data))
#define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
#define CPU_INFO(cpu) \
((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
@@ -772,6 +779,31 @@ static struct task_slice sedf_do_extra_s
}


+static int sedf_init(struct scheduler *ops)
+{
+ struct sedf_priv_info *prv;
+
+ prv = xzalloc(struct sedf_priv_info);
+ if ( prv == NULL )
+ return -ENOMEM;
+
+ ops->sched_data = prv;
+ spin_lock_init(&prv->lock);
+
+ return 0;
+}
+
+
+static void sedf_deinit(const struct scheduler *ops)
+{
+ struct sedf_priv_info *prv;
+
+ prv = SEDF_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+
/* Main scheduling function
Reasons for calling this function are:
-timeslice for the current period used up
@@ -1320,22 +1352,15 @@ static void sedf_dump_cpu_state(const st


/* Adjusts periods and slices of the domains accordingly to their weights. */
-static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
+static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, s_time_t *sumt)
{
struct vcpu *p;
struct domain *d;
- unsigned int cpu, nr_cpus = cpumask_last(&cpu_online_map) + 1;
- int *sumw = xzalloc_array(int, nr_cpus);
- s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
+ unsigned int cpu;

- if ( !sumw || !sumt )
- {
- xfree(sumt);
- xfree(sumw);
- return -ENOMEM;
- }
-
- /* Sum across all weights. */
+ /* Sum across all weights. Notice that no runq locking is needed
+ * here: the caller holds sedf_priv_info.lock and we're not changing
+ * anything that is accessed during scheduling. */
rcu_read_lock(&domlist_read_lock);
for_each_domain_in_cpupool( d, c )
{
@@ -1365,7 +1390,9 @@ static int sedf_adjust_weights(struct cp
}
rcu_read_unlock(&domlist_read_lock);

- /* Adjust all slices (and periods) to the new weight. */
+ /* Adjust all slices (and periods) to the new weight. Unlike above, we
+ * need to take thr runq lock for the various VCPUs: we're modyfing
+ * slice and period which are referenced during scheduling. */
rcu_read_lock(&domlist_read_lock);
for_each_domain_in_cpupool( d, c )
{
@@ -1375,20 +1402,20 @@ static int sedf_adjust_weights(struct cp
continue;
if ( EDOM_INFO(p)->weight )
{
+ /* Interrupts already off */
+ vcpu_schedule_lock(p);
EDOM_INFO(p)->period_orig =
EDOM_INFO(p)->period = WEIGHT_PERIOD;
EDOM_INFO(p)->slice_orig =
EDOM_INFO(p)->slice =
(EDOM_INFO(p)->weight *
(WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
+ vcpu_schedule_unlock(p);
}
}
}
rcu_read_unlock(&domlist_read_lock);

- xfree(sumt);
- xfree(sumw);
-
return 0;
}

@@ -1396,19 +1423,45 @@ static int sedf_adjust_weights(struct cp
/* set or fetch domain scheduling parameters */
static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
+ struct sedf_priv_info *prv = SEDF_PRIV(ops);
+ unsigned long flags;
+ unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
+ int *sumw = xzalloc_array(int, nr_cpus);
+ s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
struct vcpu *v;
- int rc;
+ int rc = 0;

PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");

+ /* Serialize against the pluggable scheduler lock to protect from
+ * concurrent updates. We need to take the runq lock for the VCPUs
+ * as well, since we are touching extraweight, weight, slice and
+ * period. As in sched_credit2.c, runq locks nest inside the
+ * pluggable scheduler lock. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
+ /* These are used in sedf_adjust_weights() but have to be allocated in
+ * this function, as we need to avoid nesting xmem_pool_alloc's lock
+ * within our prv->lock. */
+ if ( !sumw || !sumt )
+ {
+ /* Check for errors here, the _getinfo branch doesn't care */
+ rc = -ENOMEM;
+ goto out;
+ }
+
/* Check for sane parameters. */
if ( !op->u.sedf.period && !op->u.sedf.weight )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
if ( op->u.sedf.weight )
{
if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
@@ -1417,59 +1470,78 @@ static int sedf_adjust(const struct sche
/* Weight-driven domains with extratime only. */
for_each_vcpu ( p, v )
{
+ /* (Here and everywhere in the following) IRQs are already off,
+ * hence vcpu_spin_lock() is the one. */
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->extraweight = op->u.sedf.weight;
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->slice = 0;
EDOM_INFO(v)->period = WEIGHT_PERIOD;
+ vcpu_schedule_unlock(v);
}
}
else
{
/* Weight-driven domains with real-time execution. */
- for_each_vcpu ( p, v )
+ for_each_vcpu ( p, v ) {
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->weight = op->u.sedf.weight;
+ vcpu_schedule_unlock(v);
+ }
}
}
else
{
+ /*
+ * Sanity checking: note that disabling extra weight requires
+ * that we set a non-zero slice.
+ */
+ if ( (op->u.sedf.period > PERIOD_MAX) ||
+ (op->u.sedf.period < PERIOD_MIN) ||
+ (op->u.sedf.slice > op->u.sedf.period) ||
+ (op->u.sedf.slice < SLICE_MIN) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
/* Time-driven domains. */
for_each_vcpu ( p, v )
{
- /*
- * Sanity checking: note that disabling extra weight requires
- * that we set a non-zero slice.
- */
- if ( (op->u.sedf.period > PERIOD_MAX) ||
- (op->u.sedf.period < PERIOD_MIN) ||
- (op->u.sedf.slice > op->u.sedf.period) ||
- (op->u.sedf.slice < SLICE_MIN) )
- return -EINVAL;
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->extraweight = 0;
EDOM_INFO(v)->period_orig =
EDOM_INFO(v)->period = op->u.sedf.period;
EDOM_INFO(v)->slice_orig =
EDOM_INFO(v)->slice = op->u.sedf.slice;
+ vcpu_schedule_unlock(v);
}
}

- rc = sedf_adjust_weights(p->cpupool, op);
+ rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
if ( rc )
- return rc;
+ goto out;

for_each_vcpu ( p, v )
{
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->status =
(EDOM_INFO(v)->status &
~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
EDOM_INFO(v)->latency = op->u.sedf.latency;
extraq_check(v);
+ vcpu_schedule_unlock(v);
}
}
else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
if ( p->vcpu[0] == NULL )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
@@ -1477,14 +1549,23 @@ static int sedf_adjust(const struct sche
op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
}

- PRINT(2,"sedf_adjust_finished\n");
- return 0;
+out:
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ xfree(sumt);
+ xfree(sumw);
+
+ PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
+ return rc;
}

+static struct sedf_priv_info _sedf_priv;
+
const struct scheduler sched_sedf_def = {
- .name = "Simple EDF Scheduler",
- .opt_name = "sedf",
- .sched_id = XEN_SCHEDULER_SEDF,
+ .name = "Simple EDF Scheduler",
+ .opt_name = "sedf",
+ .sched_id = XEN_SCHEDULER_SEDF,
+ .sched_data = &_sedf_priv,

.init_domain = sedf_init_domain,
.destroy_domain = sedf_destroy_domain,
@@ -1498,6 +1579,9 @@ const struct scheduler sched_sedf_def =
.alloc_domdata = sedf_alloc_domdata,
.free_domdata = sedf_free_domdata,

+ .init = sedf_init,
+ .deinit = sedf_deinit,
+
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
.dump_cpu_state = sedf_dump_cpu_state,
diff -r 38eb74c01d9d xen/common/schedule.c
--- a/xen/common/schedule.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/schedule.c Wed Dec 07 15:45:55 2011 +0100
@@ -1005,7 +1005,6 @@ int sched_id(void)
/* Adjust scheduling parameter for a given domain. */
long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
{
- struct vcpu *v;
long ret;

if ( (op->sched_id != DOM2OP(d)->sched_id) ||
@@ -1013,40 +1012,11 @@ long sched_adjust(struct domain *d, stru
(op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
return -EINVAL;

- /*
- * Most VCPUs we can simply pause. If we are adjusting this VCPU then
- * we acquire the local schedule_lock to guard against concurrent updates.
- *
- * We only acquire the local schedule lock after we have paused all other
- * VCPUs in this domain. There are two reasons for this:
- * 1- We don't want to hold up interrupts as pausing a VCPU can
- * trigger a tlb shootdown.
- * 2- Pausing other VCPUs involves briefly locking the schedule
- * lock of the CPU they are running on. This CPU could be the
- * same as ours.
- */
-
- for_each_vcpu ( d, v )
- {
- if ( v != current )
- vcpu_pause(v);
- }
-
- if ( d == current->domain )
- vcpu_schedule_lock_irq(current);
-
+ /* NB: the pluggable scheduler code needs to take care
+ * of locking by itself. */
if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);

- if ( d == current->domain )
- vcpu_schedule_unlock_irq(current);
-
- for_each_vcpu ( d, v )
- {
- if ( v != current )
- vcpu_unpause(v);
- }
-
return ret;
}


--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-------------------------------------------------------------------
Dario Faggioli, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
PhD Candidate, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)
Re: [PATCHv2 1 of 1] Rework locking for sched_adjust. [ In reply to ]
Dario, this patch won't apply due to wordwrap damage. Try using "hg
email", or sending as an attachment.

Thanks,
-George

On Wed, 2011-12-07 at 15:02 +0000, Dario Faggioli wrote:
> The main idea is to move (as much as possible) locking logic
> from generic code to the various pluggable schedulers.
>
> While at it, the following is also accomplished:
> - pausing all the non-current VCPUs of a domain while changing its
> scheduling parameters is not effective in avoiding races and it is
> prone to deadlock, so that is removed.
> - sedf needs a global lock for preventing races while adjusting
> domains' scheduling parameters (as it is for credit and credit2),
> so that is added.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>
> diff -r 38eb74c01d9d xen/common/sched_credit.c
> --- a/xen/common/sched_credit.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/sched_credit.c Wed Dec 07 15:45:55 2011 +0100
> @@ -161,6 +161,7 @@ struct csched_dom {
> * System-wide private data
> */
> struct csched_private {
> + /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
> spinlock_t lock;
> struct list_head active_sdom;
> uint32_t ncpus;
> @@ -800,6 +801,10 @@ csched_dom_cntl(
> struct csched_private *prv = CSCHED_PRIV(ops);
> unsigned long flags;
>
> + /* Protect both get and put branches with the pluggable scheduler
> + * lock. Runq lock not needed anywhere in here. */
> + spin_lock_irqsave(&prv->lock, flags);
> +
> if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> {
> op->u.credit.weight = sdom->weight;
> @@ -809,8 +814,6 @@ csched_dom_cntl(
> {
> ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
>
> - spin_lock_irqsave(&prv->lock, flags);
> -
> if ( op->u.credit.weight != 0 )
> {
> if ( !list_empty(&sdom->active_sdom_elem) )
> @@ -824,9 +827,10 @@ csched_dom_cntl(
> if ( op->u.credit.cap != (uint16_t)~0U )
> sdom->cap = op->u.credit.cap;
>
> - spin_unlock_irqrestore(&prv->lock, flags);
> }
>
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> return 0;
> }
>
> diff -r 38eb74c01d9d xen/common/sched_credit2.c
> --- a/xen/common/sched_credit2.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/sched_credit2.c Wed Dec 07 15:45:55 2011 +0100
> @@ -1384,6 +1384,10 @@ csched_dom_cntl(
> struct csched_private *prv = CSCHED_PRIV(ops);
> unsigned long flags;
>
> + /* Must hold csched_priv lock to read and update sdom,
> + * runq lock to update csvcs. */
> + spin_lock_irqsave(&prv->lock, flags);
> +
> if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> {
> op->u.credit2.weight = sdom->weight;
> @@ -1397,10 +1401,6 @@ csched_dom_cntl(
> struct list_head *iter;
> int old_weight;
>
> - /* Must hold csched_priv lock to update sdom, runq lock to
> - * update csvcs. */
> - spin_lock_irqsave(&prv->lock, flags);
> -
> old_weight = sdom->weight;
>
> sdom->weight = op->u.credit2.weight;
> @@ -1411,22 +1411,23 @@ csched_dom_cntl(
> struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);
>
> /* NB: Locking order is important here. Because we grab this lock here, we
> - * must never lock csched_priv.lock if we're holding a runqueue
> - * lock. */
> - vcpu_schedule_lock_irq(svc->vcpu);
> + * must never lock csched_priv.lock if we're holding a runqueue lock.
> + * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
> + * been disabled. */
> + vcpu_schedule_lock(svc->vcpu);
>
> BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
>
> svc->weight = sdom->weight;
> update_max_weight(svc->rqd, svc->weight, old_weight);
>
> - vcpu_schedule_unlock_irq(svc->vcpu);
> + vcpu_schedule_unlock(svc->vcpu);
> }
> -
> - spin_unlock_irqrestore(&prv->lock, flags);
> }
> }
>
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> return 0;
> }
>
> diff -r 38eb74c01d9d xen/common/sched_sedf.c
> --- a/xen/common/sched_sedf.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/sched_sedf.c Wed Dec 07 15:45:55 2011 +0100
> @@ -61,6 +61,11 @@ struct sedf_dom_info {
> struct domain *domain;
> };
>
> +struct sedf_priv_info {
> + /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
> + spinlock_t lock;
> +};
> +
> struct sedf_vcpu_info {
> struct vcpu *vcpu;
> struct list_head list;
> @@ -115,6 +120,8 @@ struct sedf_cpu_info {
> s_time_t current_slice_expires;
> };
>
> +#define SEDF_PRIV(_ops) \
> + ((struct sedf_priv_info *)((_ops)->sched_data))
> #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
> #define CPU_INFO(cpu) \
> ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
> @@ -772,6 +779,31 @@ static struct task_slice sedf_do_extra_s
> }
>
>
> +static int sedf_init(struct scheduler *ops)
> +{
> + struct sedf_priv_info *prv;
> +
> + prv = xzalloc(struct sedf_priv_info);
> + if ( prv == NULL )
> + return -ENOMEM;
> +
> + ops->sched_data = prv;
> + spin_lock_init(&prv->lock);
> +
> + return 0;
> +}
> +
> +
> +static void sedf_deinit(const struct scheduler *ops)
> +{
> + struct sedf_priv_info *prv;
> +
> + prv = SEDF_PRIV(ops);
> + if ( prv != NULL )
> + xfree(prv);
> +}
> +
> +
> /* Main scheduling function
> Reasons for calling this function are:
> -timeslice for the current period used up
> @@ -1320,22 +1352,15 @@ static void sedf_dump_cpu_state(const st
>
>
> /* Adjusts periods and slices of the domains accordingly to their weights. */
> -static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
> +static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, s_time_t *sumt)
> {
> struct vcpu *p;
> struct domain *d;
> - unsigned int cpu, nr_cpus = cpumask_last(&cpu_online_map) + 1;
> - int *sumw = xzalloc_array(int, nr_cpus);
> - s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
> + unsigned int cpu;
>
> - if ( !sumw || !sumt )
> - {
> - xfree(sumt);
> - xfree(sumw);
> - return -ENOMEM;
> - }
> -
> - /* Sum across all weights. */
> + /* Sum across all weights. Notice that no runq locking is needed
> + * here: the caller holds sedf_priv_info.lock and we're not changing
> + * anything that is accessed during scheduling. */
> rcu_read_lock(&domlist_read_lock);
> for_each_domain_in_cpupool( d, c )
> {
> @@ -1365,7 +1390,9 @@ static int sedf_adjust_weights(struct cp
> }
> rcu_read_unlock(&domlist_read_lock);
>
> - /* Adjust all slices (and periods) to the new weight. */
> + /* Adjust all slices (and periods) to the new weight. Unlike above, we
> + * need to take thr runq lock for the various VCPUs: we're modyfing
> + * slice and period which are referenced during scheduling. */
> rcu_read_lock(&domlist_read_lock);
> for_each_domain_in_cpupool( d, c )
> {
> @@ -1375,20 +1402,20 @@ static int sedf_adjust_weights(struct cp
> continue;
> if ( EDOM_INFO(p)->weight )
> {
> + /* Interrupts already off */
> + vcpu_schedule_lock(p);
> EDOM_INFO(p)->period_orig =
> EDOM_INFO(p)->period = WEIGHT_PERIOD;
> EDOM_INFO(p)->slice_orig =
> EDOM_INFO(p)->slice =
> (EDOM_INFO(p)->weight *
> (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
> + vcpu_schedule_unlock(p);
> }
> }
> }
> rcu_read_unlock(&domlist_read_lock);
>
> - xfree(sumt);
> - xfree(sumw);
> -
> return 0;
> }
>
> @@ -1396,19 +1423,45 @@ static int sedf_adjust_weights(struct cp
> /* set or fetch domain scheduling parameters */
> static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
> {
> + struct sedf_priv_info *prv = SEDF_PRIV(ops);
> + unsigned long flags;
> + unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
> + int *sumw = xzalloc_array(int, nr_cpus);
> + s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
> struct vcpu *v;
> - int rc;
> + int rc = 0;
>
> PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
> "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
> p->domain_id, op->u.sedf.period, op->u.sedf.slice,
> op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
>
> + /* Serialize against the pluggable scheduler lock to protect from
> + * concurrent updates. We need to take the runq lock for the VCPUs
> + * as well, since we are touching extraweight, weight, slice and
> + * period. As in sched_credit2.c, runq locks nest inside the
> + * pluggable scheduler lock. */
> + spin_lock_irqsave(&prv->lock, flags);
> +
> if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
> {
> + /* These are used in sedf_adjust_weights() but have to be allocated in
> + * this function, as we need to avoid nesting xmem_pool_alloc's lock
> + * within our prv->lock. */
> + if ( !sumw || !sumt )
> + {
> + /* Check for errors here, the _getinfo branch doesn't care */
> + rc = -ENOMEM;
> + goto out;
> + }
> +
> /* Check for sane parameters. */
> if ( !op->u.sedf.period && !op->u.sedf.weight )
> - return -EINVAL;
> + {
> + rc = -EINVAL;
> + goto out;
> + }
> +
> if ( op->u.sedf.weight )
> {
> if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
> @@ -1417,59 +1470,78 @@ static int sedf_adjust(const struct sche
> /* Weight-driven domains with extratime only. */
> for_each_vcpu ( p, v )
> {
> + /* (Here and everywhere in the following) IRQs are already off,
> + * hence vcpu_spin_lock() is the one. */
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->extraweight = op->u.sedf.weight;
> EDOM_INFO(v)->weight = 0;
> EDOM_INFO(v)->slice = 0;
> EDOM_INFO(v)->period = WEIGHT_PERIOD;
> + vcpu_schedule_unlock(v);
> }
> }
> else
> {
> /* Weight-driven domains with real-time execution. */
> - for_each_vcpu ( p, v )
> + for_each_vcpu ( p, v ) {
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->weight = op->u.sedf.weight;
> + vcpu_schedule_unlock(v);
> + }
> }
> }
> else
> {
> + /*
> + * Sanity checking: note that disabling extra weight requires
> + * that we set a non-zero slice.
> + */
> + if ( (op->u.sedf.period > PERIOD_MAX) ||
> + (op->u.sedf.period < PERIOD_MIN) ||
> + (op->u.sedf.slice > op->u.sedf.period) ||
> + (op->u.sedf.slice < SLICE_MIN) )
> + {
> + rc = -EINVAL;
> + goto out;
> + }
> +
> /* Time-driven domains. */
> for_each_vcpu ( p, v )
> {
> - /*
> - * Sanity checking: note that disabling extra weight requires
> - * that we set a non-zero slice.
> - */
> - if ( (op->u.sedf.period > PERIOD_MAX) ||
> - (op->u.sedf.period < PERIOD_MIN) ||
> - (op->u.sedf.slice > op->u.sedf.period) ||
> - (op->u.sedf.slice < SLICE_MIN) )
> - return -EINVAL;
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->weight = 0;
> EDOM_INFO(v)->extraweight = 0;
> EDOM_INFO(v)->period_orig =
> EDOM_INFO(v)->period = op->u.sedf.period;
> EDOM_INFO(v)->slice_orig =
> EDOM_INFO(v)->slice = op->u.sedf.slice;
> + vcpu_schedule_unlock(v);
> }
> }
>
> - rc = sedf_adjust_weights(p->cpupool, op);
> + rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
> if ( rc )
> - return rc;
> + goto out;
>
> for_each_vcpu ( p, v )
> {
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->status =
> (EDOM_INFO(v)->status &
> ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
> EDOM_INFO(v)->latency = op->u.sedf.latency;
> extraq_check(v);
> + vcpu_schedule_unlock(v);
> }
> }
> else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> {
> if ( p->vcpu[0] == NULL )
> - return -EINVAL;
> + {
> + rc = -EINVAL;
> + goto out;
> + }
> +
> op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
> op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
> op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
> @@ -1477,14 +1549,23 @@ static int sedf_adjust(const struct sche
> op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
> }
>
> - PRINT(2,"sedf_adjust_finished\n");
> - return 0;
> +out:
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> + xfree(sumt);
> + xfree(sumw);
> +
> + PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
> + return rc;
> }
>
> +static struct sedf_priv_info _sedf_priv;
> +
> const struct scheduler sched_sedf_def = {
> - .name = "Simple EDF Scheduler",
> - .opt_name = "sedf",
> - .sched_id = XEN_SCHEDULER_SEDF,
> + .name = "Simple EDF Scheduler",
> + .opt_name = "sedf",
> + .sched_id = XEN_SCHEDULER_SEDF,
> + .sched_data = &_sedf_priv,
>
> .init_domain = sedf_init_domain,
> .destroy_domain = sedf_destroy_domain,
> @@ -1498,6 +1579,9 @@ const struct scheduler sched_sedf_def =
> .alloc_domdata = sedf_alloc_domdata,
> .free_domdata = sedf_free_domdata,
>
> + .init = sedf_init,
> + .deinit = sedf_deinit,
> +
> .do_schedule = sedf_do_schedule,
> .pick_cpu = sedf_pick_cpu,
> .dump_cpu_state = sedf_dump_cpu_state,
> diff -r 38eb74c01d9d xen/common/schedule.c
> --- a/xen/common/schedule.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/schedule.c Wed Dec 07 15:45:55 2011 +0100
> @@ -1005,7 +1005,6 @@ int sched_id(void)
> /* Adjust scheduling parameter for a given domain. */
> long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
> {
> - struct vcpu *v;
> long ret;
>
> if ( (op->sched_id != DOM2OP(d)->sched_id) ||
> @@ -1013,40 +1012,11 @@ long sched_adjust(struct domain *d, stru
> (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
> return -EINVAL;
>
> - /*
> - * Most VCPUs we can simply pause. If we are adjusting this VCPU then
> - * we acquire the local schedule_lock to guard against concurrent updates.
> - *
> - * We only acquire the local schedule lock after we have paused all other
> - * VCPUs in this domain. There are two reasons for this:
> - * 1- We don't want to hold up interrupts as pausing a VCPU can
> - * trigger a tlb shootdown.
> - * 2- Pausing other VCPUs involves briefly locking the schedule
> - * lock of the CPU they are running on. This CPU could be the
> - * same as ours.
> - */
> -
> - for_each_vcpu ( d, v )
> - {
> - if ( v != current )
> - vcpu_pause(v);
> - }
> -
> - if ( d == current->domain )
> - vcpu_schedule_lock_irq(current);
> -
> + /* NB: the pluggable scheduler code needs to take care
> + * of locking by itself. */
> if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
> TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
>
> - if ( d == current->domain )
> - vcpu_schedule_unlock_irq(current);
> -
> - for_each_vcpu ( d, v )
> - {
> - if ( v != current )
> - vcpu_unpause(v);
> - }
> -
> return ret;
> }
>
>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel