Mailing List Archive: [PATCHv2 1 of 1] Rework locking for sched

The main idea is to move (as much as possible) locking logic
from generic code to the various pluggable schedulers.

While at it, the following is also accomplished:
- pausing all the non-current VCPUs of a domain while changing its
scheduling parameters is not effective in avoiding races and it is
prone to deadlock, so that is removed.
- sedf needs a global lock for preventing races while adjusting
domains' scheduling parameters (as it is for credit and credit2),
so that is added.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

diff -r 38eb74c01d9d xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/sched_credit.c Wed Dec 07 15:45:55 2011 +0100
@@ -161,6 +161,7 @@ struct csched_dom {
* System-wide private data
*/
struct csched_private {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
spinlock_t lock;
struct list_head active_sdom;
uint32_t ncpus;
@@ -800,6 +801,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;

+ /* Protect both get and put branches with the pluggable scheduler
+ * lock. Runq lock not needed anywhere in here. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit.weight = sdom->weight;
@@ -809,8 +814,6 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);

- spin_lock_irqsave(&prv->lock, flags);
-
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
@@ -824,9 +827,10 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;

- spin_unlock_irqrestore(&prv->lock, flags);
}

+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}

diff -r 38eb74c01d9d xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/sched_credit2.c Wed Dec 07 15:45:55 2011 +0100
@@ -1384,6 +1384,10 @@ csched_dom_cntl(
struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;

+ /* Must hold csched_priv lock to read and update sdom,
+ * runq lock to update csvcs. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
op->u.credit2.weight = sdom->weight;
@@ -1397,10 +1401,6 @@ csched_dom_cntl(
struct list_head *iter;
int old_weight;

- /* Must hold csched_priv lock to update sdom, runq lock to
- * update csvcs. */
- spin_lock_irqsave(&prv->lock, flags);
-
old_weight = sdom->weight;

sdom->weight = op->u.credit2.weight;
@@ -1411,22 +1411,23 @@ csched_dom_cntl(
struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);

/* NB: Locking order is important here. Because we grab this lock here, we
- * must never lock csched_priv.lock if we're holding a runqueue
- * lock. */
- vcpu_schedule_lock_irq(svc->vcpu);
+ * must never lock csched_priv.lock if we're holding a runqueue lock.
+ * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
+ * been disabled. */
+ vcpu_schedule_lock(svc->vcpu);

BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));

svc->weight = sdom->weight;
update_max_weight(svc->rqd, svc->weight, old_weight);

- vcpu_schedule_unlock_irq(svc->vcpu);
+ vcpu_schedule_unlock(svc->vcpu);
}
-
- spin_unlock_irqrestore(&prv->lock, flags);
}
}

+ spin_unlock_irqrestore(&prv->lock, flags);
+
return 0;
}

diff -r 38eb74c01d9d xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/sched_sedf.c Wed Dec 07 15:45:55 2011 +0100
@@ -61,6 +61,11 @@ struct sedf_dom_info {
struct domain *domain;
};

+struct sedf_priv_info {
+ /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
+ spinlock_t lock;
+};
+
struct sedf_vcpu_info {
struct vcpu *vcpu;
struct list_head list;
@@ -115,6 +120,8 @@ struct sedf_cpu_info {
s_time_t current_slice_expires;
};

+#define SEDF_PRIV(_ops) \
+ ((struct sedf_priv_info *)((_ops)->sched_data))
#define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
#define CPU_INFO(cpu) \
((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
@@ -772,6 +779,31 @@ static struct task_slice sedf_do_extra_s
}

+static int sedf_init(struct scheduler *ops)
+{
+ struct sedf_priv_info *prv;
+
+ prv = xzalloc(struct sedf_priv_info);
+ if ( prv == NULL )
+ return -ENOMEM;
+
+ ops->sched_data = prv;
+ spin_lock_init(&prv->lock);
+
+ return 0;
+}
+
+
+static void sedf_deinit(const struct scheduler *ops)
+{
+ struct sedf_priv_info *prv;
+
+ prv = SEDF_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+
/* Main scheduling function
Reasons for calling this function are:
-timeslice for the current period used up
@@ -1320,22 +1352,15 @@ static void sedf_dump_cpu_state(const st

/* Adjusts periods and slices of the domains accordingly to their weights. */
-static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
+static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, s_time_t *sumt)
{
struct vcpu *p;
struct domain *d;
- unsigned int cpu, nr_cpus = cpumask_last(&cpu_online_map) + 1;
- int *sumw = xzalloc_array(int, nr_cpus);
- s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
+ unsigned int cpu;

- if ( !sumw || !sumt )
- {
- xfree(sumt);
- xfree(sumw);
- return -ENOMEM;
- }
-
- /* Sum across all weights. */
+ /* Sum across all weights. Notice that no runq locking is needed
+ * here: the caller holds sedf_priv_info.lock and we're not changing
+ * anything that is accessed during scheduling. */
rcu_read_lock(&domlist_read_lock);
for_each_domain_in_cpupool( d, c )
{
@@ -1365,7 +1390,9 @@ static int sedf_adjust_weights(struct cp
}
rcu_read_unlock(&domlist_read_lock);

- /* Adjust all slices (and periods) to the new weight. */
+ /* Adjust all slices (and periods) to the new weight. Unlike above, we
+ * need to take thr runq lock for the various VCPUs: we're modyfing
+ * slice and period which are referenced during scheduling. */
rcu_read_lock(&domlist_read_lock);
for_each_domain_in_cpupool( d, c )
{
@@ -1375,20 +1402,20 @@ static int sedf_adjust_weights(struct cp
continue;
if ( EDOM_INFO(p)->weight )
{
+ /* Interrupts already off */
+ vcpu_schedule_lock(p);
EDOM_INFO(p)->period_orig =
EDOM_INFO(p)->period = WEIGHT_PERIOD;
EDOM_INFO(p)->slice_orig =
EDOM_INFO(p)->slice =
(EDOM_INFO(p)->weight *
(WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
+ vcpu_schedule_unlock(p);
}
}
}
rcu_read_unlock(&domlist_read_lock);

- xfree(sumt);
- xfree(sumw);
-
return 0;
}

@@ -1396,19 +1423,45 @@ static int sedf_adjust_weights(struct cp
/* set or fetch domain scheduling parameters */
static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
{
+ struct sedf_priv_info *prv = SEDF_PRIV(ops);
+ unsigned long flags;
+ unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
+ int *sumw = xzalloc_array(int, nr_cpus);
+ s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
struct vcpu *v;
- int rc;
+ int rc = 0;

PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");

+ /* Serialize against the pluggable scheduler lock to protect from
+ * concurrent updates. We need to take the runq lock for the VCPUs
+ * as well, since we are touching extraweight, weight, slice and
+ * period. As in sched_credit2.c, runq locks nest inside the
+ * pluggable scheduler lock. */
+ spin_lock_irqsave(&prv->lock, flags);
+
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
+ /* These are used in sedf_adjust_weights() but have to be allocated in
+ * this function, as we need to avoid nesting xmem_pool_alloc's lock
+ * within our prv->lock. */
+ if ( !sumw || !sumt )
+ {
+ /* Check for errors here, the _getinfo branch doesn't care */
+ rc = -ENOMEM;
+ goto out;
+ }
+
/* Check for sane parameters. */
if ( !op->u.sedf.period && !op->u.sedf.weight )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
if ( op->u.sedf.weight )
{
if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
@@ -1417,59 +1470,78 @@ static int sedf_adjust(const struct sche
/* Weight-driven domains with extratime only. */
for_each_vcpu ( p, v )
{
+ /* (Here and everywhere in the following) IRQs are already off,
+ * hence vcpu_spin_lock() is the one. */
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->extraweight = op->u.sedf.weight;
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->slice = 0;
EDOM_INFO(v)->period = WEIGHT_PERIOD;
+ vcpu_schedule_unlock(v);
}
}
else
{
/* Weight-driven domains with real-time execution. */
- for_each_vcpu ( p, v )
+ for_each_vcpu ( p, v ) {
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->weight = op->u.sedf.weight;
+ vcpu_schedule_unlock(v);
+ }
}
}
else
{
+ /*
+ * Sanity checking: note that disabling extra weight requires
+ * that we set a non-zero slice.
+ */
+ if ( (op->u.sedf.period > PERIOD_MAX) ||
+ (op->u.sedf.period < PERIOD_MIN) ||
+ (op->u.sedf.slice > op->u.sedf.period) ||
+ (op->u.sedf.slice < SLICE_MIN) )
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
/* Time-driven domains. */
for_each_vcpu ( p, v )
{
- /*
- * Sanity checking: note that disabling extra weight requires
- * that we set a non-zero slice.
- */
- if ( (op->u.sedf.period > PERIOD_MAX) ||
- (op->u.sedf.period < PERIOD_MIN) ||
- (op->u.sedf.slice > op->u.sedf.period) ||
- (op->u.sedf.slice < SLICE_MIN) )
- return -EINVAL;
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->weight = 0;
EDOM_INFO(v)->extraweight = 0;
EDOM_INFO(v)->period_orig =
EDOM_INFO(v)->period = op->u.sedf.period;
EDOM_INFO(v)->slice_orig =
EDOM_INFO(v)->slice = op->u.sedf.slice;
+ vcpu_schedule_unlock(v);
}
}

- rc = sedf_adjust_weights(p->cpupool, op);
+ rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
if ( rc )
- return rc;
+ goto out;

for_each_vcpu ( p, v )
{
+ vcpu_schedule_lock(v);
EDOM_INFO(v)->status =
(EDOM_INFO(v)->status &
~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
EDOM_INFO(v)->latency = op->u.sedf.latency;
extraq_check(v);
+ vcpu_schedule_unlock(v);
}
}
else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
{
if ( p->vcpu[0] == NULL )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
+
op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
@@ -1477,14 +1549,23 @@ static int sedf_adjust(const struct sche
op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
}

- PRINT(2,"sedf_adjust_finished\n");
- return 0;
+out:
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ xfree(sumt);
+ xfree(sumw);
+
+ PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
+ return rc;
}

+static struct sedf_priv_info _sedf_priv;
+
const struct scheduler sched_sedf_def = {
- .name = "Simple EDF Scheduler",
- .opt_name = "sedf",
- .sched_id = XEN_SCHEDULER_SEDF,
+ .name = "Simple EDF Scheduler",
+ .opt_name = "sedf",
+ .sched_id = XEN_SCHEDULER_SEDF,
+ .sched_data = &_sedf_priv,

.init_domain = sedf_init_domain,
.destroy_domain = sedf_destroy_domain,
@@ -1498,6 +1579,9 @@ const struct scheduler sched_sedf_def =
.alloc_domdata = sedf_alloc_domdata,
.free_domdata = sedf_free_domdata,

+ .init = sedf_init,
+ .deinit = sedf_deinit,
+
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
.dump_cpu_state = sedf_dump_cpu_state,
diff -r 38eb74c01d9d xen/common/schedule.c
--- a/xen/common/schedule.c Tue Dec 06 21:16:56 2011 +0000
+++ b/xen/common/schedule.c Wed Dec 07 15:45:55 2011 +0100
@@ -1005,7 +1005,6 @@ int sched_id(void)
/* Adjust scheduling parameter for a given domain. */
long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
{
- struct vcpu *v;
long ret;

if ( (op->sched_id != DOM2OP(d)->sched_id) ||
@@ -1013,40 +1012,11 @@ long sched_adjust(struct domain *d, stru
(op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
return -EINVAL;

- /*
- * Most VCPUs we can simply pause. If we are adjusting this VCPU then
- * we acquire the local schedule_lock to guard against concurrent updates.
- *
- * We only acquire the local schedule lock after we have paused all other
- * VCPUs in this domain. There are two reasons for this:
- * 1- We don't want to hold up interrupts as pausing a VCPU can
- * trigger a tlb shootdown.
- * 2- Pausing other VCPUs involves briefly locking the schedule
- * lock of the CPU they are running on. This CPU could be the
- * same as ours.
- */
-
- for_each_vcpu ( d, v )
- {
- if ( v != current )
- vcpu_pause(v);
- }
-
- if ( d == current->domain )
- vcpu_schedule_lock_irq(current);
-
+ /* NB: the pluggable scheduler code needs to take care
+ * of locking by itself. */
if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);

- if ( d == current->domain )
- vcpu_schedule_unlock_irq(current);
-
- for_each_vcpu ( d, v )
- {
- if ( v != current )
- vcpu_unpause(v);
- }
-
return ret;
}

--
<<This happens because I choose it to happen!>> (Raistlin Majere)
-------------------------------------------------------------------
Dario Faggioli, http://retis.sssup.it/people/faggioli
Senior Software Engineer, Citrix Systems R&D Ltd., Cambridge (UK)
PhD Candidate, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)

Dario, this patch won't apply due to wordwrap damage. Try using "hg
email", or sending as an attachment.

Thanks,
-George

On Wed, 2011-12-07 at 15:02 +0000, Dario Faggioli wrote:
> The main idea is to move (as much as possible) locking logic
> from generic code to the various pluggable schedulers.
>
> While at it, the following is also accomplished:
> - pausing all the non-current VCPUs of a domain while changing its
> scheduling parameters is not effective in avoiding races and it is
> prone to deadlock, so that is removed.
> - sedf needs a global lock for preventing races while adjusting
> domains' scheduling parameters (as it is for credit and credit2),
> so that is added.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>
>
> diff -r 38eb74c01d9d xen/common/sched_credit.c
> --- a/xen/common/sched_credit.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/sched_credit.c Wed Dec 07 15:45:55 2011 +0100
> @@ -161,6 +161,7 @@ struct csched_dom {
> * System-wide private data
> */
> struct csched_private {
> + /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
> spinlock_t lock;
> struct list_head active_sdom;
> uint32_t ncpus;
> @@ -800,6 +801,10 @@ csched_dom_cntl(
> struct csched_private *prv = CSCHED_PRIV(ops);
> unsigned long flags;
>
> + /* Protect both get and put branches with the pluggable scheduler
> + * lock. Runq lock not needed anywhere in here. */
> + spin_lock_irqsave(&prv->lock, flags);
> +
> if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> {
> op->u.credit.weight = sdom->weight;
> @@ -809,8 +814,6 @@ csched_dom_cntl(
> {
> ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
>
> - spin_lock_irqsave(&prv->lock, flags);
> -
> if ( op->u.credit.weight != 0 )
> {
> if ( !list_empty(&sdom->active_sdom_elem) )
> @@ -824,9 +827,10 @@ csched_dom_cntl(
> if ( op->u.credit.cap != (uint16_t)~0U )
> sdom->cap = op->u.credit.cap;
>
> - spin_unlock_irqrestore(&prv->lock, flags);
> }
>
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> return 0;
> }
>
> diff -r 38eb74c01d9d xen/common/sched_credit2.c
> --- a/xen/common/sched_credit2.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/sched_credit2.c Wed Dec 07 15:45:55 2011 +0100
> @@ -1384,6 +1384,10 @@ csched_dom_cntl(
> struct csched_private *prv = CSCHED_PRIV(ops);
> unsigned long flags;
>
> + /* Must hold csched_priv lock to read and update sdom,
> + * runq lock to update csvcs. */
> + spin_lock_irqsave(&prv->lock, flags);
> +
> if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> {
> op->u.credit2.weight = sdom->weight;
> @@ -1397,10 +1401,6 @@ csched_dom_cntl(
> struct list_head *iter;
> int old_weight;
>
> - /* Must hold csched_priv lock to update sdom, runq lock to
> - * update csvcs. */
> - spin_lock_irqsave(&prv->lock, flags);
> -
> old_weight = sdom->weight;
>
> sdom->weight = op->u.credit2.weight;
> @@ -1411,22 +1411,23 @@ csched_dom_cntl(
> struct csched_vcpu *svc = list_entry(iter, struct csched_vcpu, sdom_elem);
>
> /* NB: Locking order is important here. Because we grab this lock here, we
> - * must never lock csched_priv.lock if we're holding a runqueue
> - * lock. */
> - vcpu_schedule_lock_irq(svc->vcpu);
> + * must never lock csched_priv.lock if we're holding a runqueue lock.
> + * Also, calling vcpu_schedule_lock() is enough, since IRQs have already
> + * been disabled. */
> + vcpu_schedule_lock(svc->vcpu);
>
> BUG_ON(svc->rqd != RQD(ops, svc->vcpu->processor));
>
> svc->weight = sdom->weight;
> update_max_weight(svc->rqd, svc->weight, old_weight);
>
> - vcpu_schedule_unlock_irq(svc->vcpu);
> + vcpu_schedule_unlock(svc->vcpu);
> }
> -
> - spin_unlock_irqrestore(&prv->lock, flags);
> }
> }
>
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> return 0;
> }
>
> diff -r 38eb74c01d9d xen/common/sched_sedf.c
> --- a/xen/common/sched_sedf.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/sched_sedf.c Wed Dec 07 15:45:55 2011 +0100
> @@ -61,6 +61,11 @@ struct sedf_dom_info {
> struct domain *domain;
> };
>
> +struct sedf_priv_info {
> + /* lock for the whole pluggable scheduler, nests inside cpupool_lock */
> + spinlock_t lock;
> +};
> +
> struct sedf_vcpu_info {
> struct vcpu *vcpu;
> struct list_head list;
> @@ -115,6 +120,8 @@ struct sedf_cpu_info {
> s_time_t current_slice_expires;
> };
>
> +#define SEDF_PRIV(_ops) \
> + ((struct sedf_priv_info *)((_ops)->sched_data))
> #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
> #define CPU_INFO(cpu) \
> ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
> @@ -772,6 +779,31 @@ static struct task_slice sedf_do_extra_s
> }
>
>
> +static int sedf_init(struct scheduler *ops)
> +{
> + struct sedf_priv_info *prv;
> +
> + prv = xzalloc(struct sedf_priv_info);
> + if ( prv == NULL )
> + return -ENOMEM;
> +
> + ops->sched_data = prv;
> + spin_lock_init(&prv->lock);
> +
> + return 0;
> +}
> +
> +
> +static void sedf_deinit(const struct scheduler *ops)
> +{
> + struct sedf_priv_info *prv;
> +
> + prv = SEDF_PRIV(ops);
> + if ( prv != NULL )
> + xfree(prv);
> +}
> +
> +
> /* Main scheduling function
> Reasons for calling this function are:
> -timeslice for the current period used up
> @@ -1320,22 +1352,15 @@ static void sedf_dump_cpu_state(const st
>
>
> /* Adjusts periods and slices of the domains accordingly to their weights. */
> -static int sedf_adjust_weights(struct cpupool *c, struct xen_domctl_scheduler_op *cmd)
> +static int sedf_adjust_weights(struct cpupool *c, int nr_cpus, int *sumw, s_time_t *sumt)
> {
> struct vcpu *p;
> struct domain *d;
> - unsigned int cpu, nr_cpus = cpumask_last(&cpu_online_map) + 1;
> - int *sumw = xzalloc_array(int, nr_cpus);
> - s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
> + unsigned int cpu;
>
> - if ( !sumw || !sumt )
> - {
> - xfree(sumt);
> - xfree(sumw);
> - return -ENOMEM;
> - }
> -
> - /* Sum across all weights. */
> + /* Sum across all weights. Notice that no runq locking is needed
> + * here: the caller holds sedf_priv_info.lock and we're not changing
> + * anything that is accessed during scheduling. */
> rcu_read_lock(&domlist_read_lock);
> for_each_domain_in_cpupool( d, c )
> {
> @@ -1365,7 +1390,9 @@ static int sedf_adjust_weights(struct cp
> }
> rcu_read_unlock(&domlist_read_lock);
>
> - /* Adjust all slices (and periods) to the new weight. */
> + /* Adjust all slices (and periods) to the new weight. Unlike above, we
> + * need to take thr runq lock for the various VCPUs: we're modyfing
> + * slice and period which are referenced during scheduling. */
> rcu_read_lock(&domlist_read_lock);
> for_each_domain_in_cpupool( d, c )
> {
> @@ -1375,20 +1402,20 @@ static int sedf_adjust_weights(struct cp
> continue;
> if ( EDOM_INFO(p)->weight )
> {
> + /* Interrupts already off */
> + vcpu_schedule_lock(p);
> EDOM_INFO(p)->period_orig =
> EDOM_INFO(p)->period = WEIGHT_PERIOD;
> EDOM_INFO(p)->slice_orig =
> EDOM_INFO(p)->slice =
> (EDOM_INFO(p)->weight *
> (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[cpu])) / sumw[cpu];
> + vcpu_schedule_unlock(p);
> }
> }
> }
> rcu_read_unlock(&domlist_read_lock);
>
> - xfree(sumt);
> - xfree(sumw);
> -
> return 0;
> }
>
> @@ -1396,19 +1423,45 @@ static int sedf_adjust_weights(struct cp
> /* set or fetch domain scheduling parameters */
> static int sedf_adjust(const struct scheduler *ops, struct domain *p, struct xen_domctl_scheduler_op *op)
> {
> + struct sedf_priv_info *prv = SEDF_PRIV(ops);
> + unsigned long flags;
> + unsigned int nr_cpus = cpumask_last(&cpu_online_map) + 1;
> + int *sumw = xzalloc_array(int, nr_cpus);
> + s_time_t *sumt = xzalloc_array(s_time_t, nr_cpus);
> struct vcpu *v;
> - int rc;
> + int rc = 0;
>
> PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
> "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
> p->domain_id, op->u.sedf.period, op->u.sedf.slice,
> op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
>
> + /* Serialize against the pluggable scheduler lock to protect from
> + * concurrent updates. We need to take the runq lock for the VCPUs
> + * as well, since we are touching extraweight, weight, slice and
> + * period. As in sched_credit2.c, runq locks nest inside the
> + * pluggable scheduler lock. */
> + spin_lock_irqsave(&prv->lock, flags);
> +
> if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
> {
> + /* These are used in sedf_adjust_weights() but have to be allocated in
> + * this function, as we need to avoid nesting xmem_pool_alloc's lock
> + * within our prv->lock. */
> + if ( !sumw || !sumt )
> + {
> + /* Check for errors here, the _getinfo branch doesn't care */
> + rc = -ENOMEM;
> + goto out;
> + }
> +
> /* Check for sane parameters. */
> if ( !op->u.sedf.period && !op->u.sedf.weight )
> - return -EINVAL;
> + {
> + rc = -EINVAL;
> + goto out;
> + }
> +
> if ( op->u.sedf.weight )
> {
> if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
> @@ -1417,59 +1470,78 @@ static int sedf_adjust(const struct sche
> /* Weight-driven domains with extratime only. */
> for_each_vcpu ( p, v )
> {
> + /* (Here and everywhere in the following) IRQs are already off,
> + * hence vcpu_spin_lock() is the one. */
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->extraweight = op->u.sedf.weight;
> EDOM_INFO(v)->weight = 0;
> EDOM_INFO(v)->slice = 0;
> EDOM_INFO(v)->period = WEIGHT_PERIOD;
> + vcpu_schedule_unlock(v);
> }
> }
> else
> {
> /* Weight-driven domains with real-time execution. */
> - for_each_vcpu ( p, v )
> + for_each_vcpu ( p, v ) {
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->weight = op->u.sedf.weight;
> + vcpu_schedule_unlock(v);
> + }
> }
> }
> else
> {
> + /*
> + * Sanity checking: note that disabling extra weight requires
> + * that we set a non-zero slice.
> + */
> + if ( (op->u.sedf.period > PERIOD_MAX) ||
> + (op->u.sedf.period < PERIOD_MIN) ||
> + (op->u.sedf.slice > op->u.sedf.period) ||
> + (op->u.sedf.slice < SLICE_MIN) )
> + {
> + rc = -EINVAL;
> + goto out;
> + }
> +
> /* Time-driven domains. */
> for_each_vcpu ( p, v )
> {
> - /*
> - * Sanity checking: note that disabling extra weight requires
> - * that we set a non-zero slice.
> - */
> - if ( (op->u.sedf.period > PERIOD_MAX) ||
> - (op->u.sedf.period < PERIOD_MIN) ||
> - (op->u.sedf.slice > op->u.sedf.period) ||
> - (op->u.sedf.slice < SLICE_MIN) )
> - return -EINVAL;
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->weight = 0;
> EDOM_INFO(v)->extraweight = 0;
> EDOM_INFO(v)->period_orig =
> EDOM_INFO(v)->period = op->u.sedf.period;
> EDOM_INFO(v)->slice_orig =
> EDOM_INFO(v)->slice = op->u.sedf.slice;
> + vcpu_schedule_unlock(v);
> }
> }
>
> - rc = sedf_adjust_weights(p->cpupool, op);
> + rc = sedf_adjust_weights(p->cpupool, nr_cpus, sumw, sumt);
> if ( rc )
> - return rc;
> + goto out;
>
> for_each_vcpu ( p, v )
> {
> + vcpu_schedule_lock(v);
> EDOM_INFO(v)->status =
> (EDOM_INFO(v)->status &
> ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
> EDOM_INFO(v)->latency = op->u.sedf.latency;
> extraq_check(v);
> + vcpu_schedule_unlock(v);
> }
> }
> else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> {
> if ( p->vcpu[0] == NULL )
> - return -EINVAL;
> + {
> + rc = -EINVAL;
> + goto out;
> + }
> +
> op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
> op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
> op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
> @@ -1477,14 +1549,23 @@ static int sedf_adjust(const struct sche
> op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
> }
>
> - PRINT(2,"sedf_adjust_finished\n");
> - return 0;
> +out:
> + spin_unlock_irqrestore(&prv->lock, flags);
> +
> + xfree(sumt);
> + xfree(sumw);
> +
> + PRINT(2,"sedf_adjust_finished with return code %d\n", rc);
> + return rc;
> }
>
> +static struct sedf_priv_info _sedf_priv;
> +
> const struct scheduler sched_sedf_def = {
> - .name = "Simple EDF Scheduler",
> - .opt_name = "sedf",
> - .sched_id = XEN_SCHEDULER_SEDF,
> + .name = "Simple EDF Scheduler",
> + .opt_name = "sedf",
> + .sched_id = XEN_SCHEDULER_SEDF,
> + .sched_data = &_sedf_priv,
>
> .init_domain = sedf_init_domain,
> .destroy_domain = sedf_destroy_domain,
> @@ -1498,6 +1579,9 @@ const struct scheduler sched_sedf_def =
> .alloc_domdata = sedf_alloc_domdata,
> .free_domdata = sedf_free_domdata,
>
> + .init = sedf_init,
> + .deinit = sedf_deinit,
> +
> .do_schedule = sedf_do_schedule,
> .pick_cpu = sedf_pick_cpu,
> .dump_cpu_state = sedf_dump_cpu_state,
> diff -r 38eb74c01d9d xen/common/schedule.c
> --- a/xen/common/schedule.c Tue Dec 06 21:16:56 2011 +0000
> +++ b/xen/common/schedule.c Wed Dec 07 15:45:55 2011 +0100
> @@ -1005,7 +1005,6 @@ int sched_id(void)
> /* Adjust scheduling parameter for a given domain. */
> long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
> {
> - struct vcpu *v;
> long ret;
>
> if ( (op->sched_id != DOM2OP(d)->sched_id) ||
> @@ -1013,40 +1012,11 @@ long sched_adjust(struct domain *d, stru
> (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
> return -EINVAL;
>
> - /*
> - * Most VCPUs we can simply pause. If we are adjusting this VCPU then
> - * we acquire the local schedule_lock to guard against concurrent updates.
> - *
> - * We only acquire the local schedule lock after we have paused all other
> - * VCPUs in this domain. There are two reasons for this:
> - * 1- We don't want to hold up interrupts as pausing a VCPU can
> - * trigger a tlb shootdown.
> - * 2- Pausing other VCPUs involves briefly locking the schedule
> - * lock of the CPU they are running on. This CPU could be the
> - * same as ours.
> - */
> -
> - for_each_vcpu ( d, v )
> - {
> - if ( v != current )
> - vcpu_pause(v);
> - }
> -
> - if ( d == current->domain )
> - vcpu_schedule_lock_irq(current);
> -
> + /* NB: the pluggable scheduler code needs to take care
> + * of locking by itself. */
> if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
> TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
>
> - if ( d == current->domain )
> - vcpu_schedule_unlock_irq(current);
> -
> - for_each_vcpu ( d, v )
> - {
> - if ( v != current )
> - vcpu_unpause(v);
> - }
> -
> return ret;
> }
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

Mailing List Archive

Mailing List Archive

Attached Files: