Mailing List Archive

[PATCH 7 of 8] libxl: automatic placement deals with node-affinity
Which basically means the following two things:
1) during domain creation, it is the node-affinity of
the domain --rather than the vcpu-affinities of its
vcpus-- that is affected by automatic placement;
2) during automatic placement, when counting how many
vcpus are already "bound" to a placement candidate
(as part of the process of choosing the best
candidate), node-affinity is also considered,
together with vcpu-affinity.

Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc *
{
int found;
libxl__numa_candidate candidate;
- libxl_bitmap candidate_nodemap;
+ libxl_bitmap cpupool_nodemap;
libxl_cpupoolinfo cpupool_info;
int i, cpupool, rc = 0;
uint32_t memkb;

libxl__numa_candidate_init(&candidate);
- libxl_bitmap_init(&candidate_nodemap);
+ libxl_bitmap_init(&cpupool_nodemap);

/*
* Extract the cpumap from the cpupool the domain belong to. In fact,
@@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc *
rc = libxl_domain_need_memory(CTX, info, &memkb);
if (rc)
goto out;
- if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) {
+ if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
rc = ERROR_FAIL;
goto out;
}
@@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc *
if (found == 0)
goto out;

- /* Map the candidate's node map to the domain's info->cpumap */
- libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap);
- rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap);
+ /* Map the candidate's node map to the domain's info->nodemap */
+ libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
+
+ /* Avoid trying to set the affinity to nodes that might be in the
+ * candidate's nodemap but out of our cpupool. */
+ rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
+ &cpupool_nodemap);
if (rc)
goto out;

- /* Avoid trying to set the affinity to cpus that might be in the
- * nodemap but not in our cpupool. */
- libxl_for_each_set_bit(i, info->cpumap) {
- if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
- libxl_bitmap_reset(&info->cpumap, i);
+ libxl_for_each_set_bit(i, info->nodemap) {
+ if (!libxl_bitmap_test(&cpupool_nodemap, i))
+ libxl_bitmap_reset(&info->nodemap, i);
}

LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
@@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc *

out:
libxl__numa_candidate_dispose(&candidate);
- libxl_bitmap_dispose(&candidate_nodemap);
+ libxl_bitmap_dispose(&cpupool_nodemap);
libxl_cpupoolinfo_dispose(&cpupool_info);
return rc;
}
@@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint
/*
* Check if the domain has any CPU affinity. If not, try to build
* up one. In case numa_place_domain() find at least a suitable
- * candidate, it will affect info->cpumap accordingly; if it
+ * candidate, it will affect info->nodemap accordingly; if it
* does not, it just leaves it as it is. This means (unless
* some weird error manifests) the subsequent call to
- * libxl_set_vcpuaffinity_all() will do the actual placement,
+ * libxl_domain_set_nodeaffinity() will do the actual placement,
* whatever that turns out to be.
*/
if (libxl_defbool_val(info->numa_placement)) {
diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
--- a/tools/libxl/libxl_numa.c
+++ b/tools/libxl/libxl_numa.c
@@ -171,7 +171,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
const libxl_bitmap *nodemap)
{
libxl_dominfo *dinfo = NULL;
- libxl_bitmap vcpu_nodemap;
+ libxl_bitmap vcpu_nodemap, dom_nodemap;
int nr_doms, nr_cpus;
int nr_vcpus = 0;
int i, j, k;
@@ -185,6 +185,12 @@ static int nodemap_to_nr_vcpus(libxl__gc
return ERROR_FAIL;
}

+ if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) {
+ libxl_dominfo_list_free(dinfo, nr_doms);
+ libxl_bitmap_dispose(&vcpu_nodemap);
+ return ERROR_FAIL;
+ }
+
for (i = 0; i < nr_doms; i++) {
libxl_vcpuinfo *vinfo;
int nr_dom_vcpus;
@@ -193,6 +199,9 @@ static int nodemap_to_nr_vcpus(libxl__gc
if (vinfo == NULL)
continue;

+ /* Retrieve the domain's node-affinity map (see below) */
+ libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap);
+
/* For each vcpu of each domain ... */
for (j = 0; j < nr_dom_vcpus; j++) {

@@ -201,9 +210,17 @@ static int nodemap_to_nr_vcpus(libxl__gc
libxl_for_each_set_bit(k, vinfo[j].cpumap)
libxl_bitmap_set(&vcpu_nodemap, tinfo[k].node);

- /* And check if that map has any intersection with our nodemap */
+ /*
+ * We now check whether the && of the vcpu's nodemap and the
+ * domain's nodemap has any intersection with the nodemap of our
+ * canidate.
+ * Using both (vcpu's and domain's) nodemaps allows us to take
+ * both vcpu-affinity and node-affinity into account when counting
+ * the number of vcpus bound to the candidate.
+ */
libxl_for_each_set_bit(k, vcpu_nodemap) {
- if (libxl_bitmap_test(nodemap, k)) {
+ if (libxl_bitmap_test(&dom_nodemap, k) &&
+ libxl_bitmap_test(nodemap, k)) {
nr_vcpus++;
break;
}
@@ -213,6 +230,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus);
}

+ libxl_bitmap_dispose(&dom_nodemap);
libxl_bitmap_dispose(&vcpu_nodemap);
libxl_dominfo_list_free(dinfo, nr_doms);
return nr_vcpus;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH 7 of 8] libxl: automatic placement deals with node-affinity [ In reply to ]
On Fri, Oct 5, 2012 at 3:08 PM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> Which basically means the following two things:
> 1) during domain creation, it is the node-affinity of
> the domain --rather than the vcpu-affinities of its
> vcpus-- that is affected by automatic placement;
> 2) during automatic placement, when counting how many
> vcpus are already "bound" to a placement candidate
> (as part of the process of choosing the best
> candidate), node-affinity is also considered,
> together with vcpu-affinity.
>
> Signed-off-by: Dario Faggioli <dario.faggioli@citrix.com>

Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

>
> diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c
> +++ b/tools/libxl/libxl_dom.c
> @@ -133,13 +133,13 @@ static int numa_place_domain(libxl__gc *
> {
> int found;
> libxl__numa_candidate candidate;
> - libxl_bitmap candidate_nodemap;
> + libxl_bitmap cpupool_nodemap;
> libxl_cpupoolinfo cpupool_info;
> int i, cpupool, rc = 0;
> uint32_t memkb;
>
> libxl__numa_candidate_init(&candidate);
> - libxl_bitmap_init(&candidate_nodemap);
> + libxl_bitmap_init(&cpupool_nodemap);
>
> /*
> * Extract the cpumap from the cpupool the domain belong to. In fact,
> @@ -156,7 +156,7 @@ static int numa_place_domain(libxl__gc *
> rc = libxl_domain_need_memory(CTX, info, &memkb);
> if (rc)
> goto out;
> - if (libxl_node_bitmap_alloc(CTX, &candidate_nodemap, 0)) {
> + if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
> rc = ERROR_FAIL;
> goto out;
> }
> @@ -174,17 +174,19 @@ static int numa_place_domain(libxl__gc *
> if (found == 0)
> goto out;
>
> - /* Map the candidate's node map to the domain's info->cpumap */
> - libxl__numa_candidate_get_nodemap(gc, &candidate, &candidate_nodemap);
> - rc = libxl_nodemap_to_cpumap(CTX, &candidate_nodemap, &info->cpumap);
> + /* Map the candidate's node map to the domain's info->nodemap */
> + libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
> +
> + /* Avoid trying to set the affinity to nodes that might be in the
> + * candidate's nodemap but out of our cpupool. */
> + rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
> + &cpupool_nodemap);
> if (rc)
> goto out;
>
> - /* Avoid trying to set the affinity to cpus that might be in the
> - * nodemap but not in our cpupool. */
> - libxl_for_each_set_bit(i, info->cpumap) {
> - if (!libxl_bitmap_test(&cpupool_info.cpumap, i))
> - libxl_bitmap_reset(&info->cpumap, i);
> + libxl_for_each_set_bit(i, info->nodemap) {
> + if (!libxl_bitmap_test(&cpupool_nodemap, i))
> + libxl_bitmap_reset(&info->nodemap, i);
> }
>
> LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
> @@ -193,7 +195,7 @@ static int numa_place_domain(libxl__gc *
>
> out:
> libxl__numa_candidate_dispose(&candidate);
> - libxl_bitmap_dispose(&candidate_nodemap);
> + libxl_bitmap_dispose(&cpupool_nodemap);
> libxl_cpupoolinfo_dispose(&cpupool_info);
> return rc;
> }
> @@ -211,10 +213,10 @@ int libxl__build_pre(libxl__gc *gc, uint
> /*
> * Check if the domain has any CPU affinity. If not, try to build
> * up one. In case numa_place_domain() find at least a suitable
> - * candidate, it will affect info->cpumap accordingly; if it
> + * candidate, it will affect info->nodemap accordingly; if it
> * does not, it just leaves it as it is. This means (unless
> * some weird error manifests) the subsequent call to
> - * libxl_set_vcpuaffinity_all() will do the actual placement,
> + * libxl_domain_set_nodeaffinity() will do the actual placement,
> * whatever that turns out to be.
> */
> if (libxl_defbool_val(info->numa_placement)) {
> diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
> --- a/tools/libxl/libxl_numa.c
> +++ b/tools/libxl/libxl_numa.c
> @@ -171,7 +171,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
> const libxl_bitmap *nodemap)
> {
> libxl_dominfo *dinfo = NULL;
> - libxl_bitmap vcpu_nodemap;
> + libxl_bitmap vcpu_nodemap, dom_nodemap;
> int nr_doms, nr_cpus;
> int nr_vcpus = 0;
> int i, j, k;
> @@ -185,6 +185,12 @@ static int nodemap_to_nr_vcpus(libxl__gc
> return ERROR_FAIL;
> }
>
> + if (libxl_node_bitmap_alloc(CTX, &dom_nodemap, 0) < 0) {
> + libxl_dominfo_list_free(dinfo, nr_doms);
> + libxl_bitmap_dispose(&vcpu_nodemap);
> + return ERROR_FAIL;
> + }
> +
> for (i = 0; i < nr_doms; i++) {
> libxl_vcpuinfo *vinfo;
> int nr_dom_vcpus;
> @@ -193,6 +199,9 @@ static int nodemap_to_nr_vcpus(libxl__gc
> if (vinfo == NULL)
> continue;
>
> + /* Retrieve the domain's node-affinity map (see below) */
> + libxl_domain_get_nodeaffinity(CTX, dinfo[i].domid, &dom_nodemap);
> +
> /* For each vcpu of each domain ... */
> for (j = 0; j < nr_dom_vcpus; j++) {
>
> @@ -201,9 +210,17 @@ static int nodemap_to_nr_vcpus(libxl__gc
> libxl_for_each_set_bit(k, vinfo[j].cpumap)
> libxl_bitmap_set(&vcpu_nodemap, tinfo[k].node);
>
> - /* And check if that map has any intersection with our nodemap */
> + /*
> + * We now check whether the && of the vcpu's nodemap and the
> + * domain's nodemap has any intersection with the nodemap of our
> + * canidate.
> + * Using both (vcpu's and domain's) nodemaps allows us to take
> + * both vcpu-affinity and node-affinity into account when counting
> + * the number of vcpus bound to the candidate.
> + */
> libxl_for_each_set_bit(k, vcpu_nodemap) {
> - if (libxl_bitmap_test(nodemap, k)) {
> + if (libxl_bitmap_test(&dom_nodemap, k) &&
> + libxl_bitmap_test(nodemap, k)) {
> nr_vcpus++;
> break;
> }
> @@ -213,6 +230,7 @@ static int nodemap_to_nr_vcpus(libxl__gc
> libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus);
> }
>
> + libxl_bitmap_dispose(&dom_nodemap);
> libxl_bitmap_dispose(&vcpu_nodemap);
> libxl_dominfo_list_free(dinfo, nr_doms);
> return nr_vcpus;
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel