Mailing List Archive

[PATCH 2/3] xen: allow extra memory to be two regions
Allow the extra memory (used by the balloon driver) to be in two
regions (typically low and high memory). This allows the balloon
driver to increase the number of available low pages (if the initial
number if pages is small).

As a side effect, the algorithm for building the e820 memory map is
simpler and more obviously correct as the map supplied by the
hypervisor is (almost) used as is.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
arch/x86/xen/setup.c | 167 +++++++++++++++++++++++++-------------------------
include/xen/page.h | 2 +-
2 files changed, 84 insertions(+), 85 deletions(-)

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 30d0015..3421c9e 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -51,23 +51,29 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS];
*/
#define EXTRA_MEM_RATIO (10)

-static void __init xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(u64 extra_start, u64 size)
{
unsigned long pfn;
+ int i;

- u64 size = (u64)pages * PAGE_SIZE;
- u64 extra_start = xen_extra_mem[0].start + xen_extra_mem[0].size;
-
- if (!pages)
- return;
-
- e820_add_region(extra_start, size, E820_RAM);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+ /* Add new region. */
+ if (xen_extra_mem[i].size == 0) {
+ xen_extra_mem[i].start = extra_start;
+ xen_extra_mem[i].size = size;
+ break;
+ }
+ /* Append to existing region. */
+ if (xen_extra_mem[i].start + xen_extra_mem[i].size == extra_start) {
+ xen_extra_mem[i].size += size;
+ break;
+ }
+ }
+ if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+ printk(KERN_WARNING "Warning: not enough extra memory regions\n");

memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");

- xen_extra_mem[0].size += size;
-
xen_max_p2m_pfn = PFN_DOWN(extra_start + size);

for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
@@ -118,7 +124,8 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
}

static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
- const struct e820map *e820)
+ const struct e820entry *map,
+ int nr_map)
{
phys_addr_t max_addr = PFN_PHYS(max_pfn);
phys_addr_t last_end = ISA_END_ADDRESS;
@@ -126,13 +133,13 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
int i;

/* Free any unused memory above the low 1Mbyte. */
- for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
- phys_addr_t end = e820->map[i].addr;
+ for (i = 0; i < nr_map && last_end < max_addr; i++) {
+ phys_addr_t end = map[i].addr;
end = min(max_addr, end);

if (last_end < end)
released += xen_release_chunk(last_end, end);
- last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
+ last_end = max(last_end, map[i].addr + map[i].size);
}

if (last_end < max_addr)
@@ -184,20 +191,31 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
PFN_UP(start_pci), PFN_DOWN(last));
return identity;
}
+
+static unsigned long __init xen_get_max_pages(void)
+{
+ unsigned long max_pages = MAX_DOMAIN_PAGES; /* Limited by memory map. */
+
+ if (xen_initial_domain()) {
+ /* FIXME: ask hypervisor for max pages. */
+ }
+
+ return min(max_pages, MAX_DOMAIN_PAGES);
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
char * __init xen_memory_setup(void)
{
static struct e820entry map[E820MAX] __initdata;
- static struct e820entry map_raw[E820MAX] __initdata;

unsigned long max_pfn = xen_start_info->nr_pages;
unsigned long long mem_end;
int rc;
struct xen_memory_map memmap;
+ unsigned long max_pages;
unsigned long extra_pages = 0;
- unsigned long extra_limit;
unsigned long identity_pages = 0;
int i;
int op;
@@ -224,49 +242,54 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);

- memcpy(map_raw, map, sizeof(map));
- e820.nr_map = 0;
- xen_extra_mem[0].start = mem_end;
- for (i = 0; i < memmap.nr_entries; i++) {
- unsigned long long end;
-
- /* Guard against non-page aligned E820 entries. */
- if (map[i].type == E820_RAM)
- map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
-
- end = map[i].addr + map[i].size;
- if (map[i].type == E820_RAM && end > mem_end) {
- /* RAM off the end - may be partially included */
- u64 delta = min(map[i].size, end - mem_end);
-
- map[i].size -= delta;
- end -= delta;
-
- extra_pages += PFN_DOWN(delta);
- /*
- * Set RAM below 4GB that is not for us to be unusable.
- * This prevents "System RAM" address space from being
- * used as potential resource for I/O address (happens
- * when 'allocate_resource' is called).
- */
- if (delta &&
- (xen_initial_domain() && end < 0x100000000ULL))
- e820_add_region(end, delta, E820_UNUSABLE);
- }
+ /* Make sure the Xen-supplied memory map is well-ordered. */
+ /* FIXME: is this necessary? Does Xen provide any guarantees
+ on this? */
+ sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);

- if (map[i].size > 0 && end > xen_extra_mem[0].start)
- xen_extra_mem[0].start = end;
+ max_pages = xen_get_max_pages();
+ if (max_pages > max_pfn)
+ extra_pages += max_pages - max_pfn;

- /* Add region if any remains */
- if (map[i].size > 0)
- e820_add_region(map[i].addr, map[i].size, map[i].type);
- }
- /* Align the balloon area so that max_low_pfn does not get set
- * to be at the _end_ of the PCI gap at the far end (fee01000).
- * Note that the start of balloon area gets set in the loop above
- * to be past the last E820 region. */
- if (xen_initial_domain() && (xen_extra_mem[0].start < (1ULL<<32)))
- xen_extra_mem[0].start = (1ULL<<32);
+ extra_pages += xen_return_unused_memory(max_pfn, map, memmap.nr_entries);
+
+ /*
+ * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+ * factor the base size. On non-highmem systems, the base
+ * size is the full initial memory allocation; on highmem it
+ * is limited to the max size of lowmem, so that it doesn't
+ * get completely filled.
+ *
+ * In principle there could be a problem in lowmem systems if
+ * the initial memory is also very large with respect to
+ * lowmem, but we won't try to deal with that here.
+ */
+ extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages);
+
+ i = 0;
+ while (i < memmap.nr_entries) {
+ u64 addr = map[i].addr;
+ u64 size = map[i].size;
+ u32 type = map[i].type;
+
+ if (type == E820_RAM) {
+ if (addr < mem_end) {
+ size = min(size, mem_end - addr);
+ } else if (extra_pages) {
+ size = min(size, (u64)extra_pages * PAGE_SIZE);
+ extra_pages -= size / PAGE_SIZE;
+ xen_add_extra_mem(addr, size);
+ } else
+ type = E820_UNUSABLE;
+ }
+
+ e820_add_region(addr, size, type);
+
+ map[i].addr += size;
+ map[i].size -= size;
+ if (map[i].size == 0)
+ i++;
+ }

/*
* In domU, the ISA region is normal, usable memory, but we
@@ -292,35 +315,11 @@ char * __init xen_memory_setup(void)

sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);

- extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
- /*
- * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
- * factor the base size. On non-highmem systems, the base
- * size is the full initial memory allocation; on highmem it
- * is limited to the max size of lowmem, so that it doesn't
- * get completely filled.
- *
- * In principle there could be a problem in lowmem systems if
- * the initial memory is also very large with respect to
- * lowmem, but we won't try to deal with that here.
- */
- extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
- max_pfn + extra_pages);
-
- if (extra_limit >= max_pfn)
- extra_pages = extra_limit - max_pfn;
- else
- extra_pages = 0;
-
- xen_add_extra_mem(extra_pages);
-
/*
* Set P2M for all non-RAM pages and E820 gaps to be identity
- * type PFNs. We supply it with the non-sanitized version
- * of the E820.
+ * type PFNs.
*/
- identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
+ identity_pages = xen_set_identity(e820.map, e820.nr_map);
printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
return "Xen";
}
diff --git a/include/xen/page.h b/include/xen/page.h
index b01f6ac..5a13bb3 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -8,7 +8,7 @@ struct xen_memory_region {
phys_addr_t size;
};

-#define XEN_EXTRA_MEM_MAX_REGIONS 1
+#define XEN_EXTRA_MEM_MAX_REGIONS 2

extern struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS];

--
1.7.4.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
On Tue, Aug 16, 2011 at 11:00:37AM +0100, David Vrabel wrote:
> Allow the extra memory (used by the balloon driver) to be in two
> regions (typically low and high memory). This allows the balloon
> driver to increase the number of available low pages (if the initial
> number if pages is small).
>
> As a side effect, the algorithm for building the e820 memory map is
> simpler and more obviously correct as the map supplied by the
> hypervisor is (almost) used as is.

Hm, which is not always good. The setting of 'E820_RESERVED' and 'E820_UNUSABLE',
and realigning of start of balloon space at 4GB (if necessary) changes
need to be preserved. You can look up the why if you run 'git annotate'
and look at those lines - we had lots of time getting those right.


You also need to provide the virgin copy of the E820 to the xen_set_identity
and not use the same version that is modified - which with the above setting
won't work.

I am curious - with the patch to the hypervisor - and with just a newly
implemented xen_get_max_pages() code path added to query the new
truncated amount of how many pages we need - wont that solve the problem?

>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> ---
> arch/x86/xen/setup.c | 167 +++++++++++++++++++++++++-------------------------
> include/xen/page.h | 2 +-
> 2 files changed, 84 insertions(+), 85 deletions(-)
>
> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> index 30d0015..3421c9e 100644
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -51,23 +51,29 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS];
> */
> #define EXTRA_MEM_RATIO (10)
>
> -static void __init xen_add_extra_mem(unsigned long pages)
> +static void __init xen_add_extra_mem(u64 extra_start, u64 size)
> {
> unsigned long pfn;
> + int i;
>
> - u64 size = (u64)pages * PAGE_SIZE;
> - u64 extra_start = xen_extra_mem[0].start + xen_extra_mem[0].size;
> -
> - if (!pages)
> - return;
> -
> - e820_add_region(extra_start, size, E820_RAM);
> - sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
> + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
> + /* Add new region. */
> + if (xen_extra_mem[i].size == 0) {
> + xen_extra_mem[i].start = extra_start;
> + xen_extra_mem[i].size = size;
> + break;
> + }
> + /* Append to existing region. */
> + if (xen_extra_mem[i].start + xen_extra_mem[i].size == extra_start) {
> + xen_extra_mem[i].size += size;
> + break;
> + }
> + }
> + if (i == XEN_EXTRA_MEM_MAX_REGIONS)
> + printk(KERN_WARNING "Warning: not enough extra memory regions\n");
>
> memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
>
> - xen_extra_mem[0].size += size;
> -
> xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
>
> for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
> @@ -118,7 +124,8 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
> }
>
> static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
> - const struct e820map *e820)
> + const struct e820entry *map,
> + int nr_map)
> {
> phys_addr_t max_addr = PFN_PHYS(max_pfn);
> phys_addr_t last_end = ISA_END_ADDRESS;
> @@ -126,13 +133,13 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
> int i;
>
> /* Free any unused memory above the low 1Mbyte. */
> - for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
> - phys_addr_t end = e820->map[i].addr;
> + for (i = 0; i < nr_map && last_end < max_addr; i++) {
> + phys_addr_t end = map[i].addr;
> end = min(max_addr, end);
>
> if (last_end < end)
> released += xen_release_chunk(last_end, end);
> - last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
> + last_end = max(last_end, map[i].addr + map[i].size);
> }
>
> if (last_end < max_addr)
> @@ -184,20 +191,31 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
> PFN_UP(start_pci), PFN_DOWN(last));
> return identity;
> }
> +
> +static unsigned long __init xen_get_max_pages(void)
> +{
> + unsigned long max_pages = MAX_DOMAIN_PAGES; /* Limited by memory map. */
> +
> + if (xen_initial_domain()) {
> + /* FIXME: ask hypervisor for max pages. */
> + }
> +
> + return min(max_pages, MAX_DOMAIN_PAGES);
> +}
> +
> /**
> * machine_specific_memory_setup - Hook for machine specific memory setup.
> **/
> char * __init xen_memory_setup(void)
> {
> static struct e820entry map[E820MAX] __initdata;
> - static struct e820entry map_raw[E820MAX] __initdata;
>
> unsigned long max_pfn = xen_start_info->nr_pages;
> unsigned long long mem_end;
> int rc;
> struct xen_memory_map memmap;
> + unsigned long max_pages;
> unsigned long extra_pages = 0;
> - unsigned long extra_limit;
> unsigned long identity_pages = 0;
> int i;
> int op;
> @@ -224,49 +242,54 @@ char * __init xen_memory_setup(void)
> }
> BUG_ON(rc);
>
> - memcpy(map_raw, map, sizeof(map));
> - e820.nr_map = 0;
> - xen_extra_mem[0].start = mem_end;
> - for (i = 0; i < memmap.nr_entries; i++) {
> - unsigned long long end;
> -
> - /* Guard against non-page aligned E820 entries. */
> - if (map[i].type == E820_RAM)
> - map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
> -
> - end = map[i].addr + map[i].size;
> - if (map[i].type == E820_RAM && end > mem_end) {
> - /* RAM off the end - may be partially included */
> - u64 delta = min(map[i].size, end - mem_end);
> -
> - map[i].size -= delta;
> - end -= delta;
> -
> - extra_pages += PFN_DOWN(delta);
> - /*
> - * Set RAM below 4GB that is not for us to be unusable.
> - * This prevents "System RAM" address space from being
> - * used as potential resource for I/O address (happens
> - * when 'allocate_resource' is called).
> - */
> - if (delta &&
> - (xen_initial_domain() && end < 0x100000000ULL))
> - e820_add_region(end, delta, E820_UNUSABLE);
> - }
> + /* Make sure the Xen-supplied memory map is well-ordered. */
> + /* FIXME: is this necessary? Does Xen provide any guarantees
> + on this? */
> + sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
>
> - if (map[i].size > 0 && end > xen_extra_mem[0].start)
> - xen_extra_mem[0].start = end;
> + max_pages = xen_get_max_pages();
> + if (max_pages > max_pfn)
> + extra_pages += max_pages - max_pfn;
>
> - /* Add region if any remains */
> - if (map[i].size > 0)
> - e820_add_region(map[i].addr, map[i].size, map[i].type);
> - }
> - /* Align the balloon area so that max_low_pfn does not get set
> - * to be at the _end_ of the PCI gap at the far end (fee01000).
> - * Note that the start of balloon area gets set in the loop above
> - * to be past the last E820 region. */
> - if (xen_initial_domain() && (xen_extra_mem[0].start < (1ULL<<32)))
> - xen_extra_mem[0].start = (1ULL<<32);
> + extra_pages += xen_return_unused_memory(max_pfn, map, memmap.nr_entries);
> +
> + /*
> + * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
> + * factor the base size. On non-highmem systems, the base
> + * size is the full initial memory allocation; on highmem it
> + * is limited to the max size of lowmem, so that it doesn't
> + * get completely filled.
> + *
> + * In principle there could be a problem in lowmem systems if
> + * the initial memory is also very large with respect to
> + * lowmem, but we won't try to deal with that here.
> + */
> + extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages);
> +
> + i = 0;
> + while (i < memmap.nr_entries) {
> + u64 addr = map[i].addr;
> + u64 size = map[i].size;
> + u32 type = map[i].type;
> +
> + if (type == E820_RAM) {
> + if (addr < mem_end) {
> + size = min(size, mem_end - addr);
> + } else if (extra_pages) {
> + size = min(size, (u64)extra_pages * PAGE_SIZE);
> + extra_pages -= size / PAGE_SIZE;
> + xen_add_extra_mem(addr, size);
> + } else
> + type = E820_UNUSABLE;
> + }
> +
> + e820_add_region(addr, size, type);
> +
> + map[i].addr += size;
> + map[i].size -= size;
> + if (map[i].size == 0)
> + i++;
> + }
>
> /*
> * In domU, the ISA region is normal, usable memory, but we
> @@ -292,35 +315,11 @@ char * __init xen_memory_setup(void)
>
> sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
>
> - extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
> -
> - /*
> - * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
> - * factor the base size. On non-highmem systems, the base
> - * size is the full initial memory allocation; on highmem it
> - * is limited to the max size of lowmem, so that it doesn't
> - * get completely filled.
> - *
> - * In principle there could be a problem in lowmem systems if
> - * the initial memory is also very large with respect to
> - * lowmem, but we won't try to deal with that here.
> - */
> - extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
> - max_pfn + extra_pages);
> -
> - if (extra_limit >= max_pfn)
> - extra_pages = extra_limit - max_pfn;
> - else
> - extra_pages = 0;
> -
> - xen_add_extra_mem(extra_pages);
> -
> /*
> * Set P2M for all non-RAM pages and E820 gaps to be identity
> - * type PFNs. We supply it with the non-sanitized version
> - * of the E820.
> + * type PFNs.
> */
> - identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
> + identity_pages = xen_set_identity(e820.map, e820.nr_map);
> printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
> return "Xen";
> }
> diff --git a/include/xen/page.h b/include/xen/page.h
> index b01f6ac..5a13bb3 100644
> --- a/include/xen/page.h
> +++ b/include/xen/page.h
> @@ -8,7 +8,7 @@ struct xen_memory_region {
> phys_addr_t size;
> };
>
> -#define XEN_EXTRA_MEM_MAX_REGIONS 1
> +#define XEN_EXTRA_MEM_MAX_REGIONS 2
>
> extern struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS];
>
> --
> 1.7.4.1
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
On 16/08/11 14:48, Konrad Rzeszutek Wilk wrote:
> On Tue, Aug 16, 2011 at 11:00:37AM +0100, David Vrabel wrote:
>> Allow the extra memory (used by the balloon driver) to be in two
>> regions (typically low and high memory). This allows the balloon
>> driver to increase the number of available low pages (if the initial
>> number if pages is small).
>>
>> As a side effect, the algorithm for building the e820 memory map is
>> simpler and more obviously correct as the map supplied by the
>> hypervisor is (almost) used as is.
>
> Hm, which is not always good. The setting of 'E820_RESERVED' and 'E820_UNUSABLE',
> and realigning of start of balloon space at 4GB (if necessary) changes
> need to be preserved. You can look up the why if you run 'git annotate'
> and look at those lines - we had lots of time getting those right.

My understanding of the history is that the problems were caused by not
paying attention to the reserved regions reported in the machine memory
map. This proposed algorithm is careful to only alter RAM regions --
all reserved regions and gaps are preserved as-is. I should add some
comments explaining this.

For example, should a BIOS reserve memory above 4 GiB then the current
code will place the balloon memory over the top of it but the proposed
code will not.

> You also need to provide the virgin copy of the E820 to the xen_set_identity
> and not use the same version that is modified - which with the above setting
> won't work.

Because we don't alter any reserved regions the resulting map is fine
for this.

> I am curious - with the patch to the hypervisor - and with just a newly
> implemented xen_get_max_pages() code path added to query the new
> truncated amount of how many pages we need - wont that solve the problem?

I'll address this in another email.

David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
On Tue, Aug 16, 2011 at 03:33:19PM +0100, David Vrabel wrote:
> On 16/08/11 14:48, Konrad Rzeszutek Wilk wrote:
> > On Tue, Aug 16, 2011 at 11:00:37AM +0100, David Vrabel wrote:
> >> Allow the extra memory (used by the balloon driver) to be in two
> >> regions (typically low and high memory). This allows the balloon
> >> driver to increase the number of available low pages (if the initial
> >> number if pages is small).
> >>
> >> As a side effect, the algorithm for building the e820 memory map is
> >> simpler and more obviously correct as the map supplied by the
> >> hypervisor is (almost) used as is.
> >
> > Hm, which is not always good. The setting of 'E820_RESERVED' and 'E820_UNUSABLE',
> > and realigning of start of balloon space at 4GB (if necessary) changes
> > need to be preserved. You can look up the why if you run 'git annotate'
> > and look at those lines - we had lots of time getting those right.
>
> My understanding of the history is that the problems were caused by not
> paying attention to the reserved regions reported in the machine memory

That might have been a problem too, but this is specific to RAM regions.
> map. This proposed algorithm is careful to only alter RAM regions --
> all reserved regions and gaps are preserved as-is. I should add some
> comments explaining this.

We cut RAM regions down and the Linux code thought that they were "gap" spaces
and used it as PCI I/O space. Hence we marked them as unusable. We need
that behavior.
>
> For example, should a BIOS reserve memory above 4 GiB then the current
> code will place the balloon memory over the top of it but the proposed
> code will not.

The problem with the 4GB was that the Local IOAPIC was not enumerated
in the E820 but was in the ACPI boot table. This meant that E820 had
a "gap" right before the 4GB limit and we thought it was the start of RAM
and marked it to be used as balloon space.

>
> > You also need to provide the virgin copy of the E820 to the xen_set_identity
> > and not use the same version that is modified - which with the above setting
> > won't work.
>
> Because we don't alter any reserved regions the resulting map is fine
> for this.
>
> > I am curious - with the patch to the hypervisor - and with just a newly
> > implemented xen_get_max_pages() code path added to query the new
> > truncated amount of how many pages we need - wont that solve the problem?
>
> I'll address this in another email.
>
> David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
On 16/08/11 15:48, Konrad Rzeszutek Wilk wrote:
> On Tue, Aug 16, 2011 at 03:33:19PM +0100, David Vrabel wrote:
>> On 16/08/11 14:48, Konrad Rzeszutek Wilk wrote:
>>> On Tue, Aug 16, 2011 at 11:00:37AM +0100, David Vrabel wrote:
>>>> Allow the extra memory (used by the balloon driver) to be in two
>>>> regions (typically low and high memory). This allows the balloon
>>>> driver to increase the number of available low pages (if the initial
>>>> number if pages is small).
>>>>
>>>> As a side effect, the algorithm for building the e820 memory map is
>>>> simpler and more obviously correct as the map supplied by the
>>>> hypervisor is (almost) used as is.
>>>
>>> Hm, which is not always good. The setting of 'E820_RESERVED' and 'E820_UNUSABLE',
>>> and realigning of start of balloon space at 4GB (if necessary) changes
>>> need to be preserved. You can look up the why if you run 'git annotate'
>>> and look at those lines - we had lots of time getting those right.
>>
>> My understanding of the history is that the problems were caused by not
>> paying attention to the reserved regions reported in the machine memory
>
> That might have been a problem too, but this is specific to RAM regions.
>> map. This proposed algorithm is careful to only alter RAM regions --
>> all reserved regions and gaps are preserved as-is. I should add some
>> comments explaining this.
>
> We cut RAM regions down and the Linux code thought that they were "gap" spaces
> and used it as PCI I/O space. Hence we marked them as unusable. We need
> that behavior.

Okay. This behaviour is kept as well.

For example, on my test box with 8 GiB RAM with dom0 started with 752
MiB of initial pages and limited to 4 GiB.

[ 0.000000] Xen: 0000000000000000 - 000000000009e000 (usable)
[ 0.000000] Xen: 00000000000a0000 - 0000000000100000 (reserved)
[ 0.000000] Xen: 0000000000100000 - 00000000bf699000 (usable)
[ 0.000000] Xen: 00000000bf699000 - 00000000bf6af000 (reserved)
[ 0.000000] Xen: 00000000bf6af000 - 00000000bf6ce000 (ACPI data)
[ 0.000000] Xen: 00000000bf6ce000 - 00000000c0000000 (reserved)
[ 0.000000] Xen: 00000000e0000000 - 00000000f0000000 (reserved)
[ 0.000000] Xen: 00000000fe000000 - 0000000100000000 (reserved)
[ 0.000000] Xen: 0000000100000000 - 0000000140967000 (usable)
[ 0.000000] Xen: 0000000140967000 - 0000000240000000 (unusable)

David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
On Tue, Aug 16, 2011 at 04:03:01PM +0100, David Vrabel wrote:
> On 16/08/11 15:48, Konrad Rzeszutek Wilk wrote:
> > On Tue, Aug 16, 2011 at 03:33:19PM +0100, David Vrabel wrote:
> >> On 16/08/11 14:48, Konrad Rzeszutek Wilk wrote:
> >>> On Tue, Aug 16, 2011 at 11:00:37AM +0100, David Vrabel wrote:
> >>>> Allow the extra memory (used by the balloon driver) to be in two
> >>>> regions (typically low and high memory). This allows the balloon
> >>>> driver to increase the number of available low pages (if the initial
> >>>> number if pages is small).
> >>>>
> >>>> As a side effect, the algorithm for building the e820 memory map is
> >>>> simpler and more obviously correct as the map supplied by the
> >>>> hypervisor is (almost) used as is.
> >>>
> >>> Hm, which is not always good. The setting of 'E820_RESERVED' and 'E820_UNUSABLE',
> >>> and realigning of start of balloon space at 4GB (if necessary) changes
> >>> need to be preserved. You can look up the why if you run 'git annotate'
> >>> and look at those lines - we had lots of time getting those right.
> >>
> >> My understanding of the history is that the problems were caused by not
> >> paying attention to the reserved regions reported in the machine memory
> >
> > That might have been a problem too, but this is specific to RAM regions.
> >> map. This proposed algorithm is careful to only alter RAM regions --
> >> all reserved regions and gaps are preserved as-is. I should add some
> >> comments explaining this.
> >
> > We cut RAM regions down and the Linux code thought that they were "gap" spaces
> > and used it as PCI I/O space. Hence we marked them as unusable. We need
> > that behavior.
>
> Okay. This behaviour is kept as well.
>
> For example, on my test box with 8 GiB RAM with dom0 started with 752
> MiB of initial pages and limited to 4 GiB.

So dom0_mem=max:4GB,725MB ?

>
> [ 0.000000] Xen: 0000000000000000 - 000000000009e000 (usable)
> [ 0.000000] Xen: 00000000000a0000 - 0000000000100000 (reserved)
> [ 0.000000] Xen: 0000000000100000 - 00000000bf699000 (usable)
> [ 0.000000] Xen: 00000000bf699000 - 00000000bf6af000 (reserved)
> [ 0.000000] Xen: 00000000bf6af000 - 00000000bf6ce000 (ACPI data)
> [ 0.000000] Xen: 00000000bf6ce000 - 00000000c0000000 (reserved)
> [ 0.000000] Xen: 00000000e0000000 - 00000000f0000000 (reserved)
> [ 0.000000] Xen: 00000000fe000000 - 0000000100000000 (reserved)
> [ 0.000000] Xen: 0000000100000000 - 0000000140967000 (usable)
> [ 0.000000] Xen: 0000000140967000 - 0000000240000000 (unusable)
>
What did it look before?

What does "Memory:" look before and after?

> David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
> > Okay. This behaviour is kept as well.
> >
> > For example, on my test box with 8 GiB RAM with dom0 started with 752
> > MiB of initial pages and limited to 4 GiB.
>
> So dom0_mem=max:4GB,725MB ?
>
> >
> > [ 0.000000] Xen: 0000000000000000 - 000000000009e000 (usable)
> > [ 0.000000] Xen: 00000000000a0000 - 0000000000100000 (reserved)
> > [ 0.000000] Xen: 0000000000100000 - 00000000bf699000 (usable)
> > [ 0.000000] Xen: 00000000bf699000 - 00000000bf6af000 (reserved)
> > [ 0.000000] Xen: 00000000bf6af000 - 00000000bf6ce000 (ACPI data)
> > [ 0.000000] Xen: 00000000bf6ce000 - 00000000c0000000 (reserved)
> > [ 0.000000] Xen: 00000000e0000000 - 00000000f0000000 (reserved)
> > [ 0.000000] Xen: 00000000fe000000 - 0000000100000000 (reserved)
> > [ 0.000000] Xen: 0000000100000000 - 0000000140967000 (usable)
> > [ 0.000000] Xen: 0000000140967000 - 0000000240000000 (unusable)
> >
> What did it look before?
>
> What does "Memory:" look before and after?

ping? Did I miss some patches from you? Are you waiting for Keir to merge
the Xen hypervisor patches - if so I think we can just work on this in parallel?
>
> > David
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/3] xen: allow extra memory to be two regions [ In reply to ]
On 22/08/11 14:55, Konrad Rzeszutek Wilk wrote:
>
> ping? Did I miss some patches from you? Are you waiting for Keir to merge
> the Xen hypervisor patches - if so I think we can just work on this in parallel?

I sent a new series with you Cc'd. Did you not receive them?

http://lists.xensource.com/archives/html/xen-devel/2011-08/msg00810.html

David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel