Mailing List Archive

[PATCH] linux/i386: make kernel tolerate variable start of hypervisor hole
This is the kernel side of the needed change. The hypervisor side, as it's
useful only with 32-on-64, will come with those patches. On an 8 Gb
machine, I saw a lowmem increase of 160 Mb.
I didn't go the capabilities route, but rather introduced a separate note
that specifies the lower boundary the kernel wants to tolerate for the
hypervisor hole. That way one might one day support more than 16 Gb
for x86-32/PAE (at the obvious price of further reducing lowmem in the
guest).

Signed-off-by: Jan Beulich <jbeulich@novell.com>

Index: head-2006-11-06/arch/i386/kernel/head-xen.S
===================================================================
--- head-2006-11-06.orig/arch/i386/kernel/head-xen.S 2006-11-06 09:07:40.000000000 +0100
+++ head-2006-11-06/arch/i386/kernel/head-xen.S 2006-11-10 16:38:48.000000000 +0100
@@ -9,7 +9,7 @@
#include <asm/page.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
-#include <xen/interface/arch-x86_32.h>
+#include <xen/interface/xen.h>
#include <xen/interface/elfnote.h>

/*
@@ -192,6 +192,7 @@ ENTRY(cpu_gdt_table)
#endif /* !CONFIG_XEN_COMPAT_030002 */
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
+ ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz,
"writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
#ifdef CONFIG_X86_PAE
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
Index: head-2006-11-06/include/asm-i386/mach-xen/setup_arch_post.h
===================================================================
--- head-2006-11-06.orig/include/asm-i386/mach-xen/setup_arch_post.h 2006-10-30 18:05:24.000000000 +0100
+++ head-2006-11-06/include/asm-i386/mach-xen/setup_arch_post.h 2006-11-10 15:43:02.000000000 +0100
@@ -92,8 +92,10 @@ void __init machine_specific_arch_setup(
#endif

if (HYPERVISOR_xen_version(XENVER_platform_parameters,
- &pp) == 0)
- set_fixaddr_top(pp.virt_start - PAGE_SIZE);
+ &pp) == 0) {
+ hypervisor_virt_start = pp.virt_start;
+ set_fixaddr_top();
+ }

machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
Index: head-2006-11-06/arch/i386/mm/fault-xen.c
===================================================================
--- head-2006-11-06.orig/arch/i386/mm/fault-xen.c 2006-10-30 13:40:28.000000000 +0100
+++ head-2006-11-06/arch/i386/mm/fault-xen.c 2006-11-10 15:38:42.000000000 +0100
@@ -282,12 +282,6 @@ static int spurious_fault(struct pt_regs
pmd_t *pmd;
pte_t *pte;

-#ifdef CONFIG_XEN
- /* Faults in hypervisor area are never spurious. */
- if (address >= HYPERVISOR_VIRT_START)
- return 0;
-#endif
-
/* Reserved-bit violation or user access to kernel space? */
if (error_code & 0x0c)
return 0;
@@ -372,7 +366,7 @@ fastcall void __kprobes do_page_fault(st
if (unlikely(address >= TASK_SIZE)) {
#ifdef CONFIG_XEN
/* Faults in hypervisor area can never be patched up. */
- if (address >= HYPERVISOR_VIRT_START)
+ if (address >= hypervisor_virt_start)
goto bad_area_nosemaphore;
#endif
if (!(error_code & 5))
Index: head-2006-11-06/arch/i386/mm/init-xen.c
===================================================================
--- head-2006-11-06.orig/arch/i386/mm/init-xen.c 2006-10-04 10:25:54.000000000 +0200
+++ head-2006-11-06/arch/i386/mm/init-xen.c 2006-11-10 15:49:49.000000000 +0100
@@ -130,7 +130,7 @@ static void __init page_table_range_init
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd))
+ if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
one_page_table_init(pmd);

vaddr += PMD_SIZE;
@@ -187,7 +187,7 @@ static void __init kernel_physical_mappi
pmd += pmd_idx;
for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
- if (address >= HYPERVISOR_VIRT_START)
+ if (address >= hypervisor_virt_start)
continue;

/* Map with big pages if possible, otherwise create normal page tables. */
@@ -410,7 +410,7 @@ static void __init pagetable_init (void)
* created - mappings will be set by set_fixmap():
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- page_table_range_init(vaddr, 0, pgd_base);
+ page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);

permanent_kmaps_init(pgd_base);
}
Index: head-2006-11-06/arch/i386/mm/pgtable-xen.c
===================================================================
--- head-2006-11-06.orig/arch/i386/mm/pgtable-xen.c 2006-11-06 12:22:54.000000000 +0100
+++ head-2006-11-06/arch/i386/mm/pgtable-xen.c 2006-11-10 15:49:08.000000000 +0100
@@ -186,9 +186,16 @@ void set_pmd_pfn(unsigned long vaddr, un
}

static int nr_fixmaps = 0;
+unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
EXPORT_SYMBOL(__FIXADDR_TOP);

+void __init set_fixaddr_top()
+{
+ BUG_ON(nr_fixmaps > 0);
+ __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
+}
+
void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
@@ -211,12 +218,6 @@ void __set_fixmap (enum fixed_addresses
nr_fixmaps++;
}

-void set_fixaddr_top(unsigned long top)
-{
- BUG_ON(nr_fixmaps > 0);
- __FIXADDR_TOP = top - PAGE_SIZE;
-}
-
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
Index: head-2006-11-06/include/asm-i386/mach-xen/asm/fixmap.h
===================================================================
--- head-2006-11-06.orig/include/asm-i386/mach-xen/asm/fixmap.h 2006-11-03 10:30:39.000000000 +0100
+++ head-2006-11-06/include/asm-i386/mach-xen/asm/fixmap.h 2006-11-10 15:46:13.000000000 +0100
@@ -98,7 +98,7 @@ enum fixed_addresses {
extern void __set_fixmap(enum fixed_addresses idx,
maddr_t phys, pgprot_t flags);

-extern void set_fixaddr_top(unsigned long top);
+extern void set_fixaddr_top(void);

#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
Index: head-2006-11-06/include/asm-i386/mach-xen/asm/hypervisor.h
===================================================================
--- head-2006-11-06.orig/include/asm-i386/mach-xen/asm/hypervisor.h 2006-10-26 11:42:38.000000000 +0200
+++ head-2006-11-06/include/asm-i386/mach-xen/asm/hypervisor.h 2006-11-10 15:48:03.000000000 +0100
@@ -56,6 +56,10 @@

extern shared_info_t *HYPERVISOR_shared_info;

+#ifdef CONFIG_X86_32
+extern unsigned long hypervisor_virt_start;
+#endif
+
/* arch/xen/i386/kernel/setup.c */
extern start_info_t *xen_start_info;
#ifdef CONFIG_XEN_PRIVILEGED_GUEST
Index: head-2006-11-06/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h
===================================================================
--- head-2006-11-06.orig/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h 2006-09-21 09:57:46.000000000 +0200
+++ head-2006-11-06/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h 2006-11-10 16:09:20.000000000 +0100
@@ -9,7 +9,6 @@

#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-#define PTRS_PER_PGD_NO_HV (HYPERVISOR_VIRT_START >> PGDIR_SHIFT)

/*
* the i386 is two-level, so we don't really have any
Index: head-2006-11-06/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h
===================================================================
--- head-2006-11-06.orig/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h 2006-09-21 09:57:46.000000000 +0200
+++ head-2006-11-06/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h 2006-11-10 16:09:25.000000000 +0100
@@ -8,7 +8,6 @@
*/
#define PGDIR_SHIFT 30
#define PTRS_PER_PGD 4
-#define PTRS_PER_PGD_NO_HV 4

/*
* PMD_SHIFT determines the size of the area a middle-level
Index: ./../xen/include/public/elfnote.h
===================================================================
--- ./../xen/include/public/elfnote.h 2006-08-25 15:36:10.000000000 +0200
+++ ./../xen/include/public/elfnote.h 2006-11-10 16:27:45.000000000 +0100
@@ -138,6 +138,15 @@
*/
#define XEN_ELFNOTE_BSD_SYMTAB 11

+/*
+ * The lowest address the hypervisor hole can begin at (numeric).
+ *
+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
+ * also indicates to the hypervisor that the kernel can deal with the
+ * hole starting at a higher address.
+ */
+#define XEN_ELFNOTE_HV_START_LOW 12
+
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */

/*


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel