Mailing List Archive

Clean up, fix, and rationalise RAM mapping in Xen.
ChangeSet 1.1462, 2005/05/19 13:36:18+01:00, kaf24@firebug.cl.cam.ac.uk

Clean up, fix, and rationalise RAM mapping in Xen.

First, x86/64 must take care to map only registered RAM areas and not
adjacent I/O holes -- otherwise a cpu may cache I/O space and cause
coherency conflicts on the memory bus.

Second, map_pages() and the memguard mechanisms are no longer sub-arch
specific (moved to arch/x86/mm.c:map_pages_to_xen()).

Signed-off-by: Keir Fraser <keir@xensource.com>



arch/x86/acpi/boot.c | 11 +-
arch/x86/boot/x86_64.S | 8 -
arch/x86/domain.c | 4
arch/x86/domain_build.c | 4
arch/x86/mm.c | 180 ++++++++++++++++++++++--------------------
arch/x86/setup.c | 55 +++++++------
arch/x86/x86_32/mm.c | 142 +++++----------------------------
arch/x86/x86_64/mm.c | 203 ++++++++----------------------------------------
include/asm-x86/mm.h | 8 -
include/asm-x86/page.h | 18 +++-
10 files changed, 219 insertions(+), 414 deletions(-)


diff -Nru a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c 2005-05-19 09:05:16 -04:00
+++ b/xen/arch/x86/acpi/boot.c 2005-05-19 09:05:16 -04:00
@@ -89,15 +89,18 @@
*/
enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;

-#ifdef CONFIG_X86_64
+#if 0/*def CONFIG_X86_64*/

/* rely on all ACPI tables being in the direct mapping */
char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
{
if (!phys_addr || !size)
- return NULL;
- /* XEN: We map all e820 areas which should include every ACPI table. */
- return __va(phys_addr);
+ return NULL;
+
+ if (phys_addr < (end_pfn_map << PAGE_SHIFT))
+ return __va(phys_addr);
+
+ return NULL;
}

#else
diff -Nru a/xen/arch/x86/boot/x86_64.S b/xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S 2005-05-19 09:05:15 -04:00
+++ b/xen/arch/x86/boot/x86_64.S 2005-05-19 09:05:15 -04:00
@@ -230,7 +230,7 @@
.quad 0x0000000000000000 /* unused */
.fill 4*NR_CPUS,8,0 /* space for TSS and LDT per CPU */

-/* Initial PML4 -- level-4 page table */
+/* Initial PML4 -- level-4 page table. */
.org 0x2000
ENTRY(idle_pg_table)
ENTRY(idle_pg_table_4)
@@ -238,15 +238,15 @@
.fill 261,8,0
.quad idle_pg_table_l3 - __PAGE_OFFSET + 7 # PML4[262]

-/* Initial PDP -- level-3 page table */
+/* Initial PDP -- level-3 page table. */
.org 0x3000
ENTRY(idle_pg_table_l3)
.quad idle_pg_table_l2 - __PAGE_OFFSET + 7

-/* Initial PDE -- level-2 page table. */
+/* Initial PDE -- level-2 page table. Maps first 64MB physical memory. */
.org 0x4000
ENTRY(idle_pg_table_l2)
- .macro identmap from=0, count=512
+ .macro identmap from=0, count=32
.if \count-1
identmap "(\from+0)","(\count/2)"
identmap "(\from+(0x200000*(\count/2)))","(\count/2)"
diff -Nru a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c 2005-05-19 09:05:15 -04:00
+++ b/xen/arch/x86/domain.c 2005-05-19 09:05:15 -04:00
@@ -264,7 +264,7 @@
ed->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
l1e_create_pfn(page_to_pfn(virt_to_page(gdt_table)),
- __PAGE_HYPERVISOR);
+ PAGE_HYPERVISOR);

ed->arch.guest_vtable = __linear_l2_table;
ed->arch.shadow_vtable = __shadow_linear_l2_table;
@@ -303,7 +303,7 @@
d->arch.mm_perdomain_pt + (ed->vcpu_id << PDPT_VCPU_SHIFT);
ed->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
l1e_create_pfn(page_to_pfn(virt_to_page(gdt_table)),
- __PAGE_HYPERVISOR);
+ PAGE_HYPERVISOR);
}

#ifdef CONFIG_VMX
diff -Nru a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c 2005-05-19 09:05:15 -04:00
+++ b/xen/arch/x86/domain_build.c 2005-05-19 09:05:15 -04:00
@@ -574,8 +574,8 @@
//
ASSERT( root_get_value(idle_pg_table[1]) == 0 );
ASSERT( pagetable_val(d->arch.phys_table) );
- idle_pg_table[1] = root_create_phys(pagetable_val(d->arch.phys_table),
- __PAGE_HYPERVISOR);
+ idle_pg_table[1] = root_create_phys(
+ pagetable_val(d->arch.phys_table), __PAGE_HYPERVISOR);
translate_l2pgtable(d, (l1_pgentry_t *)(1u << L2_PAGETABLE_SHIFT),
pagetable_get_pfn(ed->arch.guest_table));
idle_pg_table[1] = root_empty();
diff -Nru a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c 2005-05-19 09:05:16 -04:00
+++ b/xen/arch/x86/mm.c 2005-05-19 09:05:16 -04:00
@@ -160,8 +160,8 @@
p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20);
if ( p == 0 )
panic("Not enough memory for frame table\n");
- map_pages(idle_pg_table, FRAMETABLE_VIRT_START + i, p,
- 4UL << 20, PAGE_HYPERVISOR);
+ map_pages_to_xen(
+ FRAMETABLE_VIRT_START + i, p, 4UL << 20, PAGE_HYPERVISOR);
}

memset(frame_table, 0, frame_table_size);
@@ -2833,101 +2833,113 @@
free_xenheap_page((unsigned long)d->arch.ptwr[PTWR_PT_INACTIVE].page);
}

+/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
+int map_pages_to_xen(
+ unsigned long v,
+ unsigned long p,
+ unsigned long s,
+ unsigned long flags)
+{
+ l2_pgentry_t *pl2e, ol2e;
+ l1_pgentry_t *pl1e;
+ unsigned int i;

+ unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
+ flags &= ~MAP_SMALL_PAGES;

-/************************************************************************/
-/************************************************************************/
-/************************************************************************/
+ while ( s != 0 )
+ {
+ pl2e = virt_to_xen_l2e(v);

-/* Graveyard: stuff below may be useful in future. */
-#if 0
- case MMUEXT_TRANSFER_PAGE:
- domid = (domid_t)(val >> 16);
- gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF));
-
- if ( unlikely(IS_XEN_HEAP_FRAME(page)) ||
- unlikely(!pfn_valid(pfn)) ||
- unlikely((e = find_domain_by_id(domid)) == NULL) )
+ if ( (((v|p) & ((1 << L2_PAGETABLE_SHIFT) - 1)) == 0) &&
+ (s >= (1 << L2_PAGETABLE_SHIFT)) &&
+ !map_small_pages )
{
- MEM_LOG("Bad frame (%p) or bad domid (%d).\n", pfn, domid);
- okay = 0;
- break;
+ /* Super-page mapping. */
+ ol2e = *pl2e;
+ *pl2e = l2e_create_phys(p, flags|_PAGE_PSE);
+
+ if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
+ {
+ local_flush_tlb_pge();
+ if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
+ free_xen_pagetable(l2e_get_page(*pl2e));
+ }
+
+ v += 1 << L2_PAGETABLE_SHIFT;
+ p += 1 << L2_PAGETABLE_SHIFT;
+ s -= 1 << L2_PAGETABLE_SHIFT;
}
+ else
+ {
+ /* Normal page mapping. */
+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
+ {
+ pl1e = page_to_virt(alloc_xen_pagetable());
+ clear_page(pl1e);
+ *pl2e = l2e_create_phys(__pa(pl1e), __PAGE_HYPERVISOR);
+ }
+ else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
+ {
+ pl1e = page_to_virt(alloc_xen_pagetable());
+ for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ pl1e[i] = l1e_create_pfn(
+ l2e_get_pfn(*pl2e) + i,
+ l2e_get_flags(*pl2e) & ~_PAGE_PSE);
+ *pl2e = l2e_create_phys(__pa(pl1e), __PAGE_HYPERVISOR);
+ local_flush_tlb_pge();
+ }
+
+ pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v);
+ if ( (l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
+ local_flush_tlb_one(v);
+ *pl1e = l1e_create_phys(p, flags);
+
+ v += 1 << L1_PAGETABLE_SHIFT;
+ p += 1 << L1_PAGETABLE_SHIFT;
+ s -= 1 << L1_PAGETABLE_SHIFT;
+ }
+ }

- spin_lock(&d->page_alloc_lock);
+ return 0;
+}

- /*
- * The tricky bit: atomically release ownership while there is just one
- * benign reference to the page (PGC_allocated). If that reference
- * disappears then the deallocation routine will safely spin.
- */
- _d = pickle_domptr(d);
- _nd = page->u.inuse._domain;
- y = page->count_info;
- do {
- x = y;
- if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
- (1|PGC_allocated)) ||
- unlikely(_nd != _d) )
- {
- MEM_LOG("Bad page values %p: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%08x\n", page_to_pfn(page),
- d, d->domain_id, unpickle_domptr(_nd), x,
- page->u.inuse.type_info);
- spin_unlock(&d->page_alloc_lock);
- put_domain(e);
- return 0;
- }
- __asm__ __volatile__(
- LOCK_PREFIX "cmpxchg8b %2"
- : "=d" (_nd), "=a" (y),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
- }
- while ( unlikely(_nd != _d) || unlikely(y != x) );
+#ifdef MEMORY_GUARD

- /*
- * Unlink from 'd'. At least one reference remains (now anonymous), so
- * noone else is spinning to try to delete this page from 'd'.
- */
- d->tot_pages--;
- list_del(&page->list);
-
- spin_unlock(&d->page_alloc_lock);
+void memguard_init(void)
+{
+ map_pages_to_xen(
+ PAGE_OFFSET, 0, xenheap_phys_end, __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
+}

- spin_lock(&e->page_alloc_lock);
+static void __memguard_change_range(void *p, unsigned long l, int guard)
+{
+ unsigned long _p = (unsigned long)p;
+ unsigned long _l = (unsigned long)l;
+ unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;

- /*

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog