Mailing List Archive

Merge up and generally make shadow mode be nice.
# HG changeset patch
# User sos22@douglas.cl.cam.ac.uk
# Node ID 990c009015e8582eebf04698d845ca2b926ea6bf
# Parent 05a1340bc1e7d47d38520fffc571d17399a0cf8c
# Parent 60beade30a0c01c998f8fdea4a280d329228ce74
Merge up and generally make shadow mode be nice.

Signed-off-by: Steven Smith, sos22@cam.ac.uk

diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c Fri Jan 27 20:23:06 2006
@@ -572,7 +572,7 @@
va < gdt_descr->address + gdt_descr->size;
va += PAGE_SIZE, f++) {
frames[f] = virt_to_mfn(va);
- make_lowmem_page_readonly((void *)va);
+ make_lowmem_mmu_page_readonly((void *)va);
}
if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
BUG();
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c Fri Jan 27 20:23:06 2006
@@ -119,7 +119,7 @@
if (mm->context.size) {
if (mm == current->active_mm)
clear_LDT();
- make_pages_writable(mm->context.ldt,
+ make_pages_writable(mm->context.ldt,
(mm->context.size * LDT_ENTRY_SIZE) /
PAGE_SIZE);
if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c Fri Jan 27 20:23:06 2006
@@ -56,6 +56,7 @@
#include <asm/hypervisor.h>
#include <asm-xen/xen-public/physdev.h>
#include <asm-xen/xen-public/memory.h>
+#include <asm-xen/features.h>
#include "setup_arch_pre.h"
#include <bios_ebda.h>

@@ -1591,6 +1592,9 @@
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
+
+ setup_xen_features();
+
ARCH_SETUP
if (efi_enabled)
efi_init();
@@ -1601,6 +1605,7 @@

copy_edd();

+ /* Make the PFNs in the Xen hole reserved. */
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
init_mm.start_code = (unsigned long) _text;
@@ -1763,6 +1768,8 @@
console_use_vt = 0;
#endif
}
+
+
}

static int
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Jan 27 20:23:06 2006
@@ -113,6 +113,20 @@
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}

+unsigned long xen_pfn_hole_start(void)
+{
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PFN_HOLE_BASE;
+ return HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+}
+
+unsigned long xen_pfn_hole_size(void)
+{
+ struct mmuext_op op;
+ op.cmd = MMUEXT_PFN_HOLE_SIZE;
+ return HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+}
+
void xen_new_user_pt(unsigned long ptr)
{
struct mmuext_op op;
@@ -265,6 +279,7 @@
*/
unsigned long *contiguous_bitmap;

+#ifndef CONFIG_XEN_SHADOW_MODE
static void contiguous_bitmap_set(
unsigned long first_page, unsigned long nr_pages)
{
@@ -315,9 +330,9 @@
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- unsigned long mfn, i, flags;
+ unsigned long frame, i, flags;
struct xen_memory_reservation reservation = {
- .extent_start = &mfn,
+ .extent_start = &frame,
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
@@ -333,7 +348,7 @@
pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- mfn = pte_mfn(*pte);
+ frame = pte_mfn(*pte);
BUG_ON(HYPERVISOR_update_va_mapping(
vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
@@ -345,7 +360,8 @@
/* 2. Get a new contiguous memory extent. */
reservation.extent_order = order;
reservation.address_bits = address_bits;
- if (HYPERVISOR_memory_op(XENMEM_increase_reservation,
+ frame = __pa(vstart) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
&reservation) != 1)
goto fail;

@@ -353,9 +369,8 @@
for (i = 0; i < (1<<order); i++) {
BUG_ON(HYPERVISOR_update_va_mapping(
vstart + (i*PAGE_SIZE),
- pfn_pte_ma(mfn+i, PAGE_KERNEL), 0));
- xen_machphys_update(mfn+i, (__pa(vstart)>>PAGE_SHIFT)+i);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, mfn+i);
+ pfn_pte_ma(frame+i, PAGE_KERNEL), 0));
+ set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame+i);
}

flush_tlb_all();
@@ -371,13 +386,13 @@
reservation.address_bits = 0;

for (i = 0; i < (1<<order); i++) {
+ frame = (__pa(vstart) >> PAGE_SHIFT) + i;
BUG_ON(HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation) != 1);
+ XENMEM_populate_physmap, &reservation) != 1);
BUG_ON(HYPERVISOR_update_va_mapping(
vstart + (i*PAGE_SIZE),
- pfn_pte_ma(mfn, PAGE_KERNEL), 0));
- xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, mfn);
+ pfn_pte_ma(frame, PAGE_KERNEL), 0));
+ set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
}

flush_tlb_all();
@@ -393,9 +408,9 @@
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- unsigned long mfn, i, flags;
+ unsigned long frame, i, flags;
struct xen_memory_reservation reservation = {
- .extent_start = &mfn,
+ .extent_start = &frame,
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
@@ -413,7 +428,7 @@
pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE)));
pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE)));
pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
- mfn = pte_mfn(*pte);
+ frame = pte_mfn(*pte);
BUG_ON(HYPERVISOR_update_va_mapping(
vstart + (i*PAGE_SIZE), __pte_ma(0), 0));
set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i,
@@ -424,19 +439,32 @@

/* 2. Map new pages in place of old pages. */
for (i = 0; i < (1<<order); i++) {
+ frame = (__pa(vstart) >> PAGE_SHIFT) + i;
BUG_ON(HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation) != 1);
+ XENMEM_populate_physmap, &reservation) != 1);
BUG_ON(HYPERVISOR_update_va_mapping(
vstart + (i*PAGE_SIZE),
- pfn_pte_ma(mfn, PAGE_KERNEL), 0));
- xen_machphys_update(mfn, (__pa(vstart)>>PAGE_SHIFT)+i);
- set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, mfn);
+ pfn_pte_ma(frame, PAGE_KERNEL), 0));
+ set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame);
}

flush_tlb_all();

balloon_unlock(flags);
}
+#else
+int xen_create_contiguous_region(
+ unsigned long vstat, unsigned int order, unsigned int address_bits)
+{
+ if (order >= 1)
+ BUG();
+ return 0;
+}
+
+void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+{
+}
+#endif

#ifdef __i386__
int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Jan 27 20:23:06 2006
@@ -68,7 +68,7 @@

#ifdef CONFIG_X86_PAE
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- make_lowmem_page_readonly(pmd_table);
+ make_lowmem_mmu_page_readonly(pmd_table);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
if (pmd_table != pmd_offset(pud, 0))
@@ -89,7 +89,7 @@
{
if (pmd_none(*pmd)) {
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- make_lowmem_page_readonly(page_table);
+ make_lowmem_mmu_page_readonly(page_table);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
if (page_table != pte_offset_kernel(pmd, 0))
BUG();
@@ -643,6 +643,17 @@
set_page_count(&mem_map[pfn], 1);
totalram_pages++;
}
+
+ /* Make the Xen hole reserved. */
+ unsigned long hole_start, hole_size;
+ hole_size = xen_pfn_hole_size();
+ hole_start = xen_pfn_hole_start();
+ for (pfn = hole_start; pfn < hole_start + hole_size; pfn++) {
+ printk("<0>Reserve %lx for hole.\n",
+ pfn);
+ SetPageReserved(pfn_to_page(pfn));
+ BUG_ON(!PageReserved(pfn_to_page(pfn)));
+ }

reservedpages = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Fri Jan 27 20:23:06 2006
@@ -24,6 +24,7 @@
#include <asm/io.h>
#include <asm/mmu_context.h>

+#include <asm-xen/features.h>
#include <asm-xen/foreign_page.h>
#include <asm/hypervisor.h>

@@ -198,7 +199,7 @@
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
if (pte)
- make_lowmem_page_readonly(pte);
+ make_lowmem_mmu_page_readonly(pte);
return pte;
}

@@ -316,6 +317,7 @@

pgd_test_and_unpin(pgd);

+ memset(pgd, 0, 10);
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;

@@ -344,7 +346,7 @@
pmd_t *kpmd = pmd_offset(kpud, v);
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
memcpy(pmd, kpmd, PAGE_SIZE);
- make_lowmem_page_readonly(pmd);
+ make_lowmem_mmu_page_readonly(pmd);
}
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
@@ -378,7 +380,7 @@
spin_unlock_irqrestore(&pgd_lock, flags);
for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
- make_lowmem_page_writable(pmd);
+ make_lowmem_mmu_page_writable(pmd);
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
kmem_cache_free(pmd_cache, pmd);
}
@@ -388,27 +390,37 @@
kmem_cache_free(pgd_cache, pgd);
}

-#ifndef CONFIG_XEN_SHADOW_MODE
+
void make_lowmem_page_readonly(void *va)
{
- pte_t *pte = virt_to_ptep(va);
- int rc = HYPERVISOR_update_va_mapping(
+ pte_t *pte;
+ int rc;
+
+ pte = virt_to_ptep(va);
+ rc = HYPERVISOR_update_va_mapping(
(unsigned long)va, pte_wrprotect(*pte), 0);
BUG_ON(rc);
}

void make_lowmem_page_writable(void *va)
{
- pte_t *pte = virt_to_ptep(va);
- int rc = HYPERVISOR_update_va_mapping(
+ pte_t *pte;
+ int rc;
+
+ pte = virt_to_ptep(va);
+ rc = HYPERVISOR_update_va_mapping(
(unsigned long)va, pte_mkwrite(*pte), 0);
BUG_ON(rc);
}

+
void make_page_readonly(void *va)
{
- pte_t *pte = virt_to_ptep(va);
- int rc = HYPERVISOR_update_va_mapping(
+ pte_t *pte;
+ int rc;
+
+ pte = virt_to_ptep(va);
+ rc = HYPERVISOR_update_va_mapping(
(unsigned long)va, pte_wrprotect(*pte), 0);
if (rc) /* fallback? */
xen_l1_entry_update(pte, pte_wrprotect(*pte));
@@ -426,8 +438,11 @@

void make_page_writable(void *va)
{
- pte_t *pte = virt_to_ptep(va);
- int rc = HYPERVISOR_update_va_mapping(
+ pte_t *pte;
+ int rc;
+
+ pte = virt_to_ptep(va);
+ rc = HYPERVISOR_update_va_mapping(
(unsigned long)va, pte_mkwrite(*pte), 0);
if (rc) /* fallback? */
xen_l1_entry_update(pte, pte_mkwrite(*pte));
@@ -441,7 +456,7 @@
}
}

-void make_pages_readonly(void *va, unsigned int nr)
+void make_pages_readonly(void *va, unsigned nr)
{
while (nr-- != 0) {
make_page_readonly(va);
@@ -449,14 +464,57 @@
}
}

-void make_pages_writable(void *va, unsigned int nr)
+void make_pages_writable(void *va, unsigned nr)
{
while (nr-- != 0) {
make_page_writable(va);
va = (void *)((unsigned long)va + PAGE_SIZE);
}
}
-#endif /* CONFIG_XEN_SHADOW_MODE */
+
+#ifndef CONFIG_XEN_SHADOW_MODE
+void make_lowmem_mmu_page_readonly(void *va)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+ make_lowmem_page_readonly(va);
+}
+
+void make_lowmem_mmu_page_writable(void *va)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+ make_lowmem_page_writable(va);
+}
+
+void make_mmu_page_readonly(void *va)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+ make_page_readonly(va);
+}
+
+void make_mmu_page_writable(void *va)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+ make_page_writable(va);
+}
+
+void make_mmu_pages_readonly(void *va, unsigned int nr)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+ make_pages_readonly(va, nr);
+}
+
+void make_mmu_pages_writable(void *va, unsigned int nr)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+ make_pages_writable(va, nr);
+}
+#endif

static inline void pgd_walk_set_prot(void *pt, pgprot_t flags)
{
@@ -507,7 +565,8 @@

static void __pgd_pin(pgd_t *pgd)
{
- pgd_walk(pgd, PAGE_KERNEL_RO);
+ if (!xen_feature(writable_mmu_structures))
+ pgd_walk(pgd, PAGE_KERNEL_RO);
xen_pgd_pin(__pa(pgd));
set_bit(PG_pinned, &virt_to_page(pgd)->flags);
}
@@ -515,7 +574,8 @@
static void __pgd_unpin(pgd_t *pgd)
{
xen_pgd_unpin(__pa(pgd));
- pgd_walk(pgd, PAGE_KERNEL);
+ if (!xen_feature(writable_mmu_structures))
+ pgd_walk(pgd, PAGE_KERNEL);
clear_bit(PG_pinned, &virt_to_page(pgd)->flags);
}

diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/Makefile Fri Jan 27 20:23:06 2006
@@ -11,7 +11,7 @@

extra-y += vmlinux.lds

-obj-y := evtchn.o reboot.o gnttab.o
+obj-y := evtchn.o reboot.o gnttab.o features.o

obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_NET) += skbuff.o
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c Fri Jan 27 20:23:06 2006
@@ -201,25 +201,22 @@
}

int
-gnttab_grant_foreign_transfer(domid_t domid)
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
{
int ref;

if (unlikely((ref = get_free_entry()) == -1))
return -ENOSPC;
-
- shared[ref].frame = 0;
- shared[ref].domid = domid;
- wmb();
- shared[ref].flags = GTF_accept_transfer;
+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);

return ref;
}

void
-gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid)
-{
- shared[ref].frame = 0;
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
+{
+ shared[ref].frame = pfn;
shared[ref].domid = domid;
wmb();
shared[ref].flags = GTF_accept_transfer;
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Jan 27 20:23:06 2006
@@ -74,6 +74,7 @@
#endif


+#ifndef CONFIG_XEN_SHADOW_MODE
static int __do_suspend(void *ignore)
{
int i, j, k, fpp;
@@ -216,6 +217,13 @@

return err;
}
+#else
+static int __do_suspend(void *ignore)
+{
+ printk(KERN_WARNING "Don't do suspend in shadow mode\n");
+ return -EOPNOTSUPP;
+}
+#endif

static int shutdown_process(void *__unused)
{
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c Fri Jan 27 20:23:06 2006
@@ -65,8 +65,8 @@

preempt_disable();
#endif
- make_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
- PAGE_SIZE);
+ make_mmu_pages_readonly(pc->ldt, (pc->size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
load_LDT(pc);
#ifdef CONFIG_SMP
mask = cpumask_of_cpu(smp_processor_id());
@@ -76,7 +76,7 @@
#endif
}
if (oldsize) {
- make_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) /
+ make_mmu_pages_writable(oldldt, (oldsize * LDT_ENTRY_SIZE) /
PAGE_SIZE);
if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
@@ -92,8 +92,8 @@
if (err < 0)
return err;
memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
- make_pages_readonly(new->ldt, (new->size * LDT_ENTRY_SIZE) /
- PAGE_SIZE);
+ make_mmu_pages_readonly(new->ldt, (new->size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
return 0;
}

@@ -131,9 +131,9 @@
if (mm->context.size) {
if (mm == current->active_mm)
clear_LDT();
- make_pages_writable(mm->context.ldt,
- (mm->context.size * LDT_ENTRY_SIZE) /
- PAGE_SIZE);
+ make_mmu_pages_writable(mm->context.ldt,
+ (mm->context.size * LDT_ENTRY_SIZE) /
+ PAGE_SIZE);
if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Fri Jan 27 20:23:06 2006
@@ -63,6 +63,7 @@
#include "setup_arch_pre.h"
#include <asm/hypervisor.h>
#include <asm-xen/xen-public/nmi.h>
+#include <asm-xen/features.h>
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
#define end_pfn_map end_pfn
@@ -587,6 +588,8 @@

#endif

+ setup_xen_features();
+
HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_writable_pagetables);

diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Fri Jan 27 20:23:06 2006
@@ -141,7 +141,7 @@
va < gdt_descr->address + gdt_descr->size;
va += PAGE_SIZE, f++) {
frames[f] = virt_to_mfn(va);
- make_page_readonly((void *)va);
+ make_mmu_page_readonly((void *)va);
}
if (HYPERVISOR_set_gdt(frames, gdt_descr->size /
sizeof (struct desc_struct)))
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c Fri Jan 27 20:23:06 2006
@@ -40,6 +40,8 @@
#include <asm/proto.h>
#include <asm/smp.h>

+#include <asm-xen/features.h>
+
#ifndef Dprintk
#define Dprintk(x...)
#endif
@@ -66,12 +68,15 @@
(((mfn_to_pfn((addr) >> PAGE_SHIFT)) << PAGE_SHIFT) + \
__START_KERNEL_map)))

-static void early_make_page_readonly(void *va)
+static void early_make_mmu_page_readonly(void *va)
{
unsigned long addr, _va = (unsigned long)va;
pte_t pte, *ptep;
unsigned long *page = (unsigned long *) init_level4_pgt;

+ if (xen_feature(writable_mmu_structures))
+ return;
+
addr = (unsigned long) page[pgd_index(_va)];
addr_to_page(addr, page);

@@ -88,10 +93,13 @@
BUG();
}

-void make_page_readonly(void *va)
+void make_mmu_page_readonly(void *va)
{
pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
unsigned long addr = (unsigned long) va;
+
+ if (xen_feature(writable_mmu_structures))
+ return;

pgd = pgd_offset_k(addr);
pud = pud_offset(pgd, addr);
@@ -103,13 +111,16 @@
xen_l1_entry_update(ptep, pte); /* fallback */

if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
- make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT));
-}
-
-void make_page_writable(void *va)
+ make_mmu_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT));
+}
+
+void make_mmu_page_writable(void *va)
{
pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
unsigned long addr = (unsigned long) va;
+
+ if (xen_feature(writable_mmu_structures))
+ return;

pgd = pgd_offset_k(addr);
pud = pud_offset(pgd, addr);
@@ -121,21 +132,26 @@
xen_l1_entry_update(ptep, pte); /* fallback */

if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
- make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT));
-}
-
-void make_pages_readonly(void *va, unsigned nr)
-{
+ make_mmu_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT));
+}
+
+void make_mmu_pages_readonly(void *va, unsigned nr)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
+
while (nr-- != 0) {
- make_page_readonly(va);
+ make_mmu_page_readonly(va);
va = (void*)((unsigned long)va + PAGE_SIZE);
}
}

-void make_pages_writable(void *va, unsigned nr)
-{
+void make_mmu_pages_writable(void *va, unsigned nr)
+{
+ if (xen_feature(writable_mmu_structures))
+ return;
while (nr-- != 0) {
- make_page_writable(va);
+ make_mmu_page_writable(va);
va = (void*)((unsigned long)va + PAGE_SIZE);
}
}
@@ -223,7 +239,7 @@
pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
if (pud_none(*pud)) {
pmd = (pmd_t *) spp_getpage();
- make_page_readonly(pmd);
+ make_mmu_page_readonly(pmd);
xen_pmd_pin(__pa(pmd));
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
if (pmd != pmd_offset(pud, 0)) {
@@ -234,7 +250,7 @@
pmd = pmd_offset(pud, vaddr);
if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
- make_page_readonly(pte);
+ make_mmu_page_readonly(pte);
xen_pte_pin(__pa(pte));
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
if (pte != pte_offset_kernel(pmd, 0)) {
@@ -276,7 +292,7 @@
if (pud_none(*pud)) {

pmd = (pmd_t *) spp_getpage();
- make_page_readonly(pmd);
+ make_mmu_page_readonly(pmd);
xen_pmd_pin(__pa(pmd));

set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
@@ -290,7 +306,7 @@

if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
- make_page_readonly(pte);
+ make_mmu_page_readonly(pte);
xen_pte_pin(__pa(pte));

set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
@@ -384,6 +400,9 @@
static inline int make_readonly(unsigned long paddr)
{
int readonly = 0;
+
+ if (xen_feature(writable_mmu_structures))
+ return 0;

/* Make old and new page tables read-only. */
if ((paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
@@ -419,7 +438,7 @@
}

pmd = alloc_static_page(&pmd_phys);
- early_make_page_readonly(pmd);
+ early_make_mmu_page_readonly(pmd);
xen_pmd_pin(pmd_phys);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
@@ -448,7 +467,7 @@
__set_pte(pte, __pte(paddr | _KERNPG_TABLE));
}
pte = pte_save;
- early_make_page_readonly(pte);
+ early_make_mmu_page_readonly(pte);
xen_pte_pin(pte_phys);
set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
}
@@ -497,11 +516,11 @@
_KERNPG_TABLE | _PAGE_USER);
memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);

- early_make_page_readonly(init_level4_pgt);
- early_make_page_readonly(init_level4_user_pgt);
- early_make_page_readonly(level3_kernel_pgt);
- early_make_page_readonly(level3_user_pgt);
- early_make_page_readonly(level2_kernel_pgt);
+ early_make_mmu_page_readonly(init_level4_pgt);
+ early_make_mmu_page_readonly(init_level4_user_pgt);
+ early_make_mmu_page_readonly(level3_kernel_pgt);
+ early_make_mmu_page_readonly(level3_user_pgt);
+ early_make_mmu_page_readonly(level2_kernel_pgt);

xen_pgd_pin(__pa_symbol(init_level4_pgt));
xen_pgd_pin(__pa_symbol(init_level4_user_pgt));
@@ -539,7 +558,7 @@
pmd = (pmd_t *)&page[pmd_index(va)];
if (pmd_none(*pmd)) {
pte_page = alloc_static_page(&phys);
- early_make_page_readonly(pte_page);
+ early_make_mmu_page_readonly(pte_page);
xen_pte_pin(phys);
set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
} else {
@@ -586,7 +605,7 @@
for (; start < end; start = next) {
unsigned long pud_phys;
pud_t *pud = alloc_static_page(&pud_phys);
- early_make_page_readonly(pud);
+ early_make_mmu_page_readonly(pud);
xen_pud_pin(pud_phys);
next = start + PGDIR_SIZE;
if (next > end)
@@ -791,11 +810,11 @@
set_page_count(virt_to_page(addr), 1);
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
xen_pte_unpin(__pa(addr));
- make_page_writable(__va(__pa(addr)));
+ make_mmu_page_writable(__va(__pa(addr)));
/*
* Make pages from __PAGE_OFFSET address as well
*/
- make_page_writable((void *)addr);
+ make_mmu_page_writable((void *)addr);
free_page(addr);
totalram_pages++;
}
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Jan 27 20:23:06 2006
@@ -107,9 +107,27 @@
#define WPRINTK(fmt, args...) \
printk(KERN_WARNING "xen_mem: " fmt, ##args)

+static int page_is_xen_hole(unsigned long pfn)
+{
+ static unsigned long hole_start, hole_len = -1;
+ if (hole_len == -1) {
+ hole_start = xen_pfn_hole_start();
+ hole_len = xen_pfn_hole_size();
+ printk("<0>Xen hole at [.%lx,%lx).\n", hole_start,
+ hole_start + hole_len);
+ }
+ return pfn >= hole_start && pfn < hole_start + hole_len;
+}
+
/* balloon_append: add the given page to the balloon. */
static void balloon_append(struct page *page)
{
+ BUG_ON(PageReserved(page));
+ if (page_is_xen_hole(page_to_pfn(page))) {
+ printk("<0>Attempt to add reserved pfn %lx to balloon.\n",
+ page_to_pfn(page));
+ BUG();
+ }
/* Lowmem is re-populated first, so highmem pages go at list tail. */
if (PageHighMem(page)) {
list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
@@ -139,6 +157,21 @@
return page;
}

+static struct page *balloon_first_page(void)
+{
+ if (list_empty(&ballooned_pages))
+ return NULL;
+ return LIST_TO_PAGE(ballooned_pages.next);
+}
+
+static struct page *balloon_next_page(struct page *page)
+{
+ struct list_head *next = PAGE_TO_LIST(page)->next;
+ if (next == &ballooned_pages)
+ return NULL;
+ return LIST_TO_PAGE(next);
+}
+
static void balloon_alarm(unsigned long unused)
{
schedule_work(&balloon_worker);
@@ -154,7 +187,7 @@

static int increase_reservation(unsigned long nr_pages)
{
- unsigned long *mfn_list, pfn, i, flags;
+ unsigned long *frame_list, pfn, i, flags;
struct page *page;
long rc;
struct xen_memory_reservation reservation = {
@@ -166,20 +199,28 @@
if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
nr_pages = PAGE_SIZE / sizeof(unsigned long);

- mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (mfn_list == NULL)
+ frame_list = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (frame_list == NULL)
return -ENOMEM;

balloon_lock(flags);

- reservation.extent_start = mfn_list;
+ page = balloon_first_page();
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(page == NULL);
+ frame_list[i] = page_to_pfn(page);;
+ BUG_ON(page_is_xen_hole(frame_list[i]));
+ page = balloon_next_page(page);
+ }
+
+ reservation.extent_start = frame_list;
reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation);
+ XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) {
int ret;
/* We hit the Xen hard limit: reprobe. */
- reservation.extent_start = mfn_list;
+ reservation.extent_start = frame_list;
reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
@@ -193,20 +234,34 @@
BUG_ON(page == NULL);

pfn = page_to_pfn(page);
+#ifndef CONFIG_XEN_SHADOW_MODE
+ /* In shadow mode, Xen handles this part for us. */
BUG_ON(phys_to_machine_mapping_valid(pfn));

/* Update P->M and M->P tables. */
- set_phys_to_machine(pfn, mfn_list[i]);
- xen_machphys_update(mfn_list[i], pfn);
-
+ set_phys_to_machine(pfn, frame_list[i]);
+ xen_machphys_update(frame_list[i], pfn);
+#endif
+
+ printk("<0>Balloon allocated %lx.\n", pfn);
/* Link back into the page tables if not highmem. */
if (pfn < max_low_pfn) {
int ret;
+ pgd_t *pgd = pgd_offset_k((unsigned long)__va(pfn << PAGE_SHIFT));
+ printk("pgd is %lx.\n", *(unsigned long *)pgd);
+ (void)copy_from_user(&ret,
+ (unsigned long *)__va(pfn << PAGE_SHIFT),
+ 4);
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
- pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
+ pfn_pte_ma(frame_list[i], PAGE_KERNEL),
0);
BUG_ON(ret);
+ printk("<0>Rehooked va; pte now %lx.\n",
+ *(unsigned long *)virt_to_ptep(__va(pfn << PAGE_SHIFT)));
+ *(unsigned long *)__va(pfn << PAGE_SHIFT) =
+ 0xf001;
+ printk("<0>Touched va.\n");
}

/* Relinquish the page back to the allocator. */
@@ -221,14 +276,14 @@
out:
balloon_unlock(flags);

- free_page((unsigned long)mfn_list);
+ free_page((unsigned long)frame_list);

return 0;
}

static int decrease_reservation(unsigned long nr_pages)
{
- unsigned long *mfn_list, pfn, i, flags;
+ unsigned long *frame_list, pfn, i, flags;
struct page *page;
void *v;
int need_sleep = 0;
@@ -242,8 +297,8 @@
if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
nr_pages = PAGE_SIZE / sizeof(unsigned long);

- mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (mfn_list == NULL)
+ frame_list = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (frame_list == NULL)
return -ENOMEM;

for (i = 0; i < nr_pages; i++) {
@@ -254,7 +309,7 @@
}

pfn = page_to_pfn(page);
- mfn_list[i] = pfn_to_mfn(pfn);
+ frame_list[i] = pfn_to_mfn(pfn);

if (!PageHighMem(page)) {
v = phys_to_virt(pfn << PAGE_SHIFT);
@@ -280,12 +335,12 @@

/* No more mappings: invalidate P2M and add to balloon. */
for (i = 0; i < nr_pages; i++) {
- pfn = mfn_to_pfn(mfn_list[i]);
+ pfn = mfn_to_pfn(frame_list[i]);
set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
balloon_append(pfn_to_page(pfn));
}

- reservation.extent_start = mfn_list;
+ reservation.extent_start = frame_list;
reservation.nr_extents = nr_pages;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != nr_pages);
@@ -295,7 +350,7 @@

balloon_unlock(flags);

- free_page((unsigned long)mfn_list);
+ free_page((unsigned long)frame_list);

return need_sleep;
}
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Fri Jan 27 20:23:06 2006
@@ -586,7 +586,8 @@
BUG_ON((signed short)ref < 0);
np->grant_rx_ref[id] = ref;
gnttab_grant_foreign_transfer_ref(ref,
- np->xbdev->otherend_id);
+ np->xbdev->otherend_id,
+ __pa(skb->head) >> PAGE_SHIFT);
RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
rx_pfn_array[i] = virt_to_mfn(skb->head);

@@ -613,7 +614,7 @@
reservation.domid = DOMID_SELF;

/* Tell the ballon driver what is going on. */
- balloon_update_driver_allowance(i);
+//SOS22 balloon_update_driver_allowance(i);

/* Zap PTEs and give away pages in one big multicall. */
(void)HYPERVISOR_multicall(rx_mcl, i+1);
@@ -802,9 +803,11 @@
np->stats.rx_bytes += rx->status;

/* Remap the page. */
+#ifndef CONFIG_XEN_SHADOW_MODE
mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
mmu->val = __pa(skb->head) >> PAGE_SHIFT;
mmu++;
+#endif
MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
pfn_pte_ma(mfn, PAGE_KERNEL), 0);
mcl++;
@@ -815,7 +818,7 @@
}

/* Some pages are no longer absent... */
- balloon_update_driver_allowance(-work_done);
+//SOS22 balloon_update_driver_allowance(-work_done);

/* Do all the remapping work, and M2P updates, in one big hypercall. */
if (likely((mcl - rx_mcl) != 0)) {
@@ -999,7 +1002,8 @@
if ((unsigned long)np->rx_skbs[i] < __PAGE_OFFSET)
continue;
gnttab_grant_foreign_transfer_ref(
- np->grant_rx_ref[i], np->xbdev->otherend_id);
+ np->grant_rx_ref[i], np->xbdev->otherend_id,
+ __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT);
RING_GET_REQUEST(&np->rx, requeue_idx)->gref =
np->grant_rx_ref[i];
RING_GET_REQUEST(&np->rx, requeue_idx)->id = i;
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/drivers/xen/util.c
--- a/linux-2.6-xen-sparse/drivers/xen/util.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/drivers/xen/util.c Fri Jan 27 20:23:06 2006
@@ -1,5 +1,6 @@
#include <linux/config.h>
#include <linux/mm.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
@@ -31,7 +32,7 @@

return area;
}
-EXPORT_SYMBOL(alloc_vm_area);
+EXPORT_SYMBOL_GPL(alloc_vm_area);

void free_vm_area(struct vm_struct *area)
{
@@ -40,7 +41,7 @@
BUG_ON(ret != area);
kfree(area);
}
-EXPORT_SYMBOL(free_vm_area);
+EXPORT_SYMBOL_GPL(free_vm_area);

void lock_vm_area(struct vm_struct *area)
{
@@ -60,13 +61,13 @@
for (i = 0; i < area->size; i += PAGE_SIZE)
(void)__get_user(c, (char __user *)area->addr + i);
}
-EXPORT_SYMBOL(lock_vm_area);
+EXPORT_SYMBOL_GPL(lock_vm_area);

void unlock_vm_area(struct vm_struct *area)
{
preempt_enable();
}
-EXPORT_SYMBOL(unlock_vm_area);
+EXPORT_SYMBOL_GPL(unlock_vm_area);

/*
* Local variables:
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypervisor.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypervisor.h Fri Jan 27 20:23:06 2006
@@ -101,6 +101,9 @@
void xen_set_ldt(unsigned long ptr, unsigned long bytes);
void xen_machphys_update(unsigned long mfn, unsigned long pfn);

+unsigned long xen_pfn_hole_start(void);
+unsigned long xen_pfn_hole_size(void);
+
#ifdef CONFIG_SMP
#include <linux/cpumask.h>
void xen_tlb_flush_all(void);
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h Fri Jan 27 20:23:06 2006
@@ -59,8 +59,18 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)

+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
#define INVALID_P2M_ENTRY (~0UL)
+#ifndef CONFIG_XEN_SHADOW_MODE
#define FOREIGN_FRAME(m) ((m) | (1UL<<31))
extern unsigned long *phys_to_machine_mapping;
#define pfn_to_mfn(pfn) \
@@ -93,15 +103,6 @@
phys_to_machine_mapping[pfn] = mfn;
}

-/* Definitions for machine and pseudophysical addresses. */
-#ifdef CONFIG_X86_PAE
-typedef unsigned long long paddr_t;
-typedef unsigned long long maddr_t;
-#else
-typedef unsigned long paddr_t;
-typedef unsigned long maddr_t;
-#endif
-
static inline maddr_t phys_to_machine(paddr_t phys)
{
maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
@@ -114,7 +115,16 @@
phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
return phys;
}
-
+#else
+#define pfn_to_mfn(p) (p)
+#define mfn_to_pfn(m) (m)
+#define phys_to_machine(p) (p)
+#define machine_to_phys(m) (m)
+static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+}
+#endif
/*
* These are used to make use of C type-checking..
*/
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h Fri Jan 27 20:23:06 2006
@@ -42,7 +42,7 @@
static inline void pte_free_kernel(pte_t *pte)
{
free_page((unsigned long)pte);
- make_page_writable(pte);
+ make_mmu_page_writable(pte);
}

extern void pte_free(struct page *pte);
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h Fri Jan 27 20:23:06 2006
@@ -64,7 +64,7 @@
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
+ if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h Fri Jan 27 20:23:06 2006
@@ -149,7 +149,7 @@
({ \
unsigned long mfn = pte_mfn(_pte); \
unsigned long pfn = mfn_to_pfn(mfn); \
- if ((pfn >= max_mapnr) || (phys_to_machine_mapping[pfn] != mfn))\
+ if ((pfn >= max_mapnr) || (pfn_to_mfn(pfn) != mfn)) \
pfn = max_mapnr; /* special: force !pfn_valid() */ \
pfn; \
})
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Fri Jan 27 20:23:06 2006
@@ -412,20 +412,26 @@
ptep_set_access_flags(__vma, __address, __ptep, __entry, 1); \
} while (0)

-#ifndef CONFIG_XEN_SHADOW_MODE
void make_lowmem_page_readonly(void *va);
void make_lowmem_page_writable(void *va);
void make_page_readonly(void *va);
void make_page_writable(void *va);
void make_pages_readonly(void *va, unsigned int nr);
void make_pages_writable(void *va, unsigned int nr);
-#else
-#define make_lowmem_page_readonly(_va) ((void)0)
-#define make_lowmem_page_writable(_va) ((void)0)
-#define make_page_readonly(_va) ((void)0)
-#define make_page_writable(_va) ((void)0)
-#define make_pages_readonly(_va, _nr) ((void)0)
-#define make_pages_writable(_va, _nr) ((void)0)
+#ifndef CONFIG_XEN_SHADOW_MODE
+void make_lowmem_mmu_page_readonly(void *va);
+void make_lowmem_mmu_page_writable(void *va);
+void make_mmu_page_readonly(void *va);
+void make_mmu_page_writable(void *va);
+void make_mmu_pages_readonly(void *va, unsigned int nr);
+void make_mmu_pages_writable(void *va, unsigned int nr);
+#else
+#define make_lowmem_mmu_page_readonly(_va) ((void)0)
+#define make_lowmem_mmu_page_writable(_va) ((void)0)
+#define make_mmu_page_readonly(_va) ((void)0)
+#define make_mmu_page_writable(_va) ((void)0)
+#define make_mmu_pages_readonly(_va, _nr) ((void)0)
+#define make_mmu_pages_writable(_va, _nr) ((void)0)
#endif

#define virt_to_ptep(__va) \
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h Fri Jan 27 20:23:06 2006
@@ -7,10 +7,10 @@
#include <linux/mm.h>
#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */

-void make_page_readonly(void *va);
-void make_page_writable(void *va);
-void make_pages_readonly(void *va, unsigned int nr);
-void make_pages_writable(void *va, unsigned int nr);
+void make_mmu_page_readonly(void *va);
+void make_mmu_page_writable(void *va);
+void make_mmu_pages_readonly(void *va, unsigned int nr);
+void make_mmu_pages_writable(void *va, unsigned int nr);

#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)

@@ -161,7 +161,7 @@
{
pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
if (pte)
- make_page_readonly(pte);
+ make_mmu_page_readonly(pte);

return pte;
}
@@ -181,7 +181,7 @@
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
xen_pte_unpin(__pa(pte));
- make_page_writable(pte);
+ make_mmu_page_writable(pte);
free_page((unsigned long)pte);
}

diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/gnttab.h
--- a/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/gnttab.h Fri Jan 27 20:23:06 2006
@@ -50,7 +50,7 @@
void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
unsigned long page);

-int gnttab_grant_foreign_transfer(domid_t domid);
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);

unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
@@ -77,7 +77,8 @@
void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
unsigned long frame, int readonly);

-void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid);
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+ unsigned long pfn);

#ifdef __ia64__
#define gnttab_map_vaddr(map) __va(map.dev_bus_addr)
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/mm/memory.c
--- a/linux-2.6-xen-sparse/mm/memory.c Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/mm/memory.c Fri Jan 27 20:23:06 2006
@@ -291,6 +291,7 @@
spin_lock(&mm->page_table_lock);
if (!new)
return NULL;
+
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
diff -r 05a1340bc1e7 -r 990c009015e8 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Jan 26 18:52:18 2006
+++ b/xen/arch/x86/mm.c Fri Jan 27 20:23:06 2006
@@ -1041,7 +1041,7 @@
if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
unlikely(o != l1e_get_intpte(ol1e)) )
{
- MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
+ printf("Failed to update %" PRIpte " -> %" PRIpte
": saw %" PRIpte,
l1e_get_intpte(ol1e),
l1e_get_intpte(nl1e),
@@ -1058,11 +1058,16 @@
l1_pgentry_t ol1e;
struct domain *d = current->domain;

- if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
+ shadow_sync_all(d);
+ if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) {
+ printf("copy_from_user1 failed %p, l2 %lx.\n", pl1e,
+ *(unsigned long *)&__linear_l2_table[l2_table_offset((unsigned long)pl1e)]);
return 0;
-
- if ( unlikely(shadow_mode_refcounts(d)) )
+ }
+
+ if ( unlikely(shadow_mode_refcounts(d)) ) {
return update_l1e(pl1e, ol1e, nl1e);
+ }

if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
@@ -2540,8 +2545,10 @@

perfc_incrc(calls_to_update_va);

- if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) )
+ if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) {
+ printf("Bad update_va_mapping.\n");
return -EINVAL;
+ }

LOCK_BIGLOCK(d);

@@ -2550,9 +2557,13 @@
if ( unlikely(shadow_mode_enabled(d)) )
check_pagetable(v, "pre-va"); /* debug */

+ shadow_sync_all(d);
+
if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
- val)) )
+ val)) ) {
+ printf("mod_l1_entry failed.\n");
rc = -EINVAL;
+ }

if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) )
{
@@ -2569,7 +2580,8 @@
}

rc = shadow_do_update_va_mapping(va, val, v);
-
+ if (rc)
+ printf("shadow_do_update_va_mapping says %d.\n", rc);
check_pagetable(v, "post-va"); /* debug */
}

diff -r 05a1340bc1e7 -r 990c009015e8 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c Thu Jan 26 18:52:18 2006
+++ b/xen/arch/x86/shadow32.c Fri Jan 27 20:23:06 2006
@@ -2494,7 +2494,9 @@
l2_pgentry_t *guest2 = guest;
l2_pgentry_t *snapshot2 = snapshot;
l1_pgentry_t *shadow2 = shadow;
-
+
+ printf("Update hl2 shadow.\n");
+
ASSERT(shadow_mode_write_all(d) || shadow_mode_wr_pt_pte(d));
BUG_ON(!shadow_mode_refcounts(d)); // not yet implemented

diff -r 05a1340bc1e7 -r 990c009015e8 xen/common/kernel.c
--- a/xen/common/kernel.c Thu Jan 26 18:52:18 2006
+++ b/xen/common/kernel.c Fri Jan 27 20:23:06 2006
@@ -13,6 +13,7 @@
#include <asm/current.h>
#include <public/nmi.h>
#include <public/version.h>
+#include <asm/shadow.h>

void cmdline_parse(char *cmdline)
{
@@ -144,6 +145,31 @@
return -EFAULT;
return 0;
}
+
+ case XENVER_get_features:
+ {
+ xen_feature_info_t fi;
+
+ if ( copy_from_user(&fi, arg, sizeof(fi)) )
+ return -EFAULT;
+
+ switch ( fi.submap_idx )
+ {
+ case 0:
+ if (shadow_mode_wr_pt_pte(current->domain))
+ fi.submap = XENFEAT_writable_mmu_structures;
+ else
+ fi.submap = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ( copy_to_user(arg, &fi, sizeof(fi)) )
+ return -EFAULT;
+ return 0;
+ }
+
}

return -ENOSYS;
diff -r 05a1340bc1e7 -r 990c009015e8 xen/common/memory.c
--- a/xen/common/memory.c Thu Jan 26 18:52:18 2006
+++ b/xen/common/memory.c Fri Jan 27 20:23:06 2006
@@ -30,7 +30,7 @@
int *preempted)
{
struct pfn_info *page;
- unsigned int i;
+ unsigned long i;

if ( (extent_list != NULL) &&
!array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
@@ -52,7 +52,7 @@
d, extent_order, flags)) == NULL) )
{
DPRINTK("Could not allocate order=%d extent: "
- "id=%d flags=%x (%d of %d)\n",
+ "id=%d flags=%x (%ld of %d)\n",
extent_order, d->domain_id, flags, i, nr_extents);
return i;
}
@@ -64,6 +64,79 @@
}

return nr_extents;
+}
+
+static long
+populate_physmap(
+ struct domain *d,
+ unsigned long *extent_list,
+ unsigned int nr_extents,
+ unsigned int extent_order,
+ unsigned int flags,
+ int *preempted)
+{
+ struct pfn_info *page;
+ unsigned long i, j, pfn, mfn;
+ struct domain_mmap_cache cache1, cache2;
+
+ if ( !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
+ return 0;
+
+ if ( (extent_order != 0) &&
+ !multipage_allocation_permitted(current->domain) )
+ return 0;
+
+ if (shadow_mode_translate(d)) {
+ domain_mmap_cache_init(&cache1);
+ domain_mmap_cache_init(&cache2);
+ shadow_lock(d);
+ }
+
+ for ( i = 0; i < nr_extents; i++ )
+ {
+ if ( hypercall_preempt_check() )
+ {
+ *preempted = 1;
+ goto out;
+ }
+
+ if ( unlikely((page = alloc_domheap_pages(
+ d, extent_order, flags)) == NULL) )
+ {
+ DPRINTK("Could not allocate order=%d extent: "
+ "id=%d flags=%x (%ld of %d)\n",
+ extent_order, d->domain_id, flags, i, nr_extents);
+ goto out;
+ }
+
+ mfn = page_to_pfn(page);
+
+ if ( unlikely(__get_user(pfn, &extent_list[i]) != 0) )
+ goto out;
+
+ for ( j = 0; j < (1 << extent_order); j++ ) {
+ printf("Populating %lx with %lx.\n",
+ pfn + j, mfn + j);
+ if (shadow_mode_translate(d))
+ set_p2m_entry(d, pfn + j, mfn + j, &cache1, &cache2);
+ set_pfn_from_mfn(mfn + j, pfn + j);
+ }
+
+ if (!shadow_mode_translate(d)) {
+ /* Inform the domain of the new page's machine address. */
+ if ( __put_user(mfn, &extent_list[i]) != 0 )
+ goto out;
+ }
+ }
+
+ out:
+ if (shadow_mode_translate(d)) {
+ shadow_unlock(d);
+ domain_mmap_cache_destroy(&cache1);
+ domain_mmap_cache_destroy(&cache2);
+ }
+
+ return i;
}

static long
@@ -76,7 +149,7 @@
int *preempted)
{
struct pfn_info *page;
- unsigned long i, j, mpfn, mfn;
+ unsigned long i, j, gpfn, mfn;

if ( !array_access_ok(extent_list, nr_extents, sizeof(*extent_list)) )
return 0;
@@ -89,16 +162,16 @@
return i;
}

- if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) )
+ if ( unlikely(__get_user(gpfn, &extent_list[i]) != 0) )
return i;

for ( j = 0; j < (1 << extent_order); j++ )
{
- mfn = __gpfn_to_mfn(d, mpfn + j);
+ mfn = __gpfn_to_mfn(d, gpfn + j);
if ( unlikely(mfn >= max_page) )
{
- DPRINTK("Domain %u page number out of range (%lx >= %lx)\n",
- d->domain_id, mfn, max_page);
+ DPRINTK("Domain %u page number out of range (%lx(%lx) >= %lx)\n",
+ d->domain_id, mfn, gpfn, max_page);
return i;
}

@@ -115,8 +188,18 @@
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);

- shadow_sync_and_drop_references(d, page);
-
+ if (shadow_mode_translate(d)) {
+ struct domain_mmap_cache c1, c2;
+ domain_mmap_cache_init(&c1);
+ domain_mmap_cache_init(&c2);
+ shadow_lock(d);
+ shadow_sync_and_drop_references(d, page);
+ set_p2m_entry(d, gpfn + j, -1, &c1, &c2);
+ set_pfn_from_mfn(mfn + j, INVALID_M2P_ENTRY);
+ shadow_unlock(d);
+ domain_mmap_cache_destroy(&c1);
+ domain_mmap_cache_destroy(&c2);
+ }
put_page(page);
}
}
@@ -144,6 +227,7 @@
{
case XENMEM_increase_reservation:
case XENMEM_decrease_reservation:
+ case XENMEM_populate_physmap:
if ( copy_from_user(&reservation, arg, sizeof(reservation)) )
return -EFAULT;

@@ -171,14 +255,37 @@
else if ( (d = find_domain_by_id(reservation.domid)) == NULL )
return -ESRCH;

- rc = ((op == XENMEM_increase_reservation) ?
- increase_reservation : decrease_reservation)(
- d,
- reservation.extent_start,
- reservation.nr_extents,
- reservation.extent_order,
- flags,
- &preempted);
+ switch ( op )
+ {
+ case XENMEM_increase_reservation:
+ rc = increase_reservation(
+ d,
+ reservation.extent_start,
+ reservation.nr_extents,
+ reservation.extent_order,
+ flags,
+ &preempted);
+ break;
+ case XENMEM_decrease_reservation:
+ rc = decrease_reservation(
+ d,
+ reservation.extent_start,
+ reservation.nr_extents,
+ reservation.extent_order,
+ flags,
+ &preempted);
+ break;
+ case XENMEM_populate_physmap:
+ default:
+ rc = populate_physmap(
+ d,
+ reservation.extent_start,
+ reservation.nr_extents,
+ reservation.extent_order,
+ flags,
+ &preempted);
+ break;
+ }

if ( unlikely(reservation.domid != DOMID_SELF) )
put_domain(d);
diff -r 05a1340bc1e7 -r 990c009015e8 xen/include/asm-x86/shadow.h
--- a/xen/include/asm-x86/shadow.h Thu Jan 26 18:52:18 2006
+++ b/xen/include/asm-x86/shadow.h Fri Jan 27 20:23:06 2006
@@ -326,7 +326,7 @@
#define SHADOW_SNAPSHOT_ELSEWHERE (-1L)

/************************************************************************/
-#define SHADOW_DEBUG 0
+#define SHADOW_DEBUG 1
#define SHADOW_VERBOSE_DEBUG 0
#define SHADOW_VVERBOSE_DEBUG 0
#define SHADOW_VVVERBOSE_DEBUG 0
diff -r 05a1340bc1e7 -r 990c009015e8 xen/include/public/memory.h
--- a/xen/include/public/memory.h Thu Jan 26 18:52:18 2006
+++ b/xen/include/public/memory.h Fri Jan 27 20:23:06 2006
@@ -16,11 +16,18 @@
*/
#define XENMEM_increase_reservation 0
#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap 6
typedef struct xen_memory_reservation {

/*
- * MFN bases of extents to free (XENMEM_decrease_reservation).
- * MFN bases of extents that were allocated (XENMEM_increase_reservation).
+ * XENMEM_increase_reservation:
+ * OUT: MFN bases of extents that were allocated
+ * XENMEM_decrease_reservation:
+ * IN: MFN bases of extents to free
+ * XENMEM_populate_physmap:
+ * IN: PFN bases of extents to populate with memory
+ * OUT: MFN bases of extents that were allocated
+ * (NB. This command also updates the mach_to_phys translation table)
*/
unsigned long *extent_start;

@@ -29,11 +36,10 @@
unsigned int extent_order;

/*
- * XENMEM_increase_reservation: maximum # bits addressable by the user
- * of the allocated region (e.g., I/O devices often have a 32-bit
- * limitation even in 64-bit systems). If zero then the user has no
- * addressing restriction.
- * XENMEM_decrease_reservation: unused.
+ * Mmaximum # bits addressable by the user of the allocated region (e.g.,
+ * I/O devices often have a 32-bit limitation even in 64-bit systems). If
+ * zero then the user has no addressing restriction.
+ * This field is not used by XENMEM_decrease_reservation.
*/
unsigned int address_bits;

diff -r 05a1340bc1e7 -r 990c009015e8 xen/include/public/version.h
--- a/xen/include/public/version.h Thu Jan 26 18:52:18 2006
+++ b/xen/include/public/version.h Fri Jan 27 20:23:06 2006
@@ -39,6 +39,17 @@
unsigned long virt_start;
} xen_platform_parameters_t;

+#define XENVER_get_features 6
+typedef struct xen_feature_info {
+ unsigned int submap_idx; /* IN: which 32-bit submap to return */
+ uint32_t submap; /* OUT: 32-bit submap */
+} xen_feature_info_t;
+
+#define _XENFEAT_writable_mmu_structures 0
+#define XENFEAT_writable_mmu_structures (1UL<<_XENFEAT_writable_mmu_structures)
+
+#define XENFEAT_NR_SUBMAPS 1
+
#endif /* __XEN_PUBLIC_VERSION_H__ */

/*
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/arch/xen/kernel/features.c
--- /dev/null Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/features.c Fri Jan 27 20:23:06 2006
@@ -0,0 +1,29 @@
+/******************************************************************************
+ * features.c
+ *
+ * Xen feature flags.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <asm/hypervisor.h>
+#include <asm-xen/features.h>
+
+/* When we rebase to a more recent version of Linux we can use __read_mostly here. */
+unsigned long xen_features[XENFEAT_NR_SUBMAPS] __cacheline_aligned;
+
+void setup_xen_features(void)
+{
+ uint32_t *flags = (uint32_t *)&xen_features[0];
+ xen_feature_info_t fi;
+ int i;
+
+ for (i=0; i<XENFEAT_NR_SUBMAPS; i++) {
+ fi.submap_idx = i;
+ if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
+ break;
+ flags[i] = fi.submap;
+ }
+}
+
diff -r 05a1340bc1e7 -r 990c009015e8 linux-2.6-xen-sparse/include/asm-xen/features.h
--- /dev/null Thu Jan 26 18:52:18 2006
+++ b/linux-2.6-xen-sparse/include/asm-xen/features.h Fri Jan 27 20:23:06 2006
@@ -0,0 +1,20 @@
+/******************************************************************************
+ * features.h
+ *
+ * Query the features reported by Xen.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __ASM_XEN_FEATURES_H__
+#define __ASM_XEN_FEATURES_H__
+
+#include <asm-xen/xen-public/version.h>
+
+extern void setup_xen_features(void);
+
+extern unsigned long xen_features[XENFEAT_NR_SUBMAPS];
+
+#define xen_feature(flag) (test_bit(_XENFEAT_ ## flag, xen_features))
+
+#endif

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog