Mailing List Archive

Improve multi-processor XenLinux fork/exec/destroy times. We do this
ChangeSet 1.1391, 2005/04/27 15:42:32+01:00, kaf24@firebug.cl.cam.ac.uk

Improve multi-processor XenLinux fork/exec/destroy times. We do this
by lazily pinning page-tables for p.t. use, and aggressively unpinning
them on last use, to put as little pressure on the batched wrpt
interface as possible. Basically this means that the copy loop and
destroy loop will usually be able to directly write pagetables with no
Xen intervention at all (implicit or explicit).
Signed-off-by: Keir Fraser <keir@xensource.com>



linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c | 2
linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c | 9
linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c | 185
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu.h | 22
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h | 26
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h | 21
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h | 12
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h | 9
linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h | 9
linux-2.6.11-xen-sparse/mm/mmap.c | 2108 ++++++++++
xen/arch/x86/mm.c | 21
11 files changed, 2295 insertions(+), 129 deletions(-)


diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 2005-04-27 11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c 2005-04-27 11:02:21 -04:00
@@ -100,8 +100,8 @@
struct mm_struct * old_mm;
int retval = 0;

+ memset(&mm->context, 0, sizeof(mm->context));
init_MUTEX(&mm->context.sem);
- mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
down(&old_mm->context.sem);
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c 2005-04-27 11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c 2005-04-27 11:02:21 -04:00
@@ -710,18 +710,9 @@

kmem_cache_t *pgd_cache;
kmem_cache_t *pmd_cache;
-kmem_cache_t *pte_cache;

void __init pgtable_cache_init(void)
{
- pte_cache = kmem_cache_create("pte",
- PTRS_PER_PTE*sizeof(pte_t),
- PTRS_PER_PTE*sizeof(pte_t),
- 0,
- pte_ctor,
- pte_dtor);
- if (!pte_cache)
- panic("pgtable_cache_init(): Cannot create pte cache");
if (PTRS_PER_PMD > 1) {
pmd_cache = kmem_cache_create("pmd",
PTRS_PER_PMD*sizeof(pmd_t),
diff -Nru a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
--- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 2005-04-27 11:02:21 -04:00
+++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c 2005-04-27 11:02:21 -04:00
@@ -198,59 +198,35 @@
return pte;
}

-void pte_ctor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
- struct page *page = virt_to_page(pte);
- SetPageForeign(page, pte_free);
- set_page_count(page, 1);
-
- clear_page(pte);
- make_page_readonly(pte);
- xen_pte_pin(__pa(pte));
-}
-
-void pte_dtor(void *pte, kmem_cache_t *cache, unsigned long unused)
-{
- struct page *page = virt_to_page(pte);
- ClearPageForeign(page);
-
- xen_pte_unpin(__pa(pte));
- make_page_writable(pte);
-}
-
struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- pte_t *ptep;
-
-#ifdef CONFIG_HIGHPTE
struct page *pte;

+#ifdef CONFIG_HIGHPTE
pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
- if (pte == NULL)
- return pte;
- if (PageHighMem(pte))
- return pte;
- /* not a highmem page -- free page and grab one from the cache */
- __free_page(pte);
+#else
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ if (pte) {
+ SetPageForeign(pte, pte_free);
+ set_page_count(pte, 1);
+ }
#endif
- ptep = kmem_cache_alloc(pte_cache, GFP_KERNEL);
- if (ptep)
- return virt_to_page(ptep);
- return NULL;
+
+ return pte;
}

void pte_free(struct page *pte)
{
+ unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
+
+ if (!pte_write(*virt_to_ptep(va)))
+ HYPERVISOR_update_va_mapping(
+ va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0);
+
+ ClearPageForeign(pte);
set_page_count(pte, 1);
-#ifdef CONFIG_HIGHPTE
- if (!PageHighMem(pte))
-#endif
- kmem_cache_free(pte_cache,
- phys_to_virt(page_to_pseudophys(pte)));
-#ifdef CONFIG_HIGHPTE
- else
- __free_page(pte);
-#endif
+
+ __free_page(pte);
}

void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
@@ -305,14 +281,11 @@
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));

if (PTRS_PER_PMD > 1)
- goto out;
+ return;

pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- out:
- make_page_readonly(pgd);
- xen_pgd_pin(__pa(pgd));
}

/* never called when PTRS_PER_PMD > 1 */
@@ -320,9 +293,6 @@
{
unsigned long flags; /* can be called from interrupt context */

- xen_pgd_unpin(__pa(pgd));
- make_page_writable(pgd);
-
if (PTRS_PER_PMD > 1)
return;

@@ -357,6 +327,15 @@
void pgd_free(pgd_t *pgd)
{
int i;
+ pte_t *ptep = virt_to_ptep(pgd);
+
+ if (!pte_write(*ptep)) {
+ xen_pgd_unpin(__pa(pgd));
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)pgd,
+ pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
+ 0);
+ }

/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1)
@@ -369,28 +348,19 @@
#ifndef CONFIG_XEN_SHADOW_MODE
void make_lowmem_page_readonly(void *va)
{
- pgd_t *pgd = pgd_offset_k((unsigned long)va);
- pud_t *pud = pud_offset(pgd, (unsigned long)va);
- pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
- pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ pte_t *pte = virt_to_ptep(va);
set_pte(pte, pte_wrprotect(*pte));
}

void make_lowmem_page_writable(void *va)
{
- pgd_t *pgd = pgd_offset_k((unsigned long)va);
- pud_t *pud = pud_offset(pgd, (unsigned long)va);
- pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
- pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ pte_t *pte = virt_to_ptep(va);
set_pte(pte, pte_mkwrite(*pte));
}

void make_page_readonly(void *va)
{
- pgd_t *pgd = pgd_offset_k((unsigned long)va);
- pud_t *pud = pud_offset(pgd, (unsigned long)va);
- pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
- pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ pte_t *pte = virt_to_ptep(va);
set_pte(pte, pte_wrprotect(*pte));
if ( (unsigned long)va >= (unsigned long)high_memory )
{
@@ -405,10 +375,7 @@

void make_page_writable(void *va)
{
- pgd_t *pgd = pgd_offset_k((unsigned long)va);
- pud_t *pud = pud_offset(pgd, (unsigned long)va);
- pmd_t *pmd = pmd_offset(pud, (unsigned long)va);
- pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
+ pte_t *pte = virt_to_ptep(va);
set_pte(pte, pte_mkwrite(*pte));
if ( (unsigned long)va >= (unsigned long)high_memory )
{
@@ -439,3 +406,91 @@
}
}
#endif /* CONFIG_XEN_SHADOW_MODE */
+
+void mm_pin(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+ struct page *page;
+ int i;
+
+ spin_lock(&mm->page_table_lock);
+
+ for ( i = 0, pgd = mm->pgd; i < USER_PTRS_PER_PGD; i++, pgd++ )
+ {
+ if ( *(unsigned long *)pgd == 0 )
+ continue;
+ page = pmd_page(*(pmd_t *)pgd);
+ if ( !PageHighMem(page) )
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(page_to_pfn(page)<<PAGE_SHIFT),
+ pfn_pte(page_to_pfn(page), PAGE_KERNEL_RO), 0);
+ }
+
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)mm->pgd,
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO), 0);
+ xen_pgd_pin(__pa(mm->pgd));
+
+ mm->context.pinned = 1;
+
+ spin_unlock(&mm->page_table_lock);
+}
+
+void mm_unpin(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+ struct page *page;
+ int i;
+
+ spin_lock(&mm->page_table_lock);
+
+ xen_pgd_unpin(__pa(mm->pgd));
+ HYPERVISOR_update_va_mapping(
+ (unsigned long)mm->pgd,
+ pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0);
+
+ for ( i = 0, pgd = mm->pgd; i < USER_PTRS_PER_PGD; i++, pgd++ )

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog