Mailing List Archive

[PATCH] linux/x86: Adjust page table handling
Ensure that all and only those page table entries that have their present
bit set undergo p2m/m2p translation in all relevant places. This should
fix migration issues with _PAGE_PROTNONE pages which previously could
retain MFNs in PTEs while having the present bit clear (and thus were not
getting (un)canonicalized during save/restore).

Many thanks to Keir Fraser, who set me strait on a number of aspects in
this area while working out the way things should work.

Signed-off-by: Jan Beulich <jbeulich@novell.com>

Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/maddr.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/maddr.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/maddr.h 2007-02-13 13:41:37.000000000 +0100
@@ -21,6 +21,7 @@ typedef unsigned long maddr_t;
#ifdef CONFIG_XEN

extern unsigned long *phys_to_machine_mapping;
+extern unsigned long max_mapnr;

#undef machine_to_phys_mapping
extern unsigned long *machine_to_phys_mapping;
@@ -30,20 +31,20 @@ static inline unsigned long pfn_to_mfn(u
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
- return phys_to_machine_mapping[(unsigned int)(pfn)] &
- ~FOREIGN_FRAME_BIT;
+ BUG_ON(max_mapnr && pfn >= max_mapnr);
+ return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT;
}

static inline int phys_to_machine_mapping_valid(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return 1;
+ BUG_ON(max_mapnr && pfn >= max_mapnr);
return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
}

static inline unsigned long mfn_to_pfn(unsigned long mfn)
{
- extern unsigned long max_mapnr;
unsigned long pfn;

if (xen_feature(XENFEAT_auto_translated_physmap))
@@ -92,7 +93,6 @@ static inline unsigned long mfn_to_pfn(u
*/
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
{
- extern unsigned long max_mapnr;
unsigned long pfn = mfn_to_pfn(mfn);
if ((pfn < max_mapnr)
&& !xen_feature(XENFEAT_auto_translated_physmap)
@@ -103,6 +103,7 @@ static inline unsigned long mfn_to_local

static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
+ BUG_ON(pfn >= max_mapnr);
if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return;
@@ -124,6 +125,20 @@ static inline paddr_t machine_to_phys(ma
return phys;
}

+#ifdef CONFIG_X86_PAE
+static inline paddr_t pte_phys_to_machine(paddr_t phys)
+{
+ /*
+ * In PAE mode, the NX bit needs to be dealt with in the value
+ * passed to pfn_to_mfn(). On x86_64, we need to mask it off,
+ * but for i386 the conversion to ulong for the argument will
+ * clip it off.
+ */
+ maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
+ machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK);
+ return machine;
+}
+
static inline paddr_t pte_machine_to_phys(maddr_t machine)
{
/*
@@ -136,6 +151,7 @@ static inline paddr_t pte_machine_to_phy
phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK);
return phys;
}
+#endif

#else /* !CONFIG_XEN */

@@ -146,7 +162,6 @@ static inline paddr_t pte_machine_to_phy
#define phys_to_machine_mapping_valid(pfn) (1)
#define phys_to_machine(phys) ((maddr_t)(phys))
#define machine_to_phys(mach) ((paddr_t)(mach))
-#define pte_machine_to_phys(mach) ((paddr_t)(mach))

#endif /* !CONFIG_XEN */

Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/page.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/page.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/page.h 2007-02-13 12:27:08.000000000 +0100
@@ -29,6 +29,13 @@
#include <xen/interface/xen.h>
#include <xen/features.h>

+/*
+ * Need to repeat this here in order to not include pgtable.h (which in turn
+ * depends on definitions made here), but to be able to use the symbolic
+ * below. The preprocessor will warn if the two definitions aren't identical.
+ */
+#define _PAGE_PRESENT 0x001
+
#define arch_free_page(_page,_order) \
({ int foreign = PageForeign(_page); \
if (foreign) \
@@ -81,40 +88,38 @@ typedef struct { unsigned long long pgpr
#define pgprot_val(x) ((x).pgprot)
#include <asm/maddr.h>
#define __pte(x) ({ unsigned long long _x = (x); \
- if (_x & 1) _x = phys_to_machine(_x); \
+ if (_x & _PAGE_PRESENT) _x = pte_phys_to_machine(_x); \
((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
#define __pgd(x) ({ unsigned long long _x = (x); \
- (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+ (pgd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; })
#define __pmd(x) ({ unsigned long long _x = (x); \
- (((_x)&1) ? ((pmd_t) {phys_to_machine(_x)}) : ((pmd_t) {(_x)})); })
+ (pmd_t) {((_x) & _PAGE_PRESENT) ? pte_phys_to_machine(_x) : (_x)}; })
+static inline unsigned long long pte_val_ma(pte_t x)
+{
+ return ((unsigned long long)x.pte_high << 32) | x.pte_low;
+}
static inline unsigned long long pte_val(pte_t x)
{
- unsigned long long ret;
-
- if (x.pte_low) {
- ret = x.pte_low | (unsigned long long)x.pte_high << 32;
- ret = pte_machine_to_phys(ret) | 1;
- } else {
- ret = 0;
- }
+ unsigned long long ret = pte_val_ma(x);
+ if (x.pte_low & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
return ret;
}
static inline unsigned long long pmd_val(pmd_t x)
{
unsigned long long ret = x.pmd;
- if (ret) ret = pte_machine_to_phys(ret) | 1;
+#ifdef CONFIG_XEN_COMPAT_030002
+ if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT;
+#else
+ if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
+#endif
return ret;
}
static inline unsigned long long pgd_val(pgd_t x)
{
unsigned long long ret = x.pgd;
- if (ret) ret = pte_machine_to_phys(ret) | 1;
+ if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
return ret;
}
-static inline unsigned long long pte_val_ma(pte_t x)
-{
- return (unsigned long long)x.pte_high << 32 | x.pte_low;
-}
#define HPAGE_SHIFT 21
#else
typedef struct { unsigned long pte_low; } pte_t;
@@ -123,23 +128,23 @@ typedef struct { unsigned long pgprot; }
#define pgprot_val(x) ((x).pgprot)
#include <asm/maddr.h>
#define boot_pte_t pte_t /* or would you rather have a typedef */
-#define pte_val(x) (((x).pte_low & 1) ? \
- pte_machine_to_phys((x).pte_low) : \
+#define pte_val(x) (((x).pte_low & _PAGE_PRESENT) ? \
+ machine_to_phys((x).pte_low) : \
(x).pte_low)
#define pte_val_ma(x) ((x).pte_low)
#define __pte(x) ({ unsigned long _x = (x); \
- (((_x)&1) ? ((pte_t) {phys_to_machine(_x)}) : ((pte_t) {(_x)})); })
+ (pte_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; })
#define __pgd(x) ({ unsigned long _x = (x); \
- (((_x)&1) ? ((pgd_t) {phys_to_machine(_x)}) : ((pgd_t) {(_x)})); })
+ (pgd_t) {((_x) & _PAGE_PRESENT) ? phys_to_machine(_x) : (_x)}; })
static inline unsigned long pgd_val(pgd_t x)
{
unsigned long ret = x.pgd;
- if (ret) ret = pte_machine_to_phys(ret) | 1;
+ if (ret & _PAGE_PRESENT) ret = machine_to_phys(ret);
return ret;
}
#define HPAGE_SHIFT 22
#endif
-#define PTE_MASK PAGE_MASK
+#define PTE_MASK PHYSICAL_PAGE_MASK

#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-2level.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-02-13 12:27:08.000000000 +0100
@@ -39,7 +39,7 @@
#define ptep_get_and_clear(mm,addr,xp) __pte_ma(xchg(&(xp)->pte_low, 0))
#define pte_same(a, b) ((a).pte_low == (b).pte_low)
#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
-#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte))
+#define pte_pfn(_pte) (pte_val(_pte) >> PAGE_SHIFT)

#define pte_page(_pte) pfn_to_page(pte_pfn(_pte))

Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-3level.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-02-13 14:14:45.000000000 +0100
@@ -146,20 +146,20 @@ static inline int pte_none(pte_t pte)
}

#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |\
- (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)))
-#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte))
+ ((_pte).pte_high << (32-PAGE_SHIFT)))
+#define pte_pfn(_pte) ((pte_val(_pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)

extern unsigned long long __supported_pte_mask;

static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
- return pfn_pte_ma(pfn_to_mfn(page_nr), pgprot);
+ return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
}

static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
- BUG(); panic("needs review");
- return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \
+ return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
pgprot_val(pgprot)) & __supported_pte_mask);
}

Index: sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-i386/mach-xen/asm/pgtable.h 2007-02-13 14:14:47.000000000 +0100
@@ -315,18 +315,19 @@ static inline void clone_pgd_range(pgd_t

static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- pte.pte_low &= _PAGE_CHG_MASK;
- pte.pte_low |= pgprot_val(newprot);
-#ifdef CONFIG_X86_PAE
/*
- * Chop off the NX bit (if present), and add the NX portion of
- * the newprot (if present):
+ * Since this might change the present bit (which controls whether
+ * a pte_t object has undergone p2m translation), we must use
+ * pte_val() on the input pte and __pte() for the return value.
*/
- pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
- pte.pte_high |= (pgprot_val(newprot) >> 32) & \
- (__supported_pte_mask >> 32);
+ paddr_t pteval = pte_val(pte);
+
+ pteval &= _PAGE_CHG_MASK;
+ pteval |= pgprot_val(newprot);
+#ifdef CONFIG_X86_PAE
+ pteval &= __supported_pte_mask;
#endif
- return pte;
+ return __pte(pteval);
}

#define pmd_large(pmd) \
Index: sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/maddr.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-x86_64/mach-xen/asm/maddr.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/maddr.h 2007-02-13 13:35:31.000000000 +0100
@@ -25,14 +25,15 @@ static inline unsigned long pfn_to_mfn(u
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return pfn;
- return phys_to_machine_mapping[(unsigned int)(pfn)] &
- ~FOREIGN_FRAME_BIT;
+ BUG_ON(end_pfn && pfn >= end_pfn);
+ return phys_to_machine_mapping[pfn] & ~FOREIGN_FRAME_BIT;
}

static inline int phys_to_machine_mapping_valid(unsigned long pfn)
{
if (xen_feature(XENFEAT_auto_translated_physmap))
return 1;
+ BUG_ON(end_pfn && pfn >= end_pfn);
return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
}

@@ -96,6 +97,7 @@ static inline unsigned long mfn_to_local

static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
+ BUG_ON(pfn >= end_pfn);
if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return;
@@ -117,6 +119,14 @@ static inline paddr_t machine_to_phys(ma
return phys;
}

+static inline paddr_t pte_phys_to_machine(paddr_t phys)
+{
+ maddr_t machine;
+ machine = pfn_to_mfn((phys & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT);
+ machine = (machine << PAGE_SHIFT) | (phys & ~PHYSICAL_PAGE_MASK);
+ return machine;
+}
+
static inline paddr_t pte_machine_to_phys(maddr_t machine)
{
paddr_t phys;
@@ -134,7 +144,6 @@ static inline paddr_t pte_machine_to_phy
#define phys_to_machine_mapping_valid(pfn) (1)
#define phys_to_machine(phys) ((maddr_t)(phys))
#define machine_to_phys(mach) ((paddr_t)(mach))
-#define pte_machine_to_phys(mach) ((paddr_t)(mach))

#endif /* !CONFIG_XEN */

Index: sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/page.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-x86_64/mach-xen/asm/page.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/page.h 2007-02-13 12:27:08.000000000 +0100
@@ -9,6 +9,13 @@
#endif
#include <xen/interface/xen.h>

+/*
+ * Need to repeat this here in order to not include pgtable.h (which in turn
+ * depends on definitions made here), but to be able to use the symbolic
+ * below. The preprocessor will warn if the two definitions aren't identical.
+ */
+#define _PAGE_PRESENT 0x001
+
#define arch_free_page(_page,_order) \
({ int foreign = PageForeign(_page); \
if (foreign) \
@@ -95,28 +102,33 @@ typedef struct { unsigned long pgd; } pg

typedef struct { unsigned long pgprot; } pgprot_t;

-#define pte_val(x) (((x).pte & 1) ? pte_machine_to_phys((x).pte) : \
+#define pte_val(x) (((x).pte & _PAGE_PRESENT) ? \
+ pte_machine_to_phys((x).pte) : \
(x).pte)
#define pte_val_ma(x) ((x).pte)

static inline unsigned long pmd_val(pmd_t x)
{
unsigned long ret = x.pmd;
- if (ret) ret = pte_machine_to_phys(ret);
+#ifdef CONFIG_XEN_COMPAT_030002
+ if (ret) ret = pte_machine_to_phys(ret) | _PAGE_PRESENT;
+#else
+ if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
+#endif
return ret;
}

static inline unsigned long pud_val(pud_t x)
{
unsigned long ret = x.pud;
- if (ret) ret = pte_machine_to_phys(ret);
+ if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
return ret;
}

static inline unsigned long pgd_val(pgd_t x)
{
unsigned long ret = x.pgd;
- if (ret) ret = pte_machine_to_phys(ret);
+ if (ret & _PAGE_PRESENT) ret = pte_machine_to_phys(ret);
return ret;
}

@@ -124,25 +136,25 @@ static inline unsigned long pgd_val(pgd_

static inline pte_t __pte(unsigned long x)
{
- if (x & 1) x = phys_to_machine(x);
+ if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x);
return ((pte_t) { (x) });
}

static inline pmd_t __pmd(unsigned long x)
{
- if ((x & 1)) x = phys_to_machine(x);
+ if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x);
return ((pmd_t) { (x) });
}

static inline pud_t __pud(unsigned long x)
{
- if ((x & 1)) x = phys_to_machine(x);
+ if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x);
return ((pud_t) { (x) });
}

static inline pgd_t __pgd(unsigned long x)
{
- if ((x & 1)) x = phys_to_machine(x);
+ if (x & _PAGE_PRESENT) x = pte_phys_to_machine(x);
return ((pgd_t) { (x) });
}

Index: sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/pgtable.h
===================================================================
--- sle10sp1-2007-01-31.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-02-13 12:27:05.000000000 +0100
+++ sle10sp1-2007-01-31/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-02-13 14:14:47.000000000 +0100
@@ -303,18 +303,16 @@ static inline unsigned long pud_bad(pud_
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))

#define pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
-#define pte_pfn(_pte) mfn_to_local_pfn(pte_mfn(_pte))
+#define pte_pfn(_pte) ((pte_val(_pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)

#define pte_page(x) pfn_to_page(pte_pfn(x))

static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
- pte_t pte;
-
- (pte).pte = (pfn_to_mfn(page_nr) << PAGE_SHIFT);
- (pte).pte |= pgprot_val(pgprot);
- (pte).pte &= __supported_pte_mask;
- return pte;
+ unsigned long pte = page_nr << PAGE_SHIFT;
+ pte |= pgprot_val(pgprot);
+ pte &= __supported_pte_mask;
+ return __pte(pte);
}

/*
@@ -446,18 +444,25 @@ static inline pud_t *pud_offset_k(pgd_t
/* physical address -> PTE */
static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
- pte_t pte;
- (pte).pte = physpage | pgprot_val(pgprot);
- return pte;
+ unsigned long pteval;
+ pteval = physpage | pgprot_val(pgprot);
+ return __pte(pteval);
}

/* Change flags of a PTE */
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- (pte).pte &= _PAGE_CHG_MASK;
- (pte).pte |= pgprot_val(newprot);
- (pte).pte &= __supported_pte_mask;
- return pte;
+ /*
+ * Since this might change the present bit (which controls whether
+ * a pte_t object has undergone p2m translation), we must use
+ * pte_val() on the input pte and __pte() for the return value.
+ */
+ unsigned long pteval = pte_val(pte);
+
+ pteval &= _PAGE_CHG_MASK;
+ pteval |= pgprot_val(newprot);
+ pteval &= __supported_pte_mask;
+ return __pte(pteval);
}

#define pte_index(address) \



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel