Mailing List Archive

[PATCH 6 of 9] x86/mm: Rework stale p2m auditing
xen/arch/x86/domctl.c | 30 +++++++++
xen/arch/x86/mm/p2m-ept.c | 1 +
xen/arch/x86/mm/p2m-pod.c | 5 -
xen/arch/x86/mm/p2m-pt.c | 137 +++++++------------------------------------
xen/arch/x86/mm/p2m.c | 124 ++++++++++++++++++++++++++++++++++++---
xen/include/asm-x86/p2m.h | 11 ++-
xen/include/public/domctl.h | 12 +++
7 files changed, 186 insertions(+), 134 deletions(-)


The p2m audit code doesn't even compile, let alone work. It also
partially supports ept. Make it:

- compile
- lay groundwork for eventual ept support
- move out of the way of all calls and turn it into a domctl. It's
obviously not being used by anybody presently.
- enable it via said domctl

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r b2097819f2d9 -r 6626add0fef6 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1449,6 +1449,36 @@ long arch_do_domctl(
break;
#endif /* __x86_64__ */

+ case XEN_DOMCTL_audit_p2m:
+ {
+ struct domain *d;
+
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(domctl->domain);
+ if ( d != NULL )
+ {
+ ret = -EPERM;
+ if ( (domctl->domain != DOMID_SELF) &&
+ (d != current->domain) &&
+ (IS_PRIV_FOR(current->domain, d)) )
+ {
+#if P2M_AUDIT
+ audit_p2m(d,
+ &domctl->u.audit_p2m.orphans_debug,
+ &domctl->u.audit_p2m.orphans_invalid,
+ &domctl->u.audit_p2m.m2p_bad,
+ &domctl->u.audit_p2m.p2m_bad);
+ ret = (copy_to_guest(u_domctl, domctl, 1)) ?
+ -EFAULT : 0;
+#else
+ ret = -ENOSYS;
+#endif /* P2M_AUDIT */
+ }
+ rcu_unlock_domain(d);
+ }
+ }
+ break;
+
case XEN_DOMCTL_set_access_required:
{
struct domain *d;
diff -r b2097819f2d9 -r 6626add0fef6 xen/arch/x86/mm/p2m-ept.c
--- a/xen/arch/x86/mm/p2m-ept.c
+++ b/xen/arch/x86/mm/p2m-ept.c
@@ -817,6 +817,7 @@ void ept_p2m_init(struct p2m_domain *p2m
p2m->set_entry = ept_set_entry;
p2m->get_entry = ept_get_entry;
p2m->change_entry_type_global = ept_change_entry_type_global;
+ p2m->audit_p2m = NULL;
}

static void ept_dump_p2m_table(unsigned char key)
diff -r b2097819f2d9 -r 6626add0fef6 xen/arch/x86/mm/p2m-pod.c
--- a/xen/arch/x86/mm/p2m-pod.c
+++ b/xen/arch/x86/mm/p2m-pod.c
@@ -522,7 +522,6 @@ p2m_pod_decrease_reservation(struct doma
steal_for_cache = ( p2m->pod.entry_count > p2m->pod.count );

p2m_lock(p2m);
- audit_p2m(p2m, 1);

if ( unlikely(d->is_dying) )
goto out_unlock;
@@ -616,7 +615,6 @@ out_entry_check:
}

out_unlock:
- audit_p2m(p2m, 1);
p2m_unlock(p2m);

out:
@@ -986,7 +984,6 @@ p2m_pod_demand_populate(struct p2m_domai
*/
set_p2m_entry(p2m, gfn_aligned, _mfn(0), PAGE_ORDER_2M,
p2m_populate_on_demand, p2m->default_access);
- audit_p2m(p2m, 1);
return 0;
}

@@ -1108,7 +1105,6 @@ guest_physmap_mark_populate_on_demand(st
return rc;

p2m_lock(p2m);
- audit_p2m(p2m, 1);

P2M_DEBUG("mark pod gfn=%#lx\n", gfn);

@@ -1142,7 +1138,6 @@ guest_physmap_mark_populate_on_demand(st
BUG_ON(p2m->pod.entry_count < 0);
}

- audit_p2m(p2m, 1);
p2m_unlock(p2m);

out:
diff -r b2097819f2d9 -r 6626add0fef6 xen/arch/x86/mm/p2m-pt.c
--- a/xen/arch/x86/mm/p2m-pt.c
+++ b/xen/arch/x86/mm/p2m-pt.c
@@ -483,7 +483,6 @@ static int p2m_pod_check_and_populate(st
/* This is called from the p2m lookups, which can happen with or
* without the lock hed. */
p2m_lock_recursive(p2m);
- audit_p2m(p2m, 1);

/* Check to make sure this is still PoD */
if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
@@ -494,7 +493,6 @@ static int p2m_pod_check_and_populate(st

r = p2m_pod_demand_populate(p2m, gfn, order, q);

- audit_p2m(p2m, 1);
p2m_unlock(p2m);

return r;
@@ -975,118 +973,23 @@ static void p2m_change_type_global(struc

}

-/* Set up the p2m function pointers for pagetable format */
-void p2m_pt_init(struct p2m_domain *p2m)
+#if P2M_AUDIT
+long p2m_pt_audit_p2m(struct p2m_domain *p2m)
{
- p2m->set_entry = p2m_set_entry;
- p2m->get_entry = p2m_gfn_to_mfn;
- p2m->change_entry_type_global = p2m_change_type_global;
- p2m->write_p2m_entry = paging_write_p2m_entry;
-}
-
-
-#if P2M_AUDIT
-/* strict_m2p == 0 allows m2p mappings that don'#t match the p2m.
- * It's intended for add_to_physmap, when the domain has just been allocated
- * new mfns that might have stale m2p entries from previous owners */
-void audit_p2m(struct p2m_domain *p2m, int strict_m2p)
-{
- struct page_info *page;
- struct domain *od;
- unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
int entry_count = 0;
- mfn_t p2mfn;
- unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
+ unsigned long pmbad = 0;
+ unsigned long mfn, gfn, m2pfn;
int test_linear;
- p2m_type_t type;
struct domain *d = p2m->domain;

- if ( !paging_mode_translate(d) )
- return;
-
- //P2M_PRINTK("p2m audit starts\n");
+ ASSERT(p2m_locked_by_me(p2m));

test_linear = ( (d == current->domain)
&& !pagetable_is_null(current->arch.monitor_table) );
if ( test_linear )
flush_tlb_local();

- spin_lock(&d->page_alloc_lock);
-
- /* Audit part one: walk the domain's page allocation list, checking
- * the m2p entries. */
- page_list_for_each ( page, &d->page_list )
- {
- mfn = mfn_x(page_to_mfn(page));
-
- // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
-
- od = page_get_owner(page);
-
- if ( od != d )
- {
- P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
- mfn, od, (od?od->domain_id:-1), d, d->domain_id);
- continue;
- }
-
- gfn = get_gpfn_from_mfn(mfn);
- if ( gfn == INVALID_M2P_ENTRY )
- {
- orphans_i++;
- //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
- // mfn);
- continue;
- }
-
- if ( gfn == 0x55555555 || gfn == 0x5555555555555555 )
- {
- orphans_d++;
- //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
- // mfn);
- continue;
- }
-
- if ( gfn == SHARED_M2P_ENTRY )
- {
- P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
- mfn);
- continue;
- }
-
- p2mfn = gfn_to_mfn_type_p2m(p2m, gfn, &type, p2m_query);
- if ( strict_m2p && mfn_x(p2mfn) != mfn )
- {
- mpbad++;
- P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
- " (-> gfn %#lx)\n",
- mfn, gfn, mfn_x(p2mfn),
- (mfn_valid(p2mfn)
- ? get_gpfn_from_mfn(mfn_x(p2mfn))
- : -1u));
- /* This m2p entry is stale: the domain has another frame in
- * this physical slot. No great disaster, but for neatness,
- * blow away the m2p entry. */
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
- }
-
- if ( test_linear && (gfn <= p2m->max_mapped_pfn) )
- {
- lp2mfn = mfn_x(gfn_to_mfn_type_p2m(p2m, gfn, &type, p2m_query));
- if ( lp2mfn != mfn_x(p2mfn) )
- {
- P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
- "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn));
- }
- }
-
- // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
- // mfn, gfn, mfn_x(p2mfn), lp2mfn);
- }
-
- spin_unlock(&d->page_alloc_lock);
-
- /* Audit part two: walk the domain's p2m table, checking the entries. */
+ /* Audit part one: walk the domain's p2m table, checking the entries. */
if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 )
{
l2_pgentry_t *l2e;
@@ -1239,17 +1142,23 @@ void audit_p2m(struct p2m_domain *p2m, i
entry_count);
BUG();
}
-
- //P2M_PRINTK("p2m audit complete\n");
- //if ( orphans_i | orphans_d | mpbad | pmbad )
- // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
- // orphans_i + orphans_d, orphans_i, orphans_d);
- if ( mpbad | pmbad )
- {
- P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
- pmbad, mpbad);
- WARN();
- }
+
+ return pmbad;
}
#endif /* P2M_AUDIT */

+/* Set up the p2m function pointers for pagetable format */
+void p2m_pt_init(struct p2m_domain *p2m)
+{
+ p2m->set_entry = p2m_set_entry;
+ p2m->get_entry = p2m_gfn_to_mfn;
+ p2m->change_entry_type_global = p2m_change_type_global;
+ p2m->write_p2m_entry = paging_write_p2m_entry;
+#if P2M_AUDIT
+ p2m->audit_p2m = p2m_pt_audit_p2m;
+#else
+ p2m->audit_p2m = NULL;
+#endif
+}
+
+
diff -r b2097819f2d9 -r 6626add0fef6 xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -439,9 +439,7 @@ guest_physmap_remove_page(struct domain
{
struct p2m_domain *p2m = p2m_get_hostp2m(d);
p2m_lock(p2m);
- audit_p2m(p2m, 1);
p2m_remove_page(p2m, gfn, mfn, page_order);
- audit_p2m(p2m, 1);
p2m_unlock(p2m);
}

@@ -482,7 +480,6 @@ guest_physmap_add_entry(struct domain *d
return rc;

p2m_lock(p2m);
- audit_p2m(p2m, 0);

P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);

@@ -566,7 +563,6 @@ guest_physmap_add_entry(struct domain *d
}
}

- audit_p2m(p2m, 1);
p2m_unlock(p2m);

return rc;
@@ -656,7 +652,6 @@ set_mmio_p2m_entry(struct domain *d, uns

P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn));
rc = set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_mmio_direct, p2m->default_access);
- audit_p2m(p2m, 1);
p2m_unlock(p2m);
if ( 0 == rc )
gdprintk(XENLOG_ERR,
@@ -688,7 +683,6 @@ clear_mmio_p2m_entry(struct domain *d, u
goto out;
}
rc = set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_invalid, p2m->default_access);
- audit_p2m(p2m, 1);

out:
p2m_unlock(p2m);
@@ -785,7 +779,6 @@ int p2m_mem_paging_nominate(struct domai

/* Fix p2m entry */
set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
- audit_p2m(p2m, 1);
ret = 0;

out:
@@ -852,7 +845,6 @@ int p2m_mem_paging_evict(struct domain *

/* Remove mapping from p2m table */
set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), PAGE_ORDER_4K, p2m_ram_paged, a);
- audit_p2m(p2m, 1);

/* Clear content before returning the page to Xen */
scrub_one_page(page);
@@ -946,7 +938,6 @@ void p2m_mem_paging_populate(struct doma
req.flags |= MEM_EVENT_FLAG_EVICT_FAIL;

set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in_start, a);
- audit_p2m(p2m, 1);
}
p2m_unlock(p2m);

@@ -1014,7 +1005,6 @@ int p2m_mem_paging_prep(struct domain *d

/* Fix p2m mapping */
set_p2m_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
- audit_p2m(p2m, 1);

atomic_dec(&d->paged_pages);

@@ -1065,7 +1055,6 @@ void p2m_mem_paging_resume(struct domain
paging_mode_log_dirty(d) ? p2m_ram_logdirty : p2m_ram_rw,
a);
set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
- audit_p2m(p2m, 1);
}
p2m_unlock(p2m);
}
@@ -1427,6 +1416,119 @@ unsigned long paging_gva_to_gfn(struct v
return hostmode->gva_to_gfn(v, hostp2m, va, pfec);
}

+/*** Audit ***/
+
+#if P2M_AUDIT
+void audit_p2m(struct domain *d,
+ uint64_t *orphans_debug,
+ uint64_t *orphans_invalid,
+ uint64_t *m2p_bad,
+ uint64_t *p2m_bad)
+{
+ struct page_info *page;
+ struct domain *od;
+ unsigned long mfn, gfn;
+ mfn_t p2mfn;
+ unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
+ p2m_access_t p2ma;
+ p2m_type_t type;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+
+ if ( !paging_mode_translate(d) )
+ goto out_p2m_audit;
+
+ P2M_PRINTK("p2m audit starts\n");
+
+ p2m_lock(p2m);
+
+ if (p2m->audit_p2m)
+ pmbad = p2m->audit_p2m(p2m);
+
+ /* Audit part two: walk the domain's page allocation list, checking
+ * the m2p entries. */
+ spin_lock(&d->page_alloc_lock);
+ page_list_for_each ( page, &d->page_list )
+ {
+ mfn = mfn_x(page_to_mfn(page));
+
+ P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn);
+
+ od = page_get_owner(page);
+
+ if ( od != d )
+ {
+ P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n",
+ mfn, od, (od?od->domain_id:-1), d, d->domain_id);
+ continue;
+ }
+
+ gfn = get_gpfn_from_mfn(mfn);
+ if ( gfn == INVALID_M2P_ENTRY )
+ {
+ orphans_i++;
+ P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n",
+ mfn);
+ continue;
+ }
+
+ if ( gfn == 0x55555555 || gfn == 0x5555555555555555 )
+ {
+ orphans_d++;
+ P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n",
+ mfn);
+ continue;
+ }
+
+ if ( gfn == SHARED_M2P_ENTRY )
+ {
+ P2M_PRINTK("shared mfn (%lx) on domain page list!\n",
+ mfn);
+ continue;
+ }
+
+ p2mfn = get_gfn_type_access(p2m, gfn, &type, &p2ma, p2m_query, NULL);
+ if ( mfn_x(p2mfn) != mfn )
+ {
+ mpbad++;
+ P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx"
+ " (-> gfn %#lx)\n",
+ mfn, gfn, mfn_x(p2mfn),
+ (mfn_valid(p2mfn)
+ ? get_gpfn_from_mfn(mfn_x(p2mfn))
+ : -1u));
+ /* This m2p entry is stale: the domain has another frame in
+ * this physical slot. No great disaster, but for neatness,
+ * blow away the m2p entry. */
+ set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ }
+ __put_gfn(p2m, gfn);
+
+ P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n",
+ mfn, gfn, mfn_x(p2mfn), lp2mfn);
+ }
+ spin_unlock(&d->page_alloc_lock);
+
+ p2m_unlock(p2m);
+
+ P2M_PRINTK("p2m audit complete\n");
+ if ( orphans_i | orphans_d | mpbad | pmbad )
+ P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
+ orphans_i + orphans_d, orphans_i, orphans_d);
+ if ( mpbad | pmbad )
+ {
+ P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
+ pmbad, mpbad);
+ WARN();
+ }
+
+out_p2m_audit:
+ *orphans_debug = (uint64_t) orphans_d;
+ *orphans_invalid = (uint64_t) orphans_i;
+ *m2p_bad = (uint64_t) mpbad;
+ *p2m_bad = (uint64_t) pmbad;
+}
+#endif /* P2M_AUDIT */
+
/*
* Local variables:
* mode: C
diff -r b2097819f2d9 -r 6626add0fef6 xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -245,6 +245,7 @@ struct p2m_domain {
unsigned long gfn, l1_pgentry_t *p,
mfn_t table_mfn, l1_pgentry_t new,
unsigned int level);
+ long (*audit_p2m)(struct p2m_domain *p2m);

/* Default P2M access type for each page in the the domain: new pages,
* swapped in pages, cleared pages, and pages that are ambiquously
@@ -559,13 +560,15 @@ int set_p2m_entry(struct p2m_domain *p2m
extern void p2m_pt_init(struct p2m_domain *p2m);

/* Debugging and auditing of the P2M code? */
-#define P2M_AUDIT 0
+#define P2M_AUDIT 1
#define P2M_DEBUGGING 0

#if P2M_AUDIT
-extern void audit_p2m(struct p2m_domain *p2m, int strict_m2p);
-#else
-# define audit_p2m(_p2m, _m2p) do { (void)(_p2m),(_m2p); } while (0)
+extern void audit_p2m(struct domain *d,
+ uint64_t *orphans_debug,
+ uint64_t *orphans_invalid,
+ uint64_t *m2p_bad,
+ uint64_t *p2m_bad);
#endif /* P2M_AUDIT */

/* Printouts */
diff -r b2097819f2d9 -r 6626add0fef6 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -800,6 +800,16 @@ struct xen_domctl_mem_sharing_op {
typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);

+struct xen_domctl_audit_p2m {
+ /* OUT error counts */
+ uint64_t orphans_debug;
+ uint64_t orphans_invalid;
+ uint64_t m2p_bad;
+ uint64_t p2m_bad;
+};
+typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t);
+
#if defined(__i386__) || defined(__x86_64__)
/* XEN_DOMCTL_setvcpuextstate */
/* XEN_DOMCTL_getvcpuextstate */
@@ -898,6 +908,7 @@ struct xen_domctl {
#define XEN_DOMCTL_setvcpuextstate 62
#define XEN_DOMCTL_getvcpuextstate 63
#define XEN_DOMCTL_set_access_required 64
+#define XEN_DOMCTL_audit_p2m 65
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -951,6 +962,7 @@ struct xen_domctl {
struct xen_domctl_vcpuextstate vcpuextstate;
#endif
struct xen_domctl_set_access_required access_required;
+ struct xen_domctl_audit_p2m audit_p2m;
struct xen_domctl_gdbsx_memio gdbsx_guest_memio;
struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu;
struct xen_domctl_gdbsx_domstatus gdbsx_domstatus;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 6 of 9] x86/mm: Rework stale p2m auditing [ In reply to ]
>>> On 29.11.11 at 21:21, Andres Lagar-Cavilla <andres@lagarcavilla.org> wrote:
> @@ -559,13 +560,15 @@ int set_p2m_entry(struct p2m_domain *p2m
> extern void p2m_pt_init(struct p2m_domain *p2m);
>
> /* Debugging and auditing of the P2M code? */
> -#define P2M_AUDIT 0
> +#define P2M_AUDIT 1
> #define P2M_DEBUGGING 0
>
> #if P2M_AUDIT

Was this change of the default really intended? It uncovered a problem
in 22356:cbb6b4b17024 (the code meanwhile moved elsewhere): In a
32-bit build,

if ( gfn == 0x55555555 || gfn == 0x5555555555555555 )

causes (at least with some compiler versions, didn't check tonight's
stage tester results yet) a compiler warning (which in turn fails the
build). And a comparison like this is wrong on 64-bit too - see other
(sort of proper; "sort of" because this isn't really appropriate anyway,
especially not without at least using a manifest constant that would
force these to be in sync with the memset()-s that actually establish
these values) examples of using these magic values e.g. in
p2m_alloc_table() or guest_physmap_add_entry().

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel