Mailing List Archive

[xen-unstable] [XEN] Shadow: emulate a few extra instructions on PAE pagetable writes
# HG changeset patch
# User Tim Deegan <Tim.Deegan@xensource.com>
# Date 1180704731 -3600
# Node ID 13eca4bf2c69aff4c9b689d0dff45929e44e4edb
# Parent b182bd560e47ba3497211603a840ffb3afa13dc8
[XEN] Shadow: emulate a few extra instructions on PAE pagetable writes
in the hope of catching the "other half" write without another enter/exit.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
---
xen/arch/x86/mm/shadow/common.c | 32 ++++++++++++++
xen/arch/x86/mm/shadow/multi.c | 86 ++++++++++++++++++++++++++++-----------
xen/arch/x86/mm/shadow/private.h | 7 ++-
xen/include/asm-x86/domain.h | 2
xen/include/asm-x86/perfc_defn.h | 3 +
5 files changed, 103 insertions(+), 27 deletions(-)

diff -r b182bd560e47 -r 13eca4bf2c69 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c Thu May 31 16:09:11 2007 +0100
+++ b/xen/arch/x86/mm/shadow/common.c Fri Jun 01 14:32:11 2007 +0100
@@ -248,7 +248,7 @@ hvm_emulate_insn_fetch(enum x86_segment
{
struct sh_emulate_ctxt *sh_ctxt =
container_of(ctxt, struct sh_emulate_ctxt, ctxt);
- unsigned int insn_off = offset - ctxt->regs->eip;
+ unsigned int insn_off = offset - sh_ctxt->insn_buf_eip;

/* Fall back if requested bytes are not in the prefetch cache. */
if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
@@ -450,6 +450,7 @@ struct x86_emulate_ops *shadow_init_emul
}

/* Attempt to prefetch whole instruction. */
+ sh_ctxt->insn_buf_eip = regs->eip;
sh_ctxt->insn_buf_bytes =
(!hvm_translate_linear_addr(
x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
@@ -459,6 +460,35 @@ struct x86_emulate_ops *shadow_init_emul
? sizeof(sh_ctxt->insn_buf) : 0;

return &hvm_shadow_emulator_ops;
+}
+
+/* Update an initialized emulation context to prepare for the next
+ * instruction */
+void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt,
+ struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+ unsigned long addr, diff;
+
+ /* We don't refetch the segment bases, because we don't emulate
+ * writes to segment registers */
+
+ if ( is_hvm_vcpu(v) )
+ {
+ diff = regs->eip - sh_ctxt->insn_buf_eip;
+ if ( diff > sh_ctxt->insn_buf_bytes )
+ {
+ /* Prefetch more bytes. */
+ sh_ctxt->insn_buf_bytes =
+ (!hvm_translate_linear_addr(
+ x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
+ hvm_access_insn_fetch, sh_ctxt, &addr) &&
+ !hvm_copy_from_guest_virt(
+ sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
+ ? sizeof(sh_ctxt->insn_buf) : 0;
+ sh_ctxt->insn_buf_eip = regs->eip;
+ }
+ }
}

/**************************************************************************/
diff -r b182bd560e47 -r 13eca4bf2c69 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Thu May 31 16:09:11 2007 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Fri Jun 01 14:32:11 2007 +0100
@@ -2871,6 +2871,20 @@ static int sh_page_fault(struct vcpu *v,
if ( !shadow_mode_refcounts(d) || !guest_mode(regs) )
goto not_a_shadow_fault;

+ /*
+ * We do not emulate user writes. Instead we use them as a hint that the
+ * page is no longer a page table. This behaviour differs from native, but
+ * it seems very unlikely that any OS grants user access to page tables.
+ */
+ if ( (regs->error_code & PFEC_user_mode) )
+ {
+ SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n",
+ mfn_x(gmfn));
+ perfc_incr(shadow_fault_emulate_failed);
+ sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
+ goto done;
+ }
+
if ( is_hvm_domain(d) )
{
/*
@@ -2897,14 +2911,7 @@ static int sh_page_fault(struct vcpu *v,

emul_ops = shadow_init_emulation(&emul_ctxt, regs);

- /*
- * We do not emulate user writes. Instead we use them as a hint that the
- * page is no longer a page table. This behaviour differs from native, but
- * it seems very unlikely that any OS grants user access to page tables.
- */
- r = X86EMUL_UNHANDLEABLE;
- if ( !(regs->error_code & PFEC_user_mode) )
- r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
+ r = x86_emulate(&emul_ctxt.ctxt, emul_ops);

/*
* NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it
@@ -2921,6 +2928,35 @@ static int sh_page_fault(struct vcpu *v,
* though, this is a hint that this page should not be shadowed. */
sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
}
+
+#if GUEST_PAGING_LEVELS == 3 /* PAE guest */
+ if ( r == X86EMUL_OKAY ) {
+ int i;
+ /* Emulate up to four extra instructions in the hope of catching
+ * the "second half" of a 64-bit pagetable write. */
+ for ( i = 0 ; i < 4 ; i++ )
+ {
+ shadow_continue_emulation(&emul_ctxt, regs);
+ v->arch.paging.last_write_was_pt = 0;
+ r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
+ if ( r == X86EMUL_OKAY )
+ {
+ if ( v->arch.paging.last_write_was_pt )
+ {
+ perfc_incr(shadow_em_ex_pt);
+ break; /* Don't emulate past the other half of the write */
+ }
+ else
+ perfc_incr(shadow_em_ex_non_pt);
+ }
+ else
+ {
+ perfc_incr(shadow_em_ex_fail);
+ break; /* Don't emulate again if we failed! */
+ }
+ }
+ }
+#endif /* PAE guest */

/* Emulator has changed the user registers: write back */
if ( is_hvm_domain(d) )
@@ -3878,6 +3914,11 @@ static inline void * emulate_map_dest(st
gfn_t gfn;
mfn_t mfn;

+ /* We don't emulate user-mode writes to page tables */
+ if ( ring_3(sh_ctxt->ctxt.regs) )
+ return NULL;
+
+ /* Walk the guest pagetables */
guest_walk_tables(v, vaddr, &gw, 1);
flags = accumulate_guest_flags(v, &gw);
gfn = guest_l1e_get_gfn(gw.eff_l1e);
@@ -3885,27 +3926,24 @@ static inline void * emulate_map_dest(st
sh_audit_gw(v, &gw);
unmap_walk(v, &gw);

- if ( !(flags & _PAGE_PRESENT) )
- {
- errcode = 0;
+ errcode = PFEC_write_access;
+ if ( !(flags & _PAGE_PRESENT) )
goto page_fault;
- }
-
- if ( !(flags & _PAGE_RW) ||
- (!(flags & _PAGE_USER) && ring_3(sh_ctxt->ctxt.regs)) )
- {
- errcode = PFEC_page_present;
+
+ errcode |= PFEC_page_present;
+ if ( !(flags & _PAGE_RW) )
goto page_fault;
- }
-
- if ( !mfn_valid(mfn) )
+
+ if ( mfn_valid(mfn) )
+ {
+ *mfnp = mfn;
+ v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
+ return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
+ }
+ else
return NULL;

- *mfnp = mfn;
- return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
-
page_fault:
- errcode |= PFEC_write_access;
if ( is_hvm_vcpu(v) )
hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
else
diff -r b182bd560e47 -r 13eca4bf2c69 xen/arch/x86/mm/shadow/private.h
--- a/xen/arch/x86/mm/shadow/private.h Thu May 31 16:09:11 2007 +0100
+++ b/xen/arch/x86/mm/shadow/private.h Fri Jun 01 14:32:11 2007 +0100
@@ -634,9 +634,10 @@ struct sh_emulate_ctxt {
struct sh_emulate_ctxt {
struct x86_emulate_ctxt ctxt;

- /* [HVM] Cache of up to 15 bytes of instruction. */
- uint8_t insn_buf[15];
+ /* [HVM] Cache of up to 31 bytes of instruction. */
+ uint8_t insn_buf[31];
uint8_t insn_buf_bytes;
+ unsigned long insn_buf_eip;

/* [HVM] Cache of segment registers already gathered for this emulation. */
unsigned int valid_seg_regs;
@@ -644,6 +645,8 @@ struct sh_emulate_ctxt {
};

struct x86_emulate_ops *shadow_init_emulation(
+ struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
+void shadow_continue_emulation(
struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);

#endif /* _XEN_SHADOW_PRIVATE_H */
diff -r b182bd560e47 -r 13eca4bf2c69 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu May 31 16:09:11 2007 +0100
+++ b/xen/include/asm-x86/domain.h Fri Jun 01 14:32:11 2007 +0100
@@ -171,6 +171,8 @@ struct paging_vcpu {
struct paging_mode *mode;
/* HVM guest: paging enabled (CR0.PG)? */
unsigned int translate_enabled:1;
+ /* HVM guest: last emulate was to a pagetable */
+ unsigned int last_write_was_pt:1;

/* paging support extension */
struct shadow_vcpu shadow;
diff -r b182bd560e47 -r 13eca4bf2c69 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h Thu May 31 16:09:11 2007 +0100
+++ b/xen/include/asm-x86/perfc_defn.h Fri Jun 01 14:32:11 2007 +0100
@@ -90,5 +90,8 @@ PERFCOUNTER(shadow_invlpg, "shad
PERFCOUNTER(shadow_invlpg, "shadow emulates invlpg")
PERFCOUNTER(shadow_invlpg_fault, "shadow invlpg faults")

+PERFCOUNTER(shadow_em_ex_pt, "shadow extra pt write")
+PERFCOUNTER(shadow_em_ex_non_pt, "shadow extra non-pt-write op")
+PERFCOUNTER(shadow_em_ex_fail, "shadow extra emulation failed")

/*#endif*/ /* __XEN_PERFC_DEFN_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog