Mailing List Archive

[xen-unstable] hvm: Handle hw_cr[] array a bit more sanely.
# HG changeset patch
# User kfraser@localhost.localdomain
# Date 1186590493 -3600
# Node ID 9ef1c3e6c48e3591aa9301db0fe0afc28dd8b5c9
# Parent 25e5c1b9faad01e03bb3a35b709d79c00697bf30
hvm: Handle hw_cr[] array a bit more sanely.
SVM for the most part does not need to use it at all, and this makes
the code clearer.
Signed-off-by: Keir Fraser <keir@xensource.com>
---
xen/arch/x86/hvm/hvm.c | 7 --
xen/arch/x86/hvm/svm/svm.c | 96 ++++++++++++++---------------------------
xen/arch/x86/hvm/svm/vmcb.c | 19 ++------
xen/arch/x86/hvm/vmx/vmcs.c | 82 +++++++++++++++++++++++------------
xen/arch/x86/hvm/vmx/vmx.c | 9 +++
xen/include/asm-x86/hvm/vcpu.h | 7 ++
6 files changed, 106 insertions(+), 114 deletions(-)

diff -r 25e5c1b9faad -r 9ef1c3e6c48e xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Wed Aug 08 16:09:17 2007 +0100
+++ b/xen/arch/x86/hvm/hvm.c Wed Aug 08 17:28:13 2007 +0100
@@ -596,9 +596,6 @@ int hvm_set_cr0(unsigned long value)
}

v->arch.hvm_vcpu.guest_cr[0] = value;
- v->arch.hvm_vcpu.hw_cr[0] = value;
- if ( !paging_mode_hap(v->domain) )
- v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_PG | X86_CR0_WP;
hvm_update_guest_cr(v, 0);

if ( (value ^ old_value) & X86_CR0_PG )
@@ -672,10 +669,6 @@ int hvm_set_cr4(unsigned long value)

old_cr = v->arch.hvm_vcpu.guest_cr[4];
v->arch.hvm_vcpu.guest_cr[4] = value;
- v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
- if ( paging_mode_hap(v->domain) )
- v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
- v->arch.hvm_vcpu.hw_cr[4] |= value;
hvm_update_guest_cr(v, 4);

/* Modifying CR4.{PSE,PAE,PGE} invalidates all TLB entries, inc. Global. */
diff -r 25e5c1b9faad -r 9ef1c3e6c48e xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Wed Aug 08 16:09:17 2007 +0100
+++ b/xen/arch/x86/hvm/svm/svm.c Wed Aug 08 17:28:13 2007 +0100
@@ -59,8 +59,9 @@ int inst_copy_from_guest(unsigned char *
int inst_len);
asmlinkage void do_IRQ(struct cpu_user_regs *);

-static int svm_reset_to_realmode(struct vcpu *v,
- struct cpu_user_regs *regs);
+static int svm_reset_to_realmode(
+ struct vcpu *v, struct cpu_user_regs *regs);
+static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);

/* va of hardware host save area */
static void *hsa[NR_CPUS] __read_mostly;
@@ -343,48 +344,26 @@ int svm_vmcb_restore(struct vcpu *v, str
vmcb->rsp = c->rsp;
vmcb->rflags = c->rflags;

- v->arch.hvm_vcpu.guest_cr[0] = c->cr0;
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] =
- c->cr0 | X86_CR0_WP | X86_CR0_ET | X86_CR0_PG;
+ v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
+ svm_update_guest_cr(v, 0);

v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
-
+ svm_update_guest_cr(v, 2);
+
+ v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
+ svm_update_guest_cr(v, 4);
+
#ifdef HVM_DEBUG_SUSPEND
printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
- __func__,
- c->cr3,
- c->cr0,
- c->cr4);
+ __func__, c->cr3, c->cr0, c->cr4);
#endif

- if ( !hvm_paging_enabled(v) )
- {
- printk("%s: paging not enabled.\n", __func__);
- goto skip_cr3;
- }
-
- if ( c->cr3 == v->arch.hvm_vcpu.guest_cr[3] )
- {
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if ( mfn != pagetable_get_pfn(v->arch.guest_table) )
- goto bad_cr3;
- }
- else
- {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
+ if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) )
+ {
HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
goto bad_cr3;
-
old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
v->arch.guest_table = pagetable_from_pfn(mfn);
if (old_base_mfn)
@@ -392,10 +371,6 @@ int svm_vmcb_restore(struct vcpu *v, str
v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
}

- skip_cr3:
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = c->cr4 | HVM_CR4_HOST_MASK;
- v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
-
vmcb->idtr.limit = c->idtr_limit;
vmcb->idtr.base = c->idtr_base;

@@ -449,10 +424,6 @@ int svm_vmcb_restore(struct vcpu *v, str

if ( paging_mode_hap(v->domain) )
{
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0];
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
- vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3] = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
@@ -586,17 +557,22 @@ static void svm_update_guest_cr(struct v
switch ( cr )
{
case 0:
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0];
+ vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
+ if ( !paging_mode_hap(v->domain) )
+ vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
break;
case 2:
- vmcb->cr2 = v->arch.hvm_vcpu.hw_cr[2];
+ vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
break;
case 3:
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
svm_asid_inv_asid(v);
break;
case 4:
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4];
+ vmcb->cr4 = HVM_CR4_HOST_MASK;
+ if ( paging_mode_hap(v->domain) )
+ vmcb->cr4 &= ~X86_CR4_PAE;
+ vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
break;
default:
BUG();
@@ -724,7 +700,7 @@ static void svm_stts(struct vcpu *v)
if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
{
v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_TS;
+ vmcb->cr0 |= X86_CR0_TS;
}
}

@@ -1045,7 +1021,7 @@ static void svm_do_no_device_fault(struc
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);

if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS;
+ vmcb->cr0 &= ~X86_CR0_TS;
}

/* Reserved bits ECX: [31:14], [12:4], [2:1]*/
@@ -1774,7 +1750,7 @@ static void svm_cr_access(
/* TS being cleared means that it's time to restore fpu state. */
setup_fpu(current);
vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] &= ~X86_CR0_TS; /* clear TS */
+ vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
break;

@@ -2085,22 +2061,16 @@ static int svm_reset_to_realmode(struct

memset(regs, 0, sizeof(struct cpu_user_regs));

- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] =
- X86_CR0_ET | X86_CR0_PG | X86_CR0_WP;
v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_ET;
-
- vmcb->cr2 = 0;
+ svm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[2] = 0;
+ svm_update_guest_cr(v, 2);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ svm_update_guest_cr(v, 4);
+
vmcb->efer = EFER_SVME;
-
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = HVM_CR4_HOST_MASK;
- v->arch.hvm_vcpu.guest_cr[4] = 0;
-
- if ( paging_mode_hap(v->domain) )
- {
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0];
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
- }

/* This will jump to ROMBIOS */
vmcb->rip = 0xFFF0;
@@ -2231,7 +2201,7 @@ asmlinkage void svm_vmexit_handler(struc
unsigned long va;
va = vmcb->exitinfo2;
regs->error_code = vmcb->exitinfo1;
- HVM_DBG_LOG(DBG_LEVEL_VMMU,
+ HVM_DBG_LOG(DBG_LEVEL_VMMU,
"eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
(unsigned long)regs->eax, (unsigned long)regs->ebx,
(unsigned long)regs->ecx, (unsigned long)regs->edx,
diff -r 25e5c1b9faad -r 9ef1c3e6c48e xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Wed Aug 08 16:09:17 2007 +0100
+++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Aug 08 17:28:13 2007 +0100
@@ -216,28 +216,19 @@ static int construct_vmcb(struct vcpu *v
vmcb->tr.base = 0;
vmcb->tr.limit = 0xff;

- /* Guest CR0. */
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = read_cr0();
- v->arch.hvm_vcpu.guest_cr[0] =
- v->arch.hvm_vcpu.hw_cr[0] & ~(X86_CR0_PG | X86_CR0_TS);
-
- /* Guest CR4. */
- v->arch.hvm_vcpu.guest_cr[4] =
- read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] =
- v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_TS;
+ hvm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);

paging_update_paging_modes(v);
- vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];

if ( paging_mode_hap(v->domain) )
{
- vmcb->cr0 = v->arch.hvm_vcpu.hw_cr[0] = v->arch.hvm_vcpu.guest_cr[0];
vmcb->np_enable = 1; /* enable nested paging */
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
- vmcb->cr4 = v->arch.hvm_vcpu.hw_cr[4] = v->arch.hvm_vcpu.guest_cr[4] =
- HVM_CR4_HOST_MASK & ~X86_CR4_PAE;
vmcb->exception_intercepts = HVM_TRAP_MASK;

/* No point in intercepting CR3/4 reads, because the hardware
diff -r 25e5c1b9faad -r 9ef1c3e6c48e xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Aug 08 16:09:17 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Aug 08 17:28:13 2007 +0100
@@ -315,34 +315,69 @@ void vmx_cpu_down(void)
local_irq_restore(flags);
}

+struct foreign_vmcs {
+ struct vcpu *v;
+ unsigned int count;
+};
+static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
+
void vmx_vmcs_enter(struct vcpu *v)
{
+ struct foreign_vmcs *fv;
+
/*
* NB. We must *always* run an HVM VCPU on its own VMCS, except for
* vmx_vmcs_enter/exit critical regions.
*/
- if ( v == current )
+ if ( likely(v == current) )
return;

- vcpu_pause(v);
- spin_lock(&v->arch.hvm_vmx.vmcs_lock);
-
- vmx_clear_vmcs(v);
- vmx_load_vmcs(v);
+ fv = &this_cpu(foreign_vmcs);
+
+ if ( fv->v == v )
+ {
+ BUG_ON(fv->count == 0);
+ }
+ else
+ {
+ BUG_ON(fv->v != NULL);
+ BUG_ON(fv->count != 0);
+
+ vcpu_pause(v);
+ spin_lock(&v->arch.hvm_vmx.vmcs_lock);
+
+ vmx_clear_vmcs(v);
+ vmx_load_vmcs(v);
+
+ fv->v = v;
+ }
+
+ fv->count++;
}

void vmx_vmcs_exit(struct vcpu *v)
{
- if ( v == current )
+ struct foreign_vmcs *fv;
+
+ if ( likely(v == current) )
return;

- /* Don't confuse vmx_do_resume (for @v or @current!) */
- vmx_clear_vmcs(v);
- if ( is_hvm_vcpu(current) )
- vmx_load_vmcs(current);
-
- spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
- vcpu_unpause(v);
+ fv = &this_cpu(foreign_vmcs);
+ BUG_ON(fv->v != v);
+ BUG_ON(fv->count == 0);
+
+ if ( --fv->count == 0 )
+ {
+ /* Don't confuse vmx_do_resume (for @v or @current!) */
+ vmx_clear_vmcs(v);
+ if ( is_hvm_vcpu(current) )
+ vmx_load_vmcs(current);
+
+ spin_unlock(&v->arch.hvm_vmx.vmcs_lock);
+ vcpu_unpause(v);
+
+ fv->v = NULL;
+ }
}

struct xgt_desc {
@@ -380,7 +415,6 @@ static void vmx_set_host_env(struct vcpu

static void construct_vmcs(struct vcpu *v)
{
- unsigned long cr0, cr4;
union vmcs_arbytes arbytes;

vmx_vmcs_enter(v);
@@ -504,19 +538,11 @@ static void construct_vmcs(struct vcpu *

__vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));

- /* Guest CR0. */
- cr0 = read_cr0();
- v->arch.hvm_vcpu.hw_cr[0] = cr0;
- __vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
- v->arch.hvm_vcpu.guest_cr[0] = cr0 & ~(X86_CR0_PG | X86_CR0_TS);
- __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
-
- /* Guest CR4. */
- cr4 = read_cr4();
- __vmwrite(GUEST_CR4, cr4 & ~X86_CR4_PSE);
- v->arch.hvm_vcpu.guest_cr[4] =
- cr4 & ~(X86_CR4_PGE | X86_CR4_VMXE | X86_CR4_PAE);
- __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
+ v->arch.hvm_vcpu.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
+ hvm_update_guest_cr(v, 0);
+
+ v->arch.hvm_vcpu.guest_cr[4] = 0;
+ hvm_update_guest_cr(v, 4);

if ( cpu_has_vmx_tpr_shadow )
{
diff -r 25e5c1b9faad -r 9ef1c3e6c48e xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Wed Aug 08 16:09:17 2007 +0100
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Aug 08 17:28:13 2007 +0100
@@ -963,6 +963,9 @@ static void vmx_get_segment_register(str
}

reg->attr.bytes = (attr & 0xff) | ((attr >> 4) & 0xf00);
+ /* Unusable flag is folded into Present flag. */
+ if ( attr & (1u<<16) )
+ reg->attr.fields.p = 0;
}

/* Make sure that xen intercepts any FP accesses from current */
@@ -1062,7 +1065,9 @@ static void vmx_update_guest_cr(struct v
switch ( cr )
{
case 0:
- v->arch.hvm_vcpu.hw_cr[0] |= X86_CR0_PE | X86_CR0_NE;
+ v->arch.hvm_vcpu.hw_cr[0] =
+ v->arch.hvm_vcpu.guest_cr[0] |
+ X86_CR0_PE | X86_CR0_NE | X86_CR0_PG | X86_CR0_WP;
__vmwrite(GUEST_CR0, v->arch.hvm_vcpu.hw_cr[0]);
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[0]);
break;
@@ -1073,6 +1078,8 @@ static void vmx_update_guest_cr(struct v
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
break;
case 4:
+ v->arch.hvm_vcpu.hw_cr[4] =
+ v->arch.hvm_vcpu.guest_cr[4] | HVM_CR4_HOST_MASK;
__vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vcpu.guest_cr[4]);
break;
diff -r 25e5c1b9faad -r 9ef1c3e6c48e xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h Wed Aug 08 16:09:17 2007 +0100
+++ b/xen/include/asm-x86/hvm/vcpu.h Wed Aug 08 17:28:13 2007 +0100
@@ -33,7 +33,12 @@ struct hvm_vcpu {
unsigned long guest_cr[5];
unsigned long guest_efer;

- /* Processor-visible control-register values, while guest executes. */
+ /*
+ * Processor-visible control-register values, while guest executes.
+ * CR0, CR4: Used as a cache of VMCS contents by VMX only.
+ * CR1, CR2: Never used (guest_cr[2] is always processor-visible CR2).
+ * CR3: Always used and kept up to date by paging subsystem.
+ */
unsigned long hw_cr[5];

struct hvm_io_op io_op;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog