Mailing List Archive

SVM patch to add 64bit hv support.
# HG changeset patch
# User kaf24@firebug.cl.cam.ac.uk
# Node ID a376bab39768941c46cb57f3a3ce2dca2edb4173
# Parent a66763eb86fee0a215712245be4f49e871d74ac4
SVM patch to add 64bit hv support.
This patch only modifies svm files.
Tested with c/s 9015 with 32bit hv using UP Dom0, with UP linux and
winxpsp1 unmodified guests.
Tested with c/s 9015 with 64bit hv using UP Dom0, with 32bit and 64bit
UP linux and 32bit winxpsp1.

Signed-off-by: Tom Woller <thomas.woller@amd.com>

diff -r a66763eb86fe -r a376bab39768 xen/arch/x86/hvm/svm/emulate.c
--- a/xen/arch/x86/hvm/svm/emulate.c Tue Feb 28 18:00:15 2006
+++ b/xen/arch/x86/hvm/svm/emulate.c Tue Feb 28 21:57:38 2006
@@ -86,7 +86,7 @@
case 0x7:
value = regs->edi;
break;
-#if X86_64
+#if __x86_64__
case 0x8:
value = regs->r8;
break;
@@ -318,20 +318,14 @@


/* Get the register/mode number of src register in ModRM register. */
-unsigned int decode_dest_reg(u8 m)
-{
-#if __x86_64__
- ASSERT(0); /* Need to adjust for REX prefix if applicable */
-#endif
- return (m >> 3) & 7;
-}
-
-unsigned int decode_src_reg(u8 m)
-{
-#if __x86_64__
- ASSERT(0); /* Need to adjust for REX prefix if applicable */
-#endif
- return m & 7;
+unsigned int decode_dest_reg(u8 prefix, u8 m)
+{
+ return DECODE_MODRM_REG(prefix, m);
+}
+
+unsigned int decode_src_reg(u8 prefix, u8 m)
+{
+ return DECODE_MODRM_RM(prefix, m);
}


@@ -431,7 +425,7 @@
* The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
* to enough bytes to satisfy the instruction including prefix bytes.
*/
-unsigned int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
enum instruction_index *list, unsigned int list_count,
u8 *guest_eip_buf, enum instruction_index *match)
{
diff -r a66763eb86fe -r a376bab39768 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Tue Feb 28 18:00:15 2006
+++ b/xen/arch/x86/hvm/svm/svm.c Tue Feb 28 21:57:38 2006
@@ -164,7 +164,7 @@
}

static inline void svm_inject_exception(struct vmcb_struct *vmcb,
- int trap, int error_code)
+ int trap, int ev, int error_code)
{
eventinj_t event;

@@ -172,7 +172,7 @@
event.fields.v = 1;
event.fields.type = EVENTTYPE_EXCEPTION;
event.fields.vector = trap;
- event.fields.ev = 1;
+ event.fields.ev = ev;
event.fields.errorcode = error_code;

ASSERT(vmcb->eventinj.fields.v == 0);
@@ -237,61 +237,16 @@
}

#ifdef __x86_64__
-static struct svm_msr_state percpu_msr[NR_CPUS];
-
-static u32 msr_data_index[VMX_MSR_COUNT] =
-{
- MSR_LSTAR, MSR_STAR, MSR_CSTAR,
- MSR_SYSCALL_MASK, MSR_EFER,
-};

void svm_save_segments(struct vcpu *v)
{
- rdmsrl(MSR_SHADOW_GS_BASE, v->arch.hvm_svm.msr_content.shadow_gs);
-}
-
-/*
- * To avoid MSR save/restore at every VM exit/entry time, we restore
- * the x86_64 specific MSRs at domain switch time. Since those MSRs are
- * are not modified once set for generic domains, we don't save them,
- * but simply reset them to the values set at percpu_traps_init().
- */
+}
void svm_load_msrs(void)
{
- struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
- int i;
-
- while ( host_state->flags )
- {
- i = find_first_set_bit(host_state->flags);
- wrmsrl(msr_data_index[i], host_state->msr_items[i]);
- clear_bit(i, &host_state->flags);
- }
-}
-
-static void svm_save_init_msrs(void)
-{
- struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
- int i;
-
- for ( i = 0; i < SVM_MSR_COUNT; i++ )
- rdmsrl(msr_data_index[i], host_state->msr_items[i]);
-}
-
-#define CASE_READ_MSR(address) \
- case MSR_ ## address: \
- msr_content = msr->msr_items[SVM_INDEX_MSR_ ## address]; \
- break
-
-#define CASE_WRITE_MSR(address) \
- case MSR_ ## address: \
- msr->msr_items[SVM_INDEX_MSR_ ## address] = msr_content; \
- if (!test_bit(SVM_INDEX_MSR_ ## address, &msr->flags)) \
- { \
- set_bit(SVM_INDEX_MSR_ ## address, &msr->flags); \
- } \
- break
-
+}
+void svm_restore_msrs(struct vcpu *v)
+{
+}

#define IS_CANO_ADDRESS(add) 1

@@ -299,47 +254,45 @@
{
u64 msr_content = 0;
struct vcpu *vc = current;
- struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
+ // struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;

switch (regs->ecx)
{
case MSR_EFER:
- msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
- HVM_DBG_LOG(DBG_LEVEL_2, "EFER msr_content %llx\n",
- (unsigned long long)msr_content);
-
- if (test_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state))
- msr_content |= 1 << _EFER_LME;
-
- if (SVM_LONG_GUEST(vc))
- msr_content |= 1 << _EFER_LMA;
-
+ // msr_content = msr->msr_items[SVM_INDEX_MSR_EFER];
+ msr_content = vmcb->efer;
+ msr_content &= ~EFER_SVME;
break;

case MSR_FS_BASE:
- if (!(SVM_LONG_GUEST(vc)))
- /* XXX should it be GP fault */
- domain_crash_synchronous();
-
msr_content = vmcb->fs.base;
break;

case MSR_GS_BASE:
- if (!(SVM_LONG_GUEST(vc)))
- domain_crash_synchronous();
-
msr_content = vmcb->gs.base;
break;

case MSR_SHADOW_GS_BASE:
- msr_content = msr->shadow_gs;
- break;
-
- CASE_READ_MSR(STAR);
- CASE_READ_MSR(LSTAR);
- CASE_READ_MSR(CSTAR);
- CASE_READ_MSR(SYSCALL_MASK);
+ msr_content = vmcb->kerngsbase;
+ break;
+
+ case MSR_STAR:
+ msr_content = vmcb->star;
+ break;
+
+ case MSR_LSTAR:
+ msr_content = vmcb->lstar;
+ break;
+
+ case MSR_CSTAR:
+ msr_content = vmcb->cstar;
+ break;
+
+ case MSR_SYSCALL_MASK:
+ msr_content = vmcb->sfmask;
+ break;
+
default:
return 0;
}
@@ -356,8 +309,6 @@
{
u64 msr_content = regs->eax | ((u64)regs->edx << 32);
struct vcpu *vc = current;
- struct svm_msr_state *msr = &vc->arch.hvm_svm.msr_content;
- struct svm_msr_state *host_state = &percpu_msr[smp_processor_id()];
struct vmcb_struct *vmcb = vc->arch.hvm_svm.vmcb;

HVM_DBG_LOG(DBG_LEVEL_1, "mode_do_msr_write msr %lx msr_content %lx\n",
@@ -373,26 +324,20 @@
|| !test_bit(SVM_CPU_STATE_PAE_ENABLED,
&vc->arch.hvm_svm.cpu_state))
{
- svm_inject_exception(vmcb, TRAP_gp_fault, 0);
+ svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
}
}

if (msr_content & EFER_LME)
set_bit(SVM_CPU_STATE_LME_ENABLED, &vc->arch.hvm_svm.cpu_state);

+ /* We have already recorded that we want LME, so it will be set
+ * next time CR0 gets updated. So we clear that bit and continue.
+ */
+ if ((msr_content ^ vmcb->efer) & EFER_LME)
+ msr_content &= ~EFER_LME;
/* No update for LME/LMA since it have no effect */
- msr->msr_items[SVM_INDEX_MSR_EFER] = msr_content;
- if (msr_content & ~(EFER_LME | EFER_LMA))
- {
- msr->msr_items[SVM_INDEX_MSR_EFER] = msr_content;
- if (!test_bit(SVM_INDEX_MSR_EFER, &msr->flags))
- {
- rdmsrl(MSR_EFER, host_state->msr_items[SVM_INDEX_MSR_EFER]);
- set_bit(SVM_INDEX_MSR_EFER, &host_state->flags);
- set_bit(SVM_INDEX_MSR_EFER, &msr->flags);
- wrmsrl(MSR_EFER, msr_content);
- }
- }
+ vmcb->efer = msr_content | EFER_SVME;
break;

case MSR_FS_BASE:
@@ -403,63 +348,42 @@
if (!IS_CANO_ADDRESS(msr_content))
{
HVM_DBG_LOG(DBG_LEVEL_1, "Not cano address of msr write\n");
- svm_inject_exception(vmcb, TRAP_gp_fault, 0);
+ svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
}

if (regs->ecx == MSR_FS_BASE)
- vmcb->fs.base = msr_content;
+ vmcb->fs.base = msr_content;
else
- vmcb->gs.base = msr_content;
+ vmcb->gs.base = msr_content;
break;

case MSR_SHADOW_GS_BASE:
- if (!(SVM_LONG_GUEST(vc)))
- domain_crash_synchronous();
-
- vc->arch.hvm_svm.msr_content.shadow_gs = msr_content;
- wrmsrl(MSR_SHADOW_GS_BASE, msr_content);
- break;
-
- CASE_WRITE_MSR(STAR);
- CASE_WRITE_MSR(LSTAR);
- CASE_WRITE_MSR(CSTAR);
- CASE_WRITE_MSR(SYSCALL_MASK);
+ vmcb->kerngsbase = msr_content;
+ break;
+
+ case MSR_STAR:
+ vmcb->star = msr_content;
+ break;
+
+ case MSR_LSTAR:
+ vmcb->lstar = msr_content;
+ break;
+
+ case MSR_CSTAR:
+ vmcb->cstar = msr_content;
+ break;
+
+ case MSR_SYSCALL_MASK:
+ vmcb->sfmask = msr_content;
+ break;
+
default:
return 0;
}
return 1;
}

-void
-svm_restore_msrs(struct vcpu *v)
-{
- int i = 0;
- struct svm_msr_state *guest_state;
- struct svm_msr_state *host_state;
- unsigned long guest_flags;
-
- guest_state = &v->arch.hvm_svm.msr_content;;
- host_state = &percpu_msr[smp_processor_id()];
-
- wrmsrl(MSR_SHADOW_GS_BASE, guest_state->shadow_gs);
- guest_flags = guest_state->flags;
- if (!guest_flags)
- return;
-
- while (guest_flags){
- i = find_first_set_bit(guest_flags);
-
- HVM_DBG_LOG(DBG_LEVEL_2,
- "restore guest's index %d msr %lx with %lx\n",
- i, (unsigned long) msr_data_index[i], (unsigned long) guest_state->msr_items[i]);
- set_bit(i, &host_state->flags);
- wrmsrl(msr_data_index[i], guest_state->msr_items[i]);
- clear_bit(i, &guest_flags);
- }
-}
#else
-#define svm_save_init_msrs() ((void)0)
-
static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
{
return 0;
@@ -497,9 +421,28 @@
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
unsigned long cr0 = vmcb->cr0, eflags = vmcb->rflags, mode;
-
- mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
+ /* check which operating mode the guest is running */
+ if( vmcb->efer & EFER_LMA )
+ mode = vmcb->cs.attributes.fields.l ? 8 : 4;
+ else
+ mode = (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE) ? 2 : 4;
return svm_instrlen(guest_cpu_user_regs(), mode);
+}
+
+unsigned long svm_get_ctrl_reg(struct vcpu *v, unsigned int num)
+{
+ switch ( num )
+ {
+ case 0:
+ return v->arch.hvm_svm.cpu_shadow_cr0;
+ case 2:
+ return v->arch.hvm_svm.cpu_cr2;
+ case 3:
+ return v->arch.hvm_svm.cpu_cr3;
+ default:
+ BUG();
+ }
+ return 0; /* dummy */
}

int start_svm(void)
@@ -519,8 +462,6 @@
asidpool_init(smp_processor_id());
printk("AMD SVM Extension is enabled for cpu %d.\n", smp_processor_id());

- svm_save_init_msrs();
-
/* Setup HVM interfaces */
hvm_funcs.disable = stop_svm;

@@ -542,6 +483,7 @@
hvm_funcs.realmode = svm_realmode;
hvm_funcs.paging_enabled = svm_paging_enabled;
hvm_funcs.instruction_length = svm_instruction_length;
+ hvm_funcs.get_guest_ctrl_reg = svm_get_ctrl_reg;

hvm_enabled = 1;

@@ -631,8 +573,17 @@
}

#if defined (__x86_64__)
-void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *c )
-{
+void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v )
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ regs->rip = vmcb->rip;
+ regs->rsp = vmcb->rsp;
+ regs->rflags = vmcb->rflags;
+ regs->cs = vmcb->cs.sel;
+ regs->ds = vmcb->ds.sel;
+ regs->es = vmcb->es.sel;
+ regs->ss = vmcb->ss.sel;
}
#elif defined (__i386__)
void svm_store_cpu_user_regs(struct cpu_user_regs *regs, struct vcpu *v)
@@ -882,9 +833,9 @@
/* No support for APIC */
if (!hvm_apic_support(v->domain) && gpa >= 0xFEC00000)
{
- unsigned long inst_len;
- inst_len = svm_instruction_length(v);
- if (inst_len == (unsigned long)-1)
+ int inst_len;
+ inst_len = svm_instruction_length(v);
+ if (inst_len == -1)
{
printf("%s: INST_LEN - Unable to decode properly.\n", __func__);
domain_crash_synchronous();
@@ -936,6 +887,14 @@

eip = vmcb->rip;
error_code = vmcb->exitinfo1;
+
+ if (vmcb->idtr.limit == 0) {
+ printf("Huh? We got a GP Fault with an invalid IDTR!\n");
+ svm_dump_vmcb(__func__, vmcb);
+ svm_dump_regs(__func__, regs);
+ svm_dump_inst(vmcb->rip);
+ __hvm_bug(regs);
+ }

HVM_DBG_LOG(DBG_LEVEL_1,
"svm_general_protection_fault: eip = %lx, erro_code = %lx",
@@ -949,7 +908,7 @@


/* Reflect it back into the guest */
- svm_inject_exception(vmcb, TRAP_gp_fault, error_code);
+ svm_inject_exception(vmcb, TRAP_gp_fault, 1, error_code);
}

/* Reserved bits: [31:14], [12:1] */
@@ -961,7 +920,7 @@
unsigned int eax, ebx, ecx, edx;
unsigned long eip;
struct vcpu *v = current;
- unsigned int inst_len;
+ int inst_len;

ASSERT(vmcb);

@@ -978,8 +937,10 @@

if (input == 1)
{
+#ifndef __x86_64__
if ( hvm_apic_support(v->domain) &&
!vlapic_global_enabled((VLAPIC(v))) )
+#endif
clear_bit(X86_FEATURE_APIC, &edx);

#if CONFIG_PAGING_LEVELS < 3
@@ -1019,6 +980,7 @@
eip, input, eax, ebx, ecx, edx);

inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
+ ASSERT(inst_len > 0);
__update_guest_eip(vmcb, inst_len);
}

@@ -1111,9 +1073,11 @@
unsigned long *reg_p = 0;
unsigned int gpreg = 0;
unsigned long eip;
- unsigned int inst_len;
+ int inst_len;
+ int index;
struct vmcb_struct *vmcb;
u8 buffer[MAX_INST_LEN];
+ u8 prefix = 0;

vmcb = v->arch.hvm_svm.vmcb;

@@ -1121,13 +1085,15 @@

eip = vmcb->rip;
inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
-
- ASSERT(buffer[0] == 0x0f && (buffer[1] & 0xFD) == 0x21);
-
- gpreg = decode_src_reg(buffer[2]);
-#if DEBUG
- ASSERT(reg == decode_dest_reg(buffer[2]));
-#endif
+ index = skip_prefix_bytes(buffer, sizeof(buffer));
+
+ ASSERT(buffer[index+0] == 0x0f && (buffer[index+1] & 0xFD) == 0x21);
+
+ if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
+ prefix = buffer[index-1];
+
+ gpreg = decode_src_reg(prefix, buffer[index + 2]);
+ ASSERT(reg == decode_dest_reg(prefix, buffer[index + 2]));

HVM_DBG_LOG(DBG_LEVEL_1, "svm_dr_access : eip=%lx, reg=%d, gpreg = %x",
eip, reg, gpreg);
@@ -1148,6 +1114,7 @@
__hvm_bug(regs);
break;
}
+ ASSERT(inst_len > 0);
__update_guest_eip(vmcb, inst_len);
}

@@ -1405,7 +1372,7 @@
&v->arch.hvm_svm.cpu_state))
{
HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
- svm_inject_exception(vmcb, TRAP_gp_fault, 0);
+ svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
}

if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
@@ -1468,7 +1435,7 @@
*/
if ((value & X86_CR0_PE) == 0) {
if (value & X86_CR0_PG) {
- svm_inject_exception(vmcb, TRAP_gp_fault, 0);
+ svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
return 0;
}

@@ -1503,7 +1470,7 @@
value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
break;
case 4:
- value = vmcb->cr4;
+ value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
break;
case 8:
#if 0
@@ -1602,12 +1569,19 @@

case 4:
/* CR4 */
- if (value & X86_CR4_PAE)
- __hvm_bug(regs); /* not implemented */
-
- old_cr = vmcb->cr4;
-
- vmcb->cr4 = value;
+ if (value & X86_CR4_PAE) {
+ set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
+ } else {
+ if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
+ &v->arch.hvm_svm.cpu_state)) {
+ svm_inject_exception(vmcb, TRAP_gp_fault, 1, 0);
+ }
+ clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
+ }
+
+ old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ vmcb->cr4 = value | SVM_CR4_HOST_MASK;

/*
* Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
@@ -1636,10 +1610,12 @@
struct cpu_user_regs *regs)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- unsigned int inst_len = 0;
+ int inst_len = 0;
+ int index;
unsigned int gpreg;
unsigned long value;
- u8 buffer[6];
+ u8 buffer[MAX_INST_LEN];
+ u8 prefix = 0;
int result = 1;
enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
@@ -1648,29 +1624,41 @@
ASSERT(vmcb);

inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
+ /* get index to first actual instruction byte - as we will need to know where the
+ * prefix lives later on
+ */
+ index = skip_prefix_bytes(buffer, sizeof(buffer));

if (type == TYPE_MOV_TO_CR)
{
inst_len = __get_instruction_length_from_list(vmcb, list_a,
- ARR_SIZE(list_a), buffer, &match);
+ ARR_SIZE(list_a), &buffer[index], &match);
}
else
{
inst_len = __get_instruction_length_from_list(vmcb, list_b,
- ARR_SIZE(list_b), buffer, &match);
- }
+ ARR_SIZE(list_b), &buffer[index], &match);
+ }
+
+ ASSERT(inst_len > 0);
+
+ inst_len += index;
+
+ /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
+ if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
+ prefix = buffer[index-1];

HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long) vmcb->rip);

switch (match)
{
case INSTR_MOV2CR:
- gpreg = decode_src_reg(buffer[2]);
+ gpreg = decode_src_reg(prefix, buffer[index+2]);
result = mov_to_cr(gpreg, cr, regs);
break;

case INSTR_MOVCR2:
- gpreg = decode_src_reg(buffer[2]);
+ gpreg = decode_src_reg(prefix, buffer[index+2]);
mov_from_cr(cr, gpreg, regs);
break;

@@ -1686,7 +1674,7 @@
if (svm_dbg_on)
svm_dump_inst(svm_rip2pointer(vmcb));

- gpreg = decode_src_reg(buffer[2]);
+ gpreg = decode_src_reg(prefix, buffer[index+2]);
value = get_reg(gpreg, regs, vmcb) & 0xF;

if (svm_dbg_on)
@@ -1704,7 +1692,7 @@
case INSTR_SMSW:
svm_dump_inst(svm_rip2pointer(vmcb));
value = v->arch.hvm_svm.cpu_shadow_cr0;
- gpreg = decode_src_reg(buffer[2]);
+ gpreg = decode_src_reg(prefix, buffer[index+2]);
set_reg(gpreg, value, regs, vmcb);

if (svm_dbg_on)
@@ -1727,7 +1715,7 @@
static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- unsigned int inst_len;
+ int inst_len;
int64_t tsc_sum;

ASSERT(vmcb);
@@ -1868,7 +1856,7 @@
struct vcpu *v = current;
u8 opcode[MAX_INST_SIZE], prefix, length = MAX_INST_SIZE;
unsigned long g_vaddr;
- unsigned int inst_len;
+ int inst_len;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;

ASSERT(vmcb);
@@ -1885,6 +1873,7 @@
if (invlpga)
{
inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
+ ASSERT(inst_len > 0);
__update_guest_eip(vmcb, inst_len);

/*
@@ -1898,6 +1887,7 @@
/* What about multiple prefix codes? */
prefix = (is_prefix(opcode[0])?opcode[0]:0);
inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
+ ASSERT(inst_len > 0);

inst_len--;
length -= inst_len;
@@ -1949,7 +1939,10 @@
v->arch.hvm_svm.cpu_shadow_cr0 = X86_CR0_ET;

vmcb->cr2 = 0;
- vmcb->cr4 = 0;
+ vmcb->efer = EFER_SVME;
+
+ vmcb->cr4 = SVM_CR4_HOST_MASK;
+ v->arch.hvm_svm.cpu_shadow_cr4 = 0;

/* This will jump to ROMBIOS */
vmcb->rip = 0xFFF0;
@@ -2019,12 +2012,13 @@
static int svm_do_vmmcall(struct vcpu *v, struct cpu_user_regs *regs)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- unsigned int inst_len;
+ int inst_len;

ASSERT(vmcb);
ASSERT(regs);

inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
+ ASSERT(inst_len > 0);

/* VMMCALL sanity check */
if (vmcb->cpl > get_vmmcall_cpl(regs->edi))
@@ -2478,7 +2472,7 @@
{
v->arch.hvm_svm.injecting_event = 1;
/* Inject #PG using Interruption-Information Fields */
- svm_inject_exception(vmcb, TRAP_page_fault, regs.error_code);
+ svm_inject_exception(vmcb, TRAP_page_fault, 1, regs.error_code);

v->arch.hvm_svm.cpu_cr2 = va;
vmcb->cr2 = va;
diff -r a66763eb86fe -r a376bab39768 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Tue Feb 28 18:00:15 2006
+++ b/xen/arch/x86/hvm/svm/vmcb.c Tue Feb 28 21:57:38 2006
@@ -190,7 +190,6 @@
unsigned long eflags;
unsigned long shadow_cr;
struct vmcb_struct *vmcb = arch_svm->vmcb;
- struct Xgt_desc_struct desc;

/* Allows IRQs to be shares */
vmcb->vintr.fields.intr_masking = 1;
@@ -224,9 +223,9 @@
vmcb->fs.base = 0;
vmcb->gs.base = 0;

- __asm__ __volatile__ ("sidt (%0) \n" :: "a"(&desc) : "memory");
- vmcb->idtr.base = desc.address;
- vmcb->idtr.limit = desc.size;
+ /* Guest Interrupt descriptor table */
+ vmcb->idtr.base = 0;
+ vmcb->idtr.limit = 0;

/* Set up segment attributes */
attrib.bytes = 0;
@@ -248,15 +247,11 @@
attrib.fields.type = 0xb; /* type=0xb -> executable/readable, accessed */
vmcb->cs.attributes = attrib;

- /* Global descriptor table */
- //NMERGE7500 - can probably remove access to gdtr
- vmcb->gdtr.base = regs->edx;
- regs->edx = 0;
- ASSERT(regs->eax <= 0xFFFF); /* Make sure we're in the limit */
- vmcb->gdtr.limit = regs->eax;
- regs->eax = 0;
-
- /* Local Descriptor Table */
+ /* Guest Global descriptor table */
+ vmcb->gdtr.base = 0;
+ vmcb->gdtr.limit = 0;
+
+ /* Guest Local Descriptor Table */
attrib.fields.s = 0; /* not code or data segement */
attrib.fields.type = 0x2; /* LDT */
attrib.fields.db = 0; /* 16-bit */
@@ -279,11 +274,10 @@
/* CR3 is set in svm_final_setup_guest */

__asm__ __volatile__ ("mov %%cr4,%0" : "=r" (crn) :);
- shadow_cr = crn;
- vmcb->cr4 = shadow_cr;
-
-//MERGE7500 - should write a 0 instead to rsp?
- vmcb->rsp = regs->esp;
+ arch_svm->cpu_shadow_cr4 = crn & ~(X86_CR4_PGE | X86_CR4_PSE);
+ vmcb->cr4 = crn | SVM_CR4_HOST_MASK;
+
+ vmcb->rsp = 0;
vmcb->rip = regs->eip;

eflags = regs->eflags & ~HVM_EFLAGS_RESERVED_0; /* clear 0s */
diff -r a66763eb86fe -r a376bab39768 xen/arch/x86/hvm/svm/x86_64/exits.S
--- a/xen/arch/x86/hvm/svm/x86_64/exits.S Tue Feb 28 18:00:15 2006
+++ b/xen/arch/x86/hvm/svm/x86_64/exits.S Tue Feb 28 21:57:38 2006
@@ -107,8 +107,6 @@
movq %rax, VMCB_rax(%rcx)
movq VCPU_svm_hsa_pa(%rbx), %rax
VMSAVE
- /* XXX FPU SAVE */
- /* XXX DO TSC OFFSET */

movq VCPU_svm_vmcb_pa(%rbx), %rax
popq %r15
@@ -137,9 +135,7 @@
VMSAVE
/* rax is the only register we're allowed to touch here... */

- /* XXX FPU SAVE */
GET_CURRENT(%rax)
- /* XXX DO TSC OFFSET */
movq VCPU_svm_hsa_pa(%rax), %rax
VMLOAD

diff -r a66763eb86fe -r a376bab39768 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h Tue Feb 28 18:00:15 2006
+++ b/xen/include/asm-x86/hvm/svm/emulate.h Tue Feb 28 21:57:38 2006
@@ -83,15 +83,15 @@
struct cpu_user_regs *regs, const u8 prefix, const u8 *operand,
u8 *size);
extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
-extern unsigned int decode_dest_reg(u8 modrm);
-extern unsigned int decode_src_reg(u8 modrm);
+extern unsigned int decode_dest_reg(u8 prefix, u8 modrm);
+extern unsigned int decode_src_reg(u8 prefix, u8 modrm);
extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb);
-extern unsigned int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
+extern int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
enum instruction_index *list, unsigned int list_count,
u8 *guest_eip_buf, enum instruction_index *match);


-static inline unsigned int __get_instruction_length(struct vmcb_struct *vmcb,
+static inline int __get_instruction_length(struct vmcb_struct *vmcb,
enum instruction_index instr, u8 *guest_eip_buf)
{
return __get_instruction_length_from_list(vmcb, &instr, 1, guest_eip_buf,
@@ -138,9 +138,20 @@
}


+static inline int skip_prefix_bytes(u8 *buf, size_t size)
+{
+ int index;
+ for (index = 0; index < size && is_prefix(buf[index]); index ++)
+ /* do nothing */ ;
+ return index;
+}
+
+
+
static void inline __update_guest_eip(struct vmcb_struct *vmcb,
- unsigned long inst_len)
+ int inst_len)
{
+ ASSERT(inst_len > 0);
vmcb->rip += inst_len;
}

diff -r a66763eb86fe -r a376bab39768 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Tue Feb 28 18:00:15 2006
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Tue Feb 28 21:57:38 2006
@@ -269,21 +269,6 @@
#define SVM_LONG_GUEST(ed) \
(test_bit(SVM_CPU_STATE_LMA_ENABLED, &ed->arch.hvm_svm.cpu_state))

-enum {
- SVM_INDEX_MSR_LSTAR = 0,
- SVM_INDEX_MSR_STAR,
- SVM_INDEX_MSR_CSTAR,
- SVM_INDEX_MSR_SYSCALL_MASK,
- SVM_INDEX_MSR_EFER,
-
- SVM_MSR_COUNT,
-};
-
-struct svm_msr_state {
- unsigned long flags;
- unsigned long msr_items[SVM_MSR_COUNT];
- unsigned long shadow_gs;
-};

/*
* Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
@@ -449,7 +434,7 @@

struct arch_svm_struct {
struct vmcb_struct *vmcb;
- void *host_save_area;
+ void *host_save_area;
u64 host_save_pa;
u64 vmcb_pa;
u32 *iopm;
@@ -461,11 +446,11 @@
u32 asid_core;

unsigned long flags; /* VMCB flags */
- unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
+ unsigned long cpu_shadow_cr0; /* Guest value for CR0 */
+ unsigned long cpu_shadow_cr4; /* Guest value for CR4 */
unsigned long cpu_cr2;
unsigned long cpu_cr3;
unsigned long cpu_state;
- struct svm_msr_state msr_content;
struct timer hlt_timer; /* hlt ins emulation wakeup timer */
};

@@ -486,6 +471,14 @@

#define VMCB_EFLAGS_RESERVED_0 0xffc08028 /* bitmap for 0 */
#define VMCB_EFLAGS_RESERVED_1 0x00000002 /* bitmap for 1 */
+
+/* These bits in the CR4 are owned by the host */
+#ifdef __i386__
+#define SVM_CR4_HOST_MASK (0)
+#else
+#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
+#endif
+

#endif /* ASM_X86_HVM_SVM_VMCS_H__ */


_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog