Mailing List Archive

[xen-unstable] Merge with ppc/xen-unstable.hg.
# HG changeset patch
# User kfraser@localhost.localdomain
# Date 1185955143 -3600
# Node ID 36caf6f8407214c054f685c84621cbe7c6420d73
# Parent 553f64e4f6efb0482490324a8e7cd99d3a9a653d
# Parent 43836c2b595bcf7ea42a405dd5eb09f54c386d25
Merge with ppc/xen-unstable.hg.
---
tools/firmware/hvmloader/smbios.c | 2
tools/libxc/ia64/Makefile | 2
tools/libxc/ia64/dom_fw_acpi.c | 13
tools/python/xen/util/acmpolicy.py | 7
tools/xm-test/lib/XmTestLib/acm.py | 4
tools/xm-test/tests/security-acm/07_security-acm_pol_update.py | 9
tools/xm-test/tests/security-acm/09_security-acm_pol_update.py | 9
xen/arch/ia64/xen/dom_fw_common.c | 11
xen/arch/ia64/xen/dom_fw_dom0.c | 13
xen/arch/x86/acpi/boot.c | 15
xen/arch/x86/hvm/instrlen.c | 113 +++---
xen/arch/x86/hvm/platform.c | 14
xen/arch/x86/hvm/svm/intr.c | 83 ++--
xen/arch/x86/hvm/svm/svm.c | 87 ++---
xen/arch/x86/hvm/vmx/intr.c | 78 +---
xen/arch/x86/hvm/vmx/vmcs.c | 17 -
xen/arch/x86/hvm/vmx/vmx.c | 167 +++-------
xen/arch/x86/mm/shadow/multi.c | 2
xen/drivers/acpi/tables.c | 154 +++++++++
xen/include/asm-ia64/dom_fw_common.h | 1
xen/include/asm-x86/hvm/hvm.h | 70 +++-
xen/include/asm-x86/hvm/svm/vmcb.h | 8
xen/include/asm-x86/hvm/vmx/vmcs.h | 7
xen/include/asm-x86/hvm/vmx/vmx.h | 36 --
xen/include/xen/acpi.h | 3
25 files changed, 530 insertions(+), 395 deletions(-)

diff -r 553f64e4f6ef -r 36caf6f84072 tools/firmware/hvmloader/smbios.c
--- a/tools/firmware/hvmloader/smbios.c Mon Jul 30 17:10:45 2007 -0500
+++ b/tools/firmware/hvmloader/smbios.c Wed Aug 01 08:59:03 2007 +0100
@@ -169,7 +169,7 @@ hvm_write_smbios_tables(void)
/* temporary variables used to build up Xen version string */
char *p = NULL; /* points to next point of insertion */
unsigned len = 0; /* length of string already composed */
- char *tmp = NULL; /* holds result of itoa() */
+ char tmp[16]; /* holds result of itoa() */
unsigned tmp_len; /* length of next string to add */

hypercall_xen_version(XENVER_guest_handle, uuid);
diff -r 553f64e4f6ef -r 36caf6f84072 tools/libxc/ia64/Makefile
--- a/tools/libxc/ia64/Makefile Mon Jul 30 17:10:45 2007 -0500
+++ b/tools/libxc/ia64/Makefile Wed Aug 01 08:59:03 2007 +0100
@@ -5,6 +5,8 @@ GUEST_SRCS-y += ia64/xc_ia64_linux_resto
GUEST_SRCS-y += ia64/xc_ia64_linux_restore.c

GUEST_SRCS-y += ia64/xc_dom_ia64_util.c
+GUEST_SRCS-y += ia64/dom_fw_acpi.c
+
DOMFW_SRCS_BASE := dom_fw_common.c dom_fw_domu.c dom_fw_asm.S
DOMFW_SRCS := $(addprefix ia64/, $(DOMFW_SRCS_BASE))
$(DOMFW_SRCS):
diff -r 553f64e4f6ef -r 36caf6f84072 tools/libxc/ia64/dom_fw_acpi.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/ia64/dom_fw_acpi.c Wed Aug 01 08:59:03 2007 +0100
@@ -0,0 +1,13 @@
+#include <inttypes.h>
+#include <xen/acpi.h>
+
+uint8_t
+generate_acpi_checksum(void *tbl, unsigned long len)
+{
+ uint8_t *ptr, sum = 0;
+
+ for ( ptr = tbl; len > 0 ; len--, ptr++ )
+ sum += *ptr;
+
+ return 0 - sum;
+}
diff -r 553f64e4f6ef -r 36caf6f84072 tools/python/xen/util/acmpolicy.py
--- a/tools/python/xen/util/acmpolicy.py Mon Jul 30 17:10:45 2007 -0500
+++ b/tools/python/xen/util/acmpolicy.py Wed Aug 01 08:59:03 2007 +0100
@@ -818,12 +818,13 @@ class ACMPolicy(XSPolicy):
if successful,the policy's flags will indicate that the
policy is the one loaded into the hypervisor
"""
- (ret, output) = commands.getstatusoutput(
+ if not self.isloaded():
+ (ret, output) = commands.getstatusoutput(
security.xensec_tool +
" loadpolicy " +
self.get_filename(".bin"))
- if ret != 0:
- return -xsconstants.XSERR_POLICY_LOAD_FAILED
+ if ret != 0:
+ return -xsconstants.XSERR_POLICY_LOAD_FAILED
return xsconstants.XSERR_SUCCESS

def isloaded(self):
diff -r 553f64e4f6ef -r 36caf6f84072 tools/xm-test/lib/XmTestLib/acm.py
--- a/tools/xm-test/lib/XmTestLib/acm.py Mon Jul 30 17:10:45 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/acm.py Wed Aug 01 08:59:03 2007 +0100
@@ -67,6 +67,10 @@ def ACMLoadPolicy(policy='xm-test'):
if main.serverType == main.SERVER_XEN_API:
ACMLoadPolicy_XenAPI()
else:
+ cmd='xm dumppolicy | grep -E "^POLICY REFERENCE = ' + policy + '.$"'
+ s, o = traceCommand(cmd)
+ if o != "":
+ return
s, o = traceCommand("xm makepolicy %s" % (policy))
if s != 0:
FAIL("Need to be able to do 'xm makepolicy %s' but could not" %
diff -r 553f64e4f6ef -r 36caf6f84072 tools/xm-test/tests/security-acm/07_security-acm_pol_update.py
--- a/tools/xm-test/tests/security-acm/07_security-acm_pol_update.py Mon Jul 30 17:10:45 2007 -0500
+++ b/tools/xm-test/tests/security-acm/07_security-acm_pol_update.py Wed Aug 01 08:59:03 2007 +0100
@@ -12,10 +12,19 @@ from xen.util import acmpolicy, security
from xen.util import acmpolicy, security, xsconstants
from xen.util.acmpolicy import ACMPolicy
from xen.xend.XendDomain import DOM0_UUID
+from XmTestLib.acm import *

import commands
import os
import base64
+
+if not isACMEnabled():
+ SKIP("Not running this test since ACM not enabled.")
+
+try:
+ session = xapi.connect()
+except:
+ SKIP("Skipping this test since xm is not using the Xen-API.")

xm_test = {}
xm_test['policyname'] = "xm-test"
diff -r 553f64e4f6ef -r 36caf6f84072 tools/xm-test/tests/security-acm/09_security-acm_pol_update.py
--- a/tools/xm-test/tests/security-acm/09_security-acm_pol_update.py Mon Jul 30 17:10:45 2007 -0500
+++ b/tools/xm-test/tests/security-acm/09_security-acm_pol_update.py Wed Aug 01 08:59:03 2007 +0100
@@ -7,6 +7,7 @@

from XmTestLib import xapi
from XmTestLib.XenAPIDomain import XmTestAPIDomain
+from XmTestLib.acm import *
from XmTestLib import *
from xen.xend import XendAPIConstants
from xen.util import security, xsconstants
@@ -15,6 +16,14 @@ import base64
import base64
import struct
import time
+
+if not isACMEnabled():
+ SKIP("Not running this test since ACM not enabled.")
+
+try:
+ session = xapi.connect()
+except:
+ SKIP("Skipping this test since xm is not using the Xen-API.")

def typestoxml(types):
res = ""
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/ia64/xen/dom_fw_common.c
--- a/xen/arch/ia64/xen/dom_fw_common.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/ia64/xen/dom_fw_common.c Wed Aug 01 08:59:03 2007 +0100
@@ -207,17 +207,6 @@ print_md(efi_memory_desc_t *md)
printk("(%luKB)\n", size >> 10);
}

-uint8_t
-generate_acpi_checksum(void *tbl, unsigned long len)
-{
- uint8_t *ptr, sum = 0;
-
- for (ptr = tbl; len > 0 ; len--, ptr++)
- sum += *ptr;
-
- return 0 - sum;
-}
-
struct fake_acpi_tables {
struct acpi20_table_rsdp rsdp;
struct xsdt_descriptor_rev2 xsdt;
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/ia64/xen/dom_fw_dom0.c
--- a/xen/arch/ia64/xen/dom_fw_dom0.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/ia64/xen/dom_fw_dom0.c Wed Aug 01 08:59:03 2007 +0100
@@ -103,6 +103,7 @@ acpi_update_madt_checksum(unsigned long
/* base is physical address of acpi table */
static void __init touch_acpi_table(void)
{
+ int result;
lsapic_nbr = 0;

if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, 0) < 0)
@@ -110,6 +111,18 @@ static void __init touch_acpi_table(void
if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC,
acpi_patch_plat_int_src, 0) < 0)
printk("Error parsing MADT - no PLAT_INT_SRC entries\n");
+
+ result = acpi_table_disable(ACPI_SRAT);
+ if ( result == 0 )
+ printk("Success Disabling SRAT\n");
+ else if ( result != -ENOENT )
+ printk("ERROR: Failed Disabling SRAT\n");
+
+ result = acpi_table_disable(ACPI_SLIT);
+ if ( result == 0 )
+ printk("Success Disabling SLIT\n");
+ else if ( result != -ENOENT )
+ printk("ERROR: Failed Disabling SLIT\n");

acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);

diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/acpi/boot.c Wed Aug 01 08:59:03 2007 +0100
@@ -371,11 +371,18 @@ extern u32 pmtmr_ioport;

#ifdef CONFIG_ACPI_SLEEP
/* Get pm1x_cnt and pm1x_evt information for ACPI sleep */
-static int __init
+static void __init
acpi_fadt_parse_sleep_info(struct fadt_descriptor_rev2 *fadt)
{
+ struct acpi_table_rsdp *rsdp;
+ unsigned long rsdp_phys;
struct facs_descriptor_rev2 *facs = NULL;
uint64_t facs_pa;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys || acpi_disabled)
+ goto bad;
+ rsdp = __va(rsdp_phys);

if (fadt->revision >= FADT2_REVISION_ID) {
/* Sanity check on FADT Rev. 2 */
@@ -432,8 +439,7 @@ acpi_fadt_parse_sleep_info(struct fadt_d
"FACS is shorter than ACPI spec allow: 0x%x",
facs->length);

- if ((acpi_rsdp_rev < 2) ||
- (facs->length < 32)) {
+ if ((rsdp->revision < 2) || (facs->length < 32)) {
acpi_sinfo.wakeup_vector = facs_pa +
offsetof(struct facs_descriptor_rev2,
firmware_waking_vector);
@@ -451,10 +457,9 @@ acpi_fadt_parse_sleep_info(struct fadt_d
acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt,
acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt,
acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width);
- return 0;
+ return;
bad:
memset(&acpi_sinfo, 0, sizeof(acpi_sinfo));
- return 0;
}
#endif

diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/instrlen.c
--- a/xen/arch/x86/hvm/instrlen.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/instrlen.c Wed Aug 01 08:59:03 2007 +0100
@@ -7,14 +7,6 @@
*
* Essentially a very, very stripped version of Keir Fraser's work in
* x86_emulate.c. Used for MMIO.
- */
-
-/*
- * TODO: The way in which we use hvm_instruction_length is very inefficient as
- * it now stands. It will be worthwhile to return the actual instruction buffer
- * along with the instruction length since one of the reasons we are getting
- * the instruction length is to know how many instruction bytes we need to
- * fetch.
*/

#include <xen/config.h>
@@ -194,31 +186,51 @@ static uint8_t twobyte_table[256] = {
/*
* insn_fetch - fetch the next byte from instruction stream
*/
-#define insn_fetch() \
-({ uint8_t _x; \
- if ( length >= 15 ) \
- return -1; \
- if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \
- gdprintk(XENLOG_WARNING, \
- "Cannot read from address %lx (eip %lx, mode %d)\n", \
- pc, org_pc, address_bytes); \
- return -1; \
- } \
- pc += 1; \
- length += 1; \
- _x; \
+#define insn_fetch() \
+({ uint8_t _x; \
+ if ( length >= 15 ) \
+ return -1; \
+ if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \
+ unsigned long err; \
+ struct segment_register cs; \
+ gdprintk(XENLOG_WARNING, \
+ "Cannot read from address %lx (eip %lx, mode %d)\n", \
+ pc, org_pc, address_bytes); \
+ err = 0; /* Must be not-present: we don't enforce reserved bits */ \
+ if ( hvm_nx_enabled(current) ) \
+ err |= PFEC_insn_fetch; \
+ hvm_get_segment_register(current, x86_seg_cs, &cs); \
+ if ( cs.attr.fields.dpl != 0 ) \
+ err |= PFEC_user_mode; \
+ hvm_inject_exception(TRAP_page_fault, err, pc); \
+ return -1; \
+ } \
+ if ( buf ) \
+ buf[length] = _x; \
+ length += 1; \
+ pc += 1; \
+ _x; \
})

+#define insn_skip(_n) do { \
+ int _i; \
+ for ( _i = 0; _i < (_n); _i++) { \
+ (void) insn_fetch(); \
+ } \
+} while (0)
+
/**
- * hvm_instruction_length - returns the current instructions length
+ * hvm_instruction_fetch - read the current instruction and return its length
*
* @org_pc: guest instruction pointer
- * @mode: guest operating mode
+ * @address_bytes: guest address width
+ * @buf: (optional) buffer to load actual instruction bytes into
*
- * EXTERNAL this routine calculates the length of the current instruction
- * pointed to by org_pc. The guest state is _not_ changed by this routine.
+ * Doesn't increment the guest's instruction pointer, but may
+ * issue faults to the guest. Returns -1 on failure.
*/
-int hvm_instruction_length(unsigned long org_pc, int address_bytes)
+int hvm_instruction_fetch(unsigned long org_pc, int address_bytes,
+ unsigned char *buf)
{
uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
@@ -317,18 +329,13 @@ done_prefixes:
{
case 0:
if ( modrm_rm == 6 )
- {
- length += 2;
- pc += 2; /* skip disp16 */
- }
+ insn_skip(2); /* skip disp16 */
break;
case 1:
- length += 1;
- pc += 1; /* skip disp8 */
+ insn_skip(1); /* skip disp8 */
break;
case 2:
- length += 2;
- pc += 2; /* skip disp16 */
+ insn_skip(2); /* skip disp16 */
break;
}
}
@@ -340,33 +347,19 @@ done_prefixes:
case 0:
if ( (modrm_rm == 4) &&
((insn_fetch() & 7) == 5) )
- {
- length += 4;
- pc += 4; /* skip disp32 specified by SIB.base */
- }
+ insn_skip(4); /* skip disp32 specified by SIB.base */
else if ( modrm_rm == 5 )
- {
- length += 4;
- pc += 4; /* skip disp32 */
- }
+ insn_skip(4); /* skip disp32 */
break;
case 1:
if ( modrm_rm == 4 )
- {
- length += 1;
- pc += 1;
- }
- length += 1;
- pc += 1; /* skip disp8 */
+ insn_skip(1);
+ insn_skip(1); /* skip disp8 */
break;
case 2:
if ( modrm_rm == 4 )
- {
- length += 1;
- pc += 1;
- }
- length += 4;
- pc += 4; /* skip disp32 */
+ insn_skip(1);
+ insn_skip(4); /* skip disp32 */
break;
}
}
@@ -387,12 +380,10 @@ done_prefixes:
tmp = (d & ByteOp) ? 1 : op_bytes;
if ( tmp == 8 ) tmp = 4;
/* NB. Immediates are sign-extended as necessary. */
- length += tmp;
- pc += tmp;
+ insn_skip(tmp);
break;
case SrcImmByte:
- length += 1;
- pc += 1;
+ insn_skip(1);
break;
}

@@ -402,8 +393,7 @@ done_prefixes:
switch ( b )
{
case 0xa0 ... 0xa3: /* mov */
- length += ad_bytes;
- pc += ad_bytes; /* skip src/dst displacement */
+ insn_skip(ad_bytes); /* skip src/dst displacement */
break;
case 0xf6 ... 0xf7: /* Grp3 */
switch ( modrm_reg )
@@ -412,8 +402,7 @@ done_prefixes:
/* Special case in Grp3: test has an immediate source operand. */
tmp = (d & ByteOp) ? 1 : op_bytes;
if ( tmp == 8 ) tmp = 4;
- length += tmp;
- pc += tmp;
+ insn_skip(tmp);
break;
}
break;
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/platform.c Wed Aug 01 08:59:03 2007 +0100
@@ -1041,17 +1041,13 @@ void handle_mmio(unsigned long gpa)
/* real or vm86 modes */
address_bytes = 2;
inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip;
- inst_len = hvm_instruction_length(inst_addr, address_bytes);
+ memset(inst, 0, MAX_INST_LEN);
+ inst_len = hvm_instruction_fetch(inst_addr, address_bytes, inst);
if ( inst_len <= 0 )
{
- printk("handle_mmio: failed to get instruction length\n");
- domain_crash_synchronous();
- }
-
- memset(inst, 0, MAX_INST_LEN);
- if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) {
- printk("handle_mmio: failed to copy instruction\n");
- domain_crash_synchronous();
+ gdprintk(XENLOG_DEBUG, "handle_mmio: failed to get instruction\n");
+ /* hvm_instruction_fetch() will have injected a #PF; get out now */
+ return;
}

if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size,
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/svm/intr.c Wed Aug 01 08:59:03 2007 +0100
@@ -58,7 +58,7 @@ static void svm_inject_nmi(struct vcpu *

event.bytes = 0;
event.fields.v = 1;
- event.fields.type = EVENTTYPE_NMI;
+ event.fields.type = X86_EVENTTYPE_NMI;
event.fields.vector = 2;

ASSERT(vmcb->eventinj.fields.v == 0);
@@ -72,34 +72,39 @@ static void svm_inject_extint(struct vcp

event.bytes = 0;
event.fields.v = 1;
- event.fields.type = EVENTTYPE_INTR;
+ event.fields.type = X86_EVENTTYPE_EXT_INTR;
event.fields.vector = vector;

ASSERT(vmcb->eventinj.fields.v == 0);
vmcb->eventinj = event;
}

+static void enable_intr_window(struct vcpu *v, enum hvm_intack intr_source)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ ASSERT(intr_source != hvm_intack_none);
+
+ /*
+ * Create a dummy virtual interrupt to intercept as soon as the
+ * guest can accept the real interrupt.
+ *
+ * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
+ * shadow. This is hard to do without hardware support. We should also
+ * track 'NMI blocking' from NMI injection until IRET. This can be done
+ * quite easily in software by intercepting the unblocking IRET.
+ */
+ vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
+ HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+ svm_inject_dummy_vintr(v);
+}
+
asmlinkage void svm_intr_assist(void)
{
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
enum hvm_intack intr_source;
int intr_vector;
-
- /*
- * Previous event delivery caused this intercept?
- * This will happen if the injection is latched by the processor (hence
- * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
- * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
- * stack).
- */
- if ( vmcb->exitintinfo.fields.v )
- {
- vmcb->eventinj = vmcb->exitintinfo;
- vmcb->exitintinfo.bytes = 0;
- HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
- return;
- }

/* Crank the handle on interrupt state. */
pt_update_irq(v);
@@ -111,32 +116,23 @@ asmlinkage void svm_intr_assist(void)
return;

/*
- * If the guest can't take an interrupt right now, create a 'fake'
- * virtual interrupt on to intercept as soon as the guest _can_ take
- * interrupts. Do not obtain the next interrupt from the vlapic/pic
- * if unable to inject.
- *
- * Also do this if there is an injection already pending. This is
- * because the event delivery can arbitrarily delay the injection
- * of the vintr (for example, if the exception is handled via an
- * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
- * - the vTPR could be modified upwards, so we need to wait until the
- * exception is delivered before we can safely decide that an
- * interrupt is deliverable; and
- * - the guest might look at the APIC/PIC state, so we ought not to
- * have cleared the interrupt out of the IRR.
- *
- * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
- * shadow. This is hard to do without hardware support. We should also
- * track 'NMI blocking' from NMI injection until IRET. This can be done
- * quite easily in software by intercepting the unblocking IRET.
+ * Pending IRQs must be delayed if:
+ * 1. An event is already pending. This is despite the fact that SVM
+ * provides a VINTR delivery method quite separate from the EVENTINJ
+ * mechanism. The event delivery can arbitrarily delay the injection
+ * of the vintr (for example, if the exception is handled via an
+ * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
+ * - the vTPR could be modified upwards, so we need to wait until
+ * the exception is delivered before we can safely decide that an
+ * interrupt is deliverable; and
+ * - the guest might look at the APIC/PIC state, so we ought not to
+ * have cleared the interrupt out of the IRR.
+ * 2. The IRQ is masked.
*/
- if ( !hvm_interrupts_enabled(v, intr_source) ||
- vmcb->eventinj.fields.v )
+ if ( unlikely(vmcb->eventinj.fields.v) ||
+ !hvm_interrupts_enabled(v, intr_source) )
{
- vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
- HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
- svm_inject_dummy_vintr(v);
+ enable_intr_window(v, intr_source);
return;
}
} while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
@@ -151,6 +147,11 @@ asmlinkage void svm_intr_assist(void)
svm_inject_extint(v, intr_vector);
pt_intr_post(v, intr_vector, intr_source);
}
+
+ /* Is there another IRQ to queue up behind this one? */
+ intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_intr_window(v, intr_source);
}

/*
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c Wed Aug 01 08:59:03 2007 +0100
@@ -71,8 +71,8 @@ static void *root_vmcb[NR_CPUS] __read_m
/* hardware assisted paging bits */
extern int opt_hap_enabled;

-static void svm_inject_exception(struct vcpu *v, int trap,
- int ev, int error_code)
+static void svm_inject_exception(
+ struct vcpu *v, int trap, int ev, int error_code)
{
eventinj_t event;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -84,13 +84,11 @@ static void svm_inject_exception(struct

event.bytes = 0;
event.fields.v = 1;
- event.fields.type = EVENTTYPE_EXCEPTION;
+ event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
event.fields.vector = trap;
event.fields.ev = ev;
event.fields.errorcode = error_code;

- ASSERT(vmcb->eventinj.fields.v == 0);
-
vmcb->eventinj = event;
}

@@ -362,21 +360,14 @@ int svm_vmcb_save(struct vcpu *v, struct
c->sysenter_esp = vmcb->sysenter_esp;
c->sysenter_eip = vmcb->sysenter_eip;

- /* Save any event/interrupt that was being injected when we last exited. */
- if ( vmcb->exitintinfo.fields.v )
- {
- c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
- c->error_code = vmcb->exitintinfo.fields.errorcode;
- }
- else if ( vmcb->eventinj.fields.v )
- {
- c->pending_event = vmcb->eventinj.bytes & 0xffffffff;
+ c->pending_event = 0;
+ c->error_code = 0;
+ if ( vmcb->eventinj.fields.v &&
+ hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
+ vmcb->eventinj.fields.vector) )
+ {
+ c->pending_event = (uint32_t)vmcb->eventinj.bytes;
c->error_code = vmcb->eventinj.fields.errorcode;
- }
- else
- {
- c->pending_event = 0;
- c->error_code = 0;
}

return 1;
@@ -495,11 +486,11 @@ int svm_vmcb_restore(struct vcpu *v, str
vmcb->sysenter_esp = c->sysenter_esp;
vmcb->sysenter_eip = c->sysenter_eip;

- /* update VMCB for nested paging restore */
- if ( paging_mode_hap(v->domain) ) {
+ if ( paging_mode_hap(v->domain) )
+ {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
- (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+ vmcb->cr4 = (v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE));
vmcb->cr3 = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
@@ -514,26 +505,23 @@ int svm_vmcb_restore(struct vcpu *v, str
gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
c->pending_event, c->error_code);

- /* VMX uses a different type for #OF and #BP; fold into "Exception" */
- if ( c->pending_type == 6 )
- c->pending_type = 3;
- /* Sanity check */
- if ( c->pending_type == 1 || c->pending_type > 4
- || c->pending_reserved != 0 )
+ if ( (c->pending_type == 1) || (c->pending_type > 6) ||
+ (c->pending_reserved != 0) )
{
gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32"\n",
c->pending_event);
return -EINVAL;
}
- /* Put this pending event in exitintinfo and svm_intr_assist()
- * will reinject it when we return to the guest. */
- vmcb->exitintinfo.bytes = c->pending_event;
- vmcb->exitintinfo.fields.errorcode = c->error_code;
+
+ if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
+ {
+ vmcb->eventinj.bytes = c->pending_event;
+ vmcb->eventinj.fields.errorcode = c->error_code;
+ }
}

paging_update_paging_modes(v);
- /* signal paging update to ASID handler */
- svm_asid_g_update_paging (v);
+ svm_asid_g_update_paging(v);

return 0;

@@ -965,10 +953,10 @@ static void svm_hvm_inject_exception(
svm_inject_exception(v, trapnr, (errcode != -1), errcode);
}

-static int svm_event_injection_faulted(struct vcpu *v)
-{
- struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- return vmcb->exitintinfo.fields.v;
+static int svm_event_pending(struct vcpu *v)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ return vmcb->eventinj.fields.v;
}

static struct hvm_function_table svm_function_table = {
@@ -1000,7 +988,7 @@ static struct hvm_function_table svm_fun
.inject_exception = svm_hvm_inject_exception,
.init_ap_context = svm_init_ap_context,
.init_hypercall_page = svm_init_hypercall_page,
- .event_injection_faulted = svm_event_injection_faulted
+ .event_pending = svm_event_pending
};

static void svm_npt_detect(void)
@@ -1667,6 +1655,17 @@ static int svm_set_cr0(unsigned long val
unsigned long old_base_mfn;

HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
+
+ if ( (u32)value != value )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set upper 32 bits in CR0: %lx",
+ value);
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ return 0;
+ }
+
+ value &= ~HVM_CR0_GUEST_RESERVED_BITS;

/* ET is reserved and should be always be 1. */
value |= X86_CR0_ET;
@@ -2420,6 +2419,7 @@ asmlinkage void svm_vmexit_handler(struc
unsigned long eip;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ eventinj_t eventinj;
int inst_len, rc;

exit_reason = vmcb->exitcode;
@@ -2434,6 +2434,13 @@ asmlinkage void svm_vmexit_handler(struc

perfc_incra(svmexits, exit_reason);
eip = vmcb->rip;
+
+ /* Event delivery caused this intercept? Queue for redelivery. */
+ eventinj = vmcb->exitintinfo;
+ if ( unlikely(eventinj.fields.v) &&
+ hvm_event_needs_reinjection(eventinj.fields.type,
+ eventinj.fields.vector) )
+ vmcb->eventinj = eventinj;

switch ( exit_reason )
{
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/intr.c Wed Aug 01 08:59:03 2007 +0100
@@ -76,10 +76,9 @@ static void enable_intr_window(struct vc
u32 *cpu_exec_control = &v->arch.hvm_vmx.exec_control;
u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;

- if ( unlikely(intr_source == hvm_intack_none) )
- return;
+ ASSERT(intr_source != hvm_intack_none);

- if ( unlikely(intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi )
+ if ( (intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi )
{
/*
* We set MOV-SS blocking in lieu of STI blocking when delivering an
@@ -131,68 +130,27 @@ asmlinkage void vmx_intr_assist(void)
int intr_vector;
enum hvm_intack intr_source;
struct vcpu *v = current;
- unsigned int idtv_info_field;
- unsigned long inst_len;
+ unsigned int intr_info;

+ /* Crank the handle on interrupt state. */
pt_update_irq(v);
-
hvm_set_callback_irq_level();
-
- update_tpr_threshold(vcpu_vlapic(v));

do {
intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( likely(intr_source == hvm_intack_none) )
+ goto out;

- if ( unlikely(v->arch.hvm_vmx.vector_injected) )
- {
- v->arch.hvm_vmx.vector_injected = 0;
- enable_intr_window(v, intr_source);
- return;
- }
-
- /* This could be moved earlier in the VMX resume sequence. */
- idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
- if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
- {
- /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD,
- idtv_info_field & ~INTR_INFO_RESVD_BITS_MASK);
-
- /*
- * Safe: the length will only be interpreted for software
- * exceptions and interrupts. If we get here then delivery of some
- * event caused a fault, and this always results in defined
- * VM_EXIT_INSTRUCTION_LEN.
- */
- inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
-
- if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
- __vmread(IDT_VECTORING_ERROR_CODE));
-
- /*
- * Clear NMI-blocking interruptibility info if an NMI delivery
- * faulted. Re-delivery will re-set it (see SDM 3B 25.7.1.2).
- */
- if ( (idtv_info_field&INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
- __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
- __vmread(GUEST_INTERRUPTIBILITY_INFO) &
- ~VMX_INTR_SHADOW_NMI);
-
- enable_intr_window(v, intr_source);
-
- HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
- return;
- }
-
- if ( likely(intr_source == hvm_intack_none) )
- return;
-
- if ( !hvm_interrupts_enabled(v, intr_source) )
+ /*
+ * An event is already pending or the pending interrupt is masked?
+ * Then the pending interrupt must be delayed.
+ */
+ intr_info = __vmread(VM_ENTRY_INTR_INFO);
+ if ( unlikely(intr_info & INTR_INFO_VALID_MASK) ||
+ !hvm_interrupts_enabled(v, intr_source) )
{
enable_intr_window(v, intr_source);
- return;
+ goto out;
}
} while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );

@@ -206,6 +164,14 @@ asmlinkage void vmx_intr_assist(void)
vmx_inject_extint(v, intr_vector);
pt_intr_post(v, intr_vector, intr_source);
}
+
+ /* Is there another IRQ to queue up behind this one? */
+ intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_intr_window(v, intr_source);
+
+ out:
+ update_tpr_threshold(vcpu_vlapic(v));
}

/*
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Aug 01 08:59:03 2007 +0100
@@ -240,8 +240,23 @@ int vmx_cpu_up(void)
{
u32 eax, edx;
int cpu = smp_processor_id();
+ u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;

BUG_ON(!(read_cr4() & X86_CR4_VMXE));
+
+ /*
+ * Ensure the current processor operating mode meets
+ * the requred CRO fixed bits in VMX operation.
+ */
+ cr0 = read_cr0();
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
+ if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
+ {
+ printk("CPU%d: some settings of host CR0 are "
+ "not allowed in VMX operation.\n", cpu);
+ return 0;
+ }

rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);

@@ -418,7 +433,7 @@ static void construct_vmcs(struct vcpu *
__vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
__vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);

- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ __vmwrite(VM_ENTRY_INTR_INFO, 0);

__vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
__vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Aug 01 08:59:03 2007 +0100
@@ -613,28 +613,13 @@ void vmx_vmcs_save(struct vcpu *v, struc
c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);

- /*
- * Save any event/interrupt that was being injected when we last
- * exited. IDT_VECTORING_INFO_FIELD has priority, as anything in
- * VM_ENTRY_INTR_INFO_FIELD is either a fault caused by the first
- * event, which will happen the next time, or an interrupt, which we
- * never inject when IDT_VECTORING_INFO_FIELD is valid.
- */
- if ( (ev = __vmread(IDT_VECTORING_INFO_FIELD)) & INTR_INFO_VALID_MASK )
- {
- c->pending_event = ev;
- c->error_code = __vmread(IDT_VECTORING_ERROR_CODE);
- }
- else if ( (ev = __vmread(VM_ENTRY_INTR_INFO_FIELD)) &
- INTR_INFO_VALID_MASK )
+ c->pending_event = 0;
+ c->error_code = 0;
+ if ( ((ev = __vmread(VM_ENTRY_INTR_INFO)) & INTR_INFO_VALID_MASK) &&
+ hvm_event_needs_reinjection((ev >> 8) & 7, ev & 0xff) )
{
c->pending_event = ev;
c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE);
- }
- else
- {
- c->pending_event = 0;
- c->error_code = 0;
}

vmx_vmcs_exit(v);
@@ -754,34 +739,9 @@ int vmx_vmcs_restore(struct vcpu *v, str

if ( c->pending_valid )
{
- vmx_vmcs_enter(v);
-
gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
c->pending_event, c->error_code);

- /* SVM uses type 3 ("Exception") for #OF and #BP; VMX uses type 6 */
- if ( (c->pending_type == 3) &&
- ((c->pending_vector == 3) || (c->pending_vector == 4)) )
- c->pending_type = 6;
-
- /* For software exceptions, we need to tell the hardware the
- * instruction length as well (hmmm). */
- if ( c->pending_type > 4 )
- {
- int addrbytes, ilen;
- if ( (c->cs_arbytes & X86_SEG_AR_CS_LM_ACTIVE) &&
- (c->msr_efer & EFER_LMA) )
- addrbytes = 8;
- else if ( c->cs_arbytes & X86_SEG_AR_DEF_OP_SIZE )
- addrbytes = 4;
- else
- addrbytes = 2;
-
- ilen = hvm_instruction_length(c->rip, addrbytes);
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen);
- }
-
- /* Sanity check */
if ( (c->pending_type == 1) || (c->pending_type > 6) ||
(c->pending_reserved != 0) )
{
@@ -790,12 +750,13 @@ int vmx_vmcs_restore(struct vcpu *v, str
return -EINVAL;
}

- /* Re-inject the exception */
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, c->pending_event);
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
- v->arch.hvm_vmx.vector_injected = 1;
-
- vmx_vmcs_exit(v);
+ if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
+ {
+ vmx_vmcs_enter(v);
+ __vmwrite(VM_ENTRY_INTR_INFO, c->pending_event);
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
+ vmx_vmcs_exit(v);
+ }
}

return 0;
@@ -1203,14 +1164,10 @@ static void vmx_update_vtpr(struct vcpu
/* VMX doesn't have a V_TPR field */
}

-static int vmx_event_injection_faulted(struct vcpu *v)
-{
- unsigned int idtv_info_field;
-
+static int vmx_event_pending(struct vcpu *v)
+{
ASSERT(v == current);
-
- idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
- return (idtv_info_field & INTR_INFO_VALID_MASK);
+ return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
}

static void disable_intercept_for_msr(u32 msr)
@@ -1261,7 +1218,7 @@ static struct hvm_function_table vmx_fun
.inject_exception = vmx_inject_exception,
.init_ap_context = vmx_init_ap_context,
.init_hypercall_page = vmx_init_hypercall_page,
- .event_injection_faulted = vmx_event_injection_faulted,
+ .event_pending = vmx_event_pending,
.cpu_up = vmx_cpu_up,
.cpu_down = vmx_cpu_down,
};
@@ -2199,6 +2156,17 @@ static int vmx_set_cr0(unsigned long val
unsigned long old_base_mfn;

HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
+
+ if ( (u32)value != value )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set upper 32 bits in CR0: %lx",
+ value);
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ return 0;
+ }
+
+ value &= ~HVM_CR0_GUEST_RESERVED_BITS;

/* ET is reserved and should be always be 1. */
value |= X86_CR0_ET;
@@ -2842,47 +2810,6 @@ static void vmx_do_extint(struct cpu_use
}
}

-static void vmx_reflect_exception(struct vcpu *v)
-{
- int error_code, intr_info, vector;
-
- intr_info = __vmread(VM_EXIT_INTR_INFO);
- vector = intr_info & 0xff;
- if ( intr_info & INTR_INFO_DELIVER_CODE_MASK )
- error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
- else
- error_code = VMX_DELIVER_NO_ERROR_CODE;
-
-#ifndef NDEBUG
- {
- unsigned long rip;
-
- rip = __vmread(GUEST_RIP);
- HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, error_code = %x",
- rip, error_code);
- }
-#endif /* NDEBUG */
-
- /*
- * According to Intel Virtualization Technology Specification for
- * the IA-32 Intel Architecture (C97063-002 April 2005), section
- * 2.8.3, SW_EXCEPTION should be used for #BP and #OV, and
- * HW_EXCEPTION used for everything else. The main difference
- * appears to be that for SW_EXCEPTION, the EIP/RIP is incremented
- * by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION,
- * it is not.
- */
- if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION )
- {
- int ilen = __get_instruction_length(); /* Safe: software exception */
- vmx_inject_sw_exception(v, vector, ilen);
- }
- else
- {
- vmx_inject_hw_exception(v, vector, error_code);
- }
-}
-
static void vmx_failed_vmentry(unsigned int exit_reason,
struct cpu_user_regs *regs)
{
@@ -2919,7 +2846,7 @@ static void vmx_failed_vmentry(unsigned

asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
- unsigned int exit_reason;
+ unsigned int exit_reason, idtv_info;
unsigned long exit_qualification, inst_len = 0;
struct vcpu *v = current;

@@ -2934,6 +2861,30 @@ asmlinkage void vmx_vmexit_handler(struc

if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
return vmx_failed_vmentry(exit_reason, regs);
+
+ /* Event delivery caused this intercept? Queue for redelivery. */
+ idtv_info = __vmread(IDT_VECTORING_INFO);
+ if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) )
+ {
+ if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
+ {
+ /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
+ __vmwrite(VM_ENTRY_INTR_INFO,
+ idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+ if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ }
+
+ /*
+ * Clear NMI-blocking interruptibility info if an NMI delivery faulted.
+ * Re-delivery will re-set it (see SDM 3B 25.7.1.2).
+ */
+ if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) )
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
+ __vmread(GUEST_INTERRUPTIBILITY_INFO) &
+ ~VMX_INTR_SHADOW_NMI);
+ }

switch ( exit_reason )
{
@@ -2957,7 +2908,7 @@ asmlinkage void vmx_vmexit_handler(struc
* (NB. If we emulate this IRET for any reason, we should re-clear!)
*/
if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
- !(__vmread(IDT_VECTORING_INFO_FIELD) & INTR_INFO_VALID_MASK) &&
+ !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
(vector != TRAP_double_fault) )
__vmwrite(GUEST_INTERRUPTIBILITY_INFO,
__vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
@@ -2995,14 +2946,12 @@ asmlinkage void vmx_vmexit_handler(struc
vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
- if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
- {
- HVMTRACE_0D(NMI, v);
- vmx_store_cpu_guest_regs(v, regs, NULL);
- do_nmi(regs); /* Real NMI, vector 2: normal processing. */
- }
- else
- vmx_reflect_exception(v);
+ if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
+ (X86_EVENTTYPE_NMI << 8) )
+ goto exit_and_crash;
+ HVMTRACE_0D(NMI, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
+ do_nmi(regs); /* Real NMI, vector 2: normal processing. */
break;
case TRAP_machine_check:
HVMTRACE_0D(MCE, v);
diff -r 553f64e4f6ef -r 36caf6f84072 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/arch/x86/mm/shadow/multi.c Wed Aug 01 08:59:03 2007 +0100
@@ -2905,7 +2905,7 @@ static int sh_page_fault(struct vcpu *v,
* stack is currently considered to be a page table, so we should
* unshadow the faulting page before exiting.
*/
- if ( unlikely(hvm_event_injection_faulted(v)) )
+ if ( unlikely(hvm_event_pending(v)) )
{
gdprintk(XENLOG_DEBUG, "write to pagetable during event "
"injection: cr2=%#lx, mfn=%#lx\n",
diff -r 553f64e4f6ef -r 36caf6f84072 xen/drivers/acpi/tables.c
--- a/xen/drivers/acpi/tables.c Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/drivers/acpi/tables.c Wed Aug 01 08:59:03 2007 +0100
@@ -73,7 +73,6 @@ struct acpi_table_sdt {

static unsigned long sdt_pa; /* Physical Address */
static unsigned long sdt_count; /* Table count */
-unsigned char acpi_rsdp_rev;

static struct acpi_table_sdt sdt_entry[ACPI_MAX_TABLES] __initdata;

@@ -227,6 +226,17 @@ void acpi_table_print_madt_entry(acpi_ta
}
}

+uint8_t
+generate_acpi_checksum(void *tbl, unsigned long len)
+{
+ uint8_t *ptr, sum = 0;
+
+ for (ptr = tbl; len > 0 ; len--, ptr++)
+ sum += *ptr;
+
+ return 0 - sum;
+}
+
static int
acpi_table_compute_checksum(void *table_pointer, unsigned long length)
{
@@ -599,8 +609,6 @@ int __init acpi_table_init(void)
"RSDP (v%3.3d %6.6s ) @ 0x%p\n",
rsdp->revision, rsdp->oem_id, (void *)rsdp_phys);

- acpi_rsdp_rev = rsdp->revision;
-
if (rsdp->revision < 2)
result =
acpi_table_compute_checksum(rsdp,
@@ -623,3 +631,143 @@ int __init acpi_table_init(void)

return 0;
}
+
+int __init
+acpi_table_disable(enum acpi_table_id table_id)
+{
+ struct acpi_table_header *header = NULL;
+ struct acpi_table_rsdp *rsdp;
+ unsigned long rsdp_phys;
+ char *table_name;
+ int id;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys)
+ return -ENODEV;
+
+ rsdp = (struct acpi_table_rsdp *)__acpi_map_table(rsdp_phys,
+ sizeof(struct acpi_table_rsdp));
+ if (!rsdp)
+ return -ENODEV;
+
+ for (id = 0; id < sdt_count; id++)
+ if (sdt_entry[id].id == table_id)
+ break;
+
+ if (id == sdt_count)
+ return -ENOENT;
+
+ table_name = acpi_table_signatures[table_id];
+
+ /* First check XSDT (but only on ACPI 2.0-compatible systems) */
+
+ if ((rsdp->revision >= 2) &&
+ (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) {
+
+ struct acpi_table_xsdt *mapped_xsdt = NULL;
+
+ sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address;
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
+
+ if (!header) {
+ printk(KERN_WARNING PREFIX
+ "Unable to map XSDT header\n");
+ return -ENODEV;
+ }
+
+ /* remap in the entire table before processing */
+ mapped_xsdt = (struct acpi_table_xsdt *)
+ __acpi_map_table(sdt_pa, header->length);
+ if (!mapped_xsdt) {
+ printk(KERN_WARNING PREFIX "Unable to map XSDT\n");
+ return -ENODEV;
+ }
+ header = &mapped_xsdt->header;
+
+ if (strncmp(header->signature, "XSDT", 4)) {
+ printk(KERN_WARNING PREFIX
+ "XSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING PREFIX "Invalid XSDT checksum\n");
+ return -ENODEV;
+ }
+
+ if (id < sdt_count) {
+ header = (struct acpi_table_header *)
+ __acpi_map_table(mapped_xsdt->entry[id], sizeof(struct acpi_table_header));
+ } else {
+ printk(KERN_WARNING PREFIX
+ "Unable to disable entry %d\n",
+ id);
+ return -ENODEV;
+ }
+ }
+
+ /* Then check RSDT */
+
+ else if (rsdp->rsdt_address) {
+
+ struct acpi_table_rsdt *mapped_rsdt = NULL;
+
+ sdt_pa = rsdp->rsdt_address;
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
+ if (!header) {
+ printk(KERN_WARNING PREFIX
+ "Unable to map RSDT header\n");
+ return -ENODEV;
+ }
+
+ /* remap in the entire table before processing */
+ mapped_rsdt = (struct acpi_table_rsdt *)
+ __acpi_map_table(sdt_pa, header->length);
+ if (!mapped_rsdt) {
+ printk(KERN_WARNING PREFIX "Unable to map RSDT\n");
+ return -ENODEV;
+ }
+ header = &mapped_rsdt->header;
+
+ if (strncmp(header->signature, "RSDT", 4)) {
+ printk(KERN_WARNING PREFIX
+ "RSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING PREFIX "Invalid RSDT checksum\n");
+ return -ENODEV;
+ }
+ if (id < sdt_count) {
+ header = (struct acpi_table_header *)
+ __acpi_map_table(mapped_rsdt->entry[id], sizeof(struct acpi_table_header));
+ } else {
+ printk(KERN_WARNING PREFIX
+ "Unable to disable entry %d\n",
+ id);
+ return -ENODEV;
+ }
+ }
+
+ else {
+ printk(KERN_WARNING PREFIX
+ "No System Description Table (RSDT/XSDT) specified in RSDP\n");
+ return -ENODEV;
+ }
+
+ memcpy(header->signature, "OEMx", 4);
+ memcpy(header->oem_id, "xxxxxx", 6);
+ memcpy(header->oem_id+1, table_name, 4);
+ memcpy(header->oem_table_id, "Xen ", 8);
+ header->checksum = 0;
+ header->checksum = generate_acpi_checksum(header, header->length);
+
+ return 0;
+}
diff -r 553f64e4f6ef -r 36caf6f84072 xen/include/asm-ia64/dom_fw_common.h
--- a/xen/include/asm-ia64/dom_fw_common.h Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/include/asm-ia64/dom_fw_common.h Wed Aug 01 08:59:03 2007 +0100
@@ -85,7 +85,6 @@ xen_ia64_efi_make_md(efi_memory_desc_t *
xen_ia64_efi_make_md(efi_memory_desc_t *md,
uint32_t type, uint64_t attr,
uint64_t start, uint64_t end);
-uint8_t generate_acpi_checksum(void *tbl, unsigned long len);
struct fake_acpi_tables;
void dom_fw_fake_acpi(domain_t *d, struct fake_acpi_tables *tables);
int efi_mdt_cmp(const void *a, const void *b);
diff -r 553f64e4f6ef -r 36caf6f84072 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/include/asm-x86/hvm/hvm.h Wed Aug 01 08:59:03 2007 +0100
@@ -154,7 +154,7 @@ struct hvm_function_table {

void (*init_hypercall_page)(struct domain *d, void *hypercall_page);

- int (*event_injection_faulted)(struct vcpu *v);
+ int (*event_pending)(struct vcpu *v);

int (*cpu_up)(void);
void (*cpu_down)(void);
@@ -229,7 +229,8 @@ hvm_guest_x86_mode(struct vcpu *v)
return hvm_funcs.guest_x86_mode(v);
}

-int hvm_instruction_length(unsigned long pc, int address_bytes);
+int hvm_instruction_fetch(unsigned long pc, int address_bytes,
+ unsigned char *buf);

static inline void
hvm_update_host_cr3(struct vcpu *v)
@@ -295,24 +296,71 @@ hvm_inject_exception(unsigned int trapnr

int hvm_bringup_ap(int vcpuid, int trampoline_vector);

-static inline int hvm_event_injection_faulted(struct vcpu *v)
-{
- return hvm_funcs.event_injection_faulted(v);
-}
+static inline int hvm_event_pending(struct vcpu *v)
+{
+ return hvm_funcs.event_pending(v);
+}
+
+/* These reserved bits in lower 32 remain 0 after any load of CR0 */
+#define HVM_CR0_GUEST_RESERVED_BITS \
+ (~((unsigned long) \
+ (X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | \
+ X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | \
+ X86_CR0_WP | X86_CR0_AM | X86_CR0_NW | \
+ X86_CR0_CD | X86_CR0_PG)))

/* These bits in CR4 are owned by the host. */
#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
(X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))

/* These bits in CR4 cannot be set by the guest. */
-#define HVM_CR4_GUEST_RESERVED_BITS \
- ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
- X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
- X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
- X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+#define HVM_CR4_GUEST_RESERVED_BITS \
+ (~((unsigned long) \
+ (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)))

/* These exceptions must always be intercepted. */
#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
+/*
+ * x86 event types. This enumeration is valid for:
+ * Intel VMX: {VM_ENTRY,VM_EXIT,IDT_VECTORING}_INTR_INFO[10:8]
+ * AMD SVM: eventinj[10:8] and exitintinfo[10:8] (types 0-4 only)
+ */
+#define X86_EVENTTYPE_EXT_INTR 0 /* external interrupt */
+#define X86_EVENTTYPE_NMI 2 /* NMI */
+#define X86_EVENTTYPE_HW_EXCEPTION 3 /* hardware exception */
+#define X86_EVENTTYPE_SW_INTERRUPT 4 /* software interrupt */
+#define X86_EVENTTYPE_SW_EXCEPTION 6 /* software exception */
+
+/*
+ * Need to re-inject a given event? We avoid re-injecting software exceptions
+ * and interrupts because the faulting/trapping instruction can simply be
+ * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
+ * INT3/INTO/INTn).
+ */
+static inline int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
+{
+ switch ( type )
+ {
+ case X86_EVENTTYPE_EXT_INTR:
+ case X86_EVENTTYPE_NMI:
+ return 1;
+ case X86_EVENTTYPE_HW_EXCEPTION:
+ /*
+ * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
+ * check for these vectors, as they are really SW Exceptions. SVM has
+ * not updated RIP to point after the trapping instruction (INT3/INTO).
+ */
+ return (vector != 3) && (vector != 4);
+ default:
+ /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
+ break;
+ }
+ return 0;
+}

static inline int hvm_cpu_up(void)
{
diff -r 553f64e4f6ef -r 36caf6f84072 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Wed Aug 01 08:59:03 2007 +0100
@@ -319,14 +319,6 @@ typedef union
u64 errorcode:32;
} fields;
} __attribute__ ((packed)) eventinj_t;
-
-enum EVENTTYPES
-{
- EVENTTYPE_INTR = 0,
- EVENTTYPE_NMI = 2,
- EVENTTYPE_EXCEPTION = 3,
- EVENTTYPE_SWINT = 4,
-};

typedef union
{
diff -r 553f64e4f6ef -r 36caf6f84072 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Wed Aug 01 08:59:03 2007 +0100
@@ -66,9 +66,6 @@ struct arch_vmx_struct {

/* Cache of cpu execution control. */
u32 exec_control;
-
- /* If there is vector installed in the INTR_INFO_FIELD. */
- u32 vector_injected;

unsigned long cpu_cr0; /* copy of guest CR0 */
unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
@@ -198,7 +195,7 @@ enum vmcs_field {
VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
VM_ENTRY_CONTROLS = 0x00004012,
VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
- VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_INTR_INFO = 0x00004016,
VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
@@ -207,7 +204,7 @@ enum vmcs_field {
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
VM_EXIT_INTR_ERROR_CODE = 0x00004406,
- IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_INFO = 0x00004408,
IDT_VECTORING_ERROR_CODE = 0x0000440a,
VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
VMX_INSTRUCTION_INFO = 0x0000440e,
diff -r 553f64e4f6ef -r 36caf6f84072 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Aug 01 08:59:03 2007 +0100
@@ -94,11 +94,6 @@ void vmx_vlapic_msr_changed(struct vcpu
#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000

-#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
-#define INTR_TYPE_NMI (2 << 8) /* NMI */
-#define INTR_TYPE_HW_EXCEPTION (3 << 8) /* hardware exception */
-#define INTR_TYPE_SW_EXCEPTION (6 << 8) /* software exception */
-
/*
* Exit Qualifications for MOV for Control Register Access
*/
@@ -263,8 +258,8 @@ static inline int __vmxon (u64 addr)
return rc;
}

-static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type,
- int error_code, int ilen)
+static inline void __vmx_inject_exception(
+ struct vcpu *v, int trap, int type, int error_code)
{
unsigned long intr_fields;

@@ -276,16 +271,13 @@ static inline void __vmx_inject_exceptio
* VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State).
*/

- intr_fields = (INTR_INFO_VALID_MASK | type | trap);
+ intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap);
if ( error_code != VMX_DELIVER_NO_ERROR_CODE ) {
__vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
intr_fields |= INTR_INFO_DELIVER_CODE_MASK;
}

- if ( ilen )
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen);
-
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
+ __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);

if (trap == TRAP_page_fault)
HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vmx.cpu_cr2, error_code);
@@ -296,29 +288,19 @@ static inline void vmx_inject_hw_excepti
static inline void vmx_inject_hw_exception(
struct vcpu *v, int trap, int error_code)
{
- v->arch.hvm_vmx.vector_injected = 1;
- __vmx_inject_exception(v, trap, INTR_TYPE_HW_EXCEPTION, error_code, 0);
-}
-
-static inline void vmx_inject_sw_exception(
- struct vcpu *v, int trap, int instruction_len)
-{
- v->arch.hvm_vmx.vector_injected = 1;
- __vmx_inject_exception(v, trap, INTR_TYPE_SW_EXCEPTION,
- VMX_DELIVER_NO_ERROR_CODE,
- instruction_len);
+ __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);
}

static inline void vmx_inject_extint(struct vcpu *v, int trap)
{
- __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
- VMX_DELIVER_NO_ERROR_CODE, 0);
+ __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR,
+ VMX_DELIVER_NO_ERROR_CODE);
}

static inline void vmx_inject_nmi(struct vcpu *v)
{
- __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
- VMX_DELIVER_NO_ERROR_CODE, 0);
+ __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI,
+ VMX_DELIVER_NO_ERROR_CODE);
}

#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r 553f64e4f6ef -r 36caf6f84072 xen/include/xen/acpi.h
--- a/xen/include/xen/acpi.h Mon Jul 30 17:10:45 2007 -0500
+++ b/xen/include/xen/acpi.h Wed Aug 01 08:59:03 2007 +0100
@@ -383,6 +383,7 @@ int acpi_numa_init (void);
int acpi_numa_init (void);

int acpi_table_init (void);
+int acpi_table_disable(enum acpi_table_id table_id);
int acpi_table_parse (enum acpi_table_id id, acpi_table_handler handler);
int acpi_get_table_header_early (enum acpi_table_id id, struct acpi_table_header **header);
int acpi_table_parse_madt (enum acpi_madt_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries);
@@ -390,6 +391,7 @@ void acpi_table_print (struct acpi_table
void acpi_table_print (struct acpi_table_header *header, unsigned long phys_addr);
void acpi_table_print_madt_entry (acpi_table_entry_header *madt);
void acpi_table_print_srat_entry (acpi_table_entry_header *srat);
+uint8_t generate_acpi_checksum(void *tbl, unsigned long len);

/* the following four functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
@@ -534,6 +536,5 @@ static inline int acpi_get_pxm(acpi_hand
#endif

extern int pnpacpi_disabled;
-extern unsigned char acpi_rsdp_rev;

#endif /*_LINUX_ACPI_H*/

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog