Mailing List Archive

[xen-unstable] EPT/VT-d: Enhance MTRR/PAT virtualization when EPT/VT-d both enabled
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1232721749 0
# Node ID 175a425e9b55e63c240b0a2ad61f5ed251e85ead
# Parent f3240cd3cd2b9d48acf3d82caa2ca1cab1f66325
EPT/VT-d: Enhance MTRR/PAT virtualization when EPT/VT-d both enabled

Set effective memory type for EPT according to the VT-d snoop control
capability, and also includes some cleanups for EPT & VT-d both enabled.

Signed-off-by: Edwin Zhai <Edwin.Zhai@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
---
xen/arch/x86/hvm/mtrr.c | 20 +++++++++++
xen/arch/x86/mm/hap/p2m-ept.c | 61 +++++++++++++++++++++++++++++-------
xen/drivers/passthrough/iommu.c | 1
xen/drivers/passthrough/vtd/dmar.c | 18 ++++++++++
xen/drivers/passthrough/vtd/iommu.c | 5 ++
xen/drivers/passthrough/vtd/iommu.h | 3 +
xen/include/asm-x86/hvm/vmx/vmx.h | 3 +
xen/include/asm-x86/mtrr.h | 8 ++--
xen/include/xen/iommu.h | 1
9 files changed, 103 insertions(+), 17 deletions(-)

diff -r f3240cd3cd2b -r 175a425e9b55 xen/arch/x86/hvm/mtrr.c
--- a/xen/arch/x86/hvm/mtrr.c Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/arch/x86/hvm/mtrr.c Fri Jan 23 14:42:29 2009 +0000
@@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
1, HVMSR_PER_VCPU);

uint8_t epte_get_entry_emt(
- struct domain *d, unsigned long gfn, unsigned long mfn)
+ struct domain *d, unsigned long gfn,
+ unsigned long mfn, uint8_t *igmt, int direct_mmio)
{
uint8_t gmtrr_mtype, hmtrr_mtype;
uint32_t type;
struct vcpu *v = current;

+ *igmt = 0;
+
if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) )
return MTRR_TYPE_WRBACK;

@@ -722,6 +725,21 @@ uint8_t epte_get_entry_emt(

if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
return type;
+
+ if ( !iommu_enabled )
+ {
+ *igmt = 1;
+ return MTRR_TYPE_WRBACK;
+ }
+
+ if ( direct_mmio )
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ( iommu_snoop )
+ {
+ *igmt = 1;
+ return MTRR_TYPE_WRBACK;
+ }

gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT));
hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT));
diff -r f3240cd3cd2b -r 175a425e9b55 xen/arch/x86/mm/hap/p2m-ept.c
--- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri Jan 23 14:42:29 2009 +0000
@@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d
list_add_tail(&pg->list, &d->arch.p2m->pages);

ept_entry->emt = 0;
+ ept_entry->igmt = 0;
ept_entry->sp_avail = 0;
ept_entry->avail1 = 0;
ept_entry->mfn = page_to_mfn(pg);
@@ -114,9 +115,13 @@ static int ept_next_level(struct domain
}
}

+/*
+ * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * by observing whether any gfn->mfn translations are modified.
+ */
static int
-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
- unsigned int order, p2m_type_t p2mt)
+_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
{
ept_entry_t *table = NULL;
unsigned long gfn_remainder = gfn, offset = 0;
@@ -124,6 +129,8 @@ ept_set_entry(struct domain *d, unsigned
u32 index;
int i, rv = 0, ret = 0;
int walk_level = order / EPT_TABLE_ORDER;
+ int direct_mmio = (p2mt == p2m_mmio_direct);
+ uint8_t igmt = 0;

/* we only support 4k and 2m pages now */

@@ -157,7 +164,9 @@ ept_set_entry(struct domain *d, unsigned
{
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
{
- ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+ ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+ &igmt, direct_mmio);
+ ept_entry->igmt = igmt;
ept_entry->sp_avail = walk_level ? 1 : 0;

if ( ret == GUEST_TABLE_SUPER_PAGE )
@@ -208,7 +217,10 @@ ept_set_entry(struct domain *d, unsigned
{
split_ept_entry = split_table + i;
split_ept_entry->emt = epte_get_entry_emt(d,
- gfn-offset+i, split_mfn+i);
+ gfn-offset+i, split_mfn+i,
+ &igmt, direct_mmio);
+ split_ept_entry->igmt = igmt;
+
split_ept_entry->sp_avail = 0;

split_ept_entry->mfn = split_mfn+i;
@@ -223,7 +235,10 @@ ept_set_entry(struct domain *d, unsigned

/* Set the destinated 4k page as normal */
split_ept_entry = split_table + offset;
- split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+ split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+ &igmt, direct_mmio);
+ split_ept_entry->igmt = igmt;
+
split_ept_entry->mfn = mfn_x(mfn);
split_ept_entry->avail1 = p2mt;
ept_p2m_type_to_flags(split_ept_entry, p2mt);
@@ -246,7 +261,8 @@ out:

/* Now the p2m table is not shared with vt-d page table */

- if ( iommu_enabled && is_hvm_domain(d) )
+ if ( iommu_enabled && is_hvm_domain(d)
+ && need_modify_vtd_table )
{
if ( p2mt == p2m_ram_rw )
{
@@ -271,6 +287,17 @@ out:
}

return rv;
+}
+
+static int
+ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int order, p2m_type_t p2mt)
+{
+ /* ept_set_entry() are called from set_entry(),
+ * We should always create VT-d page table acording
+ * to the gfn to mfn translations changes.
+ */
+ return _ept_set_entry(d, gfn, mfn, order, p2mt, 1);
}

/* Read ept p2m entries */
@@ -395,18 +422,30 @@ void ept_change_entry_emt_with_range(str
* Set emt for super page.
*/
order = EPT_TABLE_ORDER;
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ /* vmx_set_uc_mode() dont' touch the gfn to mfn
+ * translations, only modify the emt field of the EPT entries.
+ * so we need not modify the current VT-d page tables.
+ */
+ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
gfn += 0x1FF;
}
else
{
- /* change emt for partial entries of the 2m area */
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ /* 1)change emt for partial entries of the 2m area.
+ * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+ * translations, only modify the emt field of the EPT entries.
+ * so we need not modify the current VT-d page tables.
+ */
+ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
}
}
- else /* gfn assigned with 4k */
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ else /* 1)gfn assigned with 4k
+ * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
+ * translations, only modify the emt field of the EPT entries.
+ * so we need not modify the current VT-d page tables.
+ */
+ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
}
}

diff -r f3240cd3cd2b -r 175a425e9b55 xen/drivers/passthrough/iommu.c
--- a/xen/drivers/passthrough/iommu.c Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/drivers/passthrough/iommu.c Fri Jan 23 14:42:29 2009 +0000
@@ -39,6 +39,7 @@ int iommu_pv_enabled = 0;
int iommu_pv_enabled = 0;
int force_iommu = 0;
int iommu_passthrough = 0;
+int iommu_snoop = 0;

static void __init parse_iommu_param(char *s)
{
diff -r f3240cd3cd2b -r 175a425e9b55 xen/drivers/passthrough/vtd/dmar.c
--- a/xen/drivers/passthrough/vtd/dmar.c Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/drivers/passthrough/vtd/dmar.c Fri Jan 23 14:42:29 2009 +0000
@@ -29,6 +29,7 @@
#include <xen/pci_regs.h>
#include <asm/string.h>
#include "dmar.h"
+#include "iommu.h"

int vtd_enabled = 1;

@@ -508,6 +509,8 @@ int acpi_dmar_init(void)
int acpi_dmar_init(void)
{
int rc;
+ struct acpi_drhd_unit *drhd;
+ struct iommu *iommu;

rc = -ENODEV;
if ( force_iommu )
@@ -524,7 +527,20 @@ int acpi_dmar_init(void)
if ( list_empty(&acpi_drhd_units) )
goto fail;

- printk("Intel VT-d has been enabled\n");
+ /* Giving that all devices within guest use same io page table,
+ * enable snoop control only if all VT-d engines support it.
+ */
+ iommu_snoop = 1;
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( !ecap_snp_ctl(iommu->ecap) ) {
+ iommu_snoop = 0;
+ break;
+ }
+ }
+
+ printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);

return 0;

diff -r f3240cd3cd2b -r 175a425e9b55 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.c Fri Jan 23 14:42:29 2009 +0000
@@ -1482,6 +1482,11 @@ int intel_iommu_map_page(
pte_present = dma_pte_present(*pte);
dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
+
+ /* Set the SNP on leaf page table if Snoop Control available */
+ if ( iommu_snoop )
+ dma_set_pte_snp(*pte);
+
iommu_flush_cache_entry(pte);
spin_unlock(&hd->mapping_lock);
unmap_vtd_domain_page(page);
diff -r f3240cd3cd2b -r 175a425e9b55 xen/drivers/passthrough/vtd/iommu.h
--- a/xen/drivers/passthrough/vtd/iommu.h Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.h Fri Jan 23 14:42:29 2009 +0000
@@ -104,6 +104,7 @@
#define ecap_ext_intr(e) ((e >> 4) & 0x1)
#define ecap_cache_hints(e) ((e >> 5) & 0x1)
#define ecap_pass_thru(e) ((e >> 6) & 0x1)
+#define ecap_snp_ctl(e) ((e >> 7) & 0x1)

/* IOTLB_REG */
#define DMA_TLB_FLUSH_GRANU_OFFSET 60
@@ -260,10 +261,12 @@ struct dma_pte {
};
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
+#define DMA_PTE_SNP (1 << 11)
#define dma_clear_pte(p) do {(p).val = 0;} while(0)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while(0)
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while(0)
#define dma_set_pte_superpage(p) do {(p).val |= (1 << 7);} while(0)
+#define dma_set_pte_snp(p) do {(p).val |= DMA_PTE_SNP;} while(0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
diff -r f3240cd3cd2b -r 175a425e9b55 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Jan 23 14:42:29 2009 +0000
@@ -33,7 +33,8 @@ typedef union {
u64 r : 1,
w : 1,
x : 1,
- emt : 4,
+ emt : 3,
+ igmt : 1,
sp_avail : 1,
avail1 : 4,
mfn : 45,
diff -r f3240cd3cd2b -r 175a425e9b55 xen/include/asm-x86/mtrr.h
--- a/xen/include/asm-x86/mtrr.h Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/include/asm-x86/mtrr.h Fri Jan 23 14:42:29 2009 +0000
@@ -64,9 +64,11 @@ extern void mtrr_centaur_report_mcr(int
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
paddr_t spaddr);
-extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, unsigned long mfn);
-extern void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
- unsigned long end_gfn);
+extern uint8_t epte_get_entry_emt(
+ struct domain *d, unsigned long gfn, unsigned long mfn,
+ uint8_t *igmt, int direct_mmio);
+extern void ept_change_entry_emt_with_range(
+ struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);

#endif /* __ASM_X86_MTRR_H__ */
diff -r f3240cd3cd2b -r 175a425e9b55 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h Fri Jan 23 14:32:41 2009 +0000
+++ b/xen/include/xen/iommu.h Fri Jan 23 14:42:29 2009 +0000
@@ -31,6 +31,7 @@ extern int iommu_pv_enabled;
extern int iommu_pv_enabled;
extern int force_iommu;
extern int iommu_passthrough;
+extern int iommu_snoop;

#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)


_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog