Mailing List Archive

[xen master] x86/shadow: dirty VRAM tracking is needed for HVM only
commit ded576ce07e9328f66842bef67d8cfc14c3088b7
Author: Jan Beulich <jbeulich@suse.com>
AuthorDate: Tue Jul 21 13:57:06 2020 +0200
Commit: Jan Beulich <jbeulich@suse.com>
CommitDate: Tue Jul 21 13:57:06 2020 +0200

x86/shadow: dirty VRAM tracking is needed for HVM only

Move shadow_track_dirty_vram() into hvm.c (requiring two static
functions to become non-static). More importantly though make sure we
don't de-reference d->arch.hvm.dirty_vram for a non-HVM guest. This was
a latent issue only just because the field lives far enough into struct
hvm_domain to be outside the part overlapping with struct pv_domain.

While moving shadow_track_dirty_vram() some purely typographic
adjustments are being made, like inserting missing blanks or putting
breaces on their own lines.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Tim Deegan <tim@xen.org>
---
xen/arch/x86/mm/shadow/common.c | 201 +------------------------------------
xen/arch/x86/mm/shadow/hvm.c | 212 +++++++++++++++++++++++++++++++++++++++
xen/arch/x86/mm/shadow/multi.c | 25 +++--
xen/arch/x86/mm/shadow/private.h | 8 ++
4 files changed, 236 insertions(+), 210 deletions(-)

diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 773777321f..05a155d1bc 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -999,7 +999,7 @@ void shadow_prealloc(struct domain *d, u32 type, unsigned int count)

/* Deliberately free all the memory we can: this will tear down all of
* this domain's shadows */
-static void shadow_blow_tables(struct domain *d)
+void shadow_blow_tables(struct domain *d)
{
struct page_info *sp, *t;
struct vcpu *v;
@@ -2029,7 +2029,7 @@ int sh_remove_write_access(struct domain *d, mfn_t gmfn,
/* Remove all mappings of a guest frame from the shadow tables.
* Returns non-zero if we need to flush TLBs. */

-static int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn)
+int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn)
{
struct page_info *page = mfn_to_page(gmfn);

@@ -3162,203 +3162,6 @@ static void sh_clean_dirty_bitmap(struct domain *d)
}


-/**************************************************************************/
-/* VRAM dirty tracking support */
-int shadow_track_dirty_vram(struct domain *d,
- unsigned long begin_pfn,
- unsigned long nr,
- XEN_GUEST_HANDLE(void) guest_dirty_bitmap)
-{
- int rc = 0;
- unsigned long end_pfn = begin_pfn + nr;
- unsigned long dirty_size = (nr + 7) / 8;
- int flush_tlb = 0;
- unsigned long i;
- p2m_type_t t;
- struct sh_dirty_vram *dirty_vram;
- struct p2m_domain *p2m = p2m_get_hostp2m(d);
- uint8_t *dirty_bitmap = NULL;
-
- if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
- return -EINVAL;
-
- /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
- p2m_lock(p2m_get_hostp2m(d));
- paging_lock(d);
-
- dirty_vram = d->arch.hvm.dirty_vram;
-
- if ( dirty_vram && (!nr ||
- ( begin_pfn != dirty_vram->begin_pfn
- || end_pfn != dirty_vram->end_pfn )) )
- {
- /* Different tracking, tear the previous down. */
- gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
- xfree(dirty_vram->sl1ma);
- xfree(dirty_vram->dirty_bitmap);
- xfree(dirty_vram);
- dirty_vram = d->arch.hvm.dirty_vram = NULL;
- }
-
- if ( !nr )
- goto out;
-
- dirty_bitmap = vzalloc(dirty_size);
- if ( dirty_bitmap == NULL )
- {
- rc = -ENOMEM;
- goto out;
- }
- /* This should happen seldomly (Video mode change),
- * no need to be careful. */
- if ( !dirty_vram )
- {
- /* Throw away all the shadows rather than walking through them
- * up to nr times getting rid of mappings of each pfn */
- shadow_blow_tables(d);
-
- gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
-
- rc = -ENOMEM;
- if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
- goto out;
- dirty_vram->begin_pfn = begin_pfn;
- dirty_vram->end_pfn = end_pfn;
- d->arch.hvm.dirty_vram = dirty_vram;
-
- if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
- goto out_dirty_vram;
- memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
-
- if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
- goto out_sl1ma;
-
- dirty_vram->last_dirty = NOW();
-
- /* Tell the caller that this time we could not track dirty bits. */
- rc = -ENODATA;
- }
- else if (dirty_vram->last_dirty == -1)
- /* still completely clean, just copy our empty bitmap */
- memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
- else
- {
- mfn_t map_mfn = INVALID_MFN;
- void *map_sl1p = NULL;
-
- /* Iterate over VRAM to track dirty bits. */
- for ( i = 0; i < nr; i++ ) {
- mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
- struct page_info *page;
- int dirty = 0;
- paddr_t sl1ma = dirty_vram->sl1ma[i];
-
- if ( mfn_eq(mfn, INVALID_MFN) )
- dirty = 1;
- else
- {
- page = mfn_to_page(mfn);
- switch (page->u.inuse.type_info & PGT_count_mask)
- {
- case 0:
- /* No guest reference, nothing to track. */
- break;
- case 1:
- /* One guest reference. */
- if ( sl1ma == INVALID_PADDR )
- {
- /* We don't know which sl1e points to this, too bad. */
- dirty = 1;
- /* TODO: Heuristics for finding the single mapping of
- * this gmfn */
- flush_tlb |= sh_remove_all_mappings(d, mfn,
- _gfn(begin_pfn + i));
- }
- else
- {
- /* Hopefully the most common case: only one mapping,
- * whose dirty bit we can use. */
- l1_pgentry_t *sl1e;
- mfn_t sl1mfn = maddr_to_mfn(sl1ma);
-
- if ( !mfn_eq(sl1mfn, map_mfn) )
- {
- if ( map_sl1p )
- unmap_domain_page(map_sl1p);
- map_sl1p = map_domain_page(sl1mfn);
- map_mfn = sl1mfn;
- }
- sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
-
- if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
- {
- dirty = 1;
- /* Note: this is atomic, so we may clear a
- * _PAGE_ACCESSED set by another processor. */
- l1e_remove_flags(*sl1e, _PAGE_DIRTY);
- flush_tlb = 1;
- }
- }
- break;
- default:
- /* More than one guest reference,
- * we don't afford tracking that. */
- dirty = 1;
- break;
- }
- }
-
- if ( dirty )
- {
- dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
- dirty_vram->last_dirty = NOW();
- }
- }
-
- if ( map_sl1p )
- unmap_domain_page(map_sl1p);
-
- memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
- memset(dirty_vram->dirty_bitmap, 0, dirty_size);
- if ( dirty_vram->last_dirty + SECONDS(2) < NOW() )
- {
- /* was clean for more than two seconds, try to disable guest
- * write access */
- for ( i = begin_pfn; i < end_pfn; i++ )
- {
- mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
- if ( !mfn_eq(mfn, INVALID_MFN) )
- flush_tlb |= sh_remove_write_access(d, mfn, 1, 0);
- }
- dirty_vram->last_dirty = -1;
- }
- }
- if ( flush_tlb )
- guest_flush_tlb_mask(d, d->dirty_cpumask);
- goto out;
-
-out_sl1ma:
- xfree(dirty_vram->sl1ma);
-out_dirty_vram:
- xfree(dirty_vram);
- dirty_vram = d->arch.hvm.dirty_vram = NULL;
-
-out:
- paging_unlock(d);
- if ( rc == 0 && dirty_bitmap != NULL &&
- copy_to_guest(guest_dirty_bitmap, dirty_bitmap, dirty_size) )
- {
- paging_lock(d);
- for ( i = 0; i < dirty_size; i++ )
- dirty_vram->dirty_bitmap[i] |= dirty_bitmap[i];
- paging_unlock(d);
- rc = -EFAULT;
- }
- vfree(dirty_bitmap);
- p2m_unlock(p2m_get_hostp2m(d));
- return rc;
-}
-
/* Fluhs TLB of selected vCPUs. */
bool shadow_flush_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
void *ctxt)
diff --git a/xen/arch/x86/mm/shadow/hvm.c b/xen/arch/x86/mm/shadow/hvm.c
index 608360daec..c5da7a071c 100644
--- a/xen/arch/x86/mm/shadow/hvm.c
+++ b/xen/arch/x86/mm/shadow/hvm.c
@@ -691,6 +691,218 @@ static void sh_emulate_unmap_dest(struct vcpu *v, void *addr,
atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
}

+/**************************************************************************/
+/* VRAM dirty tracking support */
+int shadow_track_dirty_vram(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE(void) guest_dirty_bitmap)
+{
+ int rc = 0;
+ unsigned long end_pfn = begin_pfn + nr;
+ unsigned long dirty_size = (nr + 7) / 8;
+ int flush_tlb = 0;
+ unsigned long i;
+ p2m_type_t t;
+ struct sh_dirty_vram *dirty_vram;
+ struct p2m_domain *p2m = p2m_get_hostp2m(d);
+ uint8_t *dirty_bitmap = NULL;
+
+ if ( end_pfn < begin_pfn || end_pfn > p2m->max_mapped_pfn + 1 )
+ return -EINVAL;
+
+ /* We perform p2m lookups, so lock the p2m upfront to avoid deadlock */
+ p2m_lock(p2m_get_hostp2m(d));
+ paging_lock(d);
+
+ dirty_vram = d->arch.hvm.dirty_vram;
+
+ if ( dirty_vram && (!nr ||
+ ( begin_pfn != dirty_vram->begin_pfn
+ || end_pfn != dirty_vram->end_pfn )) )
+ {
+ /* Different tracking, tear the previous down. */
+ gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", dirty_vram->begin_pfn, dirty_vram->end_pfn);
+ xfree(dirty_vram->sl1ma);
+ xfree(dirty_vram->dirty_bitmap);
+ xfree(dirty_vram);
+ dirty_vram = d->arch.hvm.dirty_vram = NULL;
+ }
+
+ if ( !nr )
+ goto out;
+
+ dirty_bitmap = vzalloc(dirty_size);
+ if ( dirty_bitmap == NULL )
+ {
+ rc = -ENOMEM;
+ goto out;
+ }
+ /*
+ * This should happen seldomly (Video mode change),
+ * no need to be careful.
+ */
+ if ( !dirty_vram )
+ {
+ /*
+ * Throw away all the shadows rather than walking through them
+ * up to nr times getting rid of mappings of each pfn.
+ */
+ shadow_blow_tables(d);
+
+ gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
+
+ rc = -ENOMEM;
+ if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+ goto out;
+ dirty_vram->begin_pfn = begin_pfn;
+ dirty_vram->end_pfn = end_pfn;
+ d->arch.hvm.dirty_vram = dirty_vram;
+
+ if ( (dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
+ goto out_dirty_vram;
+ memset(dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
+
+ if ( (dirty_vram->dirty_bitmap = xzalloc_array(uint8_t, dirty_size)) == NULL )
+ goto out_sl1ma;
+
+ dirty_vram->last_dirty = NOW();
+
+ /* Tell the caller that this time we could not track dirty bits. */
+ rc = -ENODATA;
+ }
+ else if ( dirty_vram->last_dirty == -1 )
+ /* still completely clean, just copy our empty bitmap */
+ memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
+ else
+ {
+ mfn_t map_mfn = INVALID_MFN;
+ void *map_sl1p = NULL;
+
+ /* Iterate over VRAM to track dirty bits. */
+ for ( i = 0; i < nr; i++ )
+ {
+ mfn_t mfn = get_gfn_query_unlocked(d, begin_pfn + i, &t);
+ struct page_info *page;
+ int dirty = 0;
+ paddr_t sl1ma = dirty_vram->sl1ma[i];
+
+ if ( mfn_eq(mfn, INVALID_MFN) )
+ dirty = 1;
+ else
+ {
+ page = mfn_to_page(mfn);
+ switch ( page->u.inuse.type_info & PGT_count_mask )
+ {
+ case 0:
+ /* No guest reference, nothing to track. */
+ break;
+
+ case 1:
+ /* One guest reference. */
+ if ( sl1ma == INVALID_PADDR )
+ {
+ /* We don't know which sl1e points to this, too bad. */
+ dirty = 1;
+ /*
+ * TODO: Heuristics for finding the single mapping of
+ * this gmfn
+ */
+ flush_tlb |= sh_remove_all_mappings(d, mfn,
+ _gfn(begin_pfn + i));
+ }
+ else
+ {
+ /*
+ * Hopefully the most common case: only one mapping,
+ * whose dirty bit we can use.
+ */
+ l1_pgentry_t *sl1e;
+ mfn_t sl1mfn = maddr_to_mfn(sl1ma);
+
+ if ( !mfn_eq(sl1mfn, map_mfn) )
+ {
+ if ( map_sl1p )
+ unmap_domain_page(map_sl1p);
+ map_sl1p = map_domain_page(sl1mfn);
+ map_mfn = sl1mfn;
+ }
+ sl1e = map_sl1p + (sl1ma & ~PAGE_MASK);
+
+ if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
+ {
+ dirty = 1;
+ /*
+ * Note: this is atomic, so we may clear a
+ * _PAGE_ACCESSED set by another processor.
+ */
+ l1e_remove_flags(*sl1e, _PAGE_DIRTY);
+ flush_tlb = 1;
+ }
+ }
+ break;
+
+ default:
+ /* More than one guest reference,
+ * we don't afford tracking that. */
+ dirty = 1;
+ break;
+ }
+ }
+
+ if ( dirty )
+ {
+ dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
+ dirty_vram->last_dirty = NOW();
+ }
+ }
+
+ if ( map_sl1p )
+ unmap_domain_page(map_sl1p);
+
+ memcpy(dirty_bitmap, dirty_vram->dirty_bitmap, dirty_size);
+ memset(dirty_vram->dirty_bitmap, 0, dirty_size);
+ if ( dirty_vram->last_dirty + SECONDS(2) < NOW() )
+ {
+ /*
+ * Was clean for more than two seconds, try to disable guest
+ * write access.
+ */
+ for ( i = begin_pfn; i < end_pfn; i++ )
+ {
+ mfn_t mfn = get_gfn_query_unlocked(d, i, &t);
+ if ( !mfn_eq(mfn, INVALID_MFN) )
+ flush_tlb |= sh_remove_write_access(d, mfn, 1, 0);
+ }
+ dirty_vram->last_dirty = -1;
+ }
+ }
+ if ( flush_tlb )
+ guest_flush_tlb_mask(d, d->dirty_cpumask);
+ goto out;
+
+ out_sl1ma:
+ xfree(dirty_vram->sl1ma);
+ out_dirty_vram:
+ xfree(dirty_vram);
+ dirty_vram = d->arch.hvm.dirty_vram = NULL;
+
+ out:
+ paging_unlock(d);
+ if ( rc == 0 && dirty_bitmap != NULL &&
+ copy_to_guest(guest_dirty_bitmap, dirty_bitmap, dirty_size) )
+ {
+ paging_lock(d);
+ for ( i = 0; i < dirty_size; i++ )
+ dirty_vram->dirty_bitmap[i] |= dirty_bitmap[i];
+ paging_unlock(d);
+ rc = -EFAULT;
+ }
+ vfree(dirty_bitmap);
+ p2m_unlock(p2m_get_hostp2m(d));
+ return rc;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index 7d16d1c1a9..e34937f43b 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -494,7 +494,6 @@ _sh_propagate(struct vcpu *v,
guest_l1e_t guest_entry = { guest_intpte };
shadow_l1e_t *sp = shadow_entry_ptr;
struct domain *d = v->domain;
- struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
u32 pass_thru_flags;
u32 gflags, sflags;
@@ -649,15 +648,19 @@ _sh_propagate(struct vcpu *v,
}
}

- if ( unlikely((level == 1) && dirty_vram
- && dirty_vram->last_dirty == -1
- && gfn_x(target_gfn) >= dirty_vram->begin_pfn
- && gfn_x(target_gfn) < dirty_vram->end_pfn) )
+ if ( unlikely(level == 1) && is_hvm_domain(d) )
{
- if ( ft & FETCH_TYPE_WRITE )
- dirty_vram->last_dirty = NOW();
- else
- sflags &= ~_PAGE_RW;
+ struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;
+
+ if ( dirty_vram && dirty_vram->last_dirty == -1 &&
+ gfn_x(target_gfn) >= dirty_vram->begin_pfn &&
+ gfn_x(target_gfn) < dirty_vram->end_pfn )
+ {
+ if ( ft & FETCH_TYPE_WRITE )
+ dirty_vram->last_dirty = NOW();
+ else
+ sflags &= ~_PAGE_RW;
+ }
}

/* Read-only memory */
@@ -1082,7 +1085,7 @@ static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
unsigned long gfn;
struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;

- if ( !dirty_vram /* tracking disabled? */
+ if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
|| !(flags & _PAGE_RW) /* read-only mapping? */
|| !mfn_valid(mfn) ) /* mfn can be invalid in mmio_direct */
return;
@@ -1113,7 +1116,7 @@ static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
unsigned long gfn;
struct sh_dirty_vram *dirty_vram = d->arch.hvm.dirty_vram;

- if ( !dirty_vram /* tracking disabled? */
+ if ( !is_hvm_domain(d) || !dirty_vram /* tracking disabled? */
|| !(flags & _PAGE_RW) /* read-only mapping? */
|| !mfn_valid(mfn) ) /* mfn can be invalid in mmio_direct */
return;
diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h
index 3fd3f0617a..eb5d1e3fab 100644
--- a/xen/arch/x86/mm/shadow/private.h
+++ b/xen/arch/x86/mm/shadow/private.h
@@ -438,6 +438,14 @@ mfn_t oos_snapshot_lookup(struct domain *d, mfn_t gmfn);

#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */

+/* Deliberately free all the memory we can: tear down all of d's shadows. */
+void shadow_blow_tables(struct domain *d);
+
+/*
+ * Remove all mappings of a guest frame from the shadow tables.
+ * Returns non-zero if we need to flush TLBs.
+ */
+int sh_remove_all_mappings(struct domain *d, mfn_t gmfn, gfn_t gfn);

/* Reset the up-pointers of every L3 shadow to 0.
* This is called when l3 shadows stop being pinnable, to clear out all
--
generated by git-patchbot for /home/xen/git/xen.git#master