Mailing List Archive

[PATCH v3 02/11] x86/iommu: add common page-table allocator
From: Paul Durrant <pdurrant@amazon.com>

Instead of having separate page table allocation functions in VT-d and AMD
IOMMU code, we could use a common allocation function in the general x86 code.

This patch adds a new allocation function, iommu_alloc_pgtable(), for this
purpose. The function adds the page table pages to a list. The pages in this
list are then freed by iommu_free_pgtables(), which is called by
domain_relinquish_resources() after PCI devices have been de-assigned.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Wei Liu <wl@xen.org>
Cc: "Roger Pau Monné" <roger.pau@citrix.com>

v2:
- This is split out from a larger patch of the same name in v1
---
xen/arch/x86/domain.c | 9 +++++-
xen/drivers/passthrough/x86/iommu.c | 50 +++++++++++++++++++++++++++++
xen/include/asm-x86/iommu.h | 7 ++++
3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index f8084dc9e3..d1ecc7b83b 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2153,7 +2153,8 @@ int domain_relinquish_resources(struct domain *d)
d->arch.rel_priv = PROG_ ## x; /* Fallthrough */ case PROG_ ## x

enum {
- PROG_paging = 1,
+ PROG_iommu_pagetables = 1,
+ PROG_paging,
PROG_vcpu_pagetables,
PROG_shared,
PROG_xen,
@@ -2168,6 +2169,12 @@ int domain_relinquish_resources(struct domain *d)
if ( ret )
return ret;

+ PROGRESS(iommu_pagetables):
+
+ ret = iommu_free_pgtables(d);
+ if ( ret )
+ return ret;
+
PROGRESS(paging):

/* Tear down paging-assistance stuff. */
diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
index a12109a1de..c0d4865dd7 100644
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -140,6 +140,9 @@ int arch_iommu_domain_init(struct domain *d)

spin_lock_init(&hd->arch.mapping_lock);

+ INIT_PAGE_LIST_HEAD(&hd->arch.pgtables.list);
+ spin_lock_init(&hd->arch.pgtables.lock);
+
return 0;
}

@@ -257,6 +260,53 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
return;
}

+int iommu_free_pgtables(struct domain *d)
+{
+ struct domain_iommu *hd = dom_iommu(d);
+ struct page_info *pg;
+
+ while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
+ {
+ free_domheap_page(pg);
+
+ if ( general_preempt_check() )
+ return -ERESTART;
+ }
+
+ return 0;
+}
+
+struct page_info *iommu_alloc_pgtable(struct domain *d)
+{
+ struct domain_iommu *hd = dom_iommu(d);
+ unsigned int memflags = 0;
+ struct page_info *pg;
+ void *p;
+
+#ifdef CONFIG_NUMA
+ if (hd->node != NUMA_NO_NODE)
+ memflags = MEMF_node(hd->node);
+#endif
+
+ pg = alloc_domheap_page(NULL, memflags);
+ if ( !pg )
+ return NULL;
+
+ p = __map_domain_page(pg);
+ clear_page(p);
+
+ if ( hd->platform_ops->sync_cache )
+ iommu_vcall(hd->platform_ops, sync_cache, p, PAGE_SIZE);
+
+ unmap_domain_page(p);
+
+ spin_lock(&hd->arch.pgtables.lock);
+ page_list_add(pg, &hd->arch.pgtables.list);
+ spin_unlock(&hd->arch.pgtables.lock);
+
+ return pg;
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h
index 8ce97c981f..31f6d4a8d8 100644
--- a/xen/include/asm-x86/iommu.h
+++ b/xen/include/asm-x86/iommu.h
@@ -46,6 +46,10 @@ typedef uint64_t daddr_t;
struct arch_iommu
{
spinlock_t mapping_lock; /* io page table lock */
+ struct {
+ struct page_list_head list;
+ spinlock_t lock;
+ } pgtables;

union {
/* Intel VT-d */
@@ -131,6 +135,9 @@ int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
iommu_vcall(ops, sync_cache, addr, size); \
})

+int __must_check iommu_free_pgtables(struct domain *d);
+struct page_info * __must_check iommu_alloc_pgtable(struct domain *d);
+
#endif /* !__ARCH_X86_IOMMU_H__ */
/*
* Local variables:
--
2.20.1
Re: [PATCH v3 02/11] x86/iommu: add common page-table allocator [ In reply to ]
On 03.08.2020 14:29, Paul Durrant wrote:
> From: Paul Durrant <pdurrant@amazon.com>
>
> Instead of having separate page table allocation functions in VT-d and AMD
> IOMMU code, we could use a common allocation function in the general x86 code.
>
> This patch adds a new allocation function, iommu_alloc_pgtable(), for this
> purpose. The function adds the page table pages to a list. The pages in this
> list are then freed by iommu_free_pgtables(), which is called by
> domain_relinquish_resources() after PCI devices have been de-assigned.
>
> Signed-off-by: Paul Durrant <pdurrant@amazon.com>
> ---
> Cc: Jan Beulich <jbeulich@suse.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Wei Liu <wl@xen.org>
> Cc: "Roger Pau Monné" <roger.pau@citrix.com>
>
> v2:
> - This is split out from a larger patch of the same name in v1
> ---
> xen/arch/x86/domain.c | 9 +++++-
> xen/drivers/passthrough/x86/iommu.c | 50 +++++++++++++++++++++++++++++
> xen/include/asm-x86/iommu.h | 7 ++++
> 3 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index f8084dc9e3..d1ecc7b83b 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -2153,7 +2153,8 @@ int domain_relinquish_resources(struct domain *d)
> d->arch.rel_priv = PROG_ ## x; /* Fallthrough */ case PROG_ ## x
>
> enum {
> - PROG_paging = 1,
> + PROG_iommu_pagetables = 1,
> + PROG_paging,
> PROG_vcpu_pagetables,
> PROG_shared,
> PROG_xen,

Is there a particular reason to make this new item the very first
one?

> @@ -257,6 +260,53 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> return;
> }
>
> +int iommu_free_pgtables(struct domain *d)
> +{
> + struct domain_iommu *hd = dom_iommu(d);
> + struct page_info *pg;
> +
> + while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
> + {
> + free_domheap_page(pg);
> +
> + if ( general_preempt_check() )
> + return -ERESTART;

Perhaps better only check once every so many pages?

> +struct page_info *iommu_alloc_pgtable(struct domain *d)
> +{
> + struct domain_iommu *hd = dom_iommu(d);
> + unsigned int memflags = 0;
> + struct page_info *pg;
> + void *p;
> +
> +#ifdef CONFIG_NUMA
> + if (hd->node != NUMA_NO_NODE)

Missing blanks inside parentheses.

> @@ -131,6 +135,9 @@ int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
> iommu_vcall(ops, sync_cache, addr, size); \
> })
>
> +int __must_check iommu_free_pgtables(struct domain *d);
> +struct page_info * __must_check iommu_alloc_pgtable(struct domain *d);

Commonly we put a blank on the left side of *, but none to its right.

Jan
RE: [PATCH v3 02/11] x86/iommu: add common page-table allocator [ In reply to ]
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: 03 August 2020 16:59
> To: Paul Durrant <paul@xen.org>
> Cc: xen-devel@lists.xenproject.org; Durrant, Paul <pdurrant@amazon.co.uk>; Andrew Cooper
> <andrew.cooper3@citrix.com>; Wei Liu <wl@xen.org>; Roger Pau Monné <roger.pau@citrix.com>
> Subject: RE: [EXTERNAL] [PATCH v3 02/11] x86/iommu: add common page-table allocator
>
> CAUTION: This email originated from outside of the organization. Do not click links or open
> attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On 03.08.2020 14:29, Paul Durrant wrote:
> > From: Paul Durrant <pdurrant@amazon.com>
> >
> > Instead of having separate page table allocation functions in VT-d and AMD
> > IOMMU code, we could use a common allocation function in the general x86 code.
> >
> > This patch adds a new allocation function, iommu_alloc_pgtable(), for this
> > purpose. The function adds the page table pages to a list. The pages in this
> > list are then freed by iommu_free_pgtables(), which is called by
> > domain_relinquish_resources() after PCI devices have been de-assigned.
> >
> > Signed-off-by: Paul Durrant <pdurrant@amazon.com>
> > ---
> > Cc: Jan Beulich <jbeulich@suse.com>
> > Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> > Cc: Wei Liu <wl@xen.org>
> > Cc: "Roger Pau Monné" <roger.pau@citrix.com>
> >
> > v2:
> > - This is split out from a larger patch of the same name in v1
> > ---
> > xen/arch/x86/domain.c | 9 +++++-
> > xen/drivers/passthrough/x86/iommu.c | 50 +++++++++++++++++++++++++++++
> > xen/include/asm-x86/iommu.h | 7 ++++
> > 3 files changed, 65 insertions(+), 1 deletion(-)
> >
> > diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> > index f8084dc9e3..d1ecc7b83b 100644
> > --- a/xen/arch/x86/domain.c
> > +++ b/xen/arch/x86/domain.c
> > @@ -2153,7 +2153,8 @@ int domain_relinquish_resources(struct domain *d)
> > d->arch.rel_priv = PROG_ ## x; /* Fallthrough */ case PROG_ ## x
> >
> > enum {
> > - PROG_paging = 1,
> > + PROG_iommu_pagetables = 1,
> > + PROG_paging,
> > PROG_vcpu_pagetables,
> > PROG_shared,
> > PROG_xen,
>
> Is there a particular reason to make this new item the very first
> one?

It seems like the logical place as it comes straight after device de-assignment.

>
> > @@ -257,6 +260,53 @@ void __hwdom_init arch_iommu_hwdom_init(struct domain *d)
> > return;
> > }
> >
> > +int iommu_free_pgtables(struct domain *d)
> > +{
> > + struct domain_iommu *hd = dom_iommu(d);
> > + struct page_info *pg;
> > +
> > + while ( (pg = page_list_remove_head(&hd->arch.pgtables.list)) )
> > + {
> > + free_domheap_page(pg);
> > +
> > + if ( general_preempt_check() )
> > + return -ERESTART;
>
> Perhaps better only check once every so many pages?
>

Yes, that would be reasonable.

> > +struct page_info *iommu_alloc_pgtable(struct domain *d)
> > +{
> > + struct domain_iommu *hd = dom_iommu(d);
> > + unsigned int memflags = 0;
> > + struct page_info *pg;
> > + void *p;
> > +
> > +#ifdef CONFIG_NUMA
> > + if (hd->node != NUMA_NO_NODE)
>
> Missing blanks inside parentheses.
>

Oh yes... bad conversion from ternary statement in previous version.

> > @@ -131,6 +135,9 @@ int pi_update_irte(const struct pi_desc *pi_desc, const struct pirq *pirq,
> > iommu_vcall(ops, sync_cache, addr, size); \
> > })
> >
> > +int __must_check iommu_free_pgtables(struct domain *d);
> > +struct page_info * __must_check iommu_alloc_pgtable(struct domain *d);
>
> Commonly we put a blank on the left side of *, but none to its right.
>

Kind of felt wrong not to separate it from '__must_check'.

Paul

> Jan