Mailing List Archive

[PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API
Kernel workqueues were disabled due to flawed use of kernel VA and SVA
API. Now That we have the support for attaching PASID to the device's
default domain and the ability to reserve global PASIDs from SVA APIs,
we can re-enable the kernel work queues and use them under DMA API.

We also use non-privileged access for in-kernel DMA to be consistent
with the IOMMU settings. Consequently, interrupt for user privilege is
enabled for work completion IRQs.

Link:https://lore.kernel.org/linux-iommu/20210511194726.GP1002214@nvidia.com/
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
---
drivers/dma/idxd/device.c | 30 ++++-----------------
drivers/dma/idxd/init.c | 56 ++++++++++++++++++++++++++++++++++++---
drivers/dma/idxd/sysfs.c | 7 -----
3 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 6fca8fa8d3a8..f6b133d61a04 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -299,21 +299,6 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd)
}
}

-static void __idxd_wq_set_priv_locked(struct idxd_wq *wq, int priv)
-{
- struct idxd_device *idxd = wq->idxd;
- union wqcfg wqcfg;
- unsigned int offset;
-
- offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PRIVL_IDX);
- spin_lock(&idxd->dev_lock);
- wqcfg.bits[WQCFG_PRIVL_IDX] = ioread32(idxd->reg_base + offset);
- wqcfg.priv = priv;
- wq->wqcfg->bits[WQCFG_PRIVL_IDX] = wqcfg.bits[WQCFG_PRIVL_IDX];
- iowrite32(wqcfg.bits[WQCFG_PRIVL_IDX], idxd->reg_base + offset);
- spin_unlock(&idxd->dev_lock);
-}
-
static void __idxd_wq_set_pasid_locked(struct idxd_wq *wq, int pasid)
{
struct idxd_device *idxd = wq->idxd;
@@ -1324,15 +1309,14 @@ int drv_enable_wq(struct idxd_wq *wq)
}

/*
- * In the event that the WQ is configurable for pasid and priv bits.
- * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit.
- * However, for non-kernel wq, the driver should only set the pasid_en bit for
- * shared wq. A dedicated wq that is not 'kernel' type will configure pasid and
+ * In the event that the WQ is configurable for pasid, the driver
+ * should setup the pasid, pasid_en bit. This is true for both kernel
+ * and user shared workqueues. There is no need to setup priv bit in
+ * that in-kernel DMA will also do user privileged requests.
+ * A dedicated wq that is not 'kernel' type will configure pasid and
* pasid_en later on so there is no need to setup.
*/
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
- int priv = 0;
-
if (wq_pasid_enabled(wq)) {
if (is_idxd_wq_kernel(wq) || wq_shared(wq)) {
u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0;
@@ -1340,10 +1324,6 @@ int drv_enable_wq(struct idxd_wq *wq)
__idxd_wq_set_pasid_locked(wq, pasid);
}
}
-
- if (is_idxd_wq_kernel(wq))
- priv = 1;
- __idxd_wq_set_priv_locked(wq, priv);
}

rc = 0;
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index e6ee267da0ff..6f7778e1e936 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -506,14 +506,61 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d

static int idxd_enable_system_pasid(struct idxd_device *idxd)
{
- return -EOPNOTSUPP;
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ struct iommu_domain *domain;
+ union gencfg_reg gencfg;
+ ioasid_t pasid;
+ int ret;
+
+ /*
+ * Attach a global PASID to the DMA domain so that we can use ENQCMDS
+ * to submit work on buffers mapped by DMA API.
+ */
+ domain = iommu_get_domain_for_dev(dev);
+ if (!domain)
+ return -EPERM;
+
+ pasid = iommu_alloc_global_pasid(0, dev->iommu->max_pasids);
+ if (!pasid_valid(pasid))
+ return -ENOSPC;
+
+ ret = iommu_attach_device_pasid(domain, dev, pasid);
+ if (ret) {
+ dev_err(dev, "failed to attach device pasid %d, domain type %d",
+ pasid, domain->type);
+ iommu_free_global_pasid(pasid);
+ return ret;
+ }
+
+ /* Since we set user privilege for kernel DMA, enable completion IRQ */
+ gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ gencfg.user_int_en = 1;
+ iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+ idxd->pasid = pasid;
+
+ return ret;
}

static void idxd_disable_system_pasid(struct idxd_device *idxd)
{
+ struct pci_dev *pdev = idxd->pdev;
+ struct device *dev = &pdev->dev;
+ struct iommu_domain *domain;
+ union gencfg_reg gencfg;
+
+ domain = iommu_get_domain_for_dev(dev);
+ if (!domain || domain->type == IOMMU_DOMAIN_BLOCKED)
+ return;
+
+ iommu_detach_device_pasid(domain, dev, idxd->pasid);
+ iommu_free_global_pasid(idxd->pasid);

- iommu_sva_unbind_device(idxd->sva);
+ gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ gencfg.user_int_en = 0;
+ iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
idxd->sva = NULL;
+ idxd->pasid = IOMMU_PASID_INVALID;
}

static int idxd_probe(struct idxd_device *idxd)
@@ -535,8 +582,9 @@ static int idxd_probe(struct idxd_device *idxd)
} else {
set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);

- if (idxd_enable_system_pasid(idxd))
- dev_warn(dev, "No in-kernel DMA with PASID.\n");
+ rc = idxd_enable_system_pasid(idxd);
+ if (rc)
+ dev_warn(dev, "No in-kernel DMA with PASID. %d\n", rc);
else
set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
}
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 18cd8151dee0..c5561c00a503 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -944,13 +944,6 @@ static ssize_t wq_name_store(struct device *dev,
if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0)
return -EINVAL;

- /*
- * This is temporarily placed here until we have SVM support for
- * dmaengine.
- */
- if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd))
- return -EOPNOTSUPP;
-
input = kstrndup(buf, count, GFP_KERNEL);
if (!input)
return -ENOMEM;
--
2.25.1
RE: [PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API [ In reply to ]
Hi, Jacob,

> Kernel workqueues were disabled due to flawed use of kernel VA and SVA API.
> Now That we have the support for attaching PASID to the device's default

s/That/that/

> domain and the ability to reserve global PASIDs from SVA APIs, we can re-enable
> the kernel work queues and use them under DMA API.
>
> We also use non-privileged access for in-kernel DMA to be consistent with the
> IOMMU settings. Consequently, interrupt for user privilege is enabled for work
> completion IRQs.
>
> Link:https://lore.kernel.org/linux-
> iommu/20210511194726.GP1002214@nvidia.com/
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>

Other than the typo,

Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>

Thanks.

-Fenghua
Re: [PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API [ In reply to ]
On 2023/4/1 7:11, Jacob Pan wrote:
> static void idxd_disable_system_pasid(struct idxd_device *idxd)
> {
> + struct pci_dev *pdev = idxd->pdev;
> + struct device *dev = &pdev->dev;
> + struct iommu_domain *domain;
> + union gencfg_reg gencfg;
> +
> + domain = iommu_get_domain_for_dev(dev);
> + if (!domain || domain->type == IOMMU_DOMAIN_BLOCKED)
> + return;

Out of curiosity, why do you need to check the domain type? And, in
which case could the domain for the device be changed to a blocking one?

Once a driver is bound to the device, the driver "owns" the DMA of the
device. No one else could change the domain except the driver itself.

> +
> + iommu_detach_device_pasid(domain, dev, idxd->pasid);
> + iommu_free_global_pasid(idxd->pasid);
>
> - iommu_sva_unbind_device(idxd->sva);
> + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
> + gencfg.user_int_en = 0;
> + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
> idxd->sva = NULL;
> + idxd->pasid = IOMMU_PASID_INVALID;
> }

Best regards,
baolu
Re: [PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API [ In reply to ]
Hi Baolu,

On Sat, 1 Apr 2023 21:39:32 +0800, Baolu Lu <baolu.lu@linux.intel.com>
wrote:

> On 2023/4/1 7:11, Jacob Pan wrote:
> > static void idxd_disable_system_pasid(struct idxd_device *idxd)
> > {
> > + struct pci_dev *pdev = idxd->pdev;
> > + struct device *dev = &pdev->dev;
> > + struct iommu_domain *domain;
> > + union gencfg_reg gencfg;
> > +
> > + domain = iommu_get_domain_for_dev(dev);
> > + if (!domain || domain->type == IOMMU_DOMAIN_BLOCKED)
> > + return;
>
> Out of curiosity, why do you need to check the domain type? And, in
> which case could the domain for the device be changed to a blocking one?
>
> Once a driver is bound to the device, the driver "owns" the DMA of the
> device. No one else could change the domain except the driver itself.
nothing particular just for precaution, I can drop the check or add a
warn_on.

> > +
> > + iommu_detach_device_pasid(domain, dev, idxd->pasid);
> > + iommu_free_global_pasid(idxd->pasid);
> >
> > - iommu_sva_unbind_device(idxd->sva);
> > + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
> > + gencfg.user_int_en = 0;
> > + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
> > idxd->sva = NULL;
> > + idxd->pasid = IOMMU_PASID_INVALID;
> > }
>
> Best regards,
> baolu


Thanks,

Jacob
Re: [PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API [ In reply to ]
Hi Fenghua,

On Fri, 31 Mar 2023 23:31:13 +0000, "Yu, Fenghua" <fenghua.yu@intel.com>
wrote:

> Hi, Jacob,
>
> > Kernel workqueues were disabled due to flawed use of kernel VA and SVA
> > API. Now That we have the support for attaching PASID to the device's
> > default
>
> s/That/that/
will fix, for real this time :) you pointed it out before.

> > domain and the ability to reserve global PASIDs from SVA APIs, we can
> > re-enable the kernel work queues and use them under DMA API.
> >
> > We also use non-privileged access for in-kernel DMA to be consistent
> > with the IOMMU settings. Consequently, interrupt for user privilege is
> > enabled for work completion IRQs.
> >
> > Link:https://lore.kernel.org/linux-
> > iommu/20210511194726.GP1002214@nvidia.com/
> > Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
>
> Other than the typo,
>
> Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
>
> Thanks.
>
> -Fenghua


Thanks,

Jacob
Re: [PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API [ In reply to ]
On Fri, Mar 31, 2023 at 04:11:37PM -0700, Jacob Pan wrote:
> static void idxd_disable_system_pasid(struct idxd_device *idxd)
> {
> + struct pci_dev *pdev = idxd->pdev;
> + struct device *dev = &pdev->dev;
> + struct iommu_domain *domain;
> + union gencfg_reg gencfg;
> +
> + domain = iommu_get_domain_for_dev(dev);
> + if (!domain || domain->type == IOMMU_DOMAIN_BLOCKED)
> + return;
> +
> + iommu_detach_device_pasid(domain, dev, idxd->pasid);

This sequence is kinda weird, we shouldn't pass in domain to
detach_device_pasid, IMHO. We already know the domain because we store
it in an xarray, it just creates weirdness if the user passes in the
wrong domain.

Jason
Re: [PATCH v3 7/7] dmaengine/idxd: Re-enable kernel workqueue under DMA API [ In reply to ]
On 4/5/23 8:15 PM, Jason Gunthorpe wrote:
> On Fri, Mar 31, 2023 at 04:11:37PM -0700, Jacob Pan wrote:
>> static void idxd_disable_system_pasid(struct idxd_device *idxd)
>> {
>> + struct pci_dev *pdev = idxd->pdev;
>> + struct device *dev = &pdev->dev;
>> + struct iommu_domain *domain;
>> + union gencfg_reg gencfg;
>> +
>> + domain = iommu_get_domain_for_dev(dev);
>> + if (!domain || domain->type == IOMMU_DOMAIN_BLOCKED)
>> + return;
>> +
>> + iommu_detach_device_pasid(domain, dev, idxd->pasid);
> This sequence is kinda weird, we shouldn't pass in domain to
> detach_device_pasid, IMHO. We already know the domain because we store
> it in an xarray, it just creates weirdness if the user passes in the
> wrong domain.

The initial idea was that the driver has a domain and it wants to attach
the domain to a pasid of the device. During attaching, iommu core will
save the domain in its xarray.

After use, driver want to detach the domain from the pasid by calling
iommu_detach_device_pasid(). The iommu core will compare the input
domain and the one it saved. A warning will be triggered if they are
different.

WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);

Logically speaking, @domain for detach_device_pasid is unnecessary,
because the pasid array is essentially per-device (as we discussed
before. the pci_enable_pasid() ensures this), although it is currently
placed in the group structure. In that case, the driver can and should
own everything about the pasid and domain. The roles of the iommu core
and the individual driver are only to handle requests of installing or
withdrawing a domain on/from a device's pasid.

Best regards,
baolu