Mailing List Archive

[RFC PATCH 8/8]: PVH: privcmd changes
---
drivers/xen/privcmd.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ccee0f1..0a240ab 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -33,6 +33,7 @@
#include <xen/features.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include <xen/balloon.h>

#include "privcmd.h"

@@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
if (!xen_initial_domain())
return -EPERM;

+ /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */
+ if (xen_pvh_domain())
+ return -ENOSYS;
+
if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
return -EFAULT;

@@ -251,6 +256,8 @@ struct mmap_batch_state {
xen_pfn_t __user *user;
};

+/* PVH dom0: if domU being created is PV, then mfn is mfn(addr on bus). If
+ * it's PVH then mfn is pfn (input to HAP). */
static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
@@ -274,6 +281,40 @@ static int mmap_return_errors(void *data, void *state)
return put_user(*mfnp, st->user++);
}

+/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
+ * the vma with the page info to use later.
+ * Returns: 0 if success, otherwise -errno
+ */
+static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs)
+{
+ int rc;
+ struct xen_pvh_sav_pfn_info *savp;
+
+ savp = kzalloc(sizeof(struct xen_pvh_sav_pfn_info), GFP_KERNEL);
+ if (savp == NULL)
+ return -ENOMEM;
+
+ savp->sp_paga = kcalloc(numpgs, sizeof(savp->sp_paga[0]), GFP_KERNEL);
+ if (savp->sp_paga == NULL) {
+ kfree(savp);
+ return -ENOMEM;
+ }
+
+ rc = alloc_xenballooned_pages(numpgs, savp->sp_paga, 0);
+ if (rc != 0) {
+ pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__,
+ numpgs, rc);
+ kfree(savp->sp_paga);
+ kfree(savp);
+ return -ENOMEM;
+ }
+ savp->sp_num_pgs = numpgs;
+ BUG_ON(vma->vm_private_data);
+ vma->vm_private_data = savp;
+
+ return 0;
+}
+
static struct vm_operations_struct privcmd_vm_ops;

static long privcmd_ioctl_mmap_batch(void __user *udata)
@@ -315,6 +356,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
goto out;
}

+ if (xen_pvh_domain()) {
+ if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) {
+ up_write(&mm->mmap_sem);
+ goto out;
+ }
+ }
state.domain = m.dom;
state.vma = vma;
state.va = m.addr;
@@ -365,6 +412,19 @@ static long privcmd_ioctl(struct file *file,
return ret;
}

+static void privcmd_close(struct vm_area_struct *vma)
+{
+ struct xen_pvh_sav_pfn_info *savp;
+
+ if (!xen_pvh_domain() || ((savp=vma->vm_private_data) == NULL))
+ return;
+
+ while (savp->sp_next_todo--) {
+ xen_pfn_t pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo]);
+ pvh_rem_xen_p2m(pfn, 1);
+ }
+}
+
static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
@@ -375,13 +435,14 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}

static struct vm_operations_struct privcmd_vm_ops = {
+ .close = privcmd_close,
.fault = privcmd_fault
};

static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
{
- /* Unsupported for auto-translate guests. */
- if (xen_feature(XENFEAT_auto_translated_physmap))
+ /* Unsupported for auto-translate guests unless PVH */
+ if (xen_feature(XENFEAT_auto_translated_physmap) && !xen_pvh_domain())
return -ENOSYS;

/* DONTCOPY is essential for Xen because copy_page_range doesn't know
@@ -395,6 +456,9 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)

static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
{
+ if (xen_pvh_domain())
+ return (vma->vm_private_data == NULL);
+
return (xchg(&vma->vm_private_data, (void *)1) == NULL);
}

--
1.7.2.3


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, 2012-08-16 at 02:07 +0100, Mukesh Rathor wrote:
> ---
> drivers/xen/privcmd.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 66 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..0a240ab 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -33,6 +33,7 @@
> #include <xen/features.h>
> #include <xen/page.h>
> #include <xen/xen-ops.h>
> +#include <xen/balloon.h>
>
> #include "privcmd.h"
>
> @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> + /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */
> + if (xen_pvh_domain())
> + return -ENOSYS;
> +
> if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
> return -EFAULT;
>
> @@ -251,6 +256,8 @@ struct mmap_batch_state {
> xen_pfn_t __user *user;
> };
>
> +/* PVH dom0: if domU being created is PV, then mfn is mfn(addr on bus). If
> + * it's PVH then mfn is pfn (input to HAP). */
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> @@ -274,6 +281,40 @@ static int mmap_return_errors(void *data, void *state)
> return put_user(*mfnp, st->user++);
> }
>
> +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
> + * the vma with the page info to use later.
> + * Returns: 0 if success, otherwise -errno
> + */
> +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs)
> +{
> + int rc;
> + struct xen_pvh_sav_pfn_info *savp;
> +
> + savp = kzalloc(sizeof(struct xen_pvh_sav_pfn_info), GFP_KERNEL);
> + if (savp == NULL)
> + return -ENOMEM;
> +
> + savp->sp_paga = kcalloc(numpgs, sizeof(savp->sp_paga[0]), GFP_KERNEL);
> + if (savp->sp_paga == NULL) {
> + kfree(savp);
> + return -ENOMEM;
> + }
> +
> + rc = alloc_xenballooned_pages(numpgs, savp->sp_paga, 0);
> + if (rc != 0) {
> + pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__,
> + numpgs, rc);
> + kfree(savp->sp_paga);
> + kfree(savp);
> + return -ENOMEM;
> + }
> + savp->sp_num_pgs = numpgs;
> + BUG_ON(vma->vm_private_data);
> + vma->vm_private_data = savp;
> +
> + return 0;
> +}
> +
> static struct vm_operations_struct privcmd_vm_ops;
>
> static long privcmd_ioctl_mmap_batch(void __user *udata)
> @@ -315,6 +356,12 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> goto out;
> }
>
> + if (xen_pvh_domain()) {
> + if ((ret=pvh_privcmd_resv_pfns(vma, m.num))) {
> + up_write(&mm->mmap_sem);
> + goto out;
> + }
> + }
> state.domain = m.dom;
> state.vma = vma;
> state.va = m.addr;
> @@ -365,6 +412,19 @@ static long privcmd_ioctl(struct file *file,
> return ret;
> }
>
> +static void privcmd_close(struct vm_area_struct *vma)
> +{
> + struct xen_pvh_sav_pfn_info *savp;
> +
> + if (!xen_pvh_domain() || ((savp=vma->vm_private_data) == NULL))
> + return;
> +
> + while (savp->sp_next_todo--) {
> + xen_pfn_t pfn = page_to_pfn(savp->sp_paga[savp->sp_next_todo]);
> + pvh_rem_xen_p2m(pfn, 1);
> + }
> +}
> +
> static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
> @@ -375,13 +435,14 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> static struct vm_operations_struct privcmd_vm_ops = {
> + .close = privcmd_close,
> .fault = privcmd_fault
> };
>
> static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
> {
> - /* Unsupported for auto-translate guests. */
> - if (xen_feature(XENFEAT_auto_translated_physmap))
> + /* Unsupported for auto-translate guests unless PVH */
> + if (xen_feature(XENFEAT_auto_translated_physmap) && !xen_pvh_domain())
> return -ENOSYS;
>
> /* DONTCOPY is essential for Xen because copy_page_range doesn't know
> @@ -395,6 +456,9 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
>
> static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
> {
> + if (xen_pvh_domain())
> + return (vma->vm_private_data == NULL);
> +
> return (xchg(&vma->vm_private_data, (void *)1) == NULL);

How come this is different for pvh?

Your version doesn't appear to enforce anything, since it doesn't set
it. Oh I see, you set it to some actual useful data in
pvh_privcmd_resv_pfns. I have a feeling you might want to use some sort
of atomic construct for that.

Which I suspect then means you need to be prepared for it to fail.

Can we set vm_private_data => 1 in privcmd_enforce_singleshot_mapping to
"open the transaction" and then up in pvh_privcmd_resv_pfns set it to
the final pointer up in pvh_privcmd_resv_pfns?

That was it would be non-NULL both while the mapping is being
constructed and in use. Might need some checks elsewhere that it is != 1
when you expect a real pointer.

Ian.



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, 2012-08-16 at 02:07 +0100, Mukesh Rathor wrote:
> ---
> drivers/xen/privcmd.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 66 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..0a240ab 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -33,6 +33,7 @@
> #include <xen/features.h>
> #include <xen/page.h>
> #include <xen/xen-ops.h>
> +#include <xen/balloon.h>
>
> #include "privcmd.h"
>
> @@ -199,6 +200,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> + /* PVH: TBD/FIXME. For now we only support privcmd_ioctl_mmap_batch */
> + if (xen_pvh_domain())
> + return -ENOSYS;
> +
> if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
> return -EFAULT;
>
> @@ -251,6 +256,8 @@ struct mmap_batch_state {
> xen_pfn_t __user *user;
> };
>
> +/* PVH dom0: if domU being created is PV, then mfn is mfn(addr on bus). If
> + * it's PVH then mfn is pfn (input to HAP). */
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> @@ -274,6 +281,40 @@ static int mmap_return_errors(void *data, void *state)
> return put_user(*mfnp, st->user++);
> }
>
> +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
> + * the vma with the page info to use later.
> + * Returns: 0 if success, otherwise -errno
> + */
> +static int pvh_privcmd_resv_pfns(struct vm_area_struct *vma, int numpgs)
> +{
> + int rc;
> + struct xen_pvh_sav_pfn_info *savp;
> +
> + savp = kzalloc(sizeof(struct xen_pvh_sav_pfn_info), GFP_KERNEL);
> + if (savp == NULL)
> + return -ENOMEM;
> +
> + savp->sp_paga = kcalloc(numpgs, sizeof(savp->sp_paga[0]), GFP_KERNEL);
> + if (savp->sp_paga == NULL) {
> + kfree(savp);
> + return -ENOMEM;
> + }
> +
> + rc = alloc_xenballooned_pages(numpgs, savp->sp_paga, 0);
> + if (rc != 0) {
> + pr_warn("%s Could not alloc %d pfns rc:%d\n", __FUNCTION__,
> + numpgs, rc);
> + kfree(savp->sp_paga);
> + kfree(savp);
> + return -ENOMEM;
> + }

I've just been building on this patch to make proper mmap foreign
support on ARM and I was looking for the place which freed this, both
the pages back to the balloon and then the array itself. There is code
in privcmd_close which unmaps the P2M, but I can't find the code which
frees things back to the balloon. Have I missed something?

I think we also need to think about the layering / abstraction a bit
here too. Currently on setup the caller allocates the array iff pvh and
stores it in the opaque vma->vm_private, then calls
xen_remap_domain_mfn_range which iff pvh calls pvh_add_to_xen_p2m which
assumes that the caller has seeded the vm_private with the correct
struct. xen_remap_domain_mfn_range also sets up the stage 1 PT mappings.

On teardown the iff pvh is in the caller which calls pvh_rem_xen_p2m
directly (this API is unbalanced with the setup side). The stage 1 PT
mappings are torn down implicitly somewhere else (in generic code, I
think you said).

(BTW, the terminology I'm using is stage 1 == guest OS page tables,
stage 2 == HAP)

I think you can't rely on the implicit teardown here since you need to
unmap before you hand the page back to the balloon. The reason this
doesn't look necessary now is that you don't give the page back.

Also not ordering the stage 1 and stage 2 teardown correctly is
dangerous, depending on the eventual ordering you potentially turn an
erroneous access to a virtual address, which should result in a guest OS
level page fault (and e.g. a seg fault to the offending process) into a
hypervisor shoots the guest due to an unexpected stage 2 fault type
failure, which is somewhat undesirable ;-)

With that in mind I think you do in the end need to add
xen_unmap_domain_mfn_range which does the unmap from both stage 1 and
stage 2 -- that balances out the interface (making pvh_rem_xen_p2m
internal) too, which is nice. This function may turn out to be a nop
on !pvh, but that's ok (although maybe there would be no harm in doing
explicit unmaps, for consistency?).

WRT passing data between interfaces in vma->vm_private, which is pretty
subtle, can we push that whole thing down into
xen_{remap,unmap}_domain_mfn_range too? This would make the requirement
on the caller be simple "never use vm_private", as opposed to now where
the requirement is "sometimes you might have to allocate some stuff and
stick it in here". The downside is that it pushes code which could be
generic down into per-arch stuff, although with suitable generic helper
functions this isn't so bad (whatever happened to
{alloc,free}_empty_pages_and_pagevec from the classic kernels? Those did
exactly what we want here, I think)

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Tue, 11 Sep 2012 15:10:23 +0100
Ian Campbell <Ian.Campbell@citrix.com> wrote:

> On Thu, 2012-08-16 at 02:07 +0100, Mukesh Rathor wrote:
> > ---
> > drivers/xen/privcmd.c | 68
> > + if (rc != 0) {
> > + pr_warn("%s Could not alloc %d pfns rc:%d\n",
> > __FUNCTION__,
> > + numpgs, rc);
> > + kfree(savp->sp_paga);
> > + kfree(savp);
> > + return -ENOMEM;
> > + }
>
> I've just been building on this patch to make proper mmap foreign
> support on ARM and I was looking for the place which freed this, both
> the pages back to the balloon and then the array itself. There is code
> in privcmd_close which unmaps the P2M, but I can't find the code which
> frees things back to the balloon. Have I missed something?

You are right, I missed the free. Let me revisit it and make some changes.

thanks,
mukesh


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Wed, 2012-09-12 at 02:32 +0100, Mukesh Rathor wrote:
> On Tue, 11 Sep 2012 15:10:23 +0100
> Ian Campbell <Ian.Campbell@citrix.com> wrote:
>
> > On Thu, 2012-08-16 at 02:07 +0100, Mukesh Rathor wrote:
> > > ---
> > > drivers/xen/privcmd.c | 68
> > > + if (rc != 0) {
> > > + pr_warn("%s Could not alloc %d pfns rc:%d\n",
> > > __FUNCTION__,
> > > + numpgs, rc);
> > > + kfree(savp->sp_paga);
> > > + kfree(savp);
> > > + return -ENOMEM;
> > > + }
> >
> > I've just been building on this patch to make proper mmap foreign
> > support on ARM and I was looking for the place which freed this, both
> > the pages back to the balloon and then the array itself. There is code
> > in privcmd_close which unmaps the P2M, but I can't find the code which
> > frees things back to the balloon. Have I missed something?
>
> You are right, I missed the free. Let me revisit it and make some changes.

Thanks.

Any comments on the rest of my mail? We need to agree what the interface
between the generic and the per-arch code is going to look like here.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Wed, 12 Sep 2012 08:36:59 +0100
Ian Campbell <Ian.Campbell@citrix.com> wrote:

> On Wed, 2012-09-12 at 02:32 +0100, Mukesh Rathor wrote:
> > On Tue, 11 Sep 2012 15:10:23 +0100
> > Ian Campbell <Ian.Campbell@citrix.com> wrote:
> >
> > > On Thu, 2012-08-16 at 02:07 +0100, Mukesh Rathor wrote:
> > > > ---
> > > > drivers/xen/privcmd.c | 68
> > > > + if (rc != 0) {
> > > > + pr_warn("%s Could not alloc %d pfns rc:%d\n",
> > > > __FUNCTION__,
> > > > + numpgs, rc);
> > > > + kfree(savp->sp_paga);
> > > > + kfree(savp);
> > > > + return -ENOMEM;
> > > > + }
> > >
> > > I've just been building on this patch to make proper mmap foreign
> > > support on ARM and I was looking for the place which freed this,
> > > both the pages back to the balloon and then the array itself.
> > > There is code in privcmd_close which unmaps the P2M, but I can't
> > > find the code which frees things back to the balloon. Have I
> > > missed something?
> >
> > You are right, I missed the free. Let me revisit it and make some
> > changes.
>
> Thanks.
>
> Any comments on the rest of my mail? We need to agree what the
> interface between the generic and the per-arch code is going to look
> like here.
>
> Ian.

Right, I am paging it all in the brain right now, as I made the changes
a while ago :). I will attempt to change the code according to
your email, to come up with generic interface. Also I am going to come
up with xen_unmap_domain_mfn_range. Mostly agree with your email. More
soon.

thanks,
Mukesh

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Tue, 11 Sep 2012 15:10:23 +0100
Ian Campbell <Ian.Campbell@citrix.com> wrote:

> I think you can't rely on the implicit teardown here since you need to
> unmap before you hand the page back to the balloon. The reason this
> doesn't look necessary now is that you don't give the page back.
> Also not ordering the stage 1 and stage 2 teardown correctly is
> dangerous, depending on the eventual ordering you potentially turn an
> erroneous access to a virtual address, which should result in a guest
> OS level page fault (and e.g. a seg fault to the offending process)
> into a hypervisor shoots the guest due to an unexpected stage 2 fault
> type failure, which is somewhat undesirable ;-)
>
> With that in mind I think you do in the end need to add
> xen_unmap_domain_mfn_range which does the unmap from both stage 1 and
> stage 2 -- that balances out the interface (making pvh_rem_xen_p2m
> internal) too, which is nice. This function may turn out to be a nop
> on !pvh, but that's ok (although maybe there would be no harm in doing
> explicit unmaps, for consistency?).

Ok, I added xen_unmap_domain_mfn_range(). Take a look. It appears that
by the time privcmd_close() is called, the kernel has already freed
process resources and lookup_address() returns NULL. Now I am wondering
if the kernel/mmu does anything to the page while shooting the pte
entry. Well the page was orig from balloon, so the cleanup hopefully
leaves it alone.

I had looked for other hooks initially when I did this, but
vm_operations_struct->close was the only one to pan out.

I can't really move pvh_privcmd_resv_pfns to mmu.c because the
xen_remap_domain_mfn_range is called one page at a time, and I need
to allocate the array first. I'd have to change it to linked list, worth
it? Or I'd have to move and export it.


> WRT passing data between interfaces in vma->vm_private, which is
> pretty subtle, can we push that whole thing down into
> xen_{remap,unmap}_domain_mfn_range too? This would make the
> requirement on the caller be simple "never use vm_private", as
> opposed to now where the requirement is "sometimes you might have to
> allocate some stuff and stick it in here". The downside is that it
> pushes code which could be generic down into per-arch stuff, although
> with suitable generic helper functions this isn't so bad (whatever
> happened to {alloc,free}_empty_pages_and_pagevec from the classic
> kernels? Those did exactly what we want here, I think)

Well, it has to hang off of vma->vm_private. The alternative would be to
have a hash table by process id or something, again not sure if worth it.

Take a look at my latest files attached. Now the alloc_balloon and free
are split between privcmd and mmu.c. The alternative would be to call
xen_unmap_domain_mfn_range one pfn at a time and have it call
pvh_rem_xen_p2m(), and move free_xenballooned_pages to privcmd. But
that would be same as just changing the name of pvh_rem_xen_p2m to
xen_unmap_domain_mfn_range(). Also, remap and unmap won't be much
symmetric then.

Not sure if there's a lot we could do here to be honest. LMK what you
think.

thanks,
Mukesh
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, 2012-09-13 at 02:19 +0100, Mukesh Rathor wrote:
> On Tue, 11 Sep 2012 15:10:23 +0100
> Ian Campbell <Ian.Campbell@citrix.com> wrote:
>
> > I think you can't rely on the implicit teardown here since you need to
> > unmap before you hand the page back to the balloon. The reason this
> > doesn't look necessary now is that you don't give the page back.
> > Also not ordering the stage 1 and stage 2 teardown correctly is
> > dangerous, depending on the eventual ordering you potentially turn an
> > erroneous access to a virtual address, which should result in a guest
> > OS level page fault (and e.g. a seg fault to the offending process)
> > into a hypervisor shoots the guest due to an unexpected stage 2 fault
> > type failure, which is somewhat undesirable ;-)
> >
> > With that in mind I think you do in the end need to add
> > xen_unmap_domain_mfn_range which does the unmap from both stage 1 and
> > stage 2 -- that balances out the interface (making pvh_rem_xen_p2m
> > internal) too, which is nice. This function may turn out to be a nop
> > on !pvh, but that's ok (although maybe there would be no harm in doing
> > explicit unmaps, for consistency?).
>
> Ok, I added xen_unmap_domain_mfn_range(). Take a look.

Thanks, I'll take a gander once I get ballooning working on ARM.

> It appears that
> by the time privcmd_close() is called, the kernel has already freed
> process resources and lookup_address() returns NULL. Now I am wondering
> if the kernel/mmu does anything to the page while shooting the pte
> entry. Well the page was orig from balloon, so the cleanup hopefully
> leaves it alone.

I suppose it depends on whether the core "takes over" the reference
which you hold. I think it doesn't, so this is just a leak, rather than
putting a ballooned page back into the general allocation pool (things
would be crashing left & right if it was doing this I reckon)

>
> I had looked for other hooks initially when I did this, but
> vm_operations_struct->close was the only one to pan out.
>
> I can't really move pvh_privcmd_resv_pfns to mmu.c because the
> xen_remap_domain_mfn_range is called one page at a time, and I need
> to allocate the array first. I'd have to change it to linked list, worth
> it? Or I'd have to move and export it.

Another alternative would be to add the page array as a parameter to the
map/unmap function, rather than relying on it propagating via
vma_private.

The other option I can see is for privcmd to use traverse_pages to hook
all the struct pages in at the right virtual address and then have
remap_mfn_range do a walk for each page. That's O(N) walks for each
mapping though, unless perhaps apply_to_page_range gives the callback
something which can be turned back into a struct pag or a pfn?

> > WRT passing data between interfaces in vma->vm_private, which is
> > pretty subtle, can we push that whole thing down into
> > xen_{remap,unmap}_domain_mfn_range too? This would make the
> > requirement on the caller be simple "never use vm_private", as
> > opposed to now where the requirement is "sometimes you might have to
> > allocate some stuff and stick it in here". The downside is that it
> > pushes code which could be generic down into per-arch stuff, although
> > with suitable generic helper functions this isn't so bad (whatever
> > happened to {alloc,free}_empty_pages_and_pagevec from the classic
> > kernels? Those did exactly what we want here, I think)
>
> Well, it has to hang off of vma->vm_private. The alternative would be to
> have a hash table by process id or something, again not sure if worth it.

I think using vm_private within a subsystem/layer is ok, what I think we
should avoid is having layers pass data back and forth in that field.

>
> Take a look at my latest files attached. Now the alloc_balloon and free
> are split between privcmd and mmu.c. The alternative would be to call
> xen_unmap_domain_mfn_range one pfn at a time and have it call
> pvh_rem_xen_p2m(), and move free_xenballooned_pages to privcmd. But
> that would be same as just changing the name of pvh_rem_xen_p2m to
> xen_unmap_domain_mfn_range(). Also, remap and unmap won't be much
> symmetric then.
>
> Not sure if there's a lot we could do here to be honest. LMK what you
> think.
>
> thanks,
> Mukesh
>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
> I suppose it depends on whether the core "takes over" the reference
> which you hold. I think it doesn't, so this is just a leak, rather
> than putting a ballooned page back into the general allocation pool
> (things would be crashing left & right if it was doing this I reckon)
>
> >
> > I had looked for other hooks initially when I did this, but
> > vm_operations_struct->close was the only one to pan out.
> >
> > I can't really move pvh_privcmd_resv_pfns to mmu.c because the
> > xen_remap_domain_mfn_range is called one page at a time, and I need
> > to allocate the array first. I'd have to change it to linked list,
> > worth it? Or I'd have to move and export it.
>
> Another alternative would be to add the page array as a parameter to
> the map/unmap function, rather than relying on it propagating via
> vma_private.

I thought it was a no-no to change an exported API. Konrad, is it OK
to change an exported API like xen_remap_domain_mfn_range, I mean, are
there any guidelines?

thanks,
Mukesh



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, 2012-09-13 at 19:27 +0100, Mukesh Rathor wrote:
> > I suppose it depends on whether the core "takes over" the reference
> > which you hold. I think it doesn't, so this is just a leak, rather
> > than putting a ballooned page back into the general allocation pool
> > (things would be crashing left & right if it was doing this I reckon)
> >
> > >
> > > I had looked for other hooks initially when I did this, but
> > > vm_operations_struct->close was the only one to pan out.
> > >
> > > I can't really move pvh_privcmd_resv_pfns to mmu.c because the
> > > xen_remap_domain_mfn_range is called one page at a time, and I need
> > > to allocate the array first. I'd have to change it to linked list,
> > > worth it? Or I'd have to move and export it.
> >
> > Another alternative would be to add the page array as a parameter to
> > the map/unmap function, rather than relying on it propagating via
> > vma_private.
>
> I thought it was a no-no to change an exported API. Konrad, is it OK
> to change an exported API like xen_remap_domain_mfn_range, I mean, are
> there any guidelines?

You are thinking of system calls, which cannot be changed.

Kernel internal APIs, including those exported to modules are fair game
to change. See Documentation/stable_api_nonsense.txt.

Ian.

>
> thanks,
> Mukesh
>
>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, Sep 13, 2012 at 11:27:53AM -0700, Mukesh Rathor wrote:
> > I suppose it depends on whether the core "takes over" the reference
> > which you hold. I think it doesn't, so this is just a leak, rather
> > than putting a ballooned page back into the general allocation pool
> > (things would be crashing left & right if it was doing this I reckon)
> >
> > >
> > > I had looked for other hooks initially when I did this, but
> > > vm_operations_struct->close was the only one to pan out.
> > >
> > > I can't really move pvh_privcmd_resv_pfns to mmu.c because the
> > > xen_remap_domain_mfn_range is called one page at a time, and I need
> > > to allocate the array first. I'd have to change it to linked list,
> > > worth it? Or I'd have to move and export it.
> >
> > Another alternative would be to add the page array as a parameter to
> > the map/unmap function, rather than relying on it propagating via
> > vma_private.
>
> I thought it was a no-no to change an exported API. Konrad, is it OK
> to change an exported API like xen_remap_domain_mfn_range, I mean, are
> there any guidelines?

Make it work right. We (upstream) don't care about APIs stability. Only
distros need to do it.
>
> thanks,
> Mukesh
>
>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, 13 Sep 2012 12:37:46 +0100
Ian Campbell <Ian.Campbell@citrix.com> wrote:

> On Thu, 2012-09-13 at 02:19 +0100, Mukesh Rathor wrote:
> > On Tue, 11 Sep 2012 15:10:23 +0100
> > Ian Campbell <Ian.Campbell@citrix.com> wrote:
> >
> >
> > Well, it has to hang off of vma->vm_private. The alternative would
> > be to have a hash table by process id or something, again not sure
> > if worth it.
>
> I think using vm_private within a subsystem/layer is ok, what I think
> we should avoid is having layers pass data back and forth in that
> field.

Ah I see your point. Ok, let me play around a bit see what I can do.

thanks
Mukesh


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Thu, 13 Sep 2012 17:34:20 -0700
Mukesh Rathor <mukesh.rathor@oracle.com> wrote:

> On Thu, 13 Sep 2012 12:37:46 +0100
> Ian Campbell <Ian.Campbell@citrix.com> wrote:
>
> > On Thu, 2012-09-13 at 02:19 +0100, Mukesh Rathor wrote:
> > > On Tue, 11 Sep 2012 15:10:23 +0100
> > >
> > I think using vm_private within a subsystem/layer is ok, what I
> > think we should avoid is having layers pass data back and forth in
> > that field.
>
> Ah I see your point. Ok, let me play around a bit see what I can do.

Hey Ian,

I played around a bit with privcmd, but not a whole lot can be done. I
did change things around a bit to make the APIs more symmetric and hide
vm_private_data from mmu.c. Please take a look. Unless major objections,
I'd like to resubmit all linux patches asap.

thanks,
Mukesh
Re: [RFC PATCH 8/8]: PVH: privcmd changes [ In reply to ]
On Tue, 2012-09-18 at 00:50 +0100, Mukesh Rathor wrote:
> On Thu, 13 Sep 2012 17:34:20 -0700
> Mukesh Rathor <mukesh.rathor@oracle.com> wrote:
>
> > On Thu, 13 Sep 2012 12:37:46 +0100
> > Ian Campbell <Ian.Campbell@citrix.com> wrote:
> >
> > > On Thu, 2012-09-13 at 02:19 +0100, Mukesh Rathor wrote:
> > > > On Tue, 11 Sep 2012 15:10:23 +0100
> > > >
> > > I think using vm_private within a subsystem/layer is ok, what I
> > > think we should avoid is having layers pass data back and forth in
> > > that field.
> >
> > Ah I see your point. Ok, let me play around a bit see what I can do.
>
> Hey Ian,
>
> I played around a bit with privcmd, but not a whole lot can be done. I
> did change things around a bit to make the APIs more symmetric and hide
> vm_private_data from mmu.c. Please take a look. Unless major objections,
> I'd like to resubmit all linux patches asap.

It's pretty hard to review complete .c files rather than diffs. I think
you can/should just go ahead and resubmit the latest patches.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel