Mailing List Archive

[PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify
Normally, when a userspace process mapping a grant crashes, the domain
providing the reference receives no indication that its peer has
crashed, possibly leading to unexpected freezes or timeouts. This
function provides a notification of the unmap by signalling an event
channel and/or clearing a specific byte in the page.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
tools/libxc/xc_gnttab.c | 15 ++++++++++++
tools/libxc/xc_linux_osdep.c | 52 ++++++++++++++++++++++++++++++++++++++++++
tools/libxc/xenctrl.h | 21 +++++++++++++++++
tools/libxc/xenctrlosdep.h | 5 ++++
4 files changed, 93 insertions(+), 0 deletions(-)

diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
index 4f55fce..dc7aa0c 100644
--- a/tools/libxc/xc_gnttab.c
+++ b/tools/libxc/xc_gnttab.c
@@ -174,6 +174,21 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
count, domid, refs, prot);
}

+void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
+ uint32_t domid,
+ uint32_t ref,
+ uint32_t notify_offset,
+ evtchn_port_t notify_port)
+{
+ if (xcg->ops->u.gnttab.map_grant_ref_notify)
+ return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
+ domid, ref, notify_offset, notify_port);
+ else
+ return xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
+ domid, ref, PROT_READ|PROT_WRITE);
+}
+
+
int xc_gnttab_munmap(xc_gnttab *xcg,
void *start_address,
uint32_t count)
diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
index dca6718..8f7718f 100644
--- a/tools/libxc/xc_linux_osdep.c
+++ b/tools/libxc/xc_linux_osdep.c
@@ -613,6 +613,57 @@ static void *linux_gnttab_map_domain_grant_refs(xc_gnttab *xcg, xc_osdep_handle
return do_gnttab_map_grant_refs(xcg, h, count, &domid, 0, refs, prot);
}

+static void *linux_gnttab_map_grant_ref_notify(xc_gnttab *xch, xc_osdep_handle h,
+ uint32_t domid, uint32_t ref,
+ uint32_t notify_offset,
+ evtchn_port_t notify_port)
+{
+ int fd = (int)h;
+ struct ioctl_gntdev_map_grant_ref map;
+ struct ioctl_gntdev_unmap_notify notify;
+ void *addr;
+
+ map.count = 1;
+ map.refs[0].domid = domid;
+ map.refs[0].ref = ref;
+
+ if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, &map) ) {
+ PERROR("xc_gnttab_map_grant_ref: ioctl MAP_GRANT_REF failed");
+ return NULL;
+ }
+
+ addr = mmap(NULL, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, map.index);
+ if ( addr == MAP_FAILED )
+ {
+ int saved_errno = errno;
+ struct ioctl_gntdev_unmap_grant_ref unmap_grant;
+
+ PERROR("xc_gnttab_map_grant_ref: mmap failed");
+ unmap_grant.index = map.index;
+ unmap_grant.count = 1;
+ ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
+ errno = saved_errno;
+ return NULL;
+ }
+
+ notify.index = map.index;
+ notify.action = 0;
+ if (notify_offset >= 0) {
+ notify.index += notify_offset;
+ notify.action |= UNMAP_NOTIFY_CLEAR_BYTE;
+ }
+ if (notify_port >= 0) {
+ notify.event_channel_port = notify_port;
+ notify.action |= UNMAP_NOTIFY_SEND_EVENT;
+ }
+ if (notify.action && ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &notify)) {
+ PERROR("linux_gnttab_map_grant_ref_notify: ioctl SET_UNMAP_NOTIFY failed");
+ }
+
+ return addr;
+}
+
+
static int linux_gnttab_munmap(xc_gnttab *xcg, xc_osdep_handle h,
void *start_address, uint32_t count)
{
@@ -662,6 +713,7 @@ static struct xc_osdep_ops linux_gnttab_ops = {
.map_grant_ref = &linux_gnttab_map_grant_ref,
.map_grant_refs = &linux_gnttab_map_grant_refs,
.map_domain_grant_refs = &linux_gnttab_map_domain_grant_refs,
+ .map_grant_ref_notify = &linux_gnttab_map_grant_ref_notify,
.munmap = &linux_gnttab_munmap,
},
};
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 1b82ee0..7859571 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -1349,6 +1349,27 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
int prot);

/*
+ * Memory maps a grant reference from one domain to a local address range.
+ * Mappings should be unmapped with xc_gnttab_munmap. Logs errors.
+ * This version always maps writable pages, and will attempt to set up
+ * an unmap notification at the given offset and event channel. When the
+ * page is unmapped, the byte at the given offset will be zeroed and a
+ * wakeup will be sent to the given event channel.
+ *
+ * @parm xcg a handle on an open grant table interface
+ * @parm domid the domain to map memory from
+ * @parm ref the grant reference ID to map
+ * @parm notify_offset The byte offset in the page to use for unmap
+ * notification; -1 for none.
+ * @parm notify_port The event channel port to use for unmap notify, or -1
+ */
+void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
+ uint32_t domid,
+ uint32_t ref,
+ uint32_t notify_offset,
+ evtchn_port_t notify_port);
+
+/*
* Unmaps the @count pages starting at @start_address, which were mapped by a
* call to xc_gnttab_map_grant_ref or xc_gnttab_map_grant_refs. Never logs.
*/
diff --git a/tools/libxc/xenctrlosdep.h b/tools/libxc/xenctrlosdep.h
index bfe46e0..01969c5 100644
--- a/tools/libxc/xenctrlosdep.h
+++ b/tools/libxc/xenctrlosdep.h
@@ -119,6 +119,11 @@ struct xc_osdep_ops
uint32_t domid,
uint32_t *refs,
int prot);
+ void *(*map_grant_ref_notify)(xc_gnttab *xcg, xc_osdep_handle h,
+ uint32_t domid,
+ uint32_t ref,
+ uint32_t notify_offset,
+ evtchn_port_t notify_port);
int (*munmap)(xc_gnttab *xcg, xc_osdep_handle h,
void *start_address,
uint32_t count);
--
1.7.6


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On Thu, Sep 01, 2011 at 12:22:16PM -0400, Daniel De Graaf wrote:
> Normally, when a userspace process mapping a grant crashes, the domain
> providing the reference receives no indication that its peer has
> crashed, possibly leading to unexpected freezes or timeouts. This
> function provides a notification of the unmap by signalling an event
> channel and/or clearing a specific byte in the page.
>
> Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
> ---
> tools/libxc/xc_gnttab.c | 15 ++++++++++++
> tools/libxc/xc_linux_osdep.c | 52 ++++++++++++++++++++++++++++++++++++++++++
> tools/libxc/xenctrl.h | 21 +++++++++++++++++
> tools/libxc/xenctrlosdep.h | 5 ++++
> 4 files changed, 93 insertions(+), 0 deletions(-)
>
> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
> index 4f55fce..dc7aa0c 100644
> --- a/tools/libxc/xc_gnttab.c
> +++ b/tools/libxc/xc_gnttab.c
> @@ -174,6 +174,21 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
> count, domid, refs, prot);
> }
>
> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
> + uint32_t domid,
> + uint32_t ref,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port)
> +{
> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
> + domid, ref, notify_offset, notify_port);
> + else
> + return xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
> + domid, ref, PROT_READ|PROT_WRITE);
> +}
> +
> +
> int xc_gnttab_munmap(xc_gnttab *xcg,
> void *start_address,
> uint32_t count)
> diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
> index dca6718..8f7718f 100644
> --- a/tools/libxc/xc_linux_osdep.c
> +++ b/tools/libxc/xc_linux_osdep.c
> @@ -613,6 +613,57 @@ static void *linux_gnttab_map_domain_grant_refs(xc_gnttab *xcg, xc_osdep_handle
> return do_gnttab_map_grant_refs(xcg, h, count, &domid, 0, refs, prot);
> }
>
> +static void *linux_gnttab_map_grant_ref_notify(xc_gnttab *xch, xc_osdep_handle h,
> + uint32_t domid, uint32_t ref,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port)
> +{
> + int fd = (int)h;
> + struct ioctl_gntdev_map_grant_ref map;
> + struct ioctl_gntdev_unmap_notify notify;

That looks a bit odd. Like the formatting is off?

> + void *addr;
> +
> + map.count = 1;
> + map.refs[0].domid = domid;
> + map.refs[0].ref = ref;
> +
> + if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, &map) ) {
> + PERROR("xc_gnttab_map_grant_ref: ioctl MAP_GRANT_REF failed");
> + return NULL;
> + }
> +
> + addr = mmap(NULL, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, map.index);
> + if ( addr == MAP_FAILED )
> + {
> + int saved_errno = errno;
> + struct ioctl_gntdev_unmap_grant_ref unmap_grant;
> +
> + PERROR("xc_gnttab_map_grant_ref: mmap failed");
> + unmap_grant.index = map.index;
> + unmap_grant.count = 1;
> + ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
> + errno = saved_errno;
> + return NULL;
> + }
> +
> + notify.index = map.index;
> + notify.action = 0;
> + if (notify_offset >= 0) {
> + notify.index += notify_offset;
> + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE;
> + }
> + if (notify_port >= 0) {
> + notify.event_channel_port = notify_port;
> + notify.action |= UNMAP_NOTIFY_SEND_EVENT;
> + }
> + if (notify.action && ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &notify)) {
> + PERROR("linux_gnttab_map_grant_ref_notify: ioctl SET_UNMAP_NOTIFY failed");

Perhaps reporting via an argument that we failed at doing the notify would
be useful? That way at least you know you need to do polling.

> + }
> +
> + return addr;
> +}
> +
> +
> static int linux_gnttab_munmap(xc_gnttab *xcg, xc_osdep_handle h,
> void *start_address, uint32_t count)
> {
> @@ -662,6 +713,7 @@ static struct xc_osdep_ops linux_gnttab_ops = {
> .map_grant_ref = &linux_gnttab_map_grant_ref,
> .map_grant_refs = &linux_gnttab_map_grant_refs,
> .map_domain_grant_refs = &linux_gnttab_map_domain_grant_refs,
> + .map_grant_ref_notify = &linux_gnttab_map_grant_ref_notify,
> .munmap = &linux_gnttab_munmap,
> },
> };
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index 1b82ee0..7859571 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1349,6 +1349,27 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
> int prot);
>
> /*
> + * Memory maps a grant reference from one domain to a local address range.
> + * Mappings should be unmapped with xc_gnttab_munmap. Logs errors.
^^^^^^^^^^^^ .. that looks odd?

> + * This version always maps writable pages, and will attempt to set up
> + * an unmap notification at the given offset and event channel. When the
> + * page is unmapped, the byte at the given offset will be zeroed and a
> + * wakeup will be sent to the given event channel.
> + *
> + * @parm xcg a handle on an open grant table interface
> + * @parm domid the domain to map memory from
> + * @parm ref the grant reference ID to map
> + * @parm notify_offset The byte offset in the page to use for unmap
> + * notification; -1 for none.
> + * @parm notify_port The event channel port to use for unmap notify, or -1
> + */
> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
> + uint32_t domid,
> + uint32_t ref,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port);
> +
> +/*
> * Unmaps the @count pages starting at @start_address, which were mapped by a
> * call to xc_gnttab_map_grant_ref or xc_gnttab_map_grant_refs. Never logs.
> */
> diff --git a/tools/libxc/xenctrlosdep.h b/tools/libxc/xenctrlosdep.h
> index bfe46e0..01969c5 100644
> --- a/tools/libxc/xenctrlosdep.h
> +++ b/tools/libxc/xenctrlosdep.h
> @@ -119,6 +119,11 @@ struct xc_osdep_ops
> uint32_t domid,
> uint32_t *refs,
> int prot);
> + void *(*map_grant_ref_notify)(xc_gnttab *xcg, xc_osdep_handle h,
> + uint32_t domid,
> + uint32_t ref,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port);
> int (*munmap)(xc_gnttab *xcg, xc_osdep_handle h,
> void *start_address,
> uint32_t count);
> --
> 1.7.6

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On Mon, 2011-09-19 at 23:43 +0100, Daniel De Graaf wrote:
> @@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
> uint32_t count;
> };
>
> +/*
> + * Sets up an unmap notification within the page, so that the other side can do
> + * cleanup if this side crashes. Required to implement cross-domain robust
> + * mutexes or close notification on communication channels.
> + *
> + * Each mapped page only supports one notification; multiple calls referring to
> + * the same page overwrite the previous notification. You must clear the
> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
> + * to occur.
> + */
> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
> +struct ioctl_gntdev_unmap_notify {
> + /* IN parameters */
> + /* Offset in the file descriptor for a byte within the page (same as
> + * used in mmap).

I'm probably being thick but I don't understand what this means, i.e.
what this thing is relative to.

> If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
> + * be cleared. Otherwise, it can be any byte in the page whose
> + * notification we are adjusting.
> + */
> + uint64_t index;
> + /* Action(s) to take on unmap */
> + uint32_t action;
> + /* Event channel to notify */
> + uint32_t event_channel_port;

evtchn_port_t ?

> +};
> +
> +/* Clear (set to zero) the byte specified by index */
> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> +/* Send an interrupt on the indicated event channel */
> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> +
> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
> index 4f55fce..3d3c63b 100644
> --- a/tools/libxc/xc_gnttab.c
> +++ b/tools/libxc/xc_gnttab.c
> @@ -18,6 +18,7 @@
> */
>
> #include "xc_private.h"
> +#include <errno.h>
>
> int xc_gnttab_op(xc_interface *xch, int cmd, void * op, int op_size, int count)
> {
> @@ -174,6 +175,28 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
> count, domid, refs, prot);
> }
>
> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
> + uint32_t domid,
> + uint32_t ref,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port,
> + int *notify_result)
> +{
> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
> + domid, ref, notify_offset, notify_port, notify_result);
> + else {
> + void* area = xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
> + domid, ref, PROT_READ|PROT_WRITE);
> + if (area && notify_result) {
> + *notify_result = -1;
> + errno = ENOSYS;
> + }
> + return area;
> + }

I think the new public interface is fine but do we really need a new
internal interface here?

I think you can just add the notify_* arguments to the existing OSDEP
function and have those OS backends which don't implement that feature
return ENOSYS if notify_offset != 0 (or ~0 or whatever invalid value
works).

Why doesn't the *_notify variant take a prot argument?

I'd be tempted to do away with notify_result too -- if the caller asked
for notification and we fail to give that then we can cleanup and return
an error. If they want to try again without the notification then that's
up to them.

> +}
> +
> +
> int xc_gnttab_munmap(xc_gnttab *xcg,
> void *start_address,
> uint32_t count)
> diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
> index dca6718..3040cb6 100644
> --- a/tools/libxc/xc_linux_osdep.c
> +++ b/tools/libxc/xc_linux_osdep.c
> @@ -613,6 +613,62 @@ static void *linux_gnttab_map_domain_grant_refs(xc_gnttab *xcg, xc_osdep_handle
> return do_gnttab_map_grant_refs(xcg, h, count, &domid, 0, refs, prot);
> }
>
> +static void *linux_gnttab_map_grant_ref_notify(xc_gnttab *xch, xc_osdep_handle h,
> + uint32_t domid, uint32_t ref,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port,
> + int *notify_result)
> +{
> + int fd = (int)h;
> + int rv = 0;
> + struct ioctl_gntdev_map_grant_ref map;
> + struct ioctl_gntdev_unmap_notify notify;
> + void *addr;
> +
> + map.count = 1;
> + map.refs[0].domid = domid;
> + map.refs[0].ref = ref;
> +
> + if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, &map) ) {
> + PERROR("xc_gnttab_map_grant_ref: ioctl MAP_GRANT_REF failed");
> + return NULL;
> + }
> +
> + addr = mmap(NULL, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, map.index);
> + if ( addr == MAP_FAILED )
> + {
> + int saved_errno = errno;
> + struct ioctl_gntdev_unmap_grant_ref unmap_grant;
> +
> + PERROR("xc_gnttab_map_grant_ref: mmap failed");
> + unmap_grant.index = map.index;
> + unmap_grant.count = 1;
> + ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
> + errno = saved_errno;
> + return NULL;
> + }

The non-notify variant handles EAGAIN, why doesn't this one need to do
so?

> +
> + notify.index = map.index;
> + notify.action = 0;
> + if (notify_offset >= 0) {
> + notify.index += notify_offset;
> + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE;
> + }
> + if (notify_port >= 0) {
> + notify.event_channel_port = notify_port;
> + notify.action |= UNMAP_NOTIFY_SEND_EVENT;
> + }
> + if (notify.action)
> + rv = ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &notify);

Is there a race if the other end (or this process) dies between the MAP
ioctl and here?

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On 09/21/2011 06:03 AM, Ian Campbell wrote:
> On Mon, 2011-09-19 at 23:43 +0100, Daniel De Graaf wrote:
>> @@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
>> uint32_t count;
>> };
>>
>> +/*
>> + * Sets up an unmap notification within the page, so that the other side can do
>> + * cleanup if this side crashes. Required to implement cross-domain robust
>> + * mutexes or close notification on communication channels.
>> + *
>> + * Each mapped page only supports one notification; multiple calls referring to
>> + * the same page overwrite the previous notification. You must clear the
>> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
>> + * to occur.
>> + */
>> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
>> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
>> +struct ioctl_gntdev_unmap_notify {
>> + /* IN parameters */
>> + /* Offset in the file descriptor for a byte within the page (same as
>> + * used in mmap).
>
> I'm probably being thick but I don't understand what this means, i.e.
> what this thing is relative to.

This is an offset that acts like a byte offset in the /dev/xen/gntdev device.
Conceptually, if /dev/xen/evtchn implemented pwrite() then this offset would
be the offset you would pass to modify your target byte.

For example, if you use gntdev to map two pages, the first will be at offset
0 and the second at 4096; you would pass 4098 here to indicate the third byte
of the second page. The offsets (0, 4096) are returned by the map-grant ioctls.

>> If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
>> + * be cleared. Otherwise, it can be any byte in the page whose
>> + * notification we are adjusting.
>> + */
>> + uint64_t index;
>> + /* Action(s) to take on unmap */
>> + uint32_t action;
>> + /* Event channel to notify */
>> + uint32_t event_channel_port;
>
> evtchn_port_t ?

Using that would require an include dependency on event_channel.h which is
not exposed as a userspace-visible header. Linux's evtchn.h also does not
use evtchn_port_t (it uses unsigned int).

Since this is just a direct copy of the header from the linux source tree,
any changes really need to happen there first.

>> +};
>> +
>> +/* Clear (set to zero) the byte specified by index */
>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
>> +/* Send an interrupt on the indicated event channel */
>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
>> +
>> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
>> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
>> index 4f55fce..3d3c63b 100644
>> --- a/tools/libxc/xc_gnttab.c
>> +++ b/tools/libxc/xc_gnttab.c
>> @@ -18,6 +18,7 @@
>> */
>>
>> #include "xc_private.h"
>> +#include <errno.h>
>>
>> int xc_gnttab_op(xc_interface *xch, int cmd, void * op, int op_size, int count)
>> {
>> @@ -174,6 +175,28 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
>> count, domid, refs, prot);
>> }
>>
>> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
>> + uint32_t domid,
>> + uint32_t ref,
>> + uint32_t notify_offset,
>> + evtchn_port_t notify_port,
>> + int *notify_result)
>> +{
>> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
>> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
>> + domid, ref, notify_offset, notify_port, notify_result);
>> + else {
>> + void* area = xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
>> + domid, ref, PROT_READ|PROT_WRITE);
>> + if (area && notify_result) {
>> + *notify_result = -1;
>> + errno = ENOSYS;
>> + }
>> + return area;
>> + }
>
> I think the new public interface is fine but do we really need a new
> internal interface here?
>
> I think you can just add the notify_* arguments to the existing OSDEP
> function and have those OS backends which don't implement that feature
> return ENOSYS if notify_offset != 0 (or ~0 or whatever invalid value
> works).
>
> Why doesn't the *_notify variant take a prot argument?

At least for the byte-clear portion of the notify, the page must be writable
or the notify will not work. I suppose an event channel alone could be used
for a read-only mapping, but normally the unmapping of a read-only mapping is
not an important event - although I guess you could contrive a use for this
type of notificaiton, so there's no reason not to allow it.

> I'd be tempted to do away with notify_result too -- if the caller asked
> for notification and we fail to give that then we can cleanup and return
> an error. If they want to try again without the notification then that's
> up to them.

The source of the error might be unclear, but this would make the interface
cleaner.

>
>> +}
>> +
>> +
>> int xc_gnttab_munmap(xc_gnttab *xcg,
>> void *start_address,
>> uint32_t count)
>> diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
>> index dca6718..3040cb6 100644
>> --- a/tools/libxc/xc_linux_osdep.c
>> +++ b/tools/libxc/xc_linux_osdep.c
>> @@ -613,6 +613,62 @@ static void *linux_gnttab_map_domain_grant_refs(xc_gnttab *xcg, xc_osdep_handle
>> return do_gnttab_map_grant_refs(xcg, h, count, &domid, 0, refs, prot);
>> }
>>
>> +static void *linux_gnttab_map_grant_ref_notify(xc_gnttab *xch, xc_osdep_handle h,
>> + uint32_t domid, uint32_t ref,
>> + uint32_t notify_offset,
>> + evtchn_port_t notify_port,
>> + int *notify_result)
>> +{
>> + int fd = (int)h;
>> + int rv = 0;
>> + struct ioctl_gntdev_map_grant_ref map;
>> + struct ioctl_gntdev_unmap_notify notify;
>> + void *addr;
>> +
>> + map.count = 1;
>> + map.refs[0].domid = domid;
>> + map.refs[0].ref = ref;
>> +
>> + if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, &map) ) {
>> + PERROR("xc_gnttab_map_grant_ref: ioctl MAP_GRANT_REF failed");
>> + return NULL;
>> + }
>> +
>> + addr = mmap(NULL, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, map.index);
>> + if ( addr == MAP_FAILED )
>> + {
>> + int saved_errno = errno;
>> + struct ioctl_gntdev_unmap_grant_ref unmap_grant;
>> +
>> + PERROR("xc_gnttab_map_grant_ref: mmap failed");
>> + unmap_grant.index = map.index;
>> + unmap_grant.count = 1;
>> + ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
>> + errno = saved_errno;
>> + return NULL;
>> + }
>
> The non-notify variant handles EAGAIN, why doesn't this one need to do
> so?

The non-notify variant doesn't need to handle EAGAIN anymore (with modern
kernels, at least... perhaps it should remain for older kernels). Also,
do_gnttab_map_grant_refs does not handle EAGAIN.

>> +
>> + notify.index = map.index;
>> + notify.action = 0;
>> + if (notify_offset >= 0) {
>> + notify.index += notify_offset;
>> + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE;
>> + }
>> + if (notify_port >= 0) {
>> + notify.event_channel_port = notify_port;
>> + notify.action |= UNMAP_NOTIFY_SEND_EVENT;
>> + }
>> + if (notify.action)
>> + rv = ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &notify);
>
> Is there a race if the other end (or this process) dies between the MAP
> ioctl and here?
>
> Ian.
>

Technically it's a race, but it doesn't impact any reasonable use of the
notification. The local process can't actually be using the shared page
at this point, and the other side will not be certain that the map has
actually succeeded until after the function returns (and it is notified
in some way - libvchan changes the notify byte from 2->1 at this point).

If the domain whose memory we are mapping crashes, this ioctl will succeed
unless the event channel it refers to has already been invalidated - but
either way, the notifications are now irrelevant as there is nobody to
listen to them.

--
Daniel De Graaf
National Security Agency

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On Wed, 2011-09-21 at 16:02 +0100, Daniel De Graaf wrote:
> On 09/21/2011 06:03 AM, Ian Campbell wrote:
> > On Mon, 2011-09-19 at 23:43 +0100, Daniel De Graaf wrote:
> >> @@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
> >> uint32_t count;
> >> };
> >>
> >> +/*
> >> + * Sets up an unmap notification within the page, so that the other side can do
> >> + * cleanup if this side crashes. Required to implement cross-domain robust
> >> + * mutexes or close notification on communication channels.
> >> + *
> >> + * Each mapped page only supports one notification; multiple calls referring to
> >> + * the same page overwrite the previous notification. You must clear the
> >> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
> >> + * to occur.
> >> + */
> >> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
> >> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
> >> +struct ioctl_gntdev_unmap_notify {
> >> + /* IN parameters */
> >> + /* Offset in the file descriptor for a byte within the page (same as
> >> + * used in mmap).
> >
> > I'm probably being thick but I don't understand what this means, i.e.
> > what this thing is relative to.
>
> This is an offset that acts like a byte offset in the /dev/xen/gntdev device.
> Conceptually, if /dev/xen/evtchn implemented pwrite() then this offset would
> be the offset you would pass to modify your target byte.
>
> For example, if you use gntdev to map two pages, the first will be at offset
> 0 and the second at 4096; you would pass 4098 here to indicate the third byte
> of the second page. The offsets (0, 4096) are returned by the map-grant ioctls.

Hmm. I think I was confused because it was an offset into the file
rather than, say, a virtual address.

When I map a page how do I know what the offset of it is wrt the file
descriptor? DO I just have to remember how many pages I mapped an *=
4096?

>
> >> If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
> >> + * be cleared. Otherwise, it can be any byte in the page whose
> >> + * notification we are adjusting.
> >> + */
> >> + uint64_t index;
> >> + /* Action(s) to take on unmap */
> >> + uint32_t action;
> >> + /* Event channel to notify */
> >> + uint32_t event_channel_port;
> >
> > evtchn_port_t ?
>
> Using that would require an include dependency on event_channel.h which is
> not exposed as a userspace-visible header. Linux's evtchn.h also does not
> use evtchn_port_t (it uses unsigned int).
>
> Since this is just a direct copy of the header from the linux source tree,
> any changes really need to happen there first.

OK, that's fine as it is then.

>
> >> +};
> >> +
> >> +/* Clear (set to zero) the byte specified by index */
> >> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> >> +/* Send an interrupt on the indicated event channel */
> >> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> >> +
> >> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
> >> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
> >> index 4f55fce..3d3c63b 100644
> >> --- a/tools/libxc/xc_gnttab.c
> >> +++ b/tools/libxc/xc_gnttab.c
> >> @@ -18,6 +18,7 @@
> >> */
> >>
> >> #include "xc_private.h"
> >> +#include <errno.h>
> >>
> >> int xc_gnttab_op(xc_interface *xch, int cmd, void * op, int op_size, int count)
> >> {
> >> @@ -174,6 +175,28 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
> >> count, domid, refs, prot);
> >> }
> >>
> >> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
> >> + uint32_t domid,
> >> + uint32_t ref,
> >> + uint32_t notify_offset,
> >> + evtchn_port_t notify_port,
> >> + int *notify_result)
> >> +{
> >> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
> >> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
> >> + domid, ref, notify_offset, notify_port, notify_result);
> >> + else {
> >> + void* area = xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
> >> + domid, ref, PROT_READ|PROT_WRITE);
> >> + if (area && notify_result) {
> >> + *notify_result = -1;
> >> + errno = ENOSYS;
> >> + }
> >> + return area;
> >> + }
> >
> > I think the new public interface is fine but do we really need a new
> > internal interface here?
> >
> > I think you can just add the notify_* arguments to the existing OSDEP
> > function and have those OS backends which don't implement that feature
> > return ENOSYS if notify_offset != 0 (or ~0 or whatever invalid value
> > works).
> >
> > Why doesn't the *_notify variant take a prot argument?
>
> At least for the byte-clear portion of the notify, the page must be writable
> or the notify will not work. I suppose an event channel alone could be used
> for a read-only mapping, but normally the unmapping of a read-only mapping is
> not an important event - although I guess you could contrive a use for this
> type of notificaiton, so there's no reason not to allow it.

If you combine this the two functions then returning EINVAL for attempts
to map without PROT_WRITE (or whatever perm is necessary) would be
reasonable IMHO.

> > I'd be tempted to do away with notify_result too -- if the caller asked
> > for notification and we fail to give that then we can cleanup and return
> > an error. If they want to try again without the notification then that's
> > up to them.
>
> The source of the error might be unclear, but this would make the interface
> cleaner.
>
> >
> >> +}
> >> +
> >> +
> >> int xc_gnttab_munmap(xc_gnttab *xcg,
> >> void *start_address,
> >> uint32_t count)
> >> diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
> >> index dca6718..3040cb6 100644
> >> --- a/tools/libxc/xc_linux_osdep.c
> >> +++ b/tools/libxc/xc_linux_osdep.c
> >> @@ -613,6 +613,62 @@ static void *linux_gnttab_map_domain_grant_refs(xc_gnttab *xcg, xc_osdep_handle
> >> return do_gnttab_map_grant_refs(xcg, h, count, &domid, 0, refs, prot);
> >> }
> >>
> >> +static void *linux_gnttab_map_grant_ref_notify(xc_gnttab *xch, xc_osdep_handle h,
> >> + uint32_t domid, uint32_t ref,
> >> + uint32_t notify_offset,
> >> + evtchn_port_t notify_port,
> >> + int *notify_result)
> >> +{
> >> + int fd = (int)h;
> >> + int rv = 0;
> >> + struct ioctl_gntdev_map_grant_ref map;
> >> + struct ioctl_gntdev_unmap_notify notify;
> >> + void *addr;
> >> +
> >> + map.count = 1;
> >> + map.refs[0].domid = domid;
> >> + map.refs[0].ref = ref;
> >> +
> >> + if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, &map) ) {
> >> + PERROR("xc_gnttab_map_grant_ref: ioctl MAP_GRANT_REF failed");
> >> + return NULL;
> >> + }
> >> +
> >> + addr = mmap(NULL, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, map.index);
> >> + if ( addr == MAP_FAILED )
> >> + {
> >> + int saved_errno = errno;
> >> + struct ioctl_gntdev_unmap_grant_ref unmap_grant;
> >> +
> >> + PERROR("xc_gnttab_map_grant_ref: mmap failed");
> >> + unmap_grant.index = map.index;
> >> + unmap_grant.count = 1;
> >> + ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
> >> + errno = saved_errno;
> >> + return NULL;
> >> + }
> >
> > The non-notify variant handles EAGAIN, why doesn't this one need to do
> > so?
>
> The non-notify variant doesn't need to handle EAGAIN anymore (with modern
> kernels, at least... perhaps it should remain for older kernels). Also,
> do_gnttab_map_grant_refs does not handle EAGAIN.

OK I guess that is fine (although if you combine them as I suggest it
comes back?)

I hadn't noticed that we have both map_gratn_ref and map_grant_refs,
that seems like an area which could be cleaned up. (not saying you
should, just noticing it)

>
> >> +
> >> + notify.index = map.index;
> >> + notify.action = 0;
> >> + if (notify_offset >= 0) {
> >> + notify.index += notify_offset;
> >> + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE;
> >> + }
> >> + if (notify_port >= 0) {
> >> + notify.event_channel_port = notify_port;
> >> + notify.action |= UNMAP_NOTIFY_SEND_EVENT;
> >> + }
> >> + if (notify.action)
> >> + rv = ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &notify);
> >
> > Is there a race if the other end (or this process) dies between the MAP
> > ioctl and here?
> >
> > Ian.
> >
>
> Technically it's a race, but it doesn't impact any reasonable use of the
> notification. The local process can't actually be using the shared page
> at this point, and the other side will not be certain that the map has
> actually succeeded until after the function returns (and it is notified
> in some way - libvchan changes the notify byte from 2->1 at this point).
>
> If the domain whose memory we are mapping crashes, this ioctl will succeed
> unless the event channel it refers to has already been invalidated - but
> either way, the notifications are now irrelevant as there is nobody to
> listen to them.

OK.

Thanks,
Ian.

>




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On 09/21/2011 11:25 AM, Ian Campbell wrote:
> On Wed, 2011-09-21 at 16:02 +0100, Daniel De Graaf wrote:
>> On 09/21/2011 06:03 AM, Ian Campbell wrote:
>>> On Mon, 2011-09-19 at 23:43 +0100, Daniel De Graaf wrote:
>>>> @@ -116,4 +116,35 @@ struct ioctl_gntdev_set_max_grants {
>>>> uint32_t count;
>>>> };
>>>>
>>>> +/*
>>>> + * Sets up an unmap notification within the page, so that the other side can do
>>>> + * cleanup if this side crashes. Required to implement cross-domain robust
>>>> + * mutexes or close notification on communication channels.
>>>> + *
>>>> + * Each mapped page only supports one notification; multiple calls referring to
>>>> + * the same page overwrite the previous notification. You must clear the
>>>> + * notification prior to the IOCTL_GNTALLOC_DEALLOC_GREF if you do not want it
>>>> + * to occur.
>>>> + */
>>>> +#define IOCTL_GNTDEV_SET_UNMAP_NOTIFY \
>>>> +_IOC(_IOC_NONE, 'G', 7, sizeof(struct ioctl_gntdev_unmap_notify))
>>>> +struct ioctl_gntdev_unmap_notify {
>>>> + /* IN parameters */
>>>> + /* Offset in the file descriptor for a byte within the page (same as
>>>> + * used in mmap).
>>>
>>> I'm probably being thick but I don't understand what this means, i.e.
>>> what this thing is relative to.
>>
>> This is an offset that acts like a byte offset in the /dev/xen/gntdev device.
>> Conceptually, if /dev/xen/evtchn implemented pwrite() then this offset would
>> be the offset you would pass to modify your target byte.
>>
>> For example, if you use gntdev to map two pages, the first will be at offset
>> 0 and the second at 4096; you would pass 4098 here to indicate the third byte
>> of the second page. The offsets (0, 4096) are returned by the map-grant ioctls.
>
> Hmm. I think I was confused because it was an offset into the file
> rather than, say, a virtual address.
>
> When I map a page how do I know what the offset of it is wrt the file
> descriptor? DO I just have to remember how many pages I mapped an *=
> 4096?

You had the offset at the time you mapped it, because that's what you
passed in the offset parameter to mmap(). Just don't lose the number :)

The xen interfaces do forget the number: for gntdev, they recover it
via IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR and for gntalloc, it is not needed
because the pages are unhooked as part of the map process so the offset
is no longer valid. This technique is now possible for gntdev, but older
kernels will fail the unmap ioctl and may crash if you close anyway.

>>
>>>> If using UNMAP_NOTIFY_CLEAR_BYTE, this is the byte to
>>>> + * be cleared. Otherwise, it can be any byte in the page whose
>>>> + * notification we are adjusting.
>>>> + */
>>>> + uint64_t index;
>>>> + /* Action(s) to take on unmap */
>>>> + uint32_t action;
>>>> + /* Event channel to notify */
>>>> + uint32_t event_channel_port;
>>>
>>> evtchn_port_t ?
>>
>> Using that would require an include dependency on event_channel.h which is
>> not exposed as a userspace-visible header. Linux's evtchn.h also does not
>> use evtchn_port_t (it uses unsigned int).
>>
>> Since this is just a direct copy of the header from the linux source tree,
>> any changes really need to happen there first.
>
> OK, that's fine as it is then.
>
>>
>>>> +};
>>>> +
>>>> +/* Clear (set to zero) the byte specified by index */
>>>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
>>>> +/* Send an interrupt on the indicated event channel */
>>>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
>>>> +
>>>> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
>>>> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
>>>> index 4f55fce..3d3c63b 100644
>>>> --- a/tools/libxc/xc_gnttab.c
>>>> +++ b/tools/libxc/xc_gnttab.c
>>>> @@ -18,6 +18,7 @@
>>>> */
>>>>
>>>> #include "xc_private.h"
>>>> +#include <errno.h>
>>>>
>>>> int xc_gnttab_op(xc_interface *xch, int cmd, void * op, int op_size, int count)
>>>> {
>>>> @@ -174,6 +175,28 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
>>>> count, domid, refs, prot);
>>>> }
>>>>
>>>> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
>>>> + uint32_t domid,
>>>> + uint32_t ref,
>>>> + uint32_t notify_offset,
>>>> + evtchn_port_t notify_port,
>>>> + int *notify_result)
>>>> +{
>>>> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
>>>> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
>>>> + domid, ref, notify_offset, notify_port, notify_result);
>>>> + else {
>>>> + void* area = xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
>>>> + domid, ref, PROT_READ|PROT_WRITE);
>>>> + if (area && notify_result) {
>>>> + *notify_result = -1;
>>>> + errno = ENOSYS;
>>>> + }
>>>> + return area;
>>>> + }
>>>
>>> I think the new public interface is fine but do we really need a new
>>> internal interface here?
>>>
>>> I think you can just add the notify_* arguments to the existing OSDEP
>>> function and have those OS backends which don't implement that feature
>>> return ENOSYS if notify_offset != 0 (or ~0 or whatever invalid value
>>> works).
>>>
>>> Why doesn't the *_notify variant take a prot argument?
>>
>> At least for the byte-clear portion of the notify, the page must be writable
>> or the notify will not work. I suppose an event channel alone could be used
>> for a read-only mapping, but normally the unmapping of a read-only mapping is
>> not an important event - although I guess you could contrive a use for this
>> type of notificaiton, so there's no reason not to allow it.
>
> If you combine this the two functions then returning EINVAL for attempts
> to map without PROT_WRITE (or whatever perm is necessary) would be
> reasonable IMHO.
>

The ioctl already prevents you from requesting the impossible, so this should
just work.

>>> I'd be tempted to do away with notify_result too -- if the caller asked
>>> for notification and we fail to give that then we can cleanup and return
>>> an error. If they want to try again without the notification then that's
>>> up to them.
>>
>> The source of the error might be unclear, but this would make the interface
>> cleaner.
>>
>>>
>>>> +}
>>>> +
>>>> +
>>>> int xc_gnttab_munmap(xc_gnttab *xcg,
>>>> void *start_address,
>>>> uint32_t count)
>>>> diff --git a/tools/libxc/xc_linux_osdep.c b/tools/libxc/xc_linux_osdep.c
>>>> index dca6718..3040cb6 100644
>>>> --- a/tools/libxc/xc_linux_osdep.c
>>>> +++ b/tools/libxc/xc_linux_osdep.c
>>>> @@ -613,6 +613,62 @@ static void *linux_gnttab_map_domain_grant_refs(xc_gnttab *xcg, xc_osdep_handle
>>>> return do_gnttab_map_grant_refs(xcg, h, count, &domid, 0, refs, prot);
>>>> }
>>>>
>>>> +static void *linux_gnttab_map_grant_ref_notify(xc_gnttab *xch, xc_osdep_handle h,
>>>> + uint32_t domid, uint32_t ref,
>>>> + uint32_t notify_offset,
>>>> + evtchn_port_t notify_port,
>>>> + int *notify_result)
>>>> +{
>>>> + int fd = (int)h;
>>>> + int rv = 0;
>>>> + struct ioctl_gntdev_map_grant_ref map;
>>>> + struct ioctl_gntdev_unmap_notify notify;
>>>> + void *addr;
>>>> +
>>>> + map.count = 1;
>>>> + map.refs[0].domid = domid;
>>>> + map.refs[0].ref = ref;
>>>> +
>>>> + if ( ioctl(fd, IOCTL_GNTDEV_MAP_GRANT_REF, &map) ) {
>>>> + PERROR("xc_gnttab_map_grant_ref: ioctl MAP_GRANT_REF failed");
>>>> + return NULL;
>>>> + }
>>>> +
>>>> + addr = mmap(NULL, XC_PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, map.index);
>>>> + if ( addr == MAP_FAILED )
>>>> + {
>>>> + int saved_errno = errno;
>>>> + struct ioctl_gntdev_unmap_grant_ref unmap_grant;
>>>> +
>>>> + PERROR("xc_gnttab_map_grant_ref: mmap failed");
>>>> + unmap_grant.index = map.index;
>>>> + unmap_grant.count = 1;
>>>> + ioctl(fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &unmap_grant);
>>>> + errno = saved_errno;
>>>> + return NULL;
>>>> + }
>>>
>>> The non-notify variant handles EAGAIN, why doesn't this one need to do
>>> so?
>>
>> The non-notify variant doesn't need to handle EAGAIN anymore (with modern
>> kernels, at least... perhaps it should remain for older kernels). Also,
>> do_gnttab_map_grant_refs does not handle EAGAIN.
>
> OK I guess that is fine (although if you combine them as I suggest it
> comes back?)
>
> I hadn't noticed that we have both map_gratn_ref and map_grant_refs,
> that seems like an area which could be cleaned up. (not saying you
> should, just noticing it)

Since I'm already rewriting the osdep layer functions, I think I can replace
all 3-4 existing map functions with a single function.

It looks like the current 2.6.32.x xen kernels also aren't returning EAGAIN,
so I'm unsure as to why this support was added. The commit in question is
20689:fe42b16855aa by Grzegorz Milos (committed by Keir Fraser), but I don't
see any discussion on xen-devel for it. It's also unclear why repeating the
request every millisecond in an infinite loop is better than letting the
caller handle an -EAGAIN.

>>
>>>> +
>>>> + notify.index = map.index;
>>>> + notify.action = 0;
>>>> + if (notify_offset >= 0) {
>>>> + notify.index += notify_offset;
>>>> + notify.action |= UNMAP_NOTIFY_CLEAR_BYTE;
>>>> + }
>>>> + if (notify_port >= 0) {
>>>> + notify.event_channel_port = notify_port;
>>>> + notify.action |= UNMAP_NOTIFY_SEND_EVENT;
>>>> + }
>>>> + if (notify.action)
>>>> + rv = ioctl(fd, IOCTL_GNTDEV_SET_UNMAP_NOTIFY, &notify);
>>>
>>> Is there a race if the other end (or this process) dies between the MAP
>>> ioctl and here?
>>>
>>> Ian.
>>>
>>
>> Technically it's a race, but it doesn't impact any reasonable use of the
>> notification. The local process can't actually be using the shared page
>> at this point, and the other side will not be certain that the map has
>> actually succeeded until after the function returns (and it is notified
>> in some way - libvchan changes the notify byte from 2->1 at this point).
>>
>> If the domain whose memory we are mapping crashes, this ioctl will succeed
>> unless the event channel it refers to has already been invalidated - but
>> either way, the notifications are now irrelevant as there is nobody to
>> listen to them.
>
> OK.
>
> Thanks,
> Ian.
>
>>
>
>
>


--
Daniel De Graaf
National Security Agency

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On Wed, 2011-09-21 at 18:07 +0100, Daniel De Graaf wrote:
> On 09/21/2011 11:25 AM, Ian Campbell wrote:

> > When I map a page how do I know what the offset of it is wrt the file
> > descriptor? DO I just have to remember how many pages I mapped an *=
> > 4096?
>
> You had the offset at the time you mapped it, because that's what you
> passed in the offset parameter to mmap(). Just don't lose the number :)

So I guess my followup question is where does the number I pass to mmap
come from...

/me scrobbles in the code.

Aha, so it is an output from the gntdev/gntalloc ioctls. So how about:

/* IN parameters */
/*
* Offset in the file descriptor for a byte within the page. This offset
* is the result of the IOCTL_GNTDEV_MAP_GRANT_REF and is the same as
* is used with mmap().
*/



> >
> >>
> >>>> +};
> >>>> +
> >>>> +/* Clear (set to zero) the byte specified by index */
> >>>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> >>>> +/* Send an interrupt on the indicated event channel */
> >>>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
> >>>> +
> >>>> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
> >>>> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
> >>>> index 4f55fce..3d3c63b 100644
> >>>> --- a/tools/libxc/xc_gnttab.c
> >>>> +++ b/tools/libxc/xc_gnttab.c
> >>>> @@ -18,6 +18,7 @@
> >>>> */
> >>>>
> >>>> #include "xc_private.h"
> >>>> +#include <errno.h>
> >>>>
> >>>> int xc_gnttab_op(xc_interface *xch, int cmd, void * op, int op_size, int count)
> >>>> {
> >>>> @@ -174,6 +175,28 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
> >>>> count, domid, refs, prot);
> >>>> }
> >>>>
> >>>> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
> >>>> + uint32_t domid,
> >>>> + uint32_t ref,
> >>>> + uint32_t notify_offset,
> >>>> + evtchn_port_t notify_port,
> >>>> + int *notify_result)
> >>>> +{
> >>>> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
> >>>> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
> >>>> + domid, ref, notify_offset, notify_port, notify_result);
> >>>> + else {
> >>>> + void* area = xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
> >>>> + domid, ref, PROT_READ|PROT_WRITE);
> >>>> + if (area && notify_result) {
> >>>> + *notify_result = -1;
> >>>> + errno = ENOSYS;
> >>>> + }
> >>>> + return area;
> >>>> + }
> >>>
> >>> I think the new public interface is fine but do we really need a new
> >>> internal interface here?
> >>>
> >>> I think you can just add the notify_* arguments to the existing OSDEP
> >>> function and have those OS backends which don't implement that feature
> >>> return ENOSYS if notify_offset != 0 (or ~0 or whatever invalid value
> >>> works).
> >>>
> >>> Why doesn't the *_notify variant take a prot argument?
> >>
> >> At least for the byte-clear portion of the notify, the page must be writable
> >> or the notify will not work. I suppose an event channel alone could be used
> >> for a read-only mapping, but normally the unmapping of a read-only mapping is
> >> not an important event - although I guess you could contrive a use for this
> >> type of notificaiton, so there's no reason not to allow it.
> >
> > If you combine this the two functions then returning EINVAL for attempts
> > to map without PROT_WRITE (or whatever perm is necessary) would be
> > reasonable IMHO.
> >
>
> The ioctl already prevents you from requesting the impossible, so this should
> just work.

Even better. I see the check in the gntdev driver but not in the
gntalloc one, is that right?


> > I hadn't noticed that we have both map_gratn_ref and map_grant_refs,
> > that seems like an area which could be cleaned up. (not saying you
> > should, just noticing it)
>
> Since I'm already rewriting the osdep layer functions, I think I can replace
> all 3-4 existing map functions with a single function.

Awesome, thanks!

> It looks like the current 2.6.32.x xen kernels also aren't returning EAGAIN,
> so I'm unsure as to why this support was added. The commit in question is
> 20689:fe42b16855aa by Grzegorz Milos (committed by Keir Fraser), but I don't
> see any discussion on xen-devel for it.

IIRC it was related to the page sharing patches which can cause grant
hypercalls to return EAGAIN if the granted page is swapped or shared. I
think this can only happen to backend/dom0 type operations.

I think the reason it needs to return to the guest is that the paging
daemon may be in the same domain and having the only vcpu block in the
hypercall would deadlock.

> It's also unclear why repeating the
> request every millisecond in an infinite loop is better than letting the
> caller handle an -EAGAIN.

Yeah, the millisecond thing is pretty gross, something like
sched_yield() might be a bit more palatable (if it's portable enough,
although sleep could be a fallback if not)

I suppose that hiding the EAGAIN handling in the library was just
thought to be convenient, compared with changing all the existing users.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On 09/22/2011 04:32 AM, Ian Campbell wrote:
> On Wed, 2011-09-21 at 18:07 +0100, Daniel De Graaf wrote:
>> On 09/21/2011 11:25 AM, Ian Campbell wrote:
>
>>> When I map a page how do I know what the offset of it is wrt the file
>>> descriptor? DO I just have to remember how many pages I mapped an *=
>>> 4096?
>>
>> You had the offset at the time you mapped it, because that's what you
>> passed in the offset parameter to mmap(). Just don't lose the number :)
>
> So I guess my followup question is where does the number I pass to mmap
> come from...
>
> /me scrobbles in the code.
>
> Aha, so it is an output from the gntdev/gntalloc ioctls. So how about:
>
> /* IN parameters */
> /*
> * Offset in the file descriptor for a byte within the page. This offset
> * is the result of the IOCTL_GNTDEV_MAP_GRANT_REF and is the same as
> * is used with mmap().
> */
>

Sounds good.

>>>>>> +};
>>>>>> +
>>>>>> +/* Clear (set to zero) the byte specified by index */
>>>>>> +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1
>>>>>> +/* Send an interrupt on the indicated event channel */
>>>>>> +#define UNMAP_NOTIFY_SEND_EVENT 0x2
>>>>>> +
>>>>>> #endif /* __LINUX_PUBLIC_GNTDEV_H__ */
>>>>>> diff --git a/tools/libxc/xc_gnttab.c b/tools/libxc/xc_gnttab.c
>>>>>> index 4f55fce..3d3c63b 100644
>>>>>> --- a/tools/libxc/xc_gnttab.c
>>>>>> +++ b/tools/libxc/xc_gnttab.c
>>>>>> @@ -18,6 +18,7 @@
>>>>>> */
>>>>>>
>>>>>> #include "xc_private.h"
>>>>>> +#include <errno.h>
>>>>>>
>>>>>> int xc_gnttab_op(xc_interface *xch, int cmd, void * op, int op_size, int count)
>>>>>> {
>>>>>> @@ -174,6 +175,28 @@ void *xc_gnttab_map_domain_grant_refs(xc_gnttab *xcg,
>>>>>> count, domid, refs, prot);
>>>>>> }
>>>>>>
>>>>>> +void *xc_gnttab_map_grant_ref_notify(xc_gnttab *xcg,
>>>>>> + uint32_t domid,
>>>>>> + uint32_t ref,
>>>>>> + uint32_t notify_offset,
>>>>>> + evtchn_port_t notify_port,
>>>>>> + int *notify_result)
>>>>>> +{
>>>>>> + if (xcg->ops->u.gnttab.map_grant_ref_notify)
>>>>>> + return xcg->ops->u.gnttab.map_grant_ref_notify(xcg, xcg->ops_handle,
>>>>>> + domid, ref, notify_offset, notify_port, notify_result);
>>>>>> + else {
>>>>>> + void* area = xcg->ops->u.gnttab.map_grant_ref(xcg, xcg->ops_handle,
>>>>>> + domid, ref, PROT_READ|PROT_WRITE);
>>>>>> + if (area && notify_result) {
>>>>>> + *notify_result = -1;
>>>>>> + errno = ENOSYS;
>>>>>> + }
>>>>>> + return area;
>>>>>> + }
>>>>>
>>>>> I think the new public interface is fine but do we really need a new
>>>>> internal interface here?
>>>>>
>>>>> I think you can just add the notify_* arguments to the existing OSDEP
>>>>> function and have those OS backends which don't implement that feature
>>>>> return ENOSYS if notify_offset != 0 (or ~0 or whatever invalid value
>>>>> works).
>>>>>
>>>>> Why doesn't the *_notify variant take a prot argument?
>>>>
>>>> At least for the byte-clear portion of the notify, the page must be writable
>>>> or the notify will not work. I suppose an event channel alone could be used
>>>> for a read-only mapping, but normally the unmapping of a read-only mapping is
>>>> not an important event - although I guess you could contrive a use for this
>>>> type of notificaiton, so there's no reason not to allow it.
>>>
>>> If you combine this the two functions then returning EINVAL for attempts
>>> to map without PROT_WRITE (or whatever perm is necessary) would be
>>> reasonable IMHO.
>>>
>>
>> The ioctl already prevents you from requesting the impossible, so this should
>> just work.
>
> Even better. I see the check in the gntdev driver but not in the
> gntalloc one, is that right?
>

Yes. The unmap notification will always work in gntalloc because the
shared page is owned locally, and is always writable there; read-only
refers to the mapping domain's ability to write.

>
>>> I hadn't noticed that we have both map_gratn_ref and map_grant_refs,
>>> that seems like an area which could be cleaned up. (not saying you
>>> should, just noticing it)
>>
>> Since I'm already rewriting the osdep layer functions, I think I can replace
>> all 3-4 existing map functions with a single function.
>
> Awesome, thanks!
>
>> It looks like the current 2.6.32.x xen kernels also aren't returning EAGAIN,
>> so I'm unsure as to why this support was added. The commit in question is
>> 20689:fe42b16855aa by Grzegorz Milos (committed by Keir Fraser), but I don't
>> see any discussion on xen-devel for it.
>
> IIRC it was related to the page sharing patches which can cause grant
> hypercalls to return EAGAIN if the granted page is swapped or shared. I
> think this can only happen to backend/dom0 type operations.
>
> I think the reason it needs to return to the guest is that the paging
> daemon may be in the same domain and having the only vcpu block in the
> hypercall would deadlock.
>
>> It's also unclear why repeating the
>> request every millisecond in an infinite loop is better than letting the
>> caller handle an -EAGAIN.
>
> Yeah, the millisecond thing is pretty gross, something like
> sched_yield() might be a bit more palatable (if it's portable enough,
> although sleep could be a fallback if not)
>
> I suppose that hiding the EAGAIN handling in the library was just
> thought to be convenient, compared with changing all the existing users.
>
> Ian.
>

It sounds to me like this is best solved in the kernel, although it would
still have to invoke some kind of yield operation since I assume the kernel
can't tell when the hypercall will not block (ideally you would be able to
put the invoking process to sleep pending the cross-domain page fault).

For now, it sounds like the best solution is to keep the usleep-based loop
in for all gntdev invocations. Using sched_yield will cause the CPU to spin
while the page is pending from disk or possibly while waiting for dom0 to
be scheduled (assuming a domU-domU vchan).

--
Daniel De Graaf
National Security Agency

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On Thu, 2011-09-22 at 23:14 +0100, Daniel De Graaf wrote:
> diff --git a/tools/libxc/xenctrlosdep.h b/tools/libxc/xenctrlosdep.h
> index bfe46e0..1c6317e 100644
> --- a/tools/libxc/xenctrlosdep.h
> +++ b/tools/libxc/xenctrlosdep.h
> @@ -105,20 +105,12 @@ struct xc_osdep_ops
> int (*unmask)(xc_evtchn *xce, xc_osdep_handle h, evtchn_port_t port);
> } evtchn;
> struct {
> - void *(*map_grant_ref)(xc_gnttab *xcg, xc_osdep_handle h,
> - uint32_t domid,
> - uint32_t ref,
> - int prot);
> - void *(*map_grant_refs)(xc_gnttab *xcg, xc_osdep_handle h,
> - uint32_t count,
> - uint32_t *domids,
> - uint32_t *refs,
> - int prot);
> - void *(*map_domain_grant_refs)(xc_gnttab *xcg, xc_osdep_handle h,
> - uint32_t count,
> - uint32_t domid,
> - uint32_t *refs,
> - int prot);
> +#define XC_GRANT_MAP_SINGLE_DOMAIN 0x1
> + void *(*grant_map)(xc_gnttab *xcg, xc_osdep_handle h,
> + uint32_t count, int flags, int prot,
> + uint32_t *domids, uint32_t *refs,
> + uint32_t notify_offset,
> + evtchn_port_t notify_port);
> int (*munmap)(xc_gnttab *xcg, xc_osdep_handle h,
> void *start_address,
> uint32_t count);

Not specifically to do with this patch but I wonder if we should try and
figure a way to version these shared libraries somehow, otherwise
changes like this lead to segfault at best and unexpected non-crashing
behaviour at worst (for out of tree backends that is).

Something as simple as checksumming the xenctrlosdep.h header and
including the value in the .so to be checked at load time would do the
trick.

Ian.



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
Ian Campbell writes ("Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify"):
> Not specifically to do with this patch but I wonder if we should try and
> figure a way to version these shared libraries somehow, otherwise
> changes like this lead to segfault at best and unexpected non-crashing
> behaviour at worst (for out of tree backends that is).

Our approach is to change the library major number at some point
between releases. People using the development tree can expect
instability.

> Something as simple as checksumming the xenctrlosdep.h header and
> including the value in the .so to be checked at load time would do the
> trick.

Urgh.

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify [ In reply to ]
On Fri, 2011-09-30 at 15:12 +0100, Ian Jackson wrote:
> Ian Campbell writes ("Re: [PATCH 1/3] libxc: add xc_gnttab_map_grant_ref_notify"):
> > Not specifically to do with this patch but I wonder if we should try and
> > figure a way to version these shared libraries somehow, otherwise
> > changes like this lead to segfault at best and unexpected non-crashing
> > behaviour at worst (for out of tree backends that is).
>
> Our approach is to change the library major number at some point
> between releases. People using the development tree can expect
> instability.

These are "plugins" so they don't have an SONAME. Perhaps they should,
but I'm not sure what would check it when opening with dlopen() or how
one goes about doing it manually.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel