Mailing List Archive

Re: [RFC PATCH 3/3] Implement 3-level event channel routines.
On 31/12/12 18:22, Wei Liu wrote:
> From: Wei Liu <liuw@liuw.name>
>
> Add some fields in struct domain to hold pointer to 3-level shared arrays.
> Also add per-cpu second level selector in struct vcpu.
>
> These structures are mapped by a new evtchn op. Guest should fall back to use
> 2-level event channel if mapping fails.
>
> The routines are more or less as the 2-level ones.
>
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
[...]
> --- a/xen/common/event_channel.c
> +++ b/xen/common/event_channel.c
> @@ -27,6 +27,7 @@
> #include <xen/guest_access.h>
> #include <xen/keyhandler.h>
> #include <asm/current.h>
> +#include <xen/paging.h>
>
> #include <public/xen.h>
> #include <public/event_channel.h>
> @@ -413,6 +414,85 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
> return rc;
> }
>
> +static inline int __evtchn_is_masked_l2(struct domain *d, int port)
> +{
> + return test_bit(port, &shared_info(d, evtchn_mask));
> +}
> +
> +static inline int __evtchn_is_masked_l3(struct domain *d, int port)
> +{
> + return test_bit(port % EVTCHNS_PER_PAGE,
> + d->evtchn_mask[port / EVTCHNS_PER_PAGE]);
> +}
> +
> +int evtchn_is_masked(struct domain *d, int port)
> +{
> + switch ( d->evtchn_level )
> + {
> + case 2:
> + return __evtchn_is_masked_l2(d, port);
> + case 3:
> + return __evtchn_is_masked_l3(d, port);
> + default:
> + printk(" %s: unknown event channel level %d for domain %d \n",
> + __FUNCTION__, d->evtchn_level, d->domain_id);
> + return 1;
> + }

Drop the printk? If d->evtchn_level is wrong this will spam the console
and it BUGs elsewhere anyway.

There are a bunch of other similar places.

> +static long __evtchn_register_3level(struct evtchn_register_3level *r)
> +{
> + struct domain *d = current->domain;
> + unsigned long mfns[r->nr_vcpus];
> + unsigned long offsets[r->nr_vcpus];
> + unsigned char was_up[r->nr_vcpus];
> + int rc, i;
> + struct vcpu *v;
> +
> + if ( d->evtchn_level == 3 )
> + return -EINVAL;
> +
> + if ( r->nr_vcpus > d->max_vcpus )
> + return -EINVAL;
> +
> + for ( i = 0; i < MAX_L3_PAGES; i++ )
> + if ( d->evtchn_pending[i] || d->evtchn_mask[i] )
> + return -EINVAL;
> +
> + for_each_vcpu ( d, v )
> + if ( v->evtchn_pending_sel_l2 )
> + return -EINVAL;
> +
> + if ( copy_from_user(mfns, r->l2sel_mfn,
> + sizeof(unsigned long)*r->nr_vcpus) )
> + return -EFAULT;
> +
> + if ( copy_from_user(offsets, r->l2sel_offset,
> + sizeof(unsigned long)*r->nr_vcpus) )
> + return -EFAULT;
> +
> + /* put cpu offline */
> + for_each_vcpu ( d, v )
> + {
> + if ( v == current )
> + was_up[v->vcpu_id] = 1;
> + else
> + was_up[v->vcpu_id] = !test_and_set_bit(_VPF_down,
> + &v->pause_flags);
> + }

Why offline the VCPUs? I think there needs to be comment explaining why.

> + /* map evtchn pending array and evtchn mask array */
> + rc = __map_l3_arrays(d, r->evtchn_pending, r->evtchn_mask);
> + if ( rc )
> + goto out;
> +
> + for_each_vcpu ( d, v )
> + {
> + if ( (rc = __map_l2_sel(v, mfns[v->vcpu_id], offsets[v->vcpu_id])) )
> + {
> + struct vcpu *v1;
> + for_each_vcpu ( d, v1 )
> + __unmap_l2_sel(v1);
> + __unmap_l3_arrays(d);
> + goto out;
> + }
> + }
> +
> + /* Scan current bitmap and migrate all outstanding events to new bitmap */
> + __evtchn_migrate_bitmap_l3(d);

This migrate seems racy. Won't events after the migrate but before we
set the level below be lost?

Alternatively, if it's not racy because it's all protected with
d->event_lock, then the wmb() isn't required as the spin locks are
implicit barriers.

> +
> + /* make sure all writes take effect before switching to new routines */
> + wmb();
> + d->evtchn_level = 3;
> +
> + out:
> + /* bring cpus back online */
> + for_each_vcpu ( d, v )
> + if ( was_up[v->vcpu_id] &&
> + test_and_clear_bit(_VPF_down, &v->pause_flags) )
> + vcpu_wake(v);
> +
> + return rc;
> +}
> +
> +static long evtchn_register_nlevel(evtchn_register_nlevel_t *r)
> +{
> + struct domain *d = current->domain;
> + int rc;
> +
> + spin_lock(&d->event_lock);
> +
> + switch ( r->level )
> + {
> + case 3:
> + rc = __evtchn_register_3level(&r->u.l3);
> + break;
> + default:
> + rc = -EINVAL;
> + }
> +
> + spin_unlock(&d->event_lock);
> +
> + return rc;
> +}
>
> long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
> {
[...]
> --- a/xen/include/public/event_channel.h
> +++ b/xen/include/public/event_channel.h
> @@ -71,6 +71,7 @@
> #define EVTCHNOP_bind_vcpu 8
> #define EVTCHNOP_unmask 9
> #define EVTCHNOP_reset 10
> +#define EVTCHNOP_register_nlevel 11
> /* ` } */
>
> typedef uint32_t evtchn_port_t;
> @@ -258,6 +259,29 @@ struct evtchn_reset {
> typedef struct evtchn_reset evtchn_reset_t;
>
> /*
> + * EVTCHNOP_register_nlevel: Register N level event channels.
> + * NOTES:
> + * 1. currently only 3-level is supported.
> + * 2. should fall back to basic 2-level if this call fails.
> + */
> +#define MAX_L3_PAGES 8

This is a public header so this needs to be prefixed. e.g.

#define EVTCHN_MAX_L3_PAGES 8

> +struct evtchn_register_3level {
> + unsigned long evtchn_pending[MAX_L3_PAGES];
> + unsigned long evtchn_mask[MAX_L3_PAGES];
> + unsigned long *l2sel_mfn;
> + unsigned long *l2sel_offset;

Should these unsigned longs be xen_pfn_t's?

> + uint32_t nr_vcpus;
> +};
> +
> +struct evtchn_register_nlevel {
> + uint32_t level;
> + union {
> + struct evtchn_register_3level l3;
> + } u;
> +};
> +typedef struct evtchn_register_nlevel evtchn_register_nlevel_t;

Does there need to be compat handling for these structures? As-is the
structures look like they do.

> +
> +/*
> * ` enum neg_errnoval
> * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
> * `
> diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
> index 5593066..1d4ef2d 100644
> --- a/xen/include/public/xen.h
> +++ b/xen/include/public/xen.h
> @@ -554,9 +554,24 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
>
> /*
> * Event channel endpoints per domain:
> + * 2-level:
> * 1024 if a long is 32 bits; 4096 if a long is 64 bits.
> + * 3-level:
> + * 32k if a long is 32 bits; 256k if a long is 64 bits;
> */
> -#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
> +#define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64)
> +#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64)
> +#define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \
> + switch (x) { \
> + case 2: \
> + __v = NR_EVENT_CHANNELS_L2; break; \
> + case 3: \
> + __v = NR_EVENT_CHANNELS_L3; break; \
> + default: \
> + BUG(); \
> + } \
> + __v;})
> +

You've not documented what 'x' is in this macro. Also name it 'level'.

David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel