From: Wei Liu <liuw@liuw.name>
Add some fields in struct domain to hold pointer to 3-level shared arrays.
Also add per-cpu second level selector in struct vcpu.
These structures are mapped by a new evtchn op. Guest should fall back to use
2-level event channel if mapping fails.
The routines are more or less as the 2-level ones.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
xen/arch/x86/domain.c | 2 +
xen/arch/x86/irq.c | 2 +-
xen/common/event_channel.c | 520 ++++++++++++++++++++++++++++++++++--
xen/common/keyhandler.c | 12 +-
xen/common/schedule.c | 2 +-
xen/include/public/event_channel.h | 24 ++
xen/include/public/xen.h | 17 +-
xen/include/xen/event.h | 4 +
xen/include/xen/sched.h | 12 +-
9 files changed, 565 insertions(+), 30 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 7a07c06..b457b00 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2058,6 +2058,8 @@ int domain_relinquish_resources(struct domain *d)
}
}
+ event_channel_unmap_nlevel(d);
+
d->arch.relmem = RELMEM_shared;
/* fallthrough */
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 05cede5..d517e39 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1452,7 +1452,7 @@ int pirq_guest_unmask(struct domain *d)
{
pirq = pirqs[i]->pirq;
if ( pirqs[i]->masked &&
- !test_bit(pirqs[i]->evtchn, &shared_info(d, evtchn_mask)) )
+ !evtchn_is_masked(d, pirqs[i]->evtchn) )
pirq_guest_eoi(pirqs[i]);
}
} while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 9898f8e..fb3a7b4 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -27,6 +27,7 @@
#include <xen/guest_access.h>
#include <xen/keyhandler.h>
#include <asm/current.h>
+#include <xen/paging.h>
#include <public/xen.h>
#include <public/event_channel.h>
@@ -413,6 +414,85 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
return rc;
}
+static inline int __evtchn_is_masked_l2(struct domain *d, int port)
+{
+ return test_bit(port, &shared_info(d, evtchn_mask));
+}
+
+static inline int __evtchn_is_masked_l3(struct domain *d, int port)
+{
+ return test_bit(port % EVTCHNS_PER_PAGE,
+ d->evtchn_mask[port / EVTCHNS_PER_PAGE]);
+}
+
+int evtchn_is_masked(struct domain *d, int port)
+{
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ return __evtchn_is_masked_l2(d, port);
+ case 3:
+ return __evtchn_is_masked_l3(d, port);
+ default:
+ printk(" %s: unknown event channel level %d for domain %d \n",
+ __FUNCTION__, d->evtchn_level, d->domain_id);
+ return 1;
+ }
+}
+
+static inline int __evtchn_is_pending_l2(struct domain *d, int port)
+{
+ return test_bit(port, &shared_info(d, evtchn_pending));
+}
+
+static inline int __evtchn_is_pending_l3(struct domain *d, int port)
+{
+ return test_bit(port % EVTCHNS_PER_PAGE,
+ d->evtchn_pending[port / EVTCHNS_PER_PAGE]);
+}
+
+int evtchn_is_pending(struct domain *d, int port)
+{
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ return __evtchn_is_pending_l2(d, port);
+ case 3:
+ return __evtchn_is_pending_l3(d, port);
+ default:
+ printk(" %s: unknown event channel level %d for domain %d\n",
+ __FUNCTION__, d->evtchn_level, d->domain_id);
+ return 0;
+ }
+}
+
+static inline void __evtchn_clear_pending_l2(struct domain *d, int port)
+{
+ clear_bit(port, &shared_info(d, evtchn_pending));
+}
+
+static inline void __evtchn_clear_pending_l3(struct domain *d, int port)
+{
+ clear_bit(port % EVTCHNS_PER_PAGE,
+ d->evtchn_pending[port / EVTCHNS_PER_PAGE]);
+}
+
+static void evtchn_clear_pending(struct domain *d, int port)
+{
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ __evtchn_clear_pending_l2(d, port);
+ break;
+ case 3:
+ __evtchn_clear_pending_l3(d, port);
+ break;
+ default:
+ printk("Cannot clear pending for %d level event channel"
+ " for domain %d, port %d\n",
+ d->evtchn_level, d->domain_id, port);
+ }
+}
static long __evtchn_close(struct domain *d1, int port1)
{
@@ -529,7 +609,7 @@ static long __evtchn_close(struct domain *d1, int port1)
}
/* Clear pending event to avoid unexpected behavior on re-bind. */
- clear_bit(port1, &shared_info(d1, evtchn_pending));
+ evtchn_clear_pending(d1, port1);
/* Reset binding to vcpu0 when the channel is freed. */
chn1->state = ECS_FREE;
@@ -606,16 +686,15 @@ int evtchn_send(struct domain *d, unsigned int lport)
ret = -EINVAL;
}
-out:
+ out:
spin_unlock(&ld->event_lock);
return ret;
}
-static void evtchn_set_pending(struct vcpu *v, int port)
+static void __evtchn_set_pending_l2(struct vcpu *v, int port)
{
struct domain *d = v->domain;
- int vcpuid;
/*
* The following bit operations must happen in strict order.
@@ -633,7 +712,50 @@ static void evtchn_set_pending(struct vcpu *v, int port)
{
vcpu_mark_events_pending(v);
}
-
+}
+
+static void __evtchn_set_pending_l3(struct vcpu *v, int port)
+{
+ struct domain *d = v->domain;
+
+ int page_no = port / EVTCHNS_PER_PAGE;
+ int offset = port % EVTCHNS_PER_PAGE;
+ int l1cb = BITS_PER_EVTCHN_WORD(d)*BITS_PER_EVTCHN_WORD(d);
+ int l2cb = BITS_PER_EVTCHN_WORD(d);
+
+ if ( test_and_set_bit(offset, d->evtchn_pending[page_no]) )
+ return;
+
+ if ( !test_bit(offset, d->evtchn_mask[page_no]) &&
+ !test_and_set_bit(port / l2cb,
+ v->evtchn_pending_sel_l2) &&
+ !test_and_set_bit(port / l1cb,
+ &vcpu_info(v, evtchn_pending_sel)) )
+ {
+ vcpu_mark_events_pending(v);
+ }
+}
+
+static void evtchn_set_pending(struct vcpu *v, int port)
+{
+ struct domain *d = v->domain;
+ int vcpuid;
+
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ __evtchn_set_pending_l2(v, port);
+ break;
+ case 3:
+ __evtchn_set_pending_l3(v, port);
+ break;
+ default:
+ printk("Cannot set pending for %d level event channel"
+ " for domain %d, port %d\n",
+ d->evtchn_level, d->domain_id, port);
+ return;
+ }
+
/* Check if some VCPU might be polling for this event. */
if ( likely(bitmap_empty(d->poll_mask, d->max_vcpus)) )
return;
@@ -916,21 +1038,16 @@ long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id)
}
-int evtchn_unmask(unsigned int port)
+static int __evtchn_unmask_l2(unsigned int port)
{
struct domain *d = current->domain;
struct vcpu *v;
- ASSERT(spin_is_locked(&d->event_lock));
-
- if ( unlikely(!port_is_valid(d, port)) )
- return -EINVAL;
-
v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
/*
* These operations must happen in strict order. Based on
- * include/xen/event.h:evtchn_set_pending().
+ * __evtchn_set_pending_l2().
*/
if ( test_and_clear_bit(port, &shared_info(d, evtchn_mask)) &&
test_bit (port, &shared_info(d, evtchn_pending)) &&
@@ -943,6 +1060,58 @@ int evtchn_unmask(unsigned int port)
return 0;
}
+static int __evtchn_unmask_l3(unsigned int port)
+{
+ struct domain *d = current->domain;
+ struct vcpu *v;
+
+ int page_no = port / EVTCHNS_PER_PAGE;
+ int offset = port % EVTCHNS_PER_PAGE;
+ int l1cb = BITS_PER_EVTCHN_WORD(d)*BITS_PER_EVTCHN_WORD(d);
+ int l2cb = BITS_PER_EVTCHN_WORD(d);
+
+ v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
+
+ if ( test_and_clear_bit(offset, d->evtchn_mask[page_no]) &&
+ test_bit(offset, d->evtchn_pending[page_no]) &&
+ !test_and_set_bit(port / l2cb,
+ v->evtchn_pending_sel_l2) &&
+ !test_and_set_bit(port / l1cb,
+ &vcpu_info(v, evtchn_pending_sel)) )
+ {
+ vcpu_mark_events_pending(v);
+ }
+
+ return 0;
+}
+
+int evtchn_unmask(unsigned int port)
+{
+ struct domain *d = current->domain;
+ int rc = -EINVAL;
+
+ ASSERT(spin_is_locked(&d->event_lock));
+
+ if ( unlikely(!port_is_valid(d, port)) )
+ return -EINVAL;
+
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ rc = __evtchn_unmask_l2(port);
+ break;
+ case 3:
+ rc = __evtchn_unmask_l3(port);
+ break;
+ default:
+ printk("Cannot unmask port %d for %d level event channel"
+ " for domain %d\n", port,
+ d->evtchn_level, d->domain_id);
+ }
+
+ return rc;
+}
+
static long evtchn_reset(evtchn_reset_t *r)
{
@@ -969,6 +1138,290 @@ out:
return rc;
}
+static void __unmap_l2_sel(struct vcpu *v)
+{
+ unsigned long mfn;
+
+ if ( v->evtchn_pending_sel_l2 != 0 )
+ {
+ unmap_domain_page_global(v->evtchn_pending_sel_l2);
+ mfn = virt_to_mfn(v->evtchn_pending_sel_l2);
+ put_page_and_type(mfn_to_page(mfn));
+
+ v->evtchn_pending_sel_l2 = 0;
+ }
+}
+
+static int __map_l2_sel(struct vcpu *v, unsigned long gfn, unsigned long off)
+{
+ void *mapping;
+ int rc = -EINVAL;
+ struct page_info *page;
+ struct domain *d = v->domain;
+
+ if ( off >= PAGE_SIZE )
+ return rc;
+
+ page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+ if ( !page )
+ goto out;
+
+ if ( !get_page_type(page, PGT_writable_page) )
+ {
+ put_page(page);
+ goto out;
+ }
+
+ mapping = __map_domain_page_global(page);
+ if ( mapping == NULL )
+ {
+ put_page_and_type(page);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ v->evtchn_pending_sel_l2 = (unsigned long *)(mapping + off);
+ rc = 0;
+
+ out:
+ return rc;
+}
+
+
+static void __unmap_l3_arrays(struct domain *d)
+{
+ int i;
+ unsigned long mfn;
+
+ for ( i = 0; i < MAX_L3_PAGES; i++ )
+ {
+ if ( d->evtchn_pending[i] != 0 )
+ {
+ unmap_domain_page_global(d->evtchn_pending[i]);
+ mfn = virt_to_mfn(d->evtchn_pending[i]);
+ put_page_and_type(mfn_to_page(mfn));
+ d->evtchn_pending[i] = 0;
+ }
+ if ( d->evtchn_mask[i] != 0 )
+ {
+ unmap_domain_page_global(d->evtchn_mask[i]);
+ mfn = virt_to_mfn(d->evtchn_mask[i]);
+ put_page_and_type(mfn_to_page(mfn));
+ d->evtchn_mask[i] = 0;
+ }
+ }
+}
+
+static int __map_l3_arrays(struct domain *d, unsigned long *pending,
+ unsigned long *mask)
+{
+ int i;
+ void *pending_mapping, *mask_mapping;
+ struct page_info *pending_page, *mask_page;
+ unsigned long pending_gfn, mask_gfn;
+ int rc = -EINVAL;
+
+ for ( i = 0;
+ i < MAX_L3_PAGES && pending[i] != 0 && mask[i] != 0;
+ i++ )
+ {
+ pending_gfn = pending[i];
+ mask_gfn = mask[i];
+
+ pending_page = get_page_from_gfn(d, pending_gfn, NULL, P2M_ALLOC);
+ if ( !pending_page )
+ goto err;
+
+ mask_page = get_page_from_gfn(d, mask_gfn, NULL, P2M_ALLOC);
+ if ( !mask_page )
+ {
+ put_page(pending_page);
+ goto err;
+ }
+
+ if ( !get_page_type(pending_page, PGT_writable_page) )
+ {
+ put_page(pending_page);
+ put_page(mask_page);
+ goto err;
+ }
+
+ if ( !get_page_type(mask_page, PGT_writable_page) )
+ {
+ put_page_and_type(pending_page);
+ put_page(mask_page);
+ goto err;
+ }
+
+ pending_mapping = __map_domain_page_global(pending_page);
+ if ( !pending_mapping )
+ {
+ put_page_and_type(pending_page);
+ put_page_and_type(mask_page);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ mask_mapping = __map_domain_page_global(mask_page);
+ if ( !mask_mapping )
+ {
+ unmap_domain_page_global(pending_mapping);
+ put_page_and_type(pending_page);
+ put_page_and_type(mask_page);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ d->evtchn_pending[i] = pending_mapping;
+ d->evtchn_mask[i] = mask_mapping;
+ }
+
+ rc = 0;
+
+err:
+ return rc;
+}
+
+static void __evtchn_unmap_all_3level(struct domain *d)
+{
+ struct vcpu *v;
+ /* This is called when destroying a domain, so no pausing... */
+ for_each_vcpu ( d, v )
+ __unmap_l2_sel(v);
+ __unmap_l3_arrays(d);
+}
+
+void event_channel_unmap_nlevel(struct domain *d)
+{
+ switch ( d->evtchn_level )
+ {
+ case 3:
+ __evtchn_unmap_all_3level(d);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __evtchn_migrate_bitmap_l3(struct domain *d)
+{
+ struct vcpu *v;
+
+ /* Easy way to migrate, just move existing selector down one level
+ * then copy the pending array and mask array */
+ for_each_vcpu ( d, v )
+ {
+ memcpy(&v->evtchn_pending_sel_l2[0],
+ &vcpu_info(v, evtchn_pending_sel),
+ sizeof(vcpu_info(v, evtchn_pending_sel)));
+
+ memset(&vcpu_info(v, evtchn_pending_sel), 0,
+ sizeof(vcpu_info(v, evtchn_pending_sel)));
+
+ set_bit(0, &vcpu_info(v, evtchn_pending_sel));
+ }
+
+ memcpy(d->evtchn_pending[0], &shared_info(d, evtchn_pending),
+ sizeof(shared_info(d, evtchn_pending)));
+ memcpy(d->evtchn_mask[0], &shared_info(d, evtchn_mask),
+ sizeof(shared_info(d, evtchn_mask)));
+}
+
+static long __evtchn_register_3level(struct evtchn_register_3level *r)
+{
+ struct domain *d = current->domain;
+ unsigned long mfns[r->nr_vcpus];
+ unsigned long offsets[r->nr_vcpus];
+ unsigned char was_up[r->nr_vcpus];
+ int rc, i;
+ struct vcpu *v;
+
+ if ( d->evtchn_level == 3 )
+ return -EINVAL;
+
+ if ( r->nr_vcpus > d->max_vcpus )
+ return -EINVAL;
+
+ for ( i = 0; i < MAX_L3_PAGES; i++ )
+ if ( d->evtchn_pending[i] || d->evtchn_mask[i] )
+ return -EINVAL;
+
+ for_each_vcpu ( d, v )
+ if ( v->evtchn_pending_sel_l2 )
+ return -EINVAL;
+
+ if ( copy_from_user(mfns, r->l2sel_mfn,
+ sizeof(unsigned long)*r->nr_vcpus) )
+ return -EFAULT;
+
+ if ( copy_from_user(offsets, r->l2sel_offset,
+ sizeof(unsigned long)*r->nr_vcpus) )
+ return -EFAULT;
+
+ /* put cpu offline */
+ for_each_vcpu ( d, v )
+ {
+ if ( v == current )
+ was_up[v->vcpu_id] = 1;
+ else
+ was_up[v->vcpu_id] = !test_and_set_bit(_VPF_down,
+ &v->pause_flags);
+ }
+
+ /* map evtchn pending array and evtchn mask array */
+ rc = __map_l3_arrays(d, r->evtchn_pending, r->evtchn_mask);
+ if ( rc )
+ goto out;
+
+ for_each_vcpu ( d, v )
+ {
+ if ( (rc = __map_l2_sel(v, mfns[v->vcpu_id], offsets[v->vcpu_id])) )
+ {
+ struct vcpu *v1;
+ for_each_vcpu ( d, v1 )
+ __unmap_l2_sel(v1);
+ __unmap_l3_arrays(d);
+ goto out;
+ }
+ }
+
+ /* Scan current bitmap and migrate all outstanding events to new bitmap */
+ __evtchn_migrate_bitmap_l3(d);
+
+ /* make sure all writes take effect before switching to new routines */
+ wmb();
+ d->evtchn_level = 3;
+
+ out:
+ /* bring cpus back online */
+ for_each_vcpu ( d, v )
+ if ( was_up[v->vcpu_id] &&
+ test_and_clear_bit(_VPF_down, &v->pause_flags) )
+ vcpu_wake(v);
+
+ return rc;
+}
+
+static long evtchn_register_nlevel(evtchn_register_nlevel_t *r)
+{
+ struct domain *d = current->domain;
+ int rc;
+
+ spin_lock(&d->event_lock);
+
+ switch ( r->level )
+ {
+ case 3:
+ rc = __evtchn_register_3level(&r->u.l3);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+
+ spin_unlock(&d->event_lock);
+
+ return rc;
+}
long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
{
@@ -1078,6 +1531,14 @@ long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
break;
}
+ case EVTCHNOP_register_nlevel: {
+ struct evtchn_register_nlevel reg;
+ if ( copy_from_guest(®, arg, 1) != 0 )
+ return -EFAULT;
+ rc = evtchn_register_nlevel(®);
+ break;
+ }
+
default:
rc = -ENOSYS;
break;
@@ -1251,7 +1712,6 @@ void evtchn_move_pirqs(struct vcpu *v)
spin_unlock(&d->event_lock);
}
-
static void domain_dump_evtchn_info(struct domain *d)
{
unsigned int port;
@@ -1260,8 +1720,10 @@ static void domain_dump_evtchn_info(struct domain *d)
bitmap_scnlistprintf(keyhandler_scratch, sizeof(keyhandler_scratch),
d->poll_mask, d->max_vcpus);
printk("Event channel information for domain %d:\n"
+ "using %d-level event channel\n"
"Polling vCPUs: {%s}\n"
- " port [p/m]\n", d->domain_id, keyhandler_scratch);
+ " port [p/m]\n", d->domain_id, d->evtchn_level,
+ keyhandler_scratch);
spin_lock(&d->event_lock);
@@ -1269,6 +1731,8 @@ static void domain_dump_evtchn_info(struct domain *d)
{
const struct evtchn *chn;
char *ssid;
+ int page_no = port / EVTCHNS_PER_PAGE;
+ int offset = port % EVTCHNS_PER_PAGE;
if ( !port_is_valid(d, port) )
continue;
@@ -1276,11 +1740,28 @@ static void domain_dump_evtchn_info(struct domain *d)
if ( chn->state == ECS_FREE )
continue;
- printk(" %4u [%d/%d]: s=%d n=%d x=%d",
- port,
- !!test_bit(port, &shared_info(d, evtchn_pending)),
- !!test_bit(port, &shared_info(d, evtchn_mask)),
- chn->state, chn->notify_vcpu_id, chn->xen_consumer);
+ printk(" %4u", port);
+
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ printk(" [%d/%d]:",
+ !!test_bit(port, &shared_info(d, evtchn_pending)),
+ !!test_bit(port, &shared_info(d, evtchn_mask)));
+ break;
+ case 3:
+ printk(" [%d/%d]:",
+ !!test_bit(offset, d->evtchn_pending[page_no]),
+ !!test_bit(offset, d->evtchn_mask[page_no]));
+ break;
+ default:
+ printk(" %s: unknown event channel level %d for domain %d \n",
+ __FUNCTION__, d->evtchn_level, d->domain_id);
+ goto out;
+ }
+
+ printk(" s=%d n=%d x=%d", chn->state, chn->notify_vcpu_id,
+ chn->xen_consumer);
switch ( chn->state )
{
@@ -1310,6 +1791,7 @@ static void domain_dump_evtchn_info(struct domain *d)
}
}
+ out:
spin_unlock(&d->event_lock);
}
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 2c5c230..294cca9 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -298,15 +298,15 @@ static void dump_domains(unsigned char key)
{
for_each_vcpu ( d, v )
{
+ unsigned int bits = BITS_PER_EVTCHN_WORD(d);
+ for (int i = 2; i < d->evtchn_level; i++)
+ bits *= BITS_PER_EVTCHN_WORD(d);
printk("Notifying guest %d:%d (virq %d, port %d, stat %d/%d/%d)\n",
d->domain_id, v->vcpu_id,
VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
- test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
- &shared_info(d, evtchn_pending)),
- test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
- &shared_info(d, evtchn_mask)),
- test_bit(v->virq_to_evtchn[VIRQ_DEBUG] /
- BITS_PER_EVTCHN_WORD(d),
+ evtchn_is_pending(d, v->virq_to_evtchn[VIRQ_DEBUG]),
+ evtchn_is_masked(d, v->virq_to_evtchn[VIRQ_DEBUG]),
+ test_bit(v->virq_to_evtchn[VIRQ_DEBUG] / bits,
&vcpu_info(v, evtchn_pending_sel)));
send_guest_vcpu_virq(v, VIRQ_DEBUG);
}
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index ae798c9..b676c9c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -693,7 +693,7 @@ static long do_poll(struct sched_poll *sched_poll)
goto out;
rc = 0;
- if ( test_bit(port, &shared_info(d, evtchn_pending)) )
+ if ( evtchn_is_pending(d, port) )
goto out;
}
diff --git a/xen/include/public/event_channel.h b/xen/include/public/event_channel.h
index 07ff321..29cd6e9 100644
--- a/xen/include/public/event_channel.h
+++ b/xen/include/public/event_channel.h
@@ -71,6 +71,7 @@
#define EVTCHNOP_bind_vcpu 8
#define EVTCHNOP_unmask 9
#define EVTCHNOP_reset 10
+#define EVTCHNOP_register_nlevel 11
/* ` } */
typedef uint32_t evtchn_port_t;
@@ -258,6 +259,29 @@ struct evtchn_reset {
typedef struct evtchn_reset evtchn_reset_t;
/*
+ * EVTCHNOP_register_nlevel: Register N level event channels.
+ * NOTES:
+ * 1. currently only 3-level is supported.
+ * 2. should fall back to basic 2-level if this call fails.
+ */
+#define MAX_L3_PAGES 8
+struct evtchn_register_3level {
+ unsigned long evtchn_pending[MAX_L3_PAGES];
+ unsigned long evtchn_mask[MAX_L3_PAGES];
+ unsigned long *l2sel_mfn;
+ unsigned long *l2sel_offset;
+ uint32_t nr_vcpus;
+};
+
+struct evtchn_register_nlevel {
+ uint32_t level;
+ union {
+ struct evtchn_register_3level l3;
+ } u;
+};
+typedef struct evtchn_register_nlevel evtchn_register_nlevel_t;
+
+/*
* ` enum neg_errnoval
* ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
* `
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 5593066..1d4ef2d 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -554,9 +554,24 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
/*
* Event channel endpoints per domain:
+ * 2-level:
* 1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ * 3-level:
+ * 32k if a long is 32 bits; 256k if a long is 64 bits;
*/
-#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+#define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64)
+#define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \
+ switch (x) { \
+ case 2: \
+ __v = NR_EVENT_CHANNELS_L2; break; \
+ case 3: \
+ __v = NR_EVENT_CHANNELS_L3; break; \
+ default: \
+ BUG(); \
+ } \
+ __v;})
+
struct vcpu_time_info {
/*
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 71c3e92..e7cd6be 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -102,4 +102,8 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
mb(); /* set blocked status /then/ caller does his work */ \
} while ( 0 )
+extern void event_channel_unmap_nlevel(struct domain *d);
+int evtchn_is_masked(struct domain *d, int port);
+int evtchn_is_pending(struct domain *d, int port);
+
#endif /* __XEN_EVENT_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 5f23213..ae78549 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -64,7 +64,8 @@ extern struct domain *dom0;
__v;})
#define EVTCHNS_PER_BUCKET 512
-#define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
+#define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS_L3 / EVTCHNS_PER_BUCKET)
+#define EVTCHNS_PER_PAGE (PAGE_SIZE * 8)
struct evtchn
{
@@ -104,7 +105,7 @@ void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
struct waitqueue_vcpu;
-struct vcpu
+struct vcpu
{
int vcpu_id;
@@ -112,6 +113,9 @@ struct vcpu
vcpu_info_t *vcpu_info;
+ /* For 3-level event channel */
+ unsigned long *evtchn_pending_sel_l2;
+
struct domain *domain;
struct vcpu *next_in_list;
@@ -275,6 +279,10 @@ struct domain
struct evtchn **evtchn;
spinlock_t event_lock;
unsigned int evtchn_level;
+#define L3_PAGES (NR_EVENT_CHANNELS_L3/BITS_PER_LONG*sizeof(unsigned long)/PAGE_SIZE)
+ unsigned long *evtchn_pending[L3_PAGES];
+ unsigned long *evtchn_mask[L3_PAGES];
+#undef L3_PAGES
struct grant_table *grant_table;
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Add some fields in struct domain to hold pointer to 3-level shared arrays.
Also add per-cpu second level selector in struct vcpu.
These structures are mapped by a new evtchn op. Guest should fall back to use
2-level event channel if mapping fails.
The routines are more or less as the 2-level ones.
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
xen/arch/x86/domain.c | 2 +
xen/arch/x86/irq.c | 2 +-
xen/common/event_channel.c | 520 ++++++++++++++++++++++++++++++++++--
xen/common/keyhandler.c | 12 +-
xen/common/schedule.c | 2 +-
xen/include/public/event_channel.h | 24 ++
xen/include/public/xen.h | 17 +-
xen/include/xen/event.h | 4 +
xen/include/xen/sched.h | 12 +-
9 files changed, 565 insertions(+), 30 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 7a07c06..b457b00 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -2058,6 +2058,8 @@ int domain_relinquish_resources(struct domain *d)
}
}
+ event_channel_unmap_nlevel(d);
+
d->arch.relmem = RELMEM_shared;
/* fallthrough */
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index 05cede5..d517e39 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1452,7 +1452,7 @@ int pirq_guest_unmask(struct domain *d)
{
pirq = pirqs[i]->pirq;
if ( pirqs[i]->masked &&
- !test_bit(pirqs[i]->evtchn, &shared_info(d, evtchn_mask)) )
+ !evtchn_is_masked(d, pirqs[i]->evtchn) )
pirq_guest_eoi(pirqs[i]);
}
} while ( ++pirq < d->nr_pirqs && n == ARRAY_SIZE(pirqs) );
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 9898f8e..fb3a7b4 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -27,6 +27,7 @@
#include <xen/guest_access.h>
#include <xen/keyhandler.h>
#include <asm/current.h>
+#include <xen/paging.h>
#include <public/xen.h>
#include <public/event_channel.h>
@@ -413,6 +414,85 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
return rc;
}
+static inline int __evtchn_is_masked_l2(struct domain *d, int port)
+{
+ return test_bit(port, &shared_info(d, evtchn_mask));
+}
+
+static inline int __evtchn_is_masked_l3(struct domain *d, int port)
+{
+ return test_bit(port % EVTCHNS_PER_PAGE,
+ d->evtchn_mask[port / EVTCHNS_PER_PAGE]);
+}
+
+int evtchn_is_masked(struct domain *d, int port)
+{
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ return __evtchn_is_masked_l2(d, port);
+ case 3:
+ return __evtchn_is_masked_l3(d, port);
+ default:
+ printk(" %s: unknown event channel level %d for domain %d \n",
+ __FUNCTION__, d->evtchn_level, d->domain_id);
+ return 1;
+ }
+}
+
+static inline int __evtchn_is_pending_l2(struct domain *d, int port)
+{
+ return test_bit(port, &shared_info(d, evtchn_pending));
+}
+
+static inline int __evtchn_is_pending_l3(struct domain *d, int port)
+{
+ return test_bit(port % EVTCHNS_PER_PAGE,
+ d->evtchn_pending[port / EVTCHNS_PER_PAGE]);
+}
+
+int evtchn_is_pending(struct domain *d, int port)
+{
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ return __evtchn_is_pending_l2(d, port);
+ case 3:
+ return __evtchn_is_pending_l3(d, port);
+ default:
+ printk(" %s: unknown event channel level %d for domain %d\n",
+ __FUNCTION__, d->evtchn_level, d->domain_id);
+ return 0;
+ }
+}
+
+static inline void __evtchn_clear_pending_l2(struct domain *d, int port)
+{
+ clear_bit(port, &shared_info(d, evtchn_pending));
+}
+
+static inline void __evtchn_clear_pending_l3(struct domain *d, int port)
+{
+ clear_bit(port % EVTCHNS_PER_PAGE,
+ d->evtchn_pending[port / EVTCHNS_PER_PAGE]);
+}
+
+static void evtchn_clear_pending(struct domain *d, int port)
+{
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ __evtchn_clear_pending_l2(d, port);
+ break;
+ case 3:
+ __evtchn_clear_pending_l3(d, port);
+ break;
+ default:
+ printk("Cannot clear pending for %d level event channel"
+ " for domain %d, port %d\n",
+ d->evtchn_level, d->domain_id, port);
+ }
+}
static long __evtchn_close(struct domain *d1, int port1)
{
@@ -529,7 +609,7 @@ static long __evtchn_close(struct domain *d1, int port1)
}
/* Clear pending event to avoid unexpected behavior on re-bind. */
- clear_bit(port1, &shared_info(d1, evtchn_pending));
+ evtchn_clear_pending(d1, port1);
/* Reset binding to vcpu0 when the channel is freed. */
chn1->state = ECS_FREE;
@@ -606,16 +686,15 @@ int evtchn_send(struct domain *d, unsigned int lport)
ret = -EINVAL;
}
-out:
+ out:
spin_unlock(&ld->event_lock);
return ret;
}
-static void evtchn_set_pending(struct vcpu *v, int port)
+static void __evtchn_set_pending_l2(struct vcpu *v, int port)
{
struct domain *d = v->domain;
- int vcpuid;
/*
* The following bit operations must happen in strict order.
@@ -633,7 +712,50 @@ static void evtchn_set_pending(struct vcpu *v, int port)
{
vcpu_mark_events_pending(v);
}
-
+}
+
+static void __evtchn_set_pending_l3(struct vcpu *v, int port)
+{
+ struct domain *d = v->domain;
+
+ int page_no = port / EVTCHNS_PER_PAGE;
+ int offset = port % EVTCHNS_PER_PAGE;
+ int l1cb = BITS_PER_EVTCHN_WORD(d)*BITS_PER_EVTCHN_WORD(d);
+ int l2cb = BITS_PER_EVTCHN_WORD(d);
+
+ if ( test_and_set_bit(offset, d->evtchn_pending[page_no]) )
+ return;
+
+ if ( !test_bit(offset, d->evtchn_mask[page_no]) &&
+ !test_and_set_bit(port / l2cb,
+ v->evtchn_pending_sel_l2) &&
+ !test_and_set_bit(port / l1cb,
+ &vcpu_info(v, evtchn_pending_sel)) )
+ {
+ vcpu_mark_events_pending(v);
+ }
+}
+
+static void evtchn_set_pending(struct vcpu *v, int port)
+{
+ struct domain *d = v->domain;
+ int vcpuid;
+
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ __evtchn_set_pending_l2(v, port);
+ break;
+ case 3:
+ __evtchn_set_pending_l3(v, port);
+ break;
+ default:
+ printk("Cannot set pending for %d level event channel"
+ " for domain %d, port %d\n",
+ d->evtchn_level, d->domain_id, port);
+ return;
+ }
+
/* Check if some VCPU might be polling for this event. */
if ( likely(bitmap_empty(d->poll_mask, d->max_vcpus)) )
return;
@@ -916,21 +1038,16 @@ long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id)
}
-int evtchn_unmask(unsigned int port)
+static int __evtchn_unmask_l2(unsigned int port)
{
struct domain *d = current->domain;
struct vcpu *v;
- ASSERT(spin_is_locked(&d->event_lock));
-
- if ( unlikely(!port_is_valid(d, port)) )
- return -EINVAL;
-
v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
/*
* These operations must happen in strict order. Based on
- * include/xen/event.h:evtchn_set_pending().
+ * __evtchn_set_pending_l2().
*/
if ( test_and_clear_bit(port, &shared_info(d, evtchn_mask)) &&
test_bit (port, &shared_info(d, evtchn_pending)) &&
@@ -943,6 +1060,58 @@ int evtchn_unmask(unsigned int port)
return 0;
}
+static int __evtchn_unmask_l3(unsigned int port)
+{
+ struct domain *d = current->domain;
+ struct vcpu *v;
+
+ int page_no = port / EVTCHNS_PER_PAGE;
+ int offset = port % EVTCHNS_PER_PAGE;
+ int l1cb = BITS_PER_EVTCHN_WORD(d)*BITS_PER_EVTCHN_WORD(d);
+ int l2cb = BITS_PER_EVTCHN_WORD(d);
+
+ v = d->vcpu[evtchn_from_port(d, port)->notify_vcpu_id];
+
+ if ( test_and_clear_bit(offset, d->evtchn_mask[page_no]) &&
+ test_bit(offset, d->evtchn_pending[page_no]) &&
+ !test_and_set_bit(port / l2cb,
+ v->evtchn_pending_sel_l2) &&
+ !test_and_set_bit(port / l1cb,
+ &vcpu_info(v, evtchn_pending_sel)) )
+ {
+ vcpu_mark_events_pending(v);
+ }
+
+ return 0;
+}
+
+int evtchn_unmask(unsigned int port)
+{
+ struct domain *d = current->domain;
+ int rc = -EINVAL;
+
+ ASSERT(spin_is_locked(&d->event_lock));
+
+ if ( unlikely(!port_is_valid(d, port)) )
+ return -EINVAL;
+
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ rc = __evtchn_unmask_l2(port);
+ break;
+ case 3:
+ rc = __evtchn_unmask_l3(port);
+ break;
+ default:
+ printk("Cannot unmask port %d for %d level event channel"
+ " for domain %d\n", port,
+ d->evtchn_level, d->domain_id);
+ }
+
+ return rc;
+}
+
static long evtchn_reset(evtchn_reset_t *r)
{
@@ -969,6 +1138,290 @@ out:
return rc;
}
+static void __unmap_l2_sel(struct vcpu *v)
+{
+ unsigned long mfn;
+
+ if ( v->evtchn_pending_sel_l2 != 0 )
+ {
+ unmap_domain_page_global(v->evtchn_pending_sel_l2);
+ mfn = virt_to_mfn(v->evtchn_pending_sel_l2);
+ put_page_and_type(mfn_to_page(mfn));
+
+ v->evtchn_pending_sel_l2 = 0;
+ }
+}
+
+static int __map_l2_sel(struct vcpu *v, unsigned long gfn, unsigned long off)
+{
+ void *mapping;
+ int rc = -EINVAL;
+ struct page_info *page;
+ struct domain *d = v->domain;
+
+ if ( off >= PAGE_SIZE )
+ return rc;
+
+ page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+ if ( !page )
+ goto out;
+
+ if ( !get_page_type(page, PGT_writable_page) )
+ {
+ put_page(page);
+ goto out;
+ }
+
+ mapping = __map_domain_page_global(page);
+ if ( mapping == NULL )
+ {
+ put_page_and_type(page);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ v->evtchn_pending_sel_l2 = (unsigned long *)(mapping + off);
+ rc = 0;
+
+ out:
+ return rc;
+}
+
+
+static void __unmap_l3_arrays(struct domain *d)
+{
+ int i;
+ unsigned long mfn;
+
+ for ( i = 0; i < MAX_L3_PAGES; i++ )
+ {
+ if ( d->evtchn_pending[i] != 0 )
+ {
+ unmap_domain_page_global(d->evtchn_pending[i]);
+ mfn = virt_to_mfn(d->evtchn_pending[i]);
+ put_page_and_type(mfn_to_page(mfn));
+ d->evtchn_pending[i] = 0;
+ }
+ if ( d->evtchn_mask[i] != 0 )
+ {
+ unmap_domain_page_global(d->evtchn_mask[i]);
+ mfn = virt_to_mfn(d->evtchn_mask[i]);
+ put_page_and_type(mfn_to_page(mfn));
+ d->evtchn_mask[i] = 0;
+ }
+ }
+}
+
+static int __map_l3_arrays(struct domain *d, unsigned long *pending,
+ unsigned long *mask)
+{
+ int i;
+ void *pending_mapping, *mask_mapping;
+ struct page_info *pending_page, *mask_page;
+ unsigned long pending_gfn, mask_gfn;
+ int rc = -EINVAL;
+
+ for ( i = 0;
+ i < MAX_L3_PAGES && pending[i] != 0 && mask[i] != 0;
+ i++ )
+ {
+ pending_gfn = pending[i];
+ mask_gfn = mask[i];
+
+ pending_page = get_page_from_gfn(d, pending_gfn, NULL, P2M_ALLOC);
+ if ( !pending_page )
+ goto err;
+
+ mask_page = get_page_from_gfn(d, mask_gfn, NULL, P2M_ALLOC);
+ if ( !mask_page )
+ {
+ put_page(pending_page);
+ goto err;
+ }
+
+ if ( !get_page_type(pending_page, PGT_writable_page) )
+ {
+ put_page(pending_page);
+ put_page(mask_page);
+ goto err;
+ }
+
+ if ( !get_page_type(mask_page, PGT_writable_page) )
+ {
+ put_page_and_type(pending_page);
+ put_page(mask_page);
+ goto err;
+ }
+
+ pending_mapping = __map_domain_page_global(pending_page);
+ if ( !pending_mapping )
+ {
+ put_page_and_type(pending_page);
+ put_page_and_type(mask_page);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ mask_mapping = __map_domain_page_global(mask_page);
+ if ( !mask_mapping )
+ {
+ unmap_domain_page_global(pending_mapping);
+ put_page_and_type(pending_page);
+ put_page_and_type(mask_page);
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ d->evtchn_pending[i] = pending_mapping;
+ d->evtchn_mask[i] = mask_mapping;
+ }
+
+ rc = 0;
+
+err:
+ return rc;
+}
+
+static void __evtchn_unmap_all_3level(struct domain *d)
+{
+ struct vcpu *v;
+ /* This is called when destroying a domain, so no pausing... */
+ for_each_vcpu ( d, v )
+ __unmap_l2_sel(v);
+ __unmap_l3_arrays(d);
+}
+
+void event_channel_unmap_nlevel(struct domain *d)
+{
+ switch ( d->evtchn_level )
+ {
+ case 3:
+ __evtchn_unmap_all_3level(d);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __evtchn_migrate_bitmap_l3(struct domain *d)
+{
+ struct vcpu *v;
+
+ /* Easy way to migrate, just move existing selector down one level
+ * then copy the pending array and mask array */
+ for_each_vcpu ( d, v )
+ {
+ memcpy(&v->evtchn_pending_sel_l2[0],
+ &vcpu_info(v, evtchn_pending_sel),
+ sizeof(vcpu_info(v, evtchn_pending_sel)));
+
+ memset(&vcpu_info(v, evtchn_pending_sel), 0,
+ sizeof(vcpu_info(v, evtchn_pending_sel)));
+
+ set_bit(0, &vcpu_info(v, evtchn_pending_sel));
+ }
+
+ memcpy(d->evtchn_pending[0], &shared_info(d, evtchn_pending),
+ sizeof(shared_info(d, evtchn_pending)));
+ memcpy(d->evtchn_mask[0], &shared_info(d, evtchn_mask),
+ sizeof(shared_info(d, evtchn_mask)));
+}
+
+static long __evtchn_register_3level(struct evtchn_register_3level *r)
+{
+ struct domain *d = current->domain;
+ unsigned long mfns[r->nr_vcpus];
+ unsigned long offsets[r->nr_vcpus];
+ unsigned char was_up[r->nr_vcpus];
+ int rc, i;
+ struct vcpu *v;
+
+ if ( d->evtchn_level == 3 )
+ return -EINVAL;
+
+ if ( r->nr_vcpus > d->max_vcpus )
+ return -EINVAL;
+
+ for ( i = 0; i < MAX_L3_PAGES; i++ )
+ if ( d->evtchn_pending[i] || d->evtchn_mask[i] )
+ return -EINVAL;
+
+ for_each_vcpu ( d, v )
+ if ( v->evtchn_pending_sel_l2 )
+ return -EINVAL;
+
+ if ( copy_from_user(mfns, r->l2sel_mfn,
+ sizeof(unsigned long)*r->nr_vcpus) )
+ return -EFAULT;
+
+ if ( copy_from_user(offsets, r->l2sel_offset,
+ sizeof(unsigned long)*r->nr_vcpus) )
+ return -EFAULT;
+
+ /* put cpu offline */
+ for_each_vcpu ( d, v )
+ {
+ if ( v == current )
+ was_up[v->vcpu_id] = 1;
+ else
+ was_up[v->vcpu_id] = !test_and_set_bit(_VPF_down,
+ &v->pause_flags);
+ }
+
+ /* map evtchn pending array and evtchn mask array */
+ rc = __map_l3_arrays(d, r->evtchn_pending, r->evtchn_mask);
+ if ( rc )
+ goto out;
+
+ for_each_vcpu ( d, v )
+ {
+ if ( (rc = __map_l2_sel(v, mfns[v->vcpu_id], offsets[v->vcpu_id])) )
+ {
+ struct vcpu *v1;
+ for_each_vcpu ( d, v1 )
+ __unmap_l2_sel(v1);
+ __unmap_l3_arrays(d);
+ goto out;
+ }
+ }
+
+ /* Scan current bitmap and migrate all outstanding events to new bitmap */
+ __evtchn_migrate_bitmap_l3(d);
+
+ /* make sure all writes take effect before switching to new routines */
+ wmb();
+ d->evtchn_level = 3;
+
+ out:
+ /* bring cpus back online */
+ for_each_vcpu ( d, v )
+ if ( was_up[v->vcpu_id] &&
+ test_and_clear_bit(_VPF_down, &v->pause_flags) )
+ vcpu_wake(v);
+
+ return rc;
+}
+
+static long evtchn_register_nlevel(evtchn_register_nlevel_t *r)
+{
+ struct domain *d = current->domain;
+ int rc;
+
+ spin_lock(&d->event_lock);
+
+ switch ( r->level )
+ {
+ case 3:
+ rc = __evtchn_register_3level(&r->u.l3);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+
+ spin_unlock(&d->event_lock);
+
+ return rc;
+}
long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
{
@@ -1078,6 +1531,14 @@ long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
break;
}
+ case EVTCHNOP_register_nlevel: {
+ struct evtchn_register_nlevel reg;
+ if ( copy_from_guest(®, arg, 1) != 0 )
+ return -EFAULT;
+ rc = evtchn_register_nlevel(®);
+ break;
+ }
+
default:
rc = -ENOSYS;
break;
@@ -1251,7 +1712,6 @@ void evtchn_move_pirqs(struct vcpu *v)
spin_unlock(&d->event_lock);
}
-
static void domain_dump_evtchn_info(struct domain *d)
{
unsigned int port;
@@ -1260,8 +1720,10 @@ static void domain_dump_evtchn_info(struct domain *d)
bitmap_scnlistprintf(keyhandler_scratch, sizeof(keyhandler_scratch),
d->poll_mask, d->max_vcpus);
printk("Event channel information for domain %d:\n"
+ "using %d-level event channel\n"
"Polling vCPUs: {%s}\n"
- " port [p/m]\n", d->domain_id, keyhandler_scratch);
+ " port [p/m]\n", d->domain_id, d->evtchn_level,
+ keyhandler_scratch);
spin_lock(&d->event_lock);
@@ -1269,6 +1731,8 @@ static void domain_dump_evtchn_info(struct domain *d)
{
const struct evtchn *chn;
char *ssid;
+ int page_no = port / EVTCHNS_PER_PAGE;
+ int offset = port % EVTCHNS_PER_PAGE;
if ( !port_is_valid(d, port) )
continue;
@@ -1276,11 +1740,28 @@ static void domain_dump_evtchn_info(struct domain *d)
if ( chn->state == ECS_FREE )
continue;
- printk(" %4u [%d/%d]: s=%d n=%d x=%d",
- port,
- !!test_bit(port, &shared_info(d, evtchn_pending)),
- !!test_bit(port, &shared_info(d, evtchn_mask)),
- chn->state, chn->notify_vcpu_id, chn->xen_consumer);
+ printk(" %4u", port);
+
+ switch ( d->evtchn_level )
+ {
+ case 2:
+ printk(" [%d/%d]:",
+ !!test_bit(port, &shared_info(d, evtchn_pending)),
+ !!test_bit(port, &shared_info(d, evtchn_mask)));
+ break;
+ case 3:
+ printk(" [%d/%d]:",
+ !!test_bit(offset, d->evtchn_pending[page_no]),
+ !!test_bit(offset, d->evtchn_mask[page_no]));
+ break;
+ default:
+ printk(" %s: unknown event channel level %d for domain %d \n",
+ __FUNCTION__, d->evtchn_level, d->domain_id);
+ goto out;
+ }
+
+ printk(" s=%d n=%d x=%d", chn->state, chn->notify_vcpu_id,
+ chn->xen_consumer);
switch ( chn->state )
{
@@ -1310,6 +1791,7 @@ static void domain_dump_evtchn_info(struct domain *d)
}
}
+ out:
spin_unlock(&d->event_lock);
}
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index 2c5c230..294cca9 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -298,15 +298,15 @@ static void dump_domains(unsigned char key)
{
for_each_vcpu ( d, v )
{
+ unsigned int bits = BITS_PER_EVTCHN_WORD(d);
+ for (int i = 2; i < d->evtchn_level; i++)
+ bits *= BITS_PER_EVTCHN_WORD(d);
printk("Notifying guest %d:%d (virq %d, port %d, stat %d/%d/%d)\n",
d->domain_id, v->vcpu_id,
VIRQ_DEBUG, v->virq_to_evtchn[VIRQ_DEBUG],
- test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
- &shared_info(d, evtchn_pending)),
- test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
- &shared_info(d, evtchn_mask)),
- test_bit(v->virq_to_evtchn[VIRQ_DEBUG] /
- BITS_PER_EVTCHN_WORD(d),
+ evtchn_is_pending(d, v->virq_to_evtchn[VIRQ_DEBUG]),
+ evtchn_is_masked(d, v->virq_to_evtchn[VIRQ_DEBUG]),
+ test_bit(v->virq_to_evtchn[VIRQ_DEBUG] / bits,
&vcpu_info(v, evtchn_pending_sel)));
send_guest_vcpu_virq(v, VIRQ_DEBUG);
}
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index ae798c9..b676c9c 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -693,7 +693,7 @@ static long do_poll(struct sched_poll *sched_poll)
goto out;
rc = 0;
- if ( test_bit(port, &shared_info(d, evtchn_pending)) )
+ if ( evtchn_is_pending(d, port) )
goto out;
}
diff --git a/xen/include/public/event_channel.h b/xen/include/public/event_channel.h
index 07ff321..29cd6e9 100644
--- a/xen/include/public/event_channel.h
+++ b/xen/include/public/event_channel.h
@@ -71,6 +71,7 @@
#define EVTCHNOP_bind_vcpu 8
#define EVTCHNOP_unmask 9
#define EVTCHNOP_reset 10
+#define EVTCHNOP_register_nlevel 11
/* ` } */
typedef uint32_t evtchn_port_t;
@@ -258,6 +259,29 @@ struct evtchn_reset {
typedef struct evtchn_reset evtchn_reset_t;
/*
+ * EVTCHNOP_register_nlevel: Register N level event channels.
+ * NOTES:
+ * 1. currently only 3-level is supported.
+ * 2. should fall back to basic 2-level if this call fails.
+ */
+#define MAX_L3_PAGES 8
+struct evtchn_register_3level {
+ unsigned long evtchn_pending[MAX_L3_PAGES];
+ unsigned long evtchn_mask[MAX_L3_PAGES];
+ unsigned long *l2sel_mfn;
+ unsigned long *l2sel_offset;
+ uint32_t nr_vcpus;
+};
+
+struct evtchn_register_nlevel {
+ uint32_t level;
+ union {
+ struct evtchn_register_3level l3;
+ } u;
+};
+typedef struct evtchn_register_nlevel evtchn_register_nlevel_t;
+
+/*
* ` enum neg_errnoval
* ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
* `
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 5593066..1d4ef2d 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -554,9 +554,24 @@ DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
/*
* Event channel endpoints per domain:
+ * 2-level:
* 1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ * 3-level:
+ * 32k if a long is 32 bits; 256k if a long is 64 bits;
*/
-#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+#define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64)
+#define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0; \
+ switch (x) { \
+ case 2: \
+ __v = NR_EVENT_CHANNELS_L2; break; \
+ case 3: \
+ __v = NR_EVENT_CHANNELS_L3; break; \
+ default: \
+ BUG(); \
+ } \
+ __v;})
+
struct vcpu_time_info {
/*
diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h
index 71c3e92..e7cd6be 100644
--- a/xen/include/xen/event.h
+++ b/xen/include/xen/event.h
@@ -102,4 +102,8 @@ void notify_via_xen_event_channel(struct domain *ld, int lport);
mb(); /* set blocked status /then/ caller does his work */ \
} while ( 0 )
+extern void event_channel_unmap_nlevel(struct domain *d);
+int evtchn_is_masked(struct domain *d, int port);
+int evtchn_is_pending(struct domain *d, int port);
+
#endif /* __XEN_EVENT_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 5f23213..ae78549 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -64,7 +64,8 @@ extern struct domain *dom0;
__v;})
#define EVTCHNS_PER_BUCKET 512
-#define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
+#define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS_L3 / EVTCHNS_PER_BUCKET)
+#define EVTCHNS_PER_PAGE (PAGE_SIZE * 8)
struct evtchn
{
@@ -104,7 +105,7 @@ void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
struct waitqueue_vcpu;
-struct vcpu
+struct vcpu
{
int vcpu_id;
@@ -112,6 +113,9 @@ struct vcpu
vcpu_info_t *vcpu_info;
+ /* For 3-level event channel */
+ unsigned long *evtchn_pending_sel_l2;
+
struct domain *domain;
struct vcpu *next_in_list;
@@ -275,6 +279,10 @@ struct domain
struct evtchn **evtchn;
spinlock_t event_lock;
unsigned int evtchn_level;
+#define L3_PAGES (NR_EVENT_CHANNELS_L3/BITS_PER_LONG*sizeof(unsigned long)/PAGE_SIZE)
+ unsigned long *evtchn_pending[L3_PAGES];
+ unsigned long *evtchn_mask[L3_PAGES];
+#undef L3_PAGES
struct grant_table *grant_table;
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel