Mailing List Archive

[PATCH] Switch from select() to poll() in xenconsoled's IO loop
In Linux select() typically supports up to 1024 file descriptors. This can be
a problem when user tries to boot up many guests. Switching to poll() has
minimum impact on existing code and has better scalibility.

pollfd array is dynamically allocated / reallocated. If the array fails to
expand, we just ignore the incoming fd.

Change from V2:
* remove unnecessary malloc in initialize_pollfd_arrays
* use ROUND_UP to get new size of arrays

Change from V3:
* remove initialize and destroy function for array
* embedded tracking structure in struct domain, eliminate fd_to_pollfd

Change from V4:
* make xs_pollfd local to io.c
* add back the 5 ms fuzz
* handle POLLERR and POLLHUP
* treat POLLPRI as error if set in revents
* abort if xenconsoled's own fds get screwed
* handle broken tty if tty's fds get screwed

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
tools/console/daemon/io.c | 189 +++++++++++++++++++++++++++++++--------------
1 file changed, 131 insertions(+), 58 deletions(-)

diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
index 48fe151..8d16cac 100644
--- a/tools/console/daemon/io.c
+++ b/tools/console/daemon/io.c
@@ -28,7 +28,7 @@
#include <stdlib.h>
#include <errno.h>
#include <string.h>
-#include <sys/select.h>
+#include <poll.h>
#include <fcntl.h>
#include <unistd.h>
#include <termios.h>
@@ -69,6 +69,7 @@ static int log_hv_fd = -1;
static evtchn_port_or_error_t log_hv_evtchn = -1;
static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
static xc_evtchn *xce_handle = NULL;
+static struct pollfd *xce_pollfd = NULL;

struct buffer {
char *data;
@@ -81,7 +82,9 @@ struct buffer {
struct domain {
int domid;
int master_fd;
+ struct pollfd *master_pollfd;
int slave_fd;
+ struct pollfd *slave_pollfd;
int log_fd;
bool is_dead;
unsigned last_seen;
@@ -92,6 +95,7 @@ struct domain {
evtchn_port_or_error_t local_port;
evtchn_port_or_error_t remote_port;
xc_evtchn *xce_handle;
+ struct pollfd *xce_pollfd;
struct xencons_interface *interface;
int event_count;
long long next_period;
@@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
return (sizeof(intf->in) - space);
}

+static void domain_handle_broken_tty(struct domain *dom, int recreate)
+{
+ domain_close_tty(dom);
+
+ if (recreate) {
+ domain_create_tty(dom);
+ } else {
+ shutdown_domain(dom);
+ }
+}
+
static void handle_tty_read(struct domain *dom)
{
ssize_t len = 0;
@@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
* keep the slave open for the duration.
*/
if (len < 0) {
- domain_close_tty(dom);
-
- if (domain_is_valid(dom->domid)) {
- domain_create_tty(dom);
- } else {
- shutdown_domain(dom);
- }
+ domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
} else if (domain_is_valid(dom->domid)) {
prod = intf->in_prod;
for (i = 0; i < len; i++) {
@@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
if (len < 1) {
dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
dom->domid, len, errno);
-
- domain_close_tty(dom);
-
- if (domain_is_valid(dom->domid)) {
- domain_create_tty(dom);
- } else {
- shutdown_domain(dom);
- }
+ domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
} else {
buffer_advance(&dom->buffer, len);
}
@@ -928,9 +930,53 @@ static void handle_log_reload(void)
}
}

+struct pollfd *xs_pollfd;
+static struct pollfd *fds;
+static unsigned int current_array_size;
+static unsigned int nr_fds;
+
+#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
+static struct pollfd *set_fds(int fd, short events)
+{
+ struct pollfd *ret;
+ if (current_array_size < nr_fds + 1) {
+ struct pollfd *new_fds = NULL;
+ unsigned long newsize;
+
+ /* Round up to 2^8 boundary, in practice this just
+ * make newsize larger than current_array_size.
+ */
+ newsize = ROUNDUP(nr_fds + 1, 8);
+
+ new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
+ if (!new_fds)
+ goto fail;
+ fds = new_fds;
+
+ memset(&fds[0] + current_array_size, 0,
+ sizeof(struct pollfd) * (newsize-current_array_size));
+ current_array_size = newsize;
+ }
+
+ fds[nr_fds].fd = fd;
+ fds[nr_fds].events = events;
+ ret = &fds[nr_fds];
+ nr_fds++;
+
+ return ret;
+fail:
+ dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
+ return NULL;
+}
+
+static void reset_fds(void)
+{
+ nr_fds = 0;
+ memset(fds, 0, sizeof(struct pollfd) * current_array_size);
+}
+
void handle_io(void)
{
- fd_set readfds, writefds;
int ret;

if (log_hv) {
@@ -959,21 +1005,17 @@ void handle_io(void)

for (;;) {
struct domain *d, *n;
- int max_fd = -1;
- struct timeval timeout;
+ int poll_timeout; /* timeout in milliseconds */
struct timespec ts;
long long now, next_timeout = 0;

- FD_ZERO(&readfds);
- FD_ZERO(&writefds);
+ reset_fds();

- FD_SET(xs_fileno(xs), &readfds);
- max_fd = MAX(xs_fileno(xs), max_fd);
+ xs_pollfd = set_fds(xs_fileno(xs), POLLIN|POLLPRI);

- if (log_hv) {
- FD_SET(xc_evtchn_fd(xce_handle), &readfds);
- max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
- }
+ if (log_hv)
+ xce_pollfd = set_fds(xc_evtchn_fd(xce_handle),
+ POLLIN|POLLPRI);

if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
return;
@@ -982,10 +1024,12 @@ void handle_io(void)
/* Re-calculate any event counter allowances & unblock
domains with new allowance */
for (d = dom_head; d; d = d->next) {
- /* Add 5ms of fuzz since select() often returns
- a couple of ms sooner than requested. Without
- the fuzz we typically do an extra spin in select()
- with a 1/2 ms timeout every other iteration */
+ /* CS 16257:955ee4fa1345 introduces a 5ms fuzz
+ * for select(), it is not clear poll() has
+ * similar behavior (returning a couple of ms
+ * sooner than requested) as well. Just leave
+ * the fuzz here. Remove it with a separate
+ * patch if necessary */
if ((now+5) > d->next_period) {
d->next_period = now + RATE_LIMIT_PERIOD;
if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
@@ -1006,75 +1050,101 @@ void handle_io(void)
!d->buffer.max_capacity ||
d->buffer.size < d->buffer.max_capacity) {
int evtchn_fd = xc_evtchn_fd(d->xce_handle);
- FD_SET(evtchn_fd, &readfds);
- max_fd = MAX(evtchn_fd, max_fd);
+ d->xce_pollfd = set_fds(evtchn_fd,
+ POLLIN|POLLPRI);
}
}

if (d->master_fd != -1) {
+ short events = 0;
if (!d->is_dead && ring_free_bytes(d))
- FD_SET(d->master_fd, &readfds);
+ events |= POLLIN;

if (!buffer_empty(&d->buffer))
- FD_SET(d->master_fd, &writefds);
- max_fd = MAX(d->master_fd, max_fd);
+ events |= POLLOUT;
+
+ if (events)
+ d->master_pollfd =
+ set_fds(d->master_fd,
+ events|POLLPRI);
}
}

/* If any domain has been rate limited, we need to work
- out what timeout to supply to select */
+ out what timeout to supply to poll */
if (next_timeout) {
long long duration = (next_timeout - now);
if (duration <= 0) /* sanity check */
duration = 1;
- timeout.tv_sec = duration / 1000;
- timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
- * 1000);
+ poll_timeout = (int)duration;
}

- ret = select(max_fd + 1, &readfds, &writefds, 0,
- next_timeout ? &timeout : NULL);
+ ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);

if (log_reload) {
handle_log_reload();
log_reload = 0;
}

- /* Abort if select failed, except for EINTR cases
+ /* Abort if poll failed, except for EINTR cases
which indicate a possible log reload */
if (ret == -1) {
if (errno == EINTR)
continue;
- dolog(LOG_ERR, "Failure in select: %d (%s)",
+ dolog(LOG_ERR, "Failure in poll: %d (%s)",
errno, strerror(errno));
break;
}

- if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
- handle_hv_logs();
+ if (log_hv && xce_pollfd) {
+ if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ dolog(LOG_ERR,
+ "Failure in poll xce_handle: %d (%s)",
+ errno, strerror(errno));
+ break;
+ } else if (xce_pollfd->revents & POLLIN)
+ handle_hv_logs();
+ }

if (ret <= 0)
continue;

- if (FD_ISSET(xs_fileno(xs), &readfds))
- handle_xs();
+ if (xs_pollfd) {
+ if (xs_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
+ dolog(LOG_ERR,
+ "Failure in poll xs_handle: %d (%s)",
+ errno, strerror(errno));
+ break;
+ } else if (xs_pollfd->revents & POLLIN)
+ handle_xs();
+ }

for (d = dom_head; d; d = n) {
n = d->next;
if (d->event_count < RATE_LIMIT_ALLOWANCE) {
if (d->xce_handle != NULL &&
- FD_ISSET(xc_evtchn_fd(d->xce_handle),
- &readfds))
- handle_ring_read(d);
+ d->xce_pollfd &&
+ !(d->xce_pollfd->revents &
+ ~(POLLIN|POLLOUT|POLLPRI)) &&
+ (d->xce_pollfd->revents &
+ POLLIN))
+ handle_ring_read(d);
}

- if (d->master_fd != -1 && FD_ISSET(d->master_fd,
- &readfds))
- handle_tty_read(d);
-
- if (d->master_fd != -1 && FD_ISSET(d->master_fd,
- &writefds))
- handle_tty_write(d);
+ if (d->master_fd != -1 && d->master_pollfd) {
+ if (d->master_pollfd->revents &
+ ~(POLLIN|POLLOUT|POLLPRI))
+ domain_handle_broken_tty(d,
+ domain_is_valid(d->domid));
+ else {
+ if (d->master_pollfd->revents &
+ POLLIN)
+ handle_tty_read(d);
+ if (d->master_pollfd->revents &
+ POLLOUT)
+ handle_tty_write(d);
+ }
+ }

if (d->last_seen != enum_pass)
shutdown_domain(d);
@@ -1084,6 +1154,9 @@ void handle_io(void)
}
}

+ free(fds);
+ current_array_size = 0;
+
out:
if (log_hv_fd != -1) {
close(log_hv_fd);
--
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop [ In reply to ]
Just to be clear, this is version 5 of the patch.

Git send-email mysteriously dropped my subject prefix.


Wei.

On Tue, 2013-01-08 at 11:50 +0000, Wei Liu wrote:
> In Linux select() typically supports up to 1024 file descriptors. This can be
> a problem when user tries to boot up many guests. Switching to poll() has
> minimum impact on existing code and has better scalibility.
>
> pollfd array is dynamically allocated / reallocated. If the array fails to
> expand, we just ignore the incoming fd.
>
> Change from V2:
> * remove unnecessary malloc in initialize_pollfd_arrays
> * use ROUND_UP to get new size of arrays
>
> Change from V3:
> * remove initialize and destroy function for array
> * embedded tracking structure in struct domain, eliminate fd_to_pollfd
>
> Change from V4:
> * make xs_pollfd local to io.c
> * add back the 5 ms fuzz
> * handle POLLERR and POLLHUP
> * treat POLLPRI as error if set in revents
> * abort if xenconsoled's own fds get screwed
> * handle broken tty if tty's fds get screwed
>
> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
> ---
> tools/console/daemon/io.c | 189 +++++++++++++++++++++++++++++++--------------
> 1 file changed, 131 insertions(+), 58 deletions(-)
>
> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
> index 48fe151..8d16cac 100644
> --- a/tools/console/daemon/io.c
> +++ b/tools/console/daemon/io.c
> @@ -28,7 +28,7 @@
> #include <stdlib.h>
> #include <errno.h>
> #include <string.h>
> -#include <sys/select.h>
> +#include <poll.h>
> #include <fcntl.h>
> #include <unistd.h>
> #include <termios.h>
> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
> static evtchn_port_or_error_t log_hv_evtchn = -1;
> static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
> static xc_evtchn *xce_handle = NULL;
> +static struct pollfd *xce_pollfd = NULL;
>
> struct buffer {
> char *data;
> @@ -81,7 +82,9 @@ struct buffer {
> struct domain {
> int domid;
> int master_fd;
> + struct pollfd *master_pollfd;
> int slave_fd;
> + struct pollfd *slave_pollfd;
> int log_fd;
> bool is_dead;
> unsigned last_seen;
> @@ -92,6 +95,7 @@ struct domain {
> evtchn_port_or_error_t local_port;
> evtchn_port_or_error_t remote_port;
> xc_evtchn *xce_handle;
> + struct pollfd *xce_pollfd;
> struct xencons_interface *interface;
> int event_count;
> long long next_period;
> @@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
> return (sizeof(intf->in) - space);
> }
>
> +static void domain_handle_broken_tty(struct domain *dom, int recreate)
> +{
> + domain_close_tty(dom);
> +
> + if (recreate) {
> + domain_create_tty(dom);
> + } else {
> + shutdown_domain(dom);
> + }
> +}
> +
> static void handle_tty_read(struct domain *dom)
> {
> ssize_t len = 0;
> @@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
> * keep the slave open for the duration.
> */
> if (len < 0) {
> - domain_close_tty(dom);
> -
> - if (domain_is_valid(dom->domid)) {
> - domain_create_tty(dom);
> - } else {
> - shutdown_domain(dom);
> - }
> + domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
> } else if (domain_is_valid(dom->domid)) {
> prod = intf->in_prod;
> for (i = 0; i < len; i++) {
> @@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
> if (len < 1) {
> dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
> dom->domid, len, errno);
> -
> - domain_close_tty(dom);
> -
> - if (domain_is_valid(dom->domid)) {
> - domain_create_tty(dom);
> - } else {
> - shutdown_domain(dom);
> - }
> + domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
> } else {
> buffer_advance(&dom->buffer, len);
> }
> @@ -928,9 +930,53 @@ static void handle_log_reload(void)
> }
> }
>
> +struct pollfd *xs_pollfd;
> +static struct pollfd *fds;
> +static unsigned int current_array_size;
> +static unsigned int nr_fds;
> +
> +#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
> +static struct pollfd *set_fds(int fd, short events)
> +{
> + struct pollfd *ret;
> + if (current_array_size < nr_fds + 1) {
> + struct pollfd *new_fds = NULL;
> + unsigned long newsize;
> +
> + /* Round up to 2^8 boundary, in practice this just
> + * make newsize larger than current_array_size.
> + */
> + newsize = ROUNDUP(nr_fds + 1, 8);
> +
> + new_fds = realloc(fds, sizeof(struct pollfd)*newsize);
> + if (!new_fds)
> + goto fail;
> + fds = new_fds;
> +
> + memset(&fds[0] + current_array_size, 0,
> + sizeof(struct pollfd) * (newsize-current_array_size));
> + current_array_size = newsize;
> + }
> +
> + fds[nr_fds].fd = fd;
> + fds[nr_fds].events = events;
> + ret = &fds[nr_fds];
> + nr_fds++;
> +
> + return ret;
> +fail:
> + dolog(LOG_ERR, "realloc failed, ignoring fd %d\n", fd);
> + return NULL;
> +}
> +
> +static void reset_fds(void)
> +{
> + nr_fds = 0;
> + memset(fds, 0, sizeof(struct pollfd) * current_array_size);
> +}
> +
> void handle_io(void)
> {
> - fd_set readfds, writefds;
> int ret;
>
> if (log_hv) {
> @@ -959,21 +1005,17 @@ void handle_io(void)
>
> for (;;) {
> struct domain *d, *n;
> - int max_fd = -1;
> - struct timeval timeout;
> + int poll_timeout; /* timeout in milliseconds */
> struct timespec ts;
> long long now, next_timeout = 0;
>
> - FD_ZERO(&readfds);
> - FD_ZERO(&writefds);
> + reset_fds();
>
> - FD_SET(xs_fileno(xs), &readfds);
> - max_fd = MAX(xs_fileno(xs), max_fd);
> + xs_pollfd = set_fds(xs_fileno(xs), POLLIN|POLLPRI);
>
> - if (log_hv) {
> - FD_SET(xc_evtchn_fd(xce_handle), &readfds);
> - max_fd = MAX(xc_evtchn_fd(xce_handle), max_fd);
> - }
> + if (log_hv)
> + xce_pollfd = set_fds(xc_evtchn_fd(xce_handle),
> + POLLIN|POLLPRI);
>
> if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
> return;
> @@ -982,10 +1024,12 @@ void handle_io(void)
> /* Re-calculate any event counter allowances & unblock
> domains with new allowance */
> for (d = dom_head; d; d = d->next) {
> - /* Add 5ms of fuzz since select() often returns
> - a couple of ms sooner than requested. Without
> - the fuzz we typically do an extra spin in select()
> - with a 1/2 ms timeout every other iteration */
> + /* CS 16257:955ee4fa1345 introduces a 5ms fuzz
> + * for select(), it is not clear poll() has
> + * similar behavior (returning a couple of ms
> + * sooner than requested) as well. Just leave
> + * the fuzz here. Remove it with a separate
> + * patch if necessary */
> if ((now+5) > d->next_period) {
> d->next_period = now + RATE_LIMIT_PERIOD;
> if (d->event_count >= RATE_LIMIT_ALLOWANCE) {
> @@ -1006,75 +1050,101 @@ void handle_io(void)
> !d->buffer.max_capacity ||
> d->buffer.size < d->buffer.max_capacity) {
> int evtchn_fd = xc_evtchn_fd(d->xce_handle);
> - FD_SET(evtchn_fd, &readfds);
> - max_fd = MAX(evtchn_fd, max_fd);
> + d->xce_pollfd = set_fds(evtchn_fd,
> + POLLIN|POLLPRI);
> }
> }
>
> if (d->master_fd != -1) {
> + short events = 0;
> if (!d->is_dead && ring_free_bytes(d))
> - FD_SET(d->master_fd, &readfds);
> + events |= POLLIN;
>
> if (!buffer_empty(&d->buffer))
> - FD_SET(d->master_fd, &writefds);
> - max_fd = MAX(d->master_fd, max_fd);
> + events |= POLLOUT;
> +
> + if (events)
> + d->master_pollfd =
> + set_fds(d->master_fd,
> + events|POLLPRI);
> }
> }
>
> /* If any domain has been rate limited, we need to work
> - out what timeout to supply to select */
> + out what timeout to supply to poll */
> if (next_timeout) {
> long long duration = (next_timeout - now);
> if (duration <= 0) /* sanity check */
> duration = 1;
> - timeout.tv_sec = duration / 1000;
> - timeout.tv_usec = ((duration - (timeout.tv_sec * 1000))
> - * 1000);
> + poll_timeout = (int)duration;
> }
>
> - ret = select(max_fd + 1, &readfds, &writefds, 0,
> - next_timeout ? &timeout : NULL);
> + ret = poll(fds, nr_fds, next_timeout ? poll_timeout : -1);
>
> if (log_reload) {
> handle_log_reload();
> log_reload = 0;
> }
>
> - /* Abort if select failed, except for EINTR cases
> + /* Abort if poll failed, except for EINTR cases
> which indicate a possible log reload */
> if (ret == -1) {
> if (errno == EINTR)
> continue;
> - dolog(LOG_ERR, "Failure in select: %d (%s)",
> + dolog(LOG_ERR, "Failure in poll: %d (%s)",
> errno, strerror(errno));
> break;
> }
>
> - if (log_hv && FD_ISSET(xc_evtchn_fd(xce_handle), &readfds))
> - handle_hv_logs();
> + if (log_hv && xce_pollfd) {
> + if (xce_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + dolog(LOG_ERR,
> + "Failure in poll xce_handle: %d (%s)",
> + errno, strerror(errno));
> + break;
> + } else if (xce_pollfd->revents & POLLIN)
> + handle_hv_logs();
> + }
>
> if (ret <= 0)
> continue;
>
> - if (FD_ISSET(xs_fileno(xs), &readfds))
> - handle_xs();
> + if (xs_pollfd) {
> + if (xs_pollfd->revents & ~(POLLIN|POLLOUT|POLLPRI)) {
> + dolog(LOG_ERR,
> + "Failure in poll xs_handle: %d (%s)",
> + errno, strerror(errno));
> + break;
> + } else if (xs_pollfd->revents & POLLIN)
> + handle_xs();
> + }
>
> for (d = dom_head; d; d = n) {
> n = d->next;
> if (d->event_count < RATE_LIMIT_ALLOWANCE) {
> if (d->xce_handle != NULL &&
> - FD_ISSET(xc_evtchn_fd(d->xce_handle),
> - &readfds))
> - handle_ring_read(d);
> + d->xce_pollfd &&
> + !(d->xce_pollfd->revents &
> + ~(POLLIN|POLLOUT|POLLPRI)) &&
> + (d->xce_pollfd->revents &
> + POLLIN))
> + handle_ring_read(d);
> }
>
> - if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> - &readfds))
> - handle_tty_read(d);
> -
> - if (d->master_fd != -1 && FD_ISSET(d->master_fd,
> - &writefds))
> - handle_tty_write(d);
> + if (d->master_fd != -1 && d->master_pollfd) {
> + if (d->master_pollfd->revents &
> + ~(POLLIN|POLLOUT|POLLPRI))
> + domain_handle_broken_tty(d,
> + domain_is_valid(d->domid));
> + else {
> + if (d->master_pollfd->revents &
> + POLLIN)
> + handle_tty_read(d);
> + if (d->master_pollfd->revents &
> + POLLOUT)
> + handle_tty_write(d);
> + }
> + }
>
> if (d->last_seen != enum_pass)
> shutdown_domain(d);
> @@ -1084,6 +1154,9 @@ void handle_io(void)
> }
> }
>
> + free(fds);
> + current_array_size = 0;
> +
> out:
> if (log_hv_fd != -1) {
> close(log_hv_fd);



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop [ In reply to ]
On 08/01/13 12:52, Wei Liu wrote:
> Just to be clear, this is version 5 of the patch.
>
> Git send-email mysteriously dropped my subject prefix.
>
>
> Wei.
>
> On Tue, 2013-01-08 at 11:50 +0000, Wei Liu wrote:
>> In Linux select() typically supports up to 1024 file descriptors. This can be
>> a problem when user tries to boot up many guests. Switching to poll() has
>> minimum impact on existing code and has better scalibility.
>>
>> pollfd array is dynamically allocated / reallocated. If the array fails to
>> expand, we just ignore the incoming fd.
>>
>> Change from V2:
>> * remove unnecessary malloc in initialize_pollfd_arrays
>> * use ROUND_UP to get new size of arrays
>>
>> Change from V3:
>> * remove initialize and destroy function for array
>> * embedded tracking structure in struct domain, eliminate fd_to_pollfd
>>
>> Change from V4:
>> * make xs_pollfd local to io.c
>> * add back the 5 ms fuzz
>> * handle POLLERR and POLLHUP
>> * treat POLLPRI as error if set in revents
>> * abort if xenconsoled's own fds get screwed
>> * handle broken tty if tty's fds get screwed
>>
>> Signed-off-by: Wei Liu <wei.liu2@citrix.com>
>> ---
>> tools/console/daemon/io.c | 189 +++++++++++++++++++++++++++++++--------------
>> 1 file changed, 131 insertions(+), 58 deletions(-)
>>
>> diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c
>> index 48fe151..8d16cac 100644
>> --- a/tools/console/daemon/io.c
>> +++ b/tools/console/daemon/io.c
>> @@ -28,7 +28,7 @@
>> #include <stdlib.h>
>> #include <errno.h>
>> #include <string.h>
>> -#include <sys/select.h>
>> +#include <poll.h>
>> #include <fcntl.h>
>> #include <unistd.h>
>> #include <termios.h>
>> @@ -69,6 +69,7 @@ static int log_hv_fd = -1;
>> static evtchn_port_or_error_t log_hv_evtchn = -1;
>> static xc_interface *xch; /* why does xenconsoled have two xc handles ? */
>> static xc_evtchn *xce_handle = NULL;
>> +static struct pollfd *xce_pollfd = NULL;
>>
>> struct buffer {
>> char *data;
>> @@ -81,7 +82,9 @@ struct buffer {
>> struct domain {
>> int domid;
>> int master_fd;
>> + struct pollfd *master_pollfd;
>> int slave_fd;
>> + struct pollfd *slave_pollfd;
>> int log_fd;
>> bool is_dead;
>> unsigned last_seen;
>> @@ -92,6 +95,7 @@ struct domain {
>> evtchn_port_or_error_t local_port;
>> evtchn_port_or_error_t remote_port;
>> xc_evtchn *xce_handle;
>> + struct pollfd *xce_pollfd;
>> struct xencons_interface *interface;
>> int event_count;
>> long long next_period;
>> @@ -769,6 +773,17 @@ static int ring_free_bytes(struct domain *dom)
>> return (sizeof(intf->in) - space);
>> }
>>
>> +static void domain_handle_broken_tty(struct domain *dom, int recreate)
>> +{
>> + domain_close_tty(dom);
>> +
>> + if (recreate) {
>> + domain_create_tty(dom);
>> + } else {
>> + shutdown_domain(dom);
>> + }
>> +}
>> +
>> static void handle_tty_read(struct domain *dom)
>> {
>> ssize_t len = 0;
>> @@ -794,13 +809,7 @@ static void handle_tty_read(struct domain *dom)
>> * keep the slave open for the duration.
>> */
>> if (len < 0) {
>> - domain_close_tty(dom);
>> -
>> - if (domain_is_valid(dom->domid)) {
>> - domain_create_tty(dom);
>> - } else {
>> - shutdown_domain(dom);
>> - }
>> + domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>> } else if (domain_is_valid(dom->domid)) {
>> prod = intf->in_prod;
>> for (i = 0; i < len; i++) {
>> @@ -828,14 +837,7 @@ static void handle_tty_write(struct domain *dom)
>> if (len < 1) {
>> dolog(LOG_DEBUG, "Write failed on domain %d: %zd, %d\n",
>> dom->domid, len, errno);
>> -
>> - domain_close_tty(dom);
>> -
>> - if (domain_is_valid(dom->domid)) {
>> - domain_create_tty(dom);
>> - } else {
>> - shutdown_domain(dom);
>> - }
>> + domain_handle_broken_tty(dom, domain_is_valid(dom->domid));
>> } else {
>> buffer_advance(&dom->buffer, len);
>> }
>> @@ -928,9 +930,53 @@ static void handle_log_reload(void)
>> }
>> }
>>
>> +struct pollfd *xs_pollfd;
Why is this not "static"?
>> +static struct pollfd *fds;
>> +static unsigned int current_array_size;
>> +static unsigned int nr_fds;
There seems to be no particular reason why these variables are here, and
the other ones up the top of the file. For example xce_handle is not
used "above" here, but it's declared at the top of the file. I think
keeping all variables together makes more sense than scattering them around.

Otherwise, looks good to me.

--
Mats

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH] Switch from select() to poll() in xenconsoled's IO loop [ In reply to ]
On Tue, 2013-01-08 at 14:01 +0000, Mats Petersson wrote:
> >> +struct pollfd *xs_pollfd;
> Why is this not "static"?
> >> +static struct pollfd *fds;
> >> +static unsigned int current_array_size;
> >> +static unsigned int nr_fds;
> There seems to be no particular reason why these variables are here, and
> the other ones up the top of the file. For example xce_handle is not
> used "above" here, but it's declared at the top of the file. I think
> keeping all variables together makes more sense than scattering them around.

I'd prefer if those which can be were local to the handle_io function
and not global at all. I think this includes both xs_pollfd and
xce_pollfd.

I think it also includes the existing xce_handle and log_hv_evtchn.

Ian.



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel