Mailing List Archive

[xen-unstable] merge with xen-unstable.hg
# HG changeset patch
# User Alex Williamson <alex.williamson@hp.com>
# Date 1182365367 21600
# Node ID 810885428743660169e7382ec9596373ca6ce48f
# Parent c20bc60f9243d08199cb0a9a837cbe11c6b3dcdc
# Parent 005dd6b1cf8e0008aba7984b828274a40e8d7d95
merge with xen-unstable.hg
---
docs/src/user.tex | 1
tools/blktap/drivers/Makefile | 1
tools/blktap/drivers/block-aio.c | 49 ++++-----
tools/blktap/drivers/block-qcow.c | 48 ++++-----
tools/blktap/drivers/tapaio.c | 164 ++++++++++++++++++++++++++++++++
tools/blktap/drivers/tapaio.h | 58 +++++++++++
tools/examples/init.d/xendomains | 33 ++++--
tools/ioemu/block-raw.c | 2
tools/ioemu/target-i386-dm/exec-dm.c | 42 ++++++--
tools/ioemu/vl.c | 12 ++
tools/libxc/xc_core.c | 2
tools/python/xen/xend/XendDomainInfo.py | 2
tools/python/xen/xend/server/blkif.py | 5
xen/arch/ia64/xen/domain.c | 9 -
xen/arch/ia64/xen/xenmem.c | 2
xen/arch/x86/apic.c | 4
xen/arch/x86/boot/cmdline.S | 40 +++++--
xen/arch/x86/boot/trampoline.S | 11 --
xen/arch/x86/boot/video.S | 59 ++++++-----
xen/arch/x86/boot/video.h | 9 -
xen/arch/x86/boot/x86_32.S | 4
xen/arch/x86/boot/x86_64.S | 2
xen/arch/x86/domain.c | 37 +++++--
xen/arch/x86/domain_build.c | 6 -
xen/arch/x86/flushtlb.c | 4
xen/arch/x86/hvm/hvm.c | 21 ++--
xen/arch/x86/hvm/irq.c | 81 ++++++++-------
xen/arch/x86/hvm/svm/asid.c | 72 +++++++-------
xen/arch/x86/hvm/svm/intr.c | 146 ++++++++++++++++------------
xen/arch/x86/hvm/svm/svm.c | 60 +++++------
xen/arch/x86/hvm/svm/vmcb.c | 6 -
xen/arch/x86/hvm/vioapic.c | 34 +++---
xen/arch/x86/hvm/vlapic.c | 9 -
xen/arch/x86/hvm/vmx/intr.c | 106 ++++++++++----------
xen/arch/x86/hvm/vmx/vmcs.c | 2
xen/arch/x86/hvm/vmx/vmx.c | 59 ++++++++---
xen/arch/x86/hvm/vpic.c | 3
xen/arch/x86/hvm/vpt.c | 40 ++++---
xen/arch/x86/mm.c | 10 -
xen/arch/x86/setup.c | 10 +
xen/arch/x86/traps.c | 14 ++
xen/arch/x86/x86_32/traps.c | 1
xen/arch/x86/x86_64/compat_kexec.S | 65 +++++++++++-
xen/arch/x86/x86_64/traps.c | 1
xen/common/compat/memory.c | 7 +
xen/common/domctl.c | 4
xen/common/grant_table.c | 12 +-
xen/common/kernel.c | 10 -
xen/common/kexec.c | 4
xen/common/perfc.c | 2
xen/drivers/char/console.c | 2
xen/drivers/video/vga.c | 3
xen/include/asm-ia64/guest_access.h | 25 ++--
xen/include/asm-x86/event.h | 1
xen/include/asm-x86/guest_access.h | 68 +++++++------
xen/include/asm-x86/hvm/hvm.h | 33 +++++-
xen/include/asm-x86/hvm/irq.h | 12 +-
xen/include/asm-x86/hvm/support.h | 1
xen/include/asm-x86/hvm/svm/asid.h | 1
xen/include/asm-x86/hvm/vcpu.h | 4
xen/include/asm-x86/hvm/vlapic.h | 2
xen/include/asm-x86/hvm/vmx/vmx.h | 13 +-
xen/include/asm-x86/hvm/vpic.h | 2
xen/include/asm-x86/hvm/vpt.h | 3
xen/include/xen/compat.h | 62 +++++++-----
xen/include/xen/xencomm.h | 43 ++++----
66 files changed, 1080 insertions(+), 580 deletions(-)

diff -r c20bc60f9243 -r 810885428743 docs/src/user.tex
--- a/docs/src/user.tex Wed Jun 20 12:47:52 2007 -0600
+++ b/docs/src/user.tex Wed Jun 20 12:49:27 2007 -0600
@@ -3178,6 +3178,7 @@ editing \path{grub.conf}.
\begin{description}
\item[ ask ] Display a vga menu allowing manual selection of video
mode.
+ \item[ current ] Use existing vga mode without modification.
\item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is
one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60.
\item[ gfx-$<$mode$>$ ] Select VESA graphics mode
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/Makefile Wed Jun 20 12:49:27 2007 -0600
@@ -35,6 +35,7 @@ BLK-OBJS += block-ram.o
BLK-OBJS += block-ram.o
BLK-OBJS += block-qcow.o
BLK-OBJS += aes.o
+BLK-OBJS += tapaio.o

all: $(IBIN) qcow-util

diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-aio.c
--- a/tools/blktap/drivers/block-aio.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-aio.c Wed Jun 20 12:49:27 2007 -0600
@@ -43,14 +43,7 @@
#include <sys/ioctl.h>
#include <linux/fs.h>
#include "tapdisk.h"
-
-
-/**
- * We used a kernel patch to return an fd associated with the AIO context
- * so that we can concurrently poll on synchronous and async descriptors.
- * This is signalled by passing 1 as the io context to io_setup.
- */
-#define REQUEST_ASYNC_FD 1
+#include "tapaio.h"

#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)

@@ -65,14 +58,13 @@ struct tdaio_state {
int fd;

/* libaio state */
- io_context_t aio_ctx;
+ tap_aio_context_t aio_ctx;
struct iocb iocb_list [MAX_AIO_REQS];
struct iocb *iocb_free [MAX_AIO_REQS];
struct pending_aio pending_aio[MAX_AIO_REQS];
int iocb_free_count;
struct iocb *iocb_queue[MAX_AIO_REQS];
int iocb_queued;
- int poll_fd; /* NB: we require aio_poll support */
struct io_event aio_events[MAX_AIO_REQS];
};

@@ -148,7 +140,7 @@ static inline void init_fds(struct disk_
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;

- dd->io_fd[0] = prv->poll_fd;
+ dd->io_fd[0] = prv->aio_ctx.pollfd;
}

/* Open the disk file and initialize aio state. */
@@ -162,12 +154,9 @@ int tdaio_open (struct disk_driver *dd,
/* Initialize AIO */
prv->iocb_free_count = MAX_AIO_REQS;
prv->iocb_queued = 0;
-
- prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
- prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
-
- if (prv->poll_fd < 0) {
- ret = prv->poll_fd;
+
+ ret = tap_aio_setup(&prv->aio_ctx, prv->aio_events, MAX_AIO_REQS);
+ if (ret < 0) {
if (ret == -EAGAIN) {
DPRINTF("Couldn't setup AIO context. If you are "
"trying to concurrently use a large number "
@@ -176,9 +165,7 @@ int tdaio_open (struct disk_driver *dd,
"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
"aio-max-nr')\n");
} else {
- DPRINTF("Couldn't get fd for AIO poll support. This "
- "is probably because your kernel does not "
- "have the aio-poll patch applied.\n");
+ DPRINTF("Couldn't setup AIO context.\n");
}
goto done;
}
@@ -286,7 +273,7 @@ int tdaio_submit(struct disk_driver *dd)
if (!prv->iocb_queued)
return 0;

- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);

/* XXX: TODO: Handle error conditions here. */

@@ -300,7 +287,7 @@ int tdaio_close(struct disk_driver *dd)
{
struct tdaio_state *prv = (struct tdaio_state *)dd->private;

- io_destroy(prv->aio_ctx);
+ io_destroy(prv->aio_ctx.aio_ctx);
close(prv->fd);

return 0;
@@ -308,15 +295,13 @@ int tdaio_close(struct disk_driver *dd)

int tdaio_do_callbacks(struct disk_driver *dd, int sid)
{
- int ret, i, rsp = 0;
+ int i, nr_events, rsp = 0;
struct io_event *ep;
struct tdaio_state *prv = (struct tdaio_state *)dd->private;

- /* Non-blocking test for completed io. */
- ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
- NULL);
-
- for (ep=prv->aio_events,i=ret; i-->0; ep++) {
+ nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+ for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;

@@ -327,6 +312,14 @@ int tdaio_do_callbacks(struct disk_drive

prv->iocb_free[prv->iocb_free_count++] = io;
}
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&prv->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&prv->aio_ctx);
+
return rsp;
}

diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/block-qcow.c
--- a/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/blktap/drivers/block-qcow.c Wed Jun 20 12:49:27 2007 -0600
@@ -38,6 +38,7 @@
#include "bswap.h"
#include "aes.h"
#include "tapdisk.h"
+#include "tapaio.h"

#if 1
#define ASSERT(_p) \
@@ -52,9 +53,6 @@
(uint64_t)( \
(l + (s - 1)) - ((l + (s - 1)) % s)); \
})
-
-/******AIO DEFINES******/
-#define REQUEST_ASYNC_FD 1

struct pending_aio {
td_callback_t cb;
@@ -145,7 +143,7 @@ struct tdqcow_state {
AES_KEY aes_encrypt_key; /*AES key*/
AES_KEY aes_decrypt_key; /*AES key*/
/* libaio state */
- io_context_t aio_ctx;
+ tap_aio_context_t aio_ctx;
int max_aio_reqs;
struct iocb *iocb_list;
struct iocb **iocb_free;
@@ -153,7 +151,6 @@ struct tdqcow_state {
int iocb_free_count;
struct iocb **iocb_queue;
int iocb_queued;
- int poll_fd; /* NB: we require aio_poll support */
struct io_event *aio_events;
};

@@ -179,7 +176,7 @@ static void free_aio_state(struct disk_d

static int init_aio_state(struct disk_driver *dd)
{
- int i;
+ int i, ret;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
long ioidx;
@@ -216,12 +213,9 @@ static int init_aio_state(struct disk_dr
goto fail;
}

- /*Signal kernel to create Poll FD for Asyc completion events*/
- s->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
- s->poll_fd = io_setup(s->max_aio_reqs, &s->aio_ctx);
-
- if (s->poll_fd < 0) {
- if (s->poll_fd == -EAGAIN) {
+ ret = tap_aio_setup(&s->aio_ctx, s->aio_events, s->max_aio_reqs);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
DPRINTF("Couldn't setup AIO context. If you are "
"trying to concurrently use a large number "
"of blktap-based disks, you may need to "
@@ -229,9 +223,7 @@ static int init_aio_state(struct disk_dr
"(e.g. 'echo echo 1048576 > /proc/sys/fs/"
"aio-max-nr')\n");
} else {
- DPRINTF("Couldn't get fd for AIO poll support. This "
- "is probably because your kernel does not "
- "have the aio-poll patch applied.\n");
+ DPRINTF("Couldn't setup AIO context.\n");
}
goto fail;
}
@@ -845,7 +837,7 @@ static inline void init_fds(struct disk_
for(i = 0; i < MAX_IOFD; i++)
dd->io_fd[i] = 0;

- dd->io_fd[0] = s->poll_fd;
+ dd->io_fd[0] = s->aio_ctx.pollfd;
}

/* Open the disk file and initialize qcow state. */
@@ -1144,7 +1136,7 @@ int tdqcow_submit(struct disk_driver *dd
if (!prv->iocb_queued)
return 0;

- ret = io_submit(prv->aio_ctx, prv->iocb_queued, prv->iocb_queue);
+ ret = io_submit(prv->aio_ctx.aio_ctx, prv->iocb_queued, prv->iocb_queue);

/* XXX: TODO: Handle error conditions here. */

@@ -1172,7 +1164,7 @@ int tdqcow_close(struct disk_driver *dd)
close(fd);
}

- io_destroy(s->aio_ctx);
+ io_destroy(s->aio_ctx.aio_ctx);
free(s->name);
free(s->l1_table);
free(s->l2_cache);
@@ -1184,17 +1176,15 @@ int tdqcow_close(struct disk_driver *dd)

int tdqcow_do_callbacks(struct disk_driver *dd, int sid)
{
- int ret, i, rsp = 0,*ptr;
+ int ret, i, nr_events, rsp = 0,*ptr;
struct io_event *ep;
struct tdqcow_state *prv = (struct tdqcow_state *)dd->private;

if (sid > MAX_IOFD) return 1;
-
- /* Non-blocking test for completed io. */
- ret = io_getevents(prv->aio_ctx, 0, prv->max_aio_reqs, prv->aio_events,
- NULL);
-
- for (ep = prv->aio_events, i = ret; i-- > 0; ep++) {
+
+ nr_events = tap_aio_get_events(&prv->aio_ctx);
+repeat:
+ for (ep = prv->aio_events, i = nr_events; i-- > 0; ep++) {
struct iocb *io = ep->obj;
struct pending_aio *pio;

@@ -1215,6 +1205,14 @@ int tdqcow_do_callbacks(struct disk_driv

prv->iocb_free[prv->iocb_free_count++] = io;
}
+
+ if (nr_events) {
+ nr_events = tap_aio_more_events(&prv->aio_ctx);
+ goto repeat;
+ }
+
+ tap_aio_continue(&prv->aio_ctx);
+
return rsp;
}

diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.c Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "tapaio.h"
+#include "tapdisk.h"
+#include <unistd.h>
+
+/**
+ * We used a kernel patch to return an fd associated with the AIO context
+ * so that we can concurrently poll on synchronous and async descriptors.
+ * This is signalled by passing 1 as the io context to io_setup.
+ */
+#define REQUEST_ASYNC_FD 1
+
+/*
+ * If we don't have any way to do epoll on aio events in a normal kernel,
+ * wait for aio events in a separate thread and return completion status
+ * that via a pipe that can be waited on normally.
+ *
+ * To keep locking problems between the completion thread and the submit
+ * thread to a minimum, there's a handshake which allows only one thread
+ * to be doing work on the completion queue at a time:
+ *
+ * 1) main thread sends completion thread a command via the command pipe;
+ * 2) completion thread waits for aio events and returns the number
+ * received on the completion pipe
+ * 3) main thread processes the received ctx->aio_events events
+ * 4) loop back to 1) to let the completion thread refill the aio_events
+ * buffer.
+ *
+ * This workaround needs to disappear once the kernel provides a single
+ * mechanism for waiting on both aio and normal fd wakeups.
+ */
+static void *
+tap_aio_completion_thread(void *arg)
+{
+ tap_aio_context_t *ctx = (tap_aio_context_t *) arg;
+ int command;
+ int nr_events;
+ int rc;
+
+ while (1) {
+ rc = read(ctx->command_fd[0], &command, sizeof(command));
+
+ do {
+ rc = io_getevents(ctx->aio_ctx, 1,
+ ctx->max_aio_events, ctx->aio_events,
+ NULL);
+ if (rc) {
+ nr_events = rc;
+ rc = write(ctx->completion_fd[1], &nr_events,
+ sizeof(nr_events));
+ }
+ } while (!rc);
+ }
+}
+
+void
+tap_aio_continue(tap_aio_context_t *ctx)
+{
+ int cmd = 0;
+
+ if (!ctx->poll_in_thread)
+ return;
+
+ if (write(ctx->command_fd[1], &cmd, sizeof(cmd)) < 0)
+ DPRINTF("Cannot write to command pipe\n");
+}
+
+int
+tap_aio_setup(tap_aio_context_t *ctx,
+ struct io_event *aio_events,
+ int max_aio_events)
+{
+ int ret;
+
+ ctx->aio_events = aio_events;
+ ctx->max_aio_events = max_aio_events;
+ ctx->poll_in_thread = 0;
+
+ ctx->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
+ ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+ if (ret < 0 && ret != -EINVAL)
+ return ret;
+ else if (ret > 0) {
+ ctx->pollfd = ret;
+ return ctx->pollfd;
+ }
+
+ ctx->aio_ctx = (io_context_t) 0;
+ ret = io_setup(ctx->max_aio_events, &ctx->aio_ctx);
+ if (ret < 0)
+ return ret;
+
+ if ((ret = pipe(ctx->command_fd)) < 0) {
+ DPRINTF("Unable to create command pipe\n");
+ return -1;
+ }
+ if ((ret = pipe(ctx->completion_fd)) < 0) {
+ DPRINTF("Unable to create completion pipe\n");
+ return -1;
+ }
+
+ if ((ret = pthread_create(&ctx->aio_thread, NULL,
+ tap_aio_completion_thread, ctx)) != 0) {
+ DPRINTF("Unable to create completion thread\n");
+ return -1;
+ }
+
+ ctx->pollfd = ctx->completion_fd[0];
+ ctx->poll_in_thread = 1;
+
+ tap_aio_continue(ctx);
+
+ return 0;
+}
+
+int
+tap_aio_get_events(tap_aio_context_t *ctx)
+{
+ int nr_events = 0;
+
+ if (!ctx->poll_in_thread)
+ nr_events = io_getevents(ctx->aio_ctx, 1,
+ ctx->max_aio_events, ctx->aio_events, NULL);
+ else
+ read(ctx->completion_fd[0], &nr_events, sizeof(nr_events));
+
+ return nr_events;
+}
+
+int tap_aio_more_events(tap_aio_context_t *ctx)
+{
+ return io_getevents(ctx->aio_ctx, 0,
+ ctx->max_aio_events, ctx->aio_events, NULL);
+}
+
+
diff -r c20bc60f9243 -r 810885428743 tools/blktap/drivers/tapaio.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/blktap/drivers/tapaio.h Wed Jun 20 12:49:27 2007 -0600
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2006 Andrew Warfield and Julian Chesterfield
+ * Copyright (c) 2007 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __TAPAIO_H__
+#define __TAPAIO_H__
+
+#include <pthread.h>
+#include <libaio.h>
+
+struct tap_aio_context {
+ io_context_t aio_ctx;
+
+ struct io_event *aio_events;
+ int max_aio_events;
+
+ pthread_t aio_thread;
+ int command_fd[2];
+ int completion_fd[2];
+ int pollfd;
+ unsigned int poll_in_thread : 1;
+};
+
+typedef struct tap_aio_context tap_aio_context_t;
+
+int tap_aio_setup (tap_aio_context_t *ctx,
+ struct io_event *aio_events,
+ int max_aio_events);
+void tap_aio_continue (tap_aio_context_t *ctx);
+int tap_aio_get_events (tap_aio_context_t *ctx);
+int tap_aio_more_events(tap_aio_context_t *ctx);
+
+#endif /* __TAPAIO_H__ */
diff -r c20bc60f9243 -r 810885428743 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/examples/init.d/xendomains Wed Jun 20 12:49:27 2007 -0600
@@ -182,25 +182,31 @@ rdnames()

parseln()
{
- name=`echo "$1" | cut -c0-17`
- name=${name%% *}
- rest=`echo "$1" | cut -c18- `
- read id mem cpu vcpu state tm < <(echo "$rest")
+ if [[ "$1" =~ "\(domain" ]]; then
+ name=;id=
+ else if [[ "$1" =~ "\(name" ]]; then
+ name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/')
+ else if [[ "$1" =~ "\(domid" ]]; then
+ id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/')
+ fi; fi; fi
+
+ [ -n "$name" -a -n "$id" ] && return 0 || return 1
}

is_running()
{
rdname $1
RC=1
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
case $name in
($NM)
RC=0
;;
esac
- done < <(xm list | grep -v '^Name')
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
return $RC
}

@@ -267,13 +273,14 @@ start()

all_zombies()
{
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
if test "$state" != "-b---d" -a "$state" != "-----d"; then
return 1;
fi
- done < <(xm list | grep -v '^Name')
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
return 0
}

@@ -309,8 +316,9 @@ stop()
rdnames
fi
echo -n "Shutting down Xen domains:"
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
echo -n " $name"
if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
@@ -384,7 +392,7 @@ stop()
fi
kill $WDOG_PID >/dev/null 2>&1
fi
- done < <(xm list | grep -v '^Name')
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')

# NB. this shuts down ALL Xen domains (politely), not just the ones in
# AUTODIR/*
@@ -409,15 +417,16 @@ stop()

check_domain_up()
{
+ name=;id=
while read LN; do
- parseln "$LN"
+ parseln "$LN" || continue
if test $id = 0; then continue; fi
case $name in
($1)
return 0
;;
esac
- done < <(xm list | grep -v "^Name")
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
return 1
}

diff -r c20bc60f9243 -r 810885428743 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/block-raw.c Wed Jun 20 12:49:27 2007 -0600
@@ -166,7 +166,7 @@ typedef struct RawAIOCB {
struct RawAIOCB *next;
} RawAIOCB;

-static int aio_sig_num = SIGUSR2;
+const int aio_sig_num = SIGUSR2;
static RawAIOCB *first_aio; /* AIO issued */
static int aio_initialized = 0;

diff -r c20bc60f9243 -r 810885428743 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c Wed Jun 20 12:49:27 2007 -0600
@@ -443,19 +443,40 @@ extern unsigned long logdirty_bitmap_siz
* Forcing a word-sized read/write prevents the guest from seeing a partially
* written word-sized atom.
*/
-void memcpy_words(void *dst, void *src, size_t n)
-{
- while (n >= sizeof(long)) {
- *((long *)dst) = *((long *)src);
- dst = ((long *)dst) + 1;
- src = ((long *)src) + 1;
- n -= sizeof(long);
- }
-
- if (n & 4) {
+#if defined(__x86_64__) || defined(__i386__)
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+ asm (
+ " movl %%edx,%%ecx \n"
+#ifdef __x86_64
+ " shrl $3,%%ecx \n"
+ " andl $7,%%edx \n"
+ " rep movsq \n"
+ " test $4,%%edx \n"
+ " jz 1f \n"
+ " movsl \n"
+#else /* __i386__ */
+ " shrl $2,%%ecx \n"
+ " andl $3,%%edx \n"
+ " rep movsl \n"
+#endif
+ "1: test $2,%%edx \n"
+ " jz 1f \n"
+ " movsw \n"
+ "1: test $1,%%edx \n"
+ " jz 1f \n"
+ " movsb \n"
+ "1: \n"
+ : : "S" (src), "D" (dst), "d" (n) : "ecx" );
+}
+#else
+static void memcpy_words(void *dst, void *src, size_t n)
+{
+ while (n >= sizeof(uint32_t)) {
*((uint32_t *)dst) = *((uint32_t *)src);
dst = ((uint32_t *)dst) + 1;
src = ((uint32_t *)src) + 1;
+ n -= sizeof(uint32_t);
}

if (n & 2) {
@@ -470,6 +491,7 @@ void memcpy_words(void *dst, void *src,
src = ((uint8_t *)src) + 1;
}
}
+#endif

void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
int len, int is_write)
diff -r c20bc60f9243 -r 810885428743 tools/ioemu/vl.c
--- a/tools/ioemu/vl.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/ioemu/vl.c Wed Jun 20 12:49:27 2007 -0600
@@ -7059,6 +7059,18 @@ int main(int argc, char **argv)
#endif

char qemu_dm_logfilename[128];
+
+ /* Ensure that SIGUSR2 is blocked by default when a new thread is created,
+ then only the threads that use the signal unblock it -- this fixes a
+ race condition in Qcow support where the AIO signal is misdelivered. */
+ {
+ extern const int aio_sig_num;
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, aio_sig_num);
+ sigprocmask(SIG_BLOCK, &set, NULL);
+ }

LIST_INIT (&vm_change_state_head);
#ifndef _WIN32
diff -r c20bc60f9243 -r 810885428743 tools/libxc/xc_core.c
--- a/tools/libxc/xc_core.c Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/libxc/xc_core.c Wed Jun 20 12:49:27 2007 -0600
@@ -156,7 +156,7 @@ struct xc_core_section_headers {
Elf64_Shdr *shdrs;
};
#define SHDR_INIT 16
-#define SHDR_INC 4
+#define SHDR_INC 4U

static struct xc_core_section_headers*
xc_core_shdr_init(void)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Jun 20 12:49:27 2007 -0600
@@ -983,7 +983,7 @@ class XendDomainInfo:
self.info['VCPUs_live'] = vcpus
self._writeDom(self._vcpuDomDetails())
else:
- self.info['VCPUs_live'] = vcpus
+ self.info['VCPUs_max'] = vcpus
xen.xend.XendDomain.instance().managed_config_save(self)
log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
vcpus)
diff -r c20bc60f9243 -r 810885428743 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Wed Jun 20 12:47:52 2007 -0600
+++ b/tools/python/xen/xend/server/blkif.py Wed Jun 20 12:49:27 2007 -0600
@@ -98,6 +98,11 @@ class BlkifController(DevController):

if (dev_type == 'cdrom' and new_front['device-type'] == 'cdrom' and
dev == new_back['dev'] and mode == 'r'):
+ # dummy device
+ self.writeBackend(devid,
+ 'type', new_back['type'],
+ 'params', '')
+ # new backend-device
self.writeBackend(devid,
'type', new_back['type'],
'params', new_back['params'])
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/domain.c
--- a/xen/arch/ia64/xen/domain.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/domain.c Wed Jun 20 12:49:27 2007 -0600
@@ -1146,9 +1146,8 @@ static void __init loaddomainelfimage(st
dom_imva = __va_ul(page_to_maddr(p));
if (filesz > 0) {
if (filesz >= PAGE_SIZE)
- memcpy((void *) dom_imva,
- (void *) elfaddr,
- PAGE_SIZE);
+ copy_page((void *) dom_imva,
+ (void *) elfaddr);
else {
// copy partial page
memcpy((void *) dom_imva,
@@ -1166,7 +1165,7 @@ static void __init loaddomainelfimage(st
}
else if (memsz > 0) {
/* always zero out entire page */
- memset((void *) dom_imva, 0, PAGE_SIZE);
+ clear_page((void *) dom_imva);
}
memsz -= PAGE_SIZE;
filesz -= PAGE_SIZE;
@@ -1367,7 +1366,7 @@ int __init construct_dom0(struct domain
if (start_info_page == NULL)
panic("can't allocate start info page");
si = page_to_virt(start_info_page);
- memset(si, 0, PAGE_SIZE);
+ clear_page(si);
snprintf(si->magic, sizeof(si->magic), "xen-%i.%i-ia64",
xen_major_version(), xen_minor_version());
si->nr_pages = max_pages;
diff -r c20bc60f9243 -r 810885428743 xen/arch/ia64/xen/xenmem.c
--- a/xen/arch/ia64/xen/xenmem.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/ia64/xen/xenmem.c Wed Jun 20 12:49:27 2007 -0600
@@ -90,7 +90,7 @@ alloc_dir_page(void)
panic("Not enough memory for virtual frame table!\n");
++table_size;
dir = mfn << PAGE_SHIFT;
- memset(__va(dir), 0, PAGE_SIZE);
+ clear_page(__va(dir));
return dir;
}

diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/apic.c Wed Jun 20 12:49:27 2007 -0600
@@ -817,7 +817,7 @@ void __init init_apic_mappings(void)
*/
if (!smp_found_config && detect_init_APIC()) {
apic_phys = __pa(alloc_xenheap_page());
- memset(__va(apic_phys), 0, PAGE_SIZE);
+ clear_page(__va(apic_phys));
} else
apic_phys = mp_lapic_addr;

@@ -852,7 +852,7 @@ void __init init_apic_mappings(void)
} else {
fake_ioapic_page:
ioapic_phys = __pa(alloc_xenheap_page());
- memset(__va(ioapic_phys), 0, PAGE_SIZE);
+ clear_page(__va(ioapic_phys));
}
set_fixmap_nocache(idx, ioapic_phys);
apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/cmdline.S
--- a/xen/arch/x86/boot/cmdline.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/cmdline.S Wed Jun 20 12:49:27 2007 -0600
@@ -119,30 +119,31 @@ 3: pop %edi
ret

.Lfind_option:
- push %ebx
- push 4+8(%esp)
- push 4+8(%esp)
+ mov 4(%esp),%eax
+ dec %eax
+ push %ebx
+1: pushl 4+8(%esp)
+ inc %eax
+ push %eax
call .Lstrstr
add $8,%esp
test %eax,%eax
jz 3f
cmp %eax,4+4(%esp)
- je 1f
+ je 2f
cmpb $' ',-1(%eax)
- jne 2f
-1: mov %eax,%ebx
- push 4+8(%esp)
+ jne 1b
+2: mov %eax,%ebx
+ pushl 4+8(%esp)
call .Lstrlen
add $4,%esp
- xchg %eax,%ebx
- add %eax,%ebx
+ xadd %eax,%ebx
cmpb $'\0',(%ebx)
je 3f
cmpb $' ',(%ebx)
je 3f
cmpb $'=',(%ebx)
- je 3f
-2: xor %eax,%eax
+ jne 1b
3: pop %ebx
ret

@@ -297,7 +298,7 @@ 1: lodsw
call .Lstr_prefix
add $8,%esp
test %eax,%eax
- jnz .Lcmdline_exit
+ jnz .Lparse_vga_current

/* We have 'vga=mode-<mode>'. */
add $5,%ebx
@@ -305,6 +306,19 @@ 1: lodsw
call .Latoi
add $4,%esp
mov %ax,bootsym_phys(boot_vid_mode)
+ jmp .Lcmdline_exit
+
+.Lparse_vga_current:
+ /* Check for 'vga=current'. */
+ push %ebx
+ pushl $sym_phys(.Lvga_current)
+ call .Lstr_prefix
+ add $8,%esp
+ test %eax,%eax
+ jnz .Lcmdline_exit
+
+ /* We have 'vga=current'. */
+ movw $VIDEO_CURRENT_MODE,bootsym_phys(boot_vid_mode)

.Lcmdline_exit:
popa
@@ -328,6 +342,8 @@ 1: lodsw
.asciz "gfx-"
.Lvga_mode:
.asciz "mode-"
+.Lvga_current:
+ .asciz "current"
.Lno_rm_opt:
.asciz "no-real-mode"
.Ledid_opt:
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/trampoline.S
--- a/xen/arch/x86/boot/trampoline.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/trampoline.S Wed Jun 20 12:49:27 2007 -0600
@@ -13,12 +13,11 @@ trampoline_realmode_entry:
cli
lidt bootsym(idt_48)
lgdt bootsym(gdt_48)
+ mov $1,%bl # EBX != 0 indicates we are an AP
xor %ax, %ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- mov $1,%bl # EBX != 0 indicates we are an AP
- jmp 1f
-1: ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+ ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)

idt_48: .word 0, 0, 0 # base = limit = 0
gdt_48: .word 6*8-1
@@ -135,10 +134,9 @@ trampoline_boot_cpu_entry:
ljmp $BOOT_PSEUDORM_CS,$bootsym(1f)
.code16
1: mov %eax,%cr0 # CR0.PE = 0 (leave protected mode)
- jmp 1f

/* Load proper real-mode values into %cs, %ds, %es and %ss. */
-1: ljmp $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
+ ljmp $(BOOT_TRAMPOLINE>>4),$bootsym(1f)
1: mov $(BOOT_TRAMPOLINE>>4),%ax
mov %ax,%ds
mov %ax,%es
@@ -166,10 +164,9 @@ 1: mov $(BOOT_TRAMPOLINE>>4),%a
xor %ax,%ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- jmp 1f

/* Load proper protected-mode values into all segment registers. */
-1: ljmpl $BOOT_CS32,$bootsym_phys(1f)
+ ljmpl $BOOT_CS32,$bootsym_phys(1f)
.code32
1: mov $BOOT_DS,%eax
mov %eax,%ds
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.S
--- a/xen/arch/x86/boot/video.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.S Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,10 @@

#include "video.h"

-#define modelist (0x3000)
+/* Scratch space layout. */
+#define modelist (0x3000)
+#define vesa_glob_info (modelist + 1024)
+#define vesa_mode_info (vesa_glob_info + 1024)

/* Retrieve Extended Display Identification Data. */
#define CONFIG_FIRMWARE_EDID
@@ -109,7 +112,7 @@ mopar2: movb %al, _param(PARAM_VIDEO_

# Fetching of VESA frame buffer parameters
mopar_gr:
- leaw modelist+1024, %di
+ leaw vesa_mode_info, %di
movb $0x23, _param(PARAM_HAVE_VGA)
movw 16(%di), %ax
movw %ax, _param(PARAM_LFB_LINELENGTH)
@@ -128,9 +131,7 @@ mopar_gr:
movl %eax, _param(PARAM_LFB_COLORS+4)

# get video mem size
- leaw modelist+1024, %di
- movw $0x4f00, %ax
- int $0x10
+ leaw vesa_glob_info, %di
xorl %eax, %eax
movw 18(%di), %ax
movl %eax, _param(PARAM_LFB_SIZE)
@@ -183,7 +184,10 @@ dac_done:

movw %es, _param(PARAM_VESAPM_SEG)
movw %di, _param(PARAM_VESAPM_OFF)
-no_pm: ret
+
+no_pm: pushw %ds
+ popw %es
+ ret

# The video mode menu
mode_menu:
@@ -428,17 +432,13 @@ setmenu:
jmp mode_set

check_vesa:
-#ifdef CONFIG_FIRMWARE_EDID
- leaw modelist+1024, %di
+ leaw vesa_glob_info, %di
movw $0x4f00, %ax
int $0x10
cmpw $0x004f, %ax
jnz setbad

- movw 4(%di), %ax
- movw %ax, bootsym(vbe_version)
-#endif
- leaw modelist+1024, %di
+ leaw vesa_mode_info, %di
subb $VIDEO_FIRST_VESA>>8, %bh
movw %bx, %cx # Get mode information structure
movw $0x4f01, %ax
@@ -447,7 +447,7 @@ check_vesa:
cmpw $0x004f, %ax
jnz setbad

- movb (%di), %al # Check capabilities.
+ movb (%di), %al # Check mode attributes.
andb $0x99, %al
cmpb $0x99, %al
jnz _setbad # Doh! No linear frame buffer.
@@ -530,6 +530,7 @@ spec_inits:
.word bootsym(set_8pixel)
.word bootsym(set_80x43)
.word bootsym(set_80x28)
+ .word bootsym(set_current)
.word bootsym(set_80x30)
.word bootsym(set_80x34)
.word bootsym(set_80x60)
@@ -575,6 +576,7 @@ set14: movw $0x1111, %ax
movb $0x01, %ah # Define cursor scan lines 11-12
movw $0x0b0c, %cx
int $0x10
+set_current:
stc
ret

@@ -695,33 +697,34 @@ vga_modes_end:
# Detect VESA modes.
vesa_modes:
movw %di, %bp # BP=original mode table end
- addw $0x200, %di # Buffer space
+ leaw vesa_glob_info, %di
movw $0x4f00, %ax # VESA Get card info call
int $0x10
+ movw %di, %si
movw %bp, %di
cmpw $0x004f, %ax # Successful?
jnz ret0

- cmpw $0x4556, 0x200(%di) # 'VE'
+ cmpw $0x4556, (%si) # 'VE'
jnz ret0

- cmpw $0x4153, 0x202(%di) # 'SA'
+ cmpw $0x4153, 2(%si) # 'SA'
jnz ret0

movw $bootsym(vesa_name), bootsym(card_name) # Set name to "VESA VGA"
pushw %gs
- lgsw 0x20e(%di), %si # GS:SI=mode list
+ lgsw 0xe(%si), %si # GS:SI=mode list
movw $128, %cx # Iteration limit
vesa1:
gs; lodsw
- cmpw $0xffff, %ax # End of the table?
+ cmpw $0xffff, %ax # End of the table?
jz vesar

- cmpw $0x0080, %ax # Check validity of mode ID
+ cmpw $0x0080, %ax # Check validity of mode ID
jc vesa2

- orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
- jz vesan # Certain BIOSes report 0x80-0xff!
+ orb %ah, %ah # Valid IDs 0x0000-0x007f/0x0100-0x07ff
+ jz vesan # Certain BIOSes report 0x80-0xff!

cmpw $0x0800, %ax
jnc vesae
@@ -891,8 +894,13 @@ store_edid:
cmpb $1, bootsym(opt_edid) # EDID disabled on cmdline (edid=no)?
je .Lno_edid

- cmpw $0x0200, bootsym(vbe_version) # only do EDID on >= VBE2.0
- jl .Lno_edid
+ leaw vesa_glob_info, %di
+ movw $0x4f00, %ax
+ int $0x10
+ cmpw $0x004f, %ax
+ jne .Lno_edid
+ cmpw $0x0200, 4(%di) # only do EDID on >= VBE2.0
+ jb .Lno_edid

xorw %di, %di # Report Capability
pushw %di
@@ -901,6 +909,8 @@ store_edid:
xorw %bx, %bx
xorw %cx, %cx
int $0x10
+ pushw %ds
+ popw %es
cmpw $0x004f, %ax # Call failed?
jne .Lno_edid

@@ -920,8 +930,6 @@ store_edid:
movw $0x01, %bx
movw $0x00, %cx
movw $0x00, %dx
- pushw %ds
- popw %es
movw $bootsym(boot_edid_info), %di
int $0x10

@@ -940,7 +948,6 @@ card_name: .word 0 # Pointe
card_name: .word 0 # Pointer to adapter name
graphic_mode: .byte 0 # Graphic mode with a linear frame buffer
dac_size: .byte 6 # DAC bit depth
-vbe_version: .word 0 # VBE bios version

# Status messages
keymsg: .ascii "Press <RETURN> to see video modes available,"
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/video.h
--- a/xen/arch/x86/boot/video.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/video.h Wed Jun 20 12:49:27 2007 -0600
@@ -16,10 +16,11 @@
#define VIDEO_80x50 0x0f01
#define VIDEO_80x43 0x0f02
#define VIDEO_80x28 0x0f03
-#define VIDEO_80x30 0x0f04
-#define VIDEO_80x34 0x0f05
-#define VIDEO_80x60 0x0f06
-#define VIDEO_LAST_SPECIAL 0x0f07
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_LAST_SPECIAL 0x0f08

#define ASK_VGA 0xfffd
#define VIDEO_VESA_BY_SIZE 0xffff
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S Wed Jun 20 12:49:27 2007 -0600
@@ -30,9 +30,7 @@ 1: mov %eax,(%edi)
loop 1b

/* Pass off the Multiboot info structure to C land. */
- mov multiboot_ptr,%eax
- add $__PAGE_OFFSET,%eax
- push %eax
+ pushl multiboot_ptr
call __start_xen
ud2 /* Force a panic (invalid opcode). */

diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S Wed Jun 20 12:49:27 2007 -0600
@@ -51,8 +51,6 @@ 1: movq %rax,(%rdi)

/* Pass off the Multiboot info structure to C land. */
mov multiboot_ptr(%rip),%edi
- lea start-0x100000(%rip),%rax
- add %rax,%rdi
call __start_xen
ud2 /* Force a panic (invalid opcode). */

diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain.c Wed Jun 20 12:49:27 2007 -0600
@@ -232,26 +232,28 @@ static int setup_compat_l4(struct vcpu *
l4_pgentry_t *l4tab;
int rc;

- if ( !pg )
+ if ( pg == NULL )
return -ENOMEM;

/* This page needs to look like a pagetable so that it can be shadowed */
pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated;

l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+ l4tab[0] = l4e_empty();
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_page(pg, __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3),
__PAGE_HYPERVISOR);
+
+ if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
+ {
+ free_domheap_page(pg);
+ return rc;
+ }
+
v->arch.guest_table = pagetable_from_page(pg);
v->arch.guest_table_user = v->arch.guest_table;
-
- if ( (rc = setup_arg_xlat_area(v, l4tab)) < 0 )
- {
- free_domheap_page(pg);
- return rc;
- }

return 0;
}
@@ -318,11 +320,11 @@ int switch_compat(struct domain *d)
gdt_l1e = l1e_from_page(virt_to_page(compat_gdt_table), PAGE_HYPERVISOR);
for ( vcpuid = 0; vcpuid < MAX_VIRT_CPUS; vcpuid++ )
{
+ if ( (d->vcpu[vcpuid] != NULL) &&
+ (setup_compat_l4(d->vcpu[vcpuid]) != 0) )
+ goto undo_and_fail;
d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
- if (d->vcpu[vcpuid]
- && setup_compat_l4(d->vcpu[vcpuid]) != 0)
- return -ENOMEM;
}

d->arch.physaddr_bitsize =
@@ -330,6 +332,19 @@ int switch_compat(struct domain *d)
+ (PAGE_SIZE - 2);

return 0;
+
+ undo_and_fail:
+ d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
+ release_arg_xlat_area(d);
+ gdt_l1e = l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
+ while ( vcpuid-- != 0 )
+ {
+ if ( d->vcpu[vcpuid] != NULL )
+ release_compat_l4(d->vcpu[vcpuid]);
+ d->arch.mm_perdomain_pt[((vcpuid << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
+ }
+ return -ENOMEM;
}

#else
@@ -461,7 +476,7 @@ int arch_domain_create(struct domain *d)
if ( (d->shared_info = alloc_xenheap_page()) == NULL )
goto fail;

- memset(d->shared_info, 0, PAGE_SIZE);
+ clear_page(d->shared_info);
share_xen_page_with_guest(
virt_to_page(d->shared_info), d, XENSHARE_writable);
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/domain_build.c Wed Jun 20 12:49:27 2007 -0600
@@ -505,7 +505,7 @@ int __init construct_dom0(
v->arch.guest_table = pagetable_from_paddr((unsigned long)l3start);
#else
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
- memcpy(l2tab, idle_pg_table, PAGE_SIZE);
+ copy_page(l2tab, idle_pg_table);
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
l2e_from_paddr((unsigned long)l2start, __PAGE_HYPERVISOR);
v->arch.guest_table = pagetable_from_paddr((unsigned long)l2start);
@@ -645,7 +645,7 @@ int __init construct_dom0(
panic("Not enough RAM for domain 0 PML4.\n");
l4start = l4tab = page_to_virt(page);
}
- memcpy(l4tab, idle_pg_table, PAGE_SIZE);
+ copy_page(l4tab, idle_pg_table);
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
@@ -823,7 +823,7 @@ int __init construct_dom0(

/* Set up start info area. */
si = (start_info_t *)vstartinfo_start;
- memset(si, 0, PAGE_SIZE);
+ clear_page(si);
si->nr_pages = nr_pages;

si->shared_info = virt_to_maddr(d->shared_info);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/flushtlb.c
--- a/xen/arch/x86/flushtlb.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/flushtlb.c Wed Jun 20 12:49:27 2007 -0600
@@ -80,6 +80,8 @@ void write_cr3(unsigned long cr3)

t = pre_flush();

+ hvm_flush_guest_tlbs();
+
#ifdef USER_MAPPINGS_ARE_GLOBAL
__pge_off();
__asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
@@ -103,6 +105,8 @@ void local_flush_tlb(void)

t = pre_flush();

+ hvm_flush_guest_tlbs();
+
#ifdef USER_MAPPINGS_ARE_GLOBAL
__pge_off();
__pge_on();
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Wed Jun 20 12:49:27 2007 -0600
@@ -831,11 +831,24 @@ void hvm_update_guest_cr3(struct vcpu *v
hvm_funcs.update_guest_cr3(v);
}

+static void hvm_latch_shinfo_size(struct domain *d)
+{
+ /*
+ * Called from operations which are among the very first executed by
+ * PV drivers on initialisation or after save/restore. These are sensible
+ * points at which to sample the execution mode of the guest and latch
+ * 32- or 64-bit format for shared state.
+ */
+ if ( current->domain == d )
+ d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+}
+
/* Initialise a hypercall transfer page for a VMX domain using
paravirtualised drivers. */
void hvm_hypercall_page_initialise(struct domain *d,
void *hypercall_page)
{
+ hvm_latch_shinfo_size(d);
hvm_funcs.init_hypercall_page(d, hypercall_page);
}

@@ -1065,13 +1078,7 @@ long do_hvm_op(unsigned long op, XEN_GUE
break;
case HVM_PARAM_CALLBACK_IRQ:
hvm_set_callback_via(d, a.value);
- /*
- * Since this operation is one of the very first executed
- * by PV drivers on initialisation or after save/restore, it
- * is a sensible point at which to sample the execution mode of
- * the guest and latch 32- or 64-bit format for shared state.
- */
- d->arch.has_32bit_shinfo = (hvm_guest_x86_mode(current) != 8);
+ hvm_latch_shinfo_size(d);
break;
}
d->arch.hvm_domain.params[a.index] = a.value;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/irq.c
--- a/xen/arch/x86/hvm/irq.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/irq.c Wed Jun 20 12:49:27 2007 -0600
@@ -285,43 +285,49 @@ void hvm_set_callback_via(struct domain
}
}

-int cpu_has_pending_irq(struct vcpu *v)
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v)
{
struct hvm_domain *plat = &v->domain->arch.hvm_domain;

- /* APIC */
+ if ( unlikely(v->arch.hvm_vcpu.nmi_pending) )
+ return hvm_intack_nmi;
+
if ( vlapic_has_interrupt(v) != -1 )
- return 1;
-
- /* PIC */
+ return hvm_intack_lapic;
+
if ( !vlapic_accept_pic_intr(v) )
- return 0;
-
- return plat->vpic[0].int_output;
-}
-
-int cpu_get_interrupt(struct vcpu *v, int *type)
-{
- int vector;
-
- if ( (vector = cpu_get_apic_interrupt(v, type)) != -1 )
- return vector;
-
- if ( (v->vcpu_id == 0) &&
- ((vector = cpu_get_pic_interrupt(v, type)) != -1) )
- return vector;
-
- return -1;
-}
-
-int get_isa_irq_vector(struct vcpu *v, int isa_irq, int type)
+ return hvm_intack_none;
+
+ return plat->vpic[0].int_output ? hvm_intack_pic : hvm_intack_none;
+}
+
+int hvm_vcpu_ack_pending_irq(struct vcpu *v, enum hvm_intack type, int *vector)
+{
+ switch ( type )
+ {
+ case hvm_intack_nmi:
+ return test_and_clear_bool(v->arch.hvm_vcpu.nmi_pending);
+ case hvm_intack_lapic:
+ return ((*vector = cpu_get_apic_interrupt(v)) != -1);
+ case hvm_intack_pic:
+ ASSERT(v->vcpu_id == 0);
+ return ((*vector = cpu_get_pic_interrupt(v)) != -1);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int get_isa_irq_vector(struct vcpu *v, int isa_irq, enum hvm_intack src)
{
unsigned int gsi = hvm_isa_irq_to_gsi(isa_irq);

- if ( type == APIC_DM_EXTINT )
+ if ( src == hvm_intack_pic )
return (v->domain->arch.hvm_domain.vpic[isa_irq >> 3].irq_base
+ (isa_irq & 7));

+ ASSERT(src == hvm_intack_lapic);
return domain_vioapic(v->domain)->redirtbl[gsi].fields.vector;
}

@@ -337,19 +343,20 @@ int is_isa_irq_masked(struct vcpu *v, in
domain_vioapic(v->domain)->redirtbl[gsi].fields.mask);
}

-/*
- * TODO: 1. Should not need special treatment of event-channel events.
- * 2. Should take notice of interrupt shadows (or clear them).
- */
int hvm_local_events_need_delivery(struct vcpu *v)
{
- int pending;
-
- pending = (vcpu_info(v, evtchn_upcall_pending) || cpu_has_pending_irq(v));
- if ( unlikely(pending) )
- pending = hvm_interrupts_enabled(v);
-
- return pending;
+ enum hvm_intack type;
+
+ /* TODO: Get rid of event-channel special case. */
+ if ( vcpu_info(v, evtchn_upcall_pending) )
+ type = hvm_intack_pic;
+ else
+ type = hvm_vcpu_has_pending_irq(v);
+
+ if ( likely(type == hvm_intack_none) )
+ return 0;
+
+ return hvm_interrupts_enabled(v, type);
}

#if 0 /* Keep for debugging */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/asid.c
--- a/xen/arch/x86/hvm/svm/asid.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/asid.c Wed Jun 20 12:49:27 2007 -0600
@@ -60,7 +60,7 @@ struct svm_asid_data {
u64 core_asid_generation;
u32 next_asid;
u32 max_asid;
- u32 erratum170;
+ u32 erratum170:1;
};

static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data);
@@ -140,25 +140,21 @@ void svm_asid_init_vcpu(struct vcpu *v)
}

/*
- * Increase the Generation to make free ASIDs. Flush physical TLB and give
- * ASID.
- */
-static void svm_asid_handle_inc_generation(struct vcpu *v)
-{
- struct svm_asid_data *data = svm_asid_core_data();
-
- if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
- {
- /* Handle ASID overflow. */
+ * Increase the Generation to make free ASIDs, and indirectly cause a
+ * TLB flush of all ASIDs on the next vmrun.
+ */
+void svm_asid_inc_generation(void)
+{
+ struct svm_asid_data *data = svm_asid_core_data();
+
+ if ( likely(data->core_asid_generation < SVM_ASID_LAST_GENERATION) )
+ {
+ /* Move to the next generation. We can't flush the TLB now
+ * because you need to vmrun to do that, and current might not
+ * be a HVM vcpu, but the first HVM vcpu that runs after this
+ * will pick up ASID 1 and flush the TLBs. */
data->core_asid_generation++;
- data->next_asid = SVM_ASID_FIRST_GUEST_ASID + 1;
-
- /* Handle VCPU. */
- v->arch.hvm_svm.vmcb->guest_asid = SVM_ASID_FIRST_GUEST_ASID;
- v->arch.hvm_svm.asid_generation = data->core_asid_generation;
-
- /* Trigger flush of physical TLB. */
- v->arch.hvm_svm.vmcb->tlb_control = 1;
+ data->next_asid = SVM_ASID_FIRST_GUEST_ASID;
return;
}

@@ -168,11 +164,12 @@ static void svm_asid_handle_inc_generati
* this core (flushing TLB always). So correctness is established; it
* only runs a bit slower.
*/
- printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
- data->erratum170 = 1;
- data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
-
- svm_asid_init_vcpu(v);
+ if ( !data->erratum170 )
+ {
+ printk("AMD SVM: ASID generation overrun. Disabling ASIDs.\n");
+ data->erratum170 = 1;
+ data->core_asid_generation = SVM_ASID_INVALID_GENERATION;
+ }
}

/*
@@ -202,18 +199,21 @@ asmlinkage void svm_asid_handle_vmrun(vo
return;
}

- /* Different ASID generations trigger fetching of a fresh ASID. */
- if ( likely(data->next_asid <= data->max_asid) )
- {
- /* There is a free ASID. */
- v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
- v->arch.hvm_svm.asid_generation = data->core_asid_generation;
- v->arch.hvm_svm.vmcb->tlb_control = 0;
- return;
- }
-
- /* Slow path, may cause TLB flush. */
- svm_asid_handle_inc_generation(v);
+ /* If there are no free ASIDs, need to go to a new generation */
+ if ( unlikely(data->next_asid > data->max_asid) )
+ svm_asid_inc_generation();
+
+ /* Now guaranteed to be a free ASID. */
+ v->arch.hvm_svm.vmcb->guest_asid = data->next_asid++;
+ v->arch.hvm_svm.asid_generation = data->core_asid_generation;
+
+ /* When we assign ASID 1, flush all TLB entries. We need to do it
+ * here because svm_asid_inc_generation() can be called at any time,
+ * but the TLB flush can only happen on vmrun. */
+ if ( v->arch.hvm_svm.vmcb->guest_asid == SVM_ASID_FIRST_GUEST_ASID )
+ v->arch.hvm_svm.vmcb->tlb_control = 1;
+ else
+ v->arch.hvm_svm.vmcb->tlb_control = 0;
}

void svm_asid_inv_asid(struct vcpu *v)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/intr.c Wed Jun 20 12:49:27 2007 -0600
@@ -15,7 +15,6 @@
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
- *
*/

#include <xen/config.h>
@@ -39,100 +38,119 @@
#include <xen/domain_page.h>
#include <asm/hvm/trace.h>

-/*
- * Most of this code is copied from vmx_io.c and modified
- * to be suitable for SVM.
- */
-
-static inline int svm_inject_extint(struct vcpu *v, int trap)
+static void svm_inject_dummy_vintr(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
vintr_t intr = vmcb->vintr;

- /* Update only relevant fields */
intr.fields.irq = 1;
intr.fields.intr_masking = 1;
- intr.fields.vector = trap;
+ intr.fields.vector = 0;
intr.fields.prio = 0xF;
intr.fields.ign_tpr = 1;
vmcb->vintr = intr;
+}
+
+static void svm_inject_nmi(struct vcpu *v)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ eventinj_t event;

- return 0;
+ event.bytes = 0;
+ event.fields.v = 1;
+ event.fields.type = EVENTTYPE_NMI;
+ event.fields.vector = 2;
+
+ ASSERT(vmcb->eventinj.fields.v == 0);
+ vmcb->eventinj = event;
+}
+
+static void svm_inject_extint(struct vcpu *v, int vector)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ eventinj_t event;
+
+ event.bytes = 0;
+ event.fields.v = 1;
+ event.fields.type = EVENTTYPE_INTR;
+ event.fields.vector = vector;
+
+ ASSERT(vmcb->eventinj.fields.v == 0);
+ vmcb->eventinj = event;
}

asmlinkage void svm_intr_assist(void)
{
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- int intr_type = APIC_DM_EXTINT;
- int intr_vector = -1;
+ enum hvm_intack intr_source;
+ int intr_vector;

/*
- * Previous Interrupt delivery caused this intercept?
+ * Previous event delivery caused this intercept?
* This will happen if the injection is latched by the processor (hence
- * clearing vintr.fields.irq) but then subsequently a fault occurs (e.g.,
- * due to lack of shadow mapping of guest IDT or guest-kernel stack).
- *
- * NB. Exceptions that fault during delivery are lost. This needs to be
- * fixed but we'll usually get away with it since faults are usually
- * idempotent. But this isn't the case for e.g. software interrupts!
+ * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
+ * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
+ * stack).
*/
- if ( vmcb->exitintinfo.fields.v && (vmcb->exitintinfo.fields.type == 0) )
+ if ( vmcb->exitintinfo.fields.v )
{
- intr_vector = vmcb->exitintinfo.fields.vector;
+ vmcb->eventinj = vmcb->exitintinfo;
vmcb->exitintinfo.bytes = 0;
HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
- svm_inject_extint(v, intr_vector);
return;
}

- /*
- * Previous interrupt still pending? This occurs if we return from VMRUN
- * very early in the entry-to-guest process. Usually this is because an
- * external physical interrupt was pending when we executed VMRUN.
- */
- if ( vmcb->vintr.fields.irq )
- return;
-
- /* Crank the handle on interrupt state and check for new interrrupts. */
+ /* Crank the handle on interrupt state. */
pt_update_irq(v);
hvm_set_callback_irq_level();
- if ( !cpu_has_pending_irq(v) )
- return;

- /*
- * If the guest can't take an interrupt right now, create a 'fake'
- * virtual interrupt on to intercept as soon as the guest _can_ take
- * interrupts. Do not obtain the next interrupt from the vlapic/pic
- * if unable to inject.
- *
- * Also do this if there is an exception pending. This is because
- * the delivery of the exception can arbitrarily delay the injection
- * of the vintr (for example, if the exception is handled via an
- * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
- * - the vTPR could be modified upwards, so we need to wait until the
- * exception is delivered before we can safely decide that an
- * interrupt is deliverable; and
- * - the guest might look at the APIC/PIC state, so we ought not to have
- * cleared the interrupt out of the IRR.
- */
- if ( irq_masked(vmcb->rflags) || vmcb->interrupt_shadow
- || vmcb->eventinj.fields.v )
+ do {
+ intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( likely(intr_source == hvm_intack_none) )
+ return;
+
+ /*
+ * If the guest can't take an interrupt right now, create a 'fake'
+ * virtual interrupt on to intercept as soon as the guest _can_ take
+ * interrupts. Do not obtain the next interrupt from the vlapic/pic
+ * if unable to inject.
+ *
+ * Also do this if there is an injection already pending. This is
+ * because the event delivery can arbitrarily delay the injection
+ * of the vintr (for example, if the exception is handled via an
+ * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
+ * - the vTPR could be modified upwards, so we need to wait until the
+ * exception is delivered before we can safely decide that an
+ * interrupt is deliverable; and
+ * - the guest might look at the APIC/PIC state, so we ought not to
+ * have cleared the interrupt out of the IRR.
+ *
+ * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
+ * shadow. This is hard to do without hardware support. We should also
+ * track 'NMI blocking' from NMI injection until IRET. This can be done
+ * quite easily in software by intercepting the unblocking IRET.
+ */
+ if ( !hvm_interrupts_enabled(v, intr_source) ||
+ vmcb->eventinj.fields.v )
+ {
+ vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
+ HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+ svm_inject_dummy_vintr(v);
+ return;
+ }
+ } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
+
+ if ( intr_source == hvm_intack_nmi )
{
- vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
- HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
- svm_inject_extint(v, 0x0); /* actual vector doesn't matter */
- return;
+ svm_inject_nmi(v);
}
-
- /* Okay, we can deliver the interrupt: grab it and update PIC state. */
- intr_vector = cpu_get_interrupt(v, &intr_type);
- BUG_ON(intr_vector < 0);
-
- HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
- svm_inject_extint(v, intr_vector);
-
- pt_intr_post(v, intr_vector, intr_type);
+ else
+ {
+ HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+ svm_inject_extint(v, intr_vector);
+ pt_intr_post(v, intr_vector, intr_source);
+ }
}

/*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Wed Jun 20 12:49:27 2007 -0600
@@ -312,26 +312,8 @@ int svm_vmcb_save(struct vcpu *v, struct
c->sysenter_esp = vmcb->sysenter_esp;
c->sysenter_eip = vmcb->sysenter_eip;

- /* Save any event/interrupt that was being injected when we last
- * exited. Although there are three(!) VMCB fields that can contain
- * active events, we only need to save at most one: because the
- * intr_assist logic never delivers an IRQ when any other event is
- * active, we know that the only possible collision is if we inject
- * a fault while exitintinfo contains a valid event (the delivery of
- * which caused the last exit). In that case replaying just the
- * first event should cause the same behaviour when we restore. */
- if ( vmcb->vintr.fields.irq
- && /* Check it's not a fake interrupt (see svm_intr_assist()) */
- !(vmcb->general1_intercepts & GENERAL1_INTERCEPT_VINTR) )
- {
- c->pending_vector = vmcb->vintr.fields.vector;
- c->pending_type = 0; /* External interrupt */
- c->pending_error_valid = 0;
- c->pending_reserved = 0;
- c->pending_valid = 1;
- c->error_code = 0;
- }
- else if ( vmcb->exitintinfo.fields.v )
+ /* Save any event/interrupt that was being injected when we last exited. */
+ if ( vmcb->exitintinfo.fields.v )
{
c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
c->error_code = vmcb->exitintinfo.fields.errorcode;
@@ -569,10 +551,15 @@ static inline void svm_restore_dr(struct
__restore_debug_registers(v);
}

-static int svm_interrupts_enabled(struct vcpu *v)
-{
- unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
- return !irq_masked(eflags);
+static int svm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ if ( type == hvm_intack_nmi )
+ return !vmcb->interrupt_shadow;
+
+ ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+ return !irq_masked(vmcb->rflags) && !vmcb->interrupt_shadow;
}

static int svm_guest_x86_mode(struct vcpu *v)
@@ -596,6 +583,14 @@ static void svm_update_guest_cr3(struct
static void svm_update_guest_cr3(struct vcpu *v)
{
v->arch.hvm_svm.vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
+}
+
+static void svm_flush_guest_tlbs(void)
+{
+ /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
+ * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
+ * VMRUN anyway). */
+ svm_asid_inc_generation();
}

static void svm_update_vtpr(struct vcpu *v, unsigned long value)
@@ -770,8 +765,6 @@ static void svm_init_hypercall_page(stru
{
char *p;
int i;
-
- memset(hypercall_page, 0, PAGE_SIZE);

for ( i = 0; i < (PAGE_SIZE / 32); i++ )
{
@@ -948,6 +941,7 @@ static struct hvm_function_table svm_fun
.get_segment_register = svm_get_segment_register,
.update_host_cr3 = svm_update_host_cr3,
.update_guest_cr3 = svm_update_guest_cr3,
+ .flush_guest_tlbs = svm_flush_guest_tlbs,
.update_vtpr = svm_update_vtpr,
.stts = svm_stts,
.set_tsc_offset = svm_set_tsc_offset,
@@ -957,7 +951,7 @@ static struct hvm_function_table svm_fun
.event_injection_faulted = svm_event_injection_faulted
};

-void svm_npt_detect(void)
+static void svm_npt_detect(void)
{
u32 eax, ebx, ecx, edx;

@@ -1017,6 +1011,9 @@ int start_svm(struct cpuinfo_x86 *c)

hvm_enable(&svm_function_table);

+ if ( opt_hap_enabled )
+ printk("SVM: Nested paging enabled.\n");
+
return 1;
}

@@ -1477,7 +1474,7 @@ static void svm_io_instruction(struct vc

/* Copy current guest state into io instruction state structure. */
memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
- hvm_store_cpu_guest_regs(v, regs, NULL);
+ svm_store_cpu_guest_regs(v, regs, NULL);

info.bytes = vmcb->exitinfo1;

@@ -2148,11 +2145,14 @@ static inline void svm_do_msr_access(

static inline void svm_vmexit_do_hlt(struct vmcb_struct *vmcb)
{
+ enum hvm_intack type = hvm_vcpu_has_pending_irq(current);
+
__update_guest_eip(vmcb, 1);

/* Check for interrupt not handled or new interrupt. */
- if ( (vmcb->rflags & X86_EFLAGS_IF) &&
- (vmcb->vintr.fields.irq || cpu_has_pending_irq(current)) ) {
+ if ( vmcb->eventinj.fields.v ||
+ ((type != hvm_intack_none) && svm_interrupts_enabled(current, type)) )
+ {
HVMTRACE_1D(HLT, current, /*int pending=*/ 1);
return;
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Wed Jun 20 12:49:27 2007 -0600
@@ -56,7 +56,7 @@ struct vmcb_struct *alloc_vmcb(void)
return NULL;
}

- memset(vmcb, 0, PAGE_SIZE);
+ clear_page(vmcb);
return vmcb;
}

@@ -72,11 +72,11 @@ struct host_save_area *alloc_host_save_a
hsa = alloc_xenheap_page();
if ( hsa == NULL )
{
- printk(XENLOG_WARNING "Warning: failed to allocate vmcb.\n");
+ printk(XENLOG_WARNING "Warning: failed to allocate hsa.\n");
return NULL;
}

- memset(hsa, 0, PAGE_SIZE);
+ clear_page(hsa);
return hsa;
}

diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vioapic.c
--- a/xen/arch/x86/hvm/vioapic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vioapic.c Wed Jun 20 12:49:27 2007 -0600
@@ -254,17 +254,11 @@ static void ioapic_inj_irq(
HVM_DBG_LOG(DBG_LEVEL_IOAPIC, "irq %d trig %d deliv %d",
vector, trig_mode, delivery_mode);

- switch ( delivery_mode )
- {
- case dest_Fixed:
- case dest_LowestPrio:
- if ( vlapic_set_irq(target, vector, trig_mode) )
- vcpu_kick(vlapic_vcpu(target));
- break;
- default:
- gdprintk(XENLOG_WARNING, "error delivery mode %d\n", delivery_mode);
- break;
- }
+ ASSERT((delivery_mode == dest_Fixed) ||
+ (delivery_mode == dest_LowestPrio));
+
+ if ( vlapic_set_irq(target, vector, trig_mode) )
+ vcpu_kick(vlapic_vcpu(target));
}

static uint32_t ioapic_get_delivery_bitmask(
@@ -368,7 +362,6 @@ static void vioapic_deliver(struct hvm_h
}

case dest_Fixed:
- case dest_ExtINT:
{
uint8_t bit;
for ( bit = 0; deliver_bitmask != 0; bit++ )
@@ -393,10 +386,21 @@ static void vioapic_deliver(struct hvm_h
break;
}

- case dest_SMI:
case dest_NMI:
- case dest_INIT:
- case dest__reserved_2:
+ {
+ uint8_t bit;
+ for ( bit = 0; deliver_bitmask != 0; bit++ )
+ {
+ if ( !(deliver_bitmask & (1 << bit)) )
+ continue;
+ deliver_bitmask &= ~(1 << bit);
+ if ( ((v = vioapic_domain(vioapic)->vcpu[bit]) != NULL) &&
+ !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+ vcpu_kick(v);
+ }
+ break;
+ }
+
default:
gdprintk(XENLOG_WARNING, "Unsupported delivery mode %d\n",
delivery_mode);
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vlapic.c Wed Jun 20 12:49:27 2007 -0600
@@ -294,7 +294,8 @@ static int vlapic_accept_irq(struct vcpu
break;

case APIC_DM_NMI:
- gdprintk(XENLOG_WARNING, "Ignoring guest NMI\n");
+ if ( !test_and_set_bool(v->arch.hvm_vcpu.nmi_pending) )
+ vcpu_kick(v);
break;

case APIC_DM_INIT:
@@ -747,7 +748,7 @@ int vlapic_has_interrupt(struct vcpu *v)
return highest_irr;
}

-int cpu_get_apic_interrupt(struct vcpu *v, int *mode)
+int cpu_get_apic_interrupt(struct vcpu *v)
{
int vector = vlapic_has_interrupt(v);
struct vlapic *vlapic = vcpu_vlapic(v);
@@ -757,8 +758,6 @@ int cpu_get_apic_interrupt(struct vcpu *

vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
vlapic_clear_irr(vector, vlapic);
-
- *mode = APIC_DM_FIXED;
return vector;
}

@@ -935,7 +934,7 @@ int vlapic_init(struct vcpu *v)
return -ENOMEM;
}

- memset(vlapic->regs, 0, PAGE_SIZE);
+ clear_page(vlapic->regs);

vlapic_reset(vlapic);

diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/intr.c Wed Jun 20 12:49:27 2007 -0600
@@ -102,8 +102,8 @@ static void update_tpr_threshold(struct

asmlinkage void vmx_intr_assist(void)
{
- int has_ext_irq, intr_vector, intr_type = 0;
- unsigned long eflags, intr_shadow;
+ int intr_vector;
+ enum hvm_intack intr_source;
struct vcpu *v = current;
unsigned int idtv_info_field;
unsigned long inst_len;
@@ -114,65 +114,67 @@ asmlinkage void vmx_intr_assist(void)

update_tpr_threshold(vcpu_vlapic(v));

- has_ext_irq = cpu_has_pending_irq(v);
+ do {
+ intr_source = hvm_vcpu_has_pending_irq(v);

- if ( unlikely(v->arch.hvm_vmx.vector_injected) )
- {
- v->arch.hvm_vmx.vector_injected = 0;
- if ( unlikely(has_ext_irq) )
- enable_irq_window(v);
- return;
- }
+ if ( unlikely(v->arch.hvm_vmx.vector_injected) )
+ {
+ v->arch.hvm_vmx.vector_injected = 0;
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_irq_window(v);
+ return;
+ }

- /* This could be moved earlier in the VMX resume sequence. */
- idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
- if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
- {
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+ /* This could be moved earlier in the VMX resume sequence. */
+ idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
+ if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
+ {
+ __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
+
+ /*
+ * Safe: the length will only be interpreted for software
+ * exceptions and interrupts. If we get here then delivery of some
+ * event caused a fault, and this always results in defined
+ * VM_EXIT_INSTRUCTION_LEN.
+ */
+ inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
+ __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+
+ if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_irq_window(v);
+
+ HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
+ return;
+ }
+
+ if ( likely(intr_source == hvm_intack_none) )
+ return;

/*
- * Safe: the length will only be interpreted for software exceptions
- * and interrupts. If we get here then delivery of some event caused a
- * fault, and this always results in defined VM_EXIT_INSTRUCTION_LEN.
+ * TODO: Better NMI handling. Shouldn't wait for EFLAGS.IF==1, but
+ * should wait for exit from 'NMI blocking' window (NMI injection to
+ * next IRET). This requires us to use the new 'virtual NMI' support.
*/
- inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
+ if ( !hvm_interrupts_enabled(v, intr_source) )
+ {
+ enable_irq_window(v);
+ return;
+ }
+ } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );

- if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
- __vmread(IDT_VECTORING_ERROR_CODE));
- if ( unlikely(has_ext_irq) )
- enable_irq_window(v);
-
- HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
- return;
+ if ( intr_source == hvm_intack_nmi )
+ {
+ vmx_inject_nmi(v);
}
-
- if ( likely(!has_ext_irq) )
- return;
-
- intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
- if ( unlikely(intr_shadow & (VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS)) )
+ else
{
- enable_irq_window(v);
- HVM_DBG_LOG(DBG_LEVEL_1, "interruptibility");
- return;
+ HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
+ vmx_inject_extint(v, intr_vector);
+ pt_intr_post(v, intr_vector, intr_source);
}
-
- eflags = __vmread(GUEST_RFLAGS);
- if ( irq_masked(eflags) )
- {
- enable_irq_window(v);
- return;
- }
-
- intr_vector = cpu_get_interrupt(v, &intr_type);
- BUG_ON(intr_vector < 0);
-
- HVMTRACE_2D(INJ_VIRQ, v, intr_vector, /*fake=*/ 0);
- vmx_inject_extint(v, intr_vector, VMX_DELIVER_NO_ERROR_CODE);
-
- pt_intr_post(v, intr_vector, intr_type);
}

/*
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Wed Jun 20 12:49:27 2007 -0600
@@ -158,7 +158,7 @@ static struct vmcs_struct *vmx_alloc_vmc
return NULL;
}

- memset(vmcs, 0, PAGE_SIZE);
+ clear_page(vmcs);
vmcs->vmcs_revision_id = vmcs_revision_id;

return vmcs;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Jun 20 12:49:27 2007 -0600
@@ -1070,8 +1070,6 @@ static void vmx_init_hypercall_page(stru
char *p;
int i;

- memset(hypercall_page, 0, PAGE_SIZE);
-
for ( i = 0; i < (PAGE_SIZE / 32); i++ )
{
p = (char *)(hypercall_page + (i * 32));
@@ -1115,16 +1113,26 @@ static int vmx_nx_enabled(struct vcpu *v
return v->arch.hvm_vmx.efer & EFER_NX;
}

-static int vmx_interrupts_enabled(struct vcpu *v)
-{
- unsigned long eflags = __vmread(GUEST_RFLAGS);
- return !irq_masked(eflags);
-}
-
+static int vmx_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+ unsigned long intr_shadow, eflags;
+
+ ASSERT(v == current);
+
+ intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+ intr_shadow &= VMX_INTR_SHADOW_STI|VMX_INTR_SHADOW_MOV_SS;
+
+ if ( type == hvm_intack_nmi )
+ return !intr_shadow;
+
+ ASSERT((type == hvm_intack_pic) || (type == hvm_intack_lapic));
+ eflags = __vmread(GUEST_RFLAGS);
+ return !irq_masked(eflags) && !intr_shadow;
+}

static void vmx_update_host_cr3(struct vcpu *v)
{
- ASSERT( (v == current) || !vcpu_runnable(v) );
+ ASSERT((v == current) || !vcpu_runnable(v));
vmx_vmcs_enter(v);
__vmwrite(HOST_CR3, v->arch.cr3);
vmx_vmcs_exit(v);
@@ -1132,12 +1140,18 @@ static void vmx_update_host_cr3(struct v

static void vmx_update_guest_cr3(struct vcpu *v)
{
- ASSERT( (v == current) || !vcpu_runnable(v) );
+ ASSERT((v == current) || !vcpu_runnable(v));
vmx_vmcs_enter(v);
__vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr3);
vmx_vmcs_exit(v);
}

+static void vmx_flush_guest_tlbs(void)
+{
+ /* No tagged TLB support on VMX yet. The fact that we're in Xen
+ * at all means any guest will have a clean TLB when it's next run,
+ * because VMRESUME will flush it for us. */
+}

static void vmx_inject_exception(
unsigned int trapnr, int errcode, unsigned long cr2)
@@ -1205,6 +1219,7 @@ static struct hvm_function_table vmx_fun
.get_segment_register = vmx_get_segment_register,
.update_host_cr3 = vmx_update_host_cr3,
.update_guest_cr3 = vmx_update_guest_cr3,
+ .flush_guest_tlbs = vmx_flush_guest_tlbs,
.update_vtpr = vmx_update_vtpr,
.stts = vmx_stts,
.set_tsc_offset = vmx_set_tsc_offset,
@@ -1837,7 +1852,7 @@ static void vmx_io_instruction(unsigned

/* Copy current guest state into io instruction state structure. */
memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
- hvm_store_cpu_guest_regs(current, regs, NULL);
+ vmx_store_cpu_guest_regs(current, regs, NULL);

HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
"exit_qualification = %lx",
@@ -2549,7 +2564,8 @@ static inline int vmx_do_msr_read(struct

HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);

- switch (ecx) {
+ switch ( ecx )
+ {
case MSR_IA32_TIME_STAMP_COUNTER:
msr_content = hvm_get_guest_time(v);
break;
@@ -2565,6 +2581,8 @@ static inline int vmx_do_msr_read(struct
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ goto gp_fault;
default:
if ( long_mode_do_msr_read(regs) )
goto done;
@@ -2576,8 +2594,8 @@ static inline int vmx_do_msr_read(struct
regs->edx = edx;
goto done;
}
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
- return 0;
+
+ goto gp_fault;
}

regs->eax = msr_content & 0xFFFFFFFF;
@@ -2589,6 +2607,10 @@ done:
ecx, (unsigned long)regs->eax,
(unsigned long)regs->edx);
return 1;
+
+gp_fault:
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ return 0;
}

static int vmx_alloc_vlapic_mapping(struct domain *d)
@@ -2667,7 +2689,8 @@ static inline int vmx_do_msr_write(struc
msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
HVMTRACE_2D(MSR_WRITE, v, ecx, msr_content);

- switch (ecx) {
+ switch ( ecx )
+ {
case MSR_IA32_TIME_STAMP_COUNTER:
hvm_set_guest_time(v, msr_content);
pt_reset(v);
@@ -2684,6 +2707,8 @@ static inline int vmx_do_msr_write(struc
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ goto gp_fault;
default:
if ( !long_mode_do_msr_write(regs) )
wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2691,6 +2716,10 @@ static inline int vmx_do_msr_write(struc
}

return 1;
+
+gp_fault:
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ return 0;
}

static void vmx_do_hlt(void)
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpic.c
--- a/xen/arch/x86/hvm/vpic.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpic.c Wed Jun 20 12:49:27 2007 -0600
@@ -499,7 +499,7 @@ void vpic_irq_negative_edge(struct domai
vpic_update_int_output(vpic);
}

-int cpu_get_pic_interrupt(struct vcpu *v, int *type)
+int cpu_get_pic_interrupt(struct vcpu *v)
{
int irq, vector;
struct hvm_hw_vpic *vpic = &v->domain->arch.hvm_domain.vpic[0];
@@ -512,6 +512,5 @@ int cpu_get_pic_interrupt(struct vcpu *v
return -1;

vector = vpic[irq >> 3].irq_base + (irq & 7);
- *type = APIC_DM_EXTINT;
return vector;
}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/hvm/vpt.c
--- a/xen/arch/x86/hvm/vpt.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/hvm/vpt.c Wed Jun 20 12:49:27 2007 -0600
@@ -155,7 +155,8 @@ void pt_update_irq(struct vcpu *v)
}
}

-static struct periodic_time *is_pt_irq(struct vcpu *v, int vector, int type)
+static struct periodic_time *is_pt_irq(
+ struct vcpu *v, int vector, enum hvm_intack src)
{
struct list_head *head = &v->arch.hvm_vcpu.tm_list;
struct periodic_time *pt;
@@ -174,7 +175,7 @@ static struct periodic_time *is_pt_irq(s
return pt;
}

- vec = get_isa_irq_vector(v, pt->irq, type);
+ vec = get_isa_irq_vector(v, pt->irq, src);

/* RTC irq need special care */
if ( (vector != vec) || (pt->irq == 8 && !is_rtc_periodic_irq(rtc)) )
@@ -186,7 +187,7 @@ static struct periodic_time *is_pt_irq(s
return NULL;
}

-void pt_intr_post(struct vcpu *v, int vector, int type)
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src)
{
struct periodic_time *pt;
time_cb *cb;
@@ -194,7 +195,7 @@ void pt_intr_post(struct vcpu *v, int ve

spin_lock(&v->arch.hvm_vcpu.tm_lock);

- pt = is_pt_irq(v, vector, type);
+ pt = is_pt_irq(v, vector, src);
if ( pt == NULL )
{
spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -227,13 +228,10 @@ void pt_reset(struct vcpu *v)

list_for_each_entry ( pt, head, list )
{
- if ( pt->enabled )
- {
- pt->pending_intr_nr = 0;
- pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
- pt->scheduled = NOW() + pt->period;
- set_timer(&pt->timer, pt->scheduled);
- }
+ pt->pending_intr_nr = 0;
+ pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
+ pt->scheduled = NOW() + pt->period;
+ set_timer(&pt->timer, pt->scheduled);
}

spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -247,10 +245,7 @@ void pt_migrate(struct vcpu *v)
spin_lock(&v->arch.hvm_vcpu.tm_lock);

list_for_each_entry ( pt, head, list )
- {
- if ( pt->enabled )
- migrate_timer(&pt->timer, v->processor);
- }
+ migrate_timer(&pt->timer, v->processor);

spin_unlock(&v->arch.hvm_vcpu.tm_lock);
}
@@ -263,8 +258,9 @@ void create_periodic_time(

spin_lock(&v->arch.hvm_vcpu.tm_lock);

- init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
pt->enabled = 1;
+ pt->pending_intr_nr = 0;
+
if ( period < 900000 ) /* < 0.9 ms */
{
gdprintk(XENLOG_WARNING,
@@ -283,6 +279,8 @@ void create_periodic_time(
pt->priv = data;

list_add(&pt->list, &v->arch.hvm_vcpu.tm_list);
+
+ init_timer(&pt->timer, pt_timer_fn, pt, v->processor);
set_timer(&pt->timer, pt->scheduled);

spin_unlock(&v->arch.hvm_vcpu.tm_lock);
@@ -295,8 +293,12 @@ void destroy_periodic_time(struct period

pt_lock(pt);
pt->enabled = 0;
- pt->pending_intr_nr = 0;
list_del(&pt->list);
+ pt_unlock(pt);
+
+ /*
+ * pt_timer_fn() can run until this kill_timer() returns. We must do this
+ * outside pt_lock() otherwise we can deadlock with pt_timer_fn().
+ */
kill_timer(&pt->timer);
- pt_unlock(pt);
-}
+}
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/mm.c Wed Jun 20 12:49:27 2007 -0600
@@ -2942,7 +2942,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong)
if ( entries > FIRST_RESERVED_GDT_ENTRY )
return -EINVAL;

- if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
+ if ( copy_from_guest(frames, frame_list, nr_pages) )
return -EFAULT;

LOCK_BIGLOCK(current->domain);
@@ -3123,7 +3123,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
return -ESRCH;

- rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
+ rc = copy_from_guest(d->arch.e820, fmap.map.buffer,
fmap.map.nr_entries) ? -EFAULT : 0;
d->arch.nr_e820 = fmap.map.nr_entries;

@@ -3144,7 +3144,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
return -EFAULT;

map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
- if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
+ if ( copy_to_guest(map.buffer, d->arch.e820, map.nr_entries) ||
copy_to_guest(arg, &map, 1) )
return -EFAULT;

@@ -3168,7 +3168,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
buffer = guest_handle_cast(memmap.buffer, e820entry_t);

count = min((unsigned int)e820.nr_map, memmap.nr_entries);
- if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
+ if ( copy_to_guest(buffer, e820.map, count) < 0 )
return -EFAULT;

memmap.nr_entries = count;
@@ -3181,7 +3181,7 @@ long arch_memory_op(int op, XEN_GUEST_HA

case XENMEM_machphys_mapping:
{
- struct xen_machphys_mapping mapping = {
+ static const struct xen_machphys_mapping mapping = {
.v_start = MACH2PHYS_VIRT_START,
.v_end = MACH2PHYS_VIRT_END,
.max_mfn = MACH2PHYS_NR_ENTRIES - 1
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/setup.c Wed Jun 20 12:49:27 2007 -0600
@@ -295,14 +295,14 @@ static struct e820map __initdata boot_e8
/* Reserve area (@s,@e) in the temporary bootstrap e820 map. */
static void __init reserve_in_boot_e820(unsigned long s, unsigned long e)
{
- unsigned long rs, re;
+ uint64_t rs, re;
int i;

for ( i = 0; i < boot_e820.nr_map; i++ )
{
/* Have we found the e820 region that includes the specified range? */
rs = boot_e820.map[i].addr;
- re = boot_e820.map[i].addr + boot_e820.map[i].size;
+ re = rs + boot_e820.map[i].size;
if ( (s < rs) || (e > re) )
continue;

@@ -402,7 +402,7 @@ void init_done(void)
startup_cpu_idle_loop();
}

-void __init __start_xen(multiboot_info_t *mbi)
+void __init __start_xen(unsigned long mbi_p)
{
char *memmap_type = NULL;
char __cmdline[] = "", *cmdline = __cmdline;
@@ -410,6 +410,7 @@ void __init __start_xen(multiboot_info_t
unsigned int initrdidx = 1;
char *_policy_start = NULL;
unsigned long _policy_len = 0;
+ multiboot_info_t *mbi = __va(mbi_p);
module_t *mod = (module_t *)__va(mbi->mods_addr);
unsigned long nr_pages, modules_length;
int i, e820_warn = 0, bytes = 0;
@@ -678,6 +679,9 @@ void __init __start_xen(multiboot_info_t
barrier();
move_memory(e, 0, __pa(&_end) - xen_phys_start);

+ /* Poison low 1MB to detect stray pointers to physical 0-1MB. */
+ memset(maddr_to_bootstrap_virt(e), 0x55, 1U<<20);
+
/* Walk initial pagetables, relocating page directory entries. */
pl4e = __va(__pa(idle_pg_table));
for ( i = 0 ; i < L4_PAGETABLE_ENTRIES; i++, pl4e++ )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/traps.c Wed Jun 20 12:49:27 2007 -0600
@@ -462,7 +462,17 @@ int rdmsr_hypervisor_regs(
if ( idx > 0 )
return 0;

- *eax = *edx = 0;
+ switch ( idx )
+ {
+ case 0:
+ {
+ *eax = *edx = 0;
+ break;
+ }
+ default:
+ BUG();
+ }
+
return 1;
}

@@ -1130,7 +1140,7 @@ static inline int guest_io_okay(
* read as 0xff (no access allowed).
*/
TOGGLE_MODE();
- switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
+ switch ( __copy_from_guest_offset(x.bytes, v->arch.iobmp,
port>>3, 2) )
{
default: x.bytes[0] = ~0;
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c Wed Jun 20 12:49:27 2007 -0600
@@ -513,6 +513,7 @@ static void hypercall_page_initialise_ri

void hypercall_page_initialise(struct domain *d, void *hypercall_page)
{
+ memset(hypercall_page, 0xCC, PAGE_SIZE);
if ( is_hvm_domain(d) )
hvm_hypercall_page_initialise(d, hypercall_page);
else if ( supervisor_mode_kernel )
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/compat_kexec.S
--- a/xen/arch/x86/x86_64/compat_kexec.S Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/compat_kexec.S Wed Jun 20 12:49:27 2007 -0600
@@ -1,5 +1,11 @@
/*
* Compatibility kexec handler.
+ */
+
+/*
+ * NOTE: We rely on Xen not relocating itself above the 4G boundary. This is
+ * currently true but if it ever changes then compat_pg_table will
+ * need to be moved back below 4G at run time.
*/

#include <xen/config.h>
@@ -8,7 +14,20 @@
#include <asm/msr.h>
#include <asm/page.h>

-#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
+/* The unrelocated physical address of a symbol. */
+#define SYM_PHYS(sym) ((sym) - __XEN_VIRT_START)
+
+/* Load physical address of symbol into register and relocate it. */
+#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
+ add xen_phys_start(%rip), reg
+
+/*
+ * Relocate a physical address in memory. Size of temporary register
+ * determines size of the value to relocate.
+ */
+#define RELOCATE_MEM(addr,reg) mov addr(%rip), reg ; \
+ add xen_phys_start(%rip), reg ; \
+ mov reg, addr(%rip)

.text

@@ -31,20 +50,35 @@ 1: dec %r9
test %r9,%r9
jnz 1b

- mov $SYM_PHYS(compat_page_list),%rdx
+ RELOCATE_SYM(compat_page_list,%rdx)
+
+ /* Relocate compatibility mode entry point address. */
+ RELOCATE_MEM(compatibility_mode_far,%eax)
+
+ /* Relocate compat_pg_table. */
+ RELOCATE_MEM(compat_pg_table, %rax)
+ RELOCATE_MEM(compat_pg_table+0x8, %rax)
+ RELOCATE_MEM(compat_pg_table+0x10,%rax)
+ RELOCATE_MEM(compat_pg_table+0x18,%rax)

/*
* Setup an identity mapped region in PML4[0] of idle page
* table.
*/
- lea l3_identmap(%rip),%rax
- sub %rbx,%rax
+ RELOCATE_SYM(l3_identmap,%rax)
or $0x63,%rax
mov %rax, idle_pg_table(%rip)

/* Switch to idle page table. */
- movq $SYM_PHYS(idle_pg_table), %rax
+ RELOCATE_SYM(idle_pg_table,%rax)
movq %rax, %cr3
+
+ /* Switch to identity mapped compatibility stack. */
+ RELOCATE_SYM(compat_stack,%rax)
+ movq %rax, %rsp
+
+ /* Save xen_phys_start for 32 bit code. */
+ movq xen_phys_start(%rip), %rbx

/* Jump to low identity mapping in compatibility mode. */
ljmp *compatibility_mode_far(%rip)
@@ -54,7 +88,26 @@ compatibility_mode_far:
.long SYM_PHYS(compatibility_mode)
.long __HYPERVISOR_CS32

+ /*
+ * We use 5 words of stack for the arguments passed to the kernel. The
+ * kernel only uses 1 word before switching to its own stack. Allocate
+ * 16 words to give "plenty" of room.
+ */
+ .fill 16,4,0
+compat_stack:
+
.code32
+
+#undef RELOCATE_SYM
+#undef RELOCATE_MEM
+
+/*
+ * Load physical address of symbol into register and relocate it. %rbx
+ * contains xen_phys_start(%rip) saved before jump to compatibility
+ * mode.
+ */
+#define RELOCATE_SYM(sym,reg) mov $SYM_PHYS(sym), reg ; \
+ add %ebx, reg

compatibility_mode:
/* Setup some sane segments. */
@@ -78,7 +131,7 @@ compatibility_mode:
movl %eax, %cr0

/* Switch to 32 bit page table. */
- movl $SYM_PHYS(compat_pg_table), %eax
+ RELOCATE_SYM(compat_pg_table, %eax)
movl %eax, %cr3

/* Clear MSR_EFER[LME], disabling long mode */
diff -r c20bc60f9243 -r 810885428743 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c Wed Jun 20 12:49:27 2007 -0600
@@ -510,6 +510,7 @@ static void hypercall_page_initialise_ri

void hypercall_page_initialise(struct domain *d, void *hypercall_page)
{
+ memset(hypercall_page, 0xCC, PAGE_SIZE);
if ( is_hvm_domain(d) )
hvm_hypercall_page_initialise(d, hypercall_page);
else if ( !is_pv_32bit_domain(d) )
diff -r c20bc60f9243 -r 810885428743 xen/common/compat/memory.c
--- a/xen/common/compat/memory.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/compat/memory.c Wed Jun 20 12:49:27 2007 -0600
@@ -258,7 +258,8 @@ int compat_memory_op(unsigned int cmd, X
compat_pfn_t pfn = nat.rsrv->extent_start.p[start_extent];

BUG_ON(pfn != nat.rsrv->extent_start.p[start_extent]);
- if ( __copy_to_compat_offset(cmp.rsrv.extent_start, start_extent, &pfn, 1) )
+ if ( __copy_to_compat_offset(cmp.rsrv.extent_start,
+ start_extent, &pfn, 1) )
{
if ( split >= 0 )
{
@@ -275,6 +276,10 @@ int compat_memory_op(unsigned int cmd, X
break;
}
}
+
+ /* Bail if there was an error. */
+ if ( (split >= 0) && (end_extent != nat.rsrv->nr_extents) )
+ split = 0;
}
else
start_extent = end_extent;
diff -r c20bc60f9243 -r 810885428743 xen/common/domctl.c
--- a/xen/common/domctl.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/domctl.c Wed Jun 20 12:49:27 2007 -0600
@@ -43,7 +43,7 @@ void cpumask_to_xenctl_cpumap(

bitmap_long_to_byte(bytemap, cpus_addr(*cpumask), NR_CPUS);

- copy_to_guest(xenctl_cpumap->bitmap, &bytemap[0], copy_bytes);
+ copy_to_guest(xenctl_cpumap->bitmap, bytemap, copy_bytes);

for ( i = copy_bytes; i < guest_bytes; i++ )
copy_to_guest_offset(xenctl_cpumap->bitmap, i, &zero, 1);
@@ -63,7 +63,7 @@ void xenctl_cpumap_to_cpumask(
if ( guest_handle_is_null(xenctl_cpumap->bitmap) )
return;

- copy_from_guest(&bytemap[0], xenctl_cpumap->bitmap, copy_bytes);
+ copy_from_guest(bytemap, xenctl_cpumap->bitmap, copy_bytes);

bitmap_byte_to_long(cpus_addr(*cpumask), bytemap, NR_CPUS);
}
diff -r c20bc60f9243 -r 810885428743 xen/common/grant_table.c
--- a/xen/common/grant_table.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/grant_table.c Wed Jun 20 12:49:27 2007 -0600
@@ -148,7 +148,7 @@ get_maptrack_handle(
return -1;
}

- memset(new_mt, 0, PAGE_SIZE);
+ clear_page(new_mt);

new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;

@@ -624,7 +624,7 @@ gnttab_grow_table(struct domain *d, unsi
{
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
- memset(gt->active[i], 0, PAGE_SIZE);
+ clear_page(gt->active[i]);
}

/* Shared */
@@ -632,7 +632,7 @@ gnttab_grow_table(struct domain *d, unsi
{
if ( (gt->shared[i] = alloc_xenheap_page()) == NULL )
goto shared_alloc_failed;
- memset(gt->shared[i], 0, PAGE_SIZE);
+ clear_page(gt->shared[i]);
}

/* Share the new shared frames with the recipient domain */
@@ -1365,7 +1365,7 @@ grant_table_create(
{
if ( (t->active[i] = alloc_xenheap_page()) == NULL )
goto no_mem_2;
- memset(t->active[i], 0, PAGE_SIZE);
+ clear_page(t->active[i]);
}

/* Tracking of mapped foreign frames table */
@@ -1375,7 +1375,7 @@ grant_table_create(
memset(t->maptrack, 0, max_nr_maptrack_frames() * sizeof(t->maptrack[0]));
if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
goto no_mem_3;
- memset(t->maptrack[0], 0, PAGE_SIZE);
+ clear_page(t->maptrack[0]);
t->maptrack_limit = PAGE_SIZE / sizeof(struct grant_mapping);
for ( i = 0; i < t->maptrack_limit; i++ )
t->maptrack[0][i].ref = i+1;
@@ -1389,7 +1389,7 @@ grant_table_create(
{
if ( (t->shared[i] = alloc_xenheap_page()) == NULL )
goto no_mem_4;
- memset(t->shared[i], 0, PAGE_SIZE);
+ clear_page(t->shared[i]);
}

for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
diff -r c20bc60f9243 -r 810885428743 xen/common/kernel.c
--- a/xen/common/kernel.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kernel.c Wed Jun 20 12:49:27 2007 -0600
@@ -142,7 +142,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
{
xen_extraversion_t extraversion;
safe_strcpy(extraversion, xen_extra_version());
- if ( copy_to_guest(arg, (char *)extraversion, sizeof(extraversion)) )
+ if ( copy_to_guest(arg, extraversion, ARRAY_SIZE(extraversion)) )
return -EFAULT;
return 0;
}
@@ -167,7 +167,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
memset(info, 0, sizeof(info));
arch_get_xen_caps(&info);

- if ( copy_to_guest(arg, (char *)info, sizeof(info)) )
+ if ( copy_to_guest(arg, info, ARRAY_SIZE(info)) )
return -EFAULT;
return 0;
}
@@ -187,7 +187,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
{
xen_changeset_info_t chgset;
safe_strcpy(chgset, xen_changeset());
- if ( copy_to_guest(arg, (char *)chgset, sizeof(chgset)) )
+ if ( copy_to_guest(arg, chgset, ARRAY_SIZE(chgset)) )
return -EFAULT;
return 0;
}
@@ -229,8 +229,8 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL

case XENVER_guest_handle:
{
- if ( copy_to_guest(arg, (char *)current->domain->handle,
- sizeof(current->domain->handle)) )
+ if ( copy_to_guest(arg, current->domain->handle,
+ ARRAY_SIZE(current->domain->handle)) )
return -EFAULT;
return 0;
}
diff -r c20bc60f9243 -r 810885428743 xen/common/kexec.c
--- a/xen/common/kexec.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/kexec.c Wed Jun 20 12:49:27 2007 -0600
@@ -169,7 +169,11 @@ static int kexec_get(reserve)(xen_kexec_

static int kexec_get(xen)(xen_kexec_range_t *range)
{
+#ifdef CONFIG_X86_64
+ range->start = xenheap_phys_start;
+#else
range->start = virt_to_maddr(_start);
+#endif
range->size = (unsigned long)xenheap_phys_end - (unsigned long)range->start;
return 0;
}
diff -r c20bc60f9243 -r 810885428743 xen/common/perfc.c
--- a/xen/common/perfc.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/common/perfc.c Wed Jun 20 12:49:27 2007 -0600
@@ -227,7 +227,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
}
BUG_ON(v != perfc_nbr_vals);

- if ( copy_to_guest(desc, (xen_sysctl_perfc_desc_t *)perfc_d, NR_PERFCTRS) )
+ if ( copy_to_guest(desc, perfc_d, NR_PERFCTRS) )
return -EFAULT;
if ( copy_to_guest(val, perfc_vals, perfc_nbr_vals) )
return -EFAULT;
diff -r c20bc60f9243 -r 810885428743 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/char/console.c Wed Jun 20 12:49:27 2007 -0600
@@ -326,7 +326,7 @@ static long guest_console_write(XEN_GUES
CONSOLEIO_write, count, buffer);

kcount = min_t(int, count, sizeof(kbuf)-1);
- if ( copy_from_guest((char *)kbuf, buffer, kcount) )
+ if ( copy_from_guest(kbuf, buffer, kcount) )
return -EFAULT;
kbuf[kcount] = '\0';

diff -r c20bc60f9243 -r 810885428743 xen/drivers/video/vga.c
--- a/xen/drivers/video/vga.c Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/drivers/video/vga.c Wed Jun 20 12:49:27 2007 -0600
@@ -32,6 +32,9 @@ static unsigned char *video;
*
* 'vga=ask':
* display a vga menu of available modes
+ *
+ * 'vga=current':
+ * use the current vga mode without modification
*
* 'vga=text-80x<rows>':
* text mode, where <rows> is one of {25,28,30,34,43,50,60}
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-ia64/guest_access.h
--- a/xen/include/asm-ia64/guest_access.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-ia64/guest_access.h Wed Jun 20 12:49:27 2007 -0600
@@ -76,28 +76,31 @@ extern int xencomm_handle_is_null(void *
__copy_field_from_guest(ptr, hnd, field)

#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \
- const typeof(ptr) _d = (hnd).p; \
- const typeof(ptr) _s = (ptr); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ void *_d = (hnd).p; \
+ ((void)((hnd).p == (ptr))); \
xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
})

#define __copy_field_to_guest(hnd, ptr, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(ptr) _d = (hnd).p; \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = (hnd).p; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \
})

-#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \
- const typeof(ptr) _s = (hnd).p; \
- const typeof(ptr) _d = (ptr); \
- xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
+#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
+ xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
})

#define __copy_field_from_guest(ptr, hnd, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(ptr) _s = (hnd).p; \
- const typeof(&(ptr)->field) _d = &(ptr)->field; \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
+ const void *_s = (hnd).p; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \
})

diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/event.h
--- a/xen/include/asm-x86/event.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/event.h Wed Jun 20 12:49:27 2007 -0600
@@ -10,7 +10,6 @@
#define __ASM_EVENT_H__

#include <xen/shared.h>
-#include <asm/hvm/irq.h> /* cpu_has_pending_irq() */

static inline void vcpu_kick(struct vcpu *v)
{
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/guest_access.h
--- a/xen/include/asm-x86/guest_access.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/guest_access.h Wed Jun 20 12:49:27 2007 -0600
@@ -32,11 +32,12 @@
* specifying an offset into the guest array.
*/
#define copy_to_guest_offset(hnd, off, ptr, nr) ({ \
- typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(hnd).p; \
+ ((void)((hnd).p == (ptr))); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \
- copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \
+ copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) : \
+ copy_to_user(_d+(off), _s, sizeof(*_s)*(nr)); \
})

/*
@@ -44,29 +45,30 @@
* specifying an offset into the guest array.
*/
#define copy_from_guest_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x+(off), sizeof(*_x)*(nr)) :\
- copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \
+ copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+ copy_from_user(_d, _s+(off), sizeof(*_d)*(nr)); \
})

/* Copy sub-field of a structure to guest context via a guest handle. */
#define copy_field_to_guest(hnd, ptr, field) ({ \
- typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &(hnd).p->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x, _y, sizeof(*_x)) : \
- copy_to_user(_x, _y, sizeof(*_x)); \
+ copy_to_user_hvm(_d, _s, sizeof(*_s)) : \
+ copy_to_user(_d, _s, sizeof(*_s)); \
})

/* Copy sub-field of a structure from guest context via a guest handle. */
#define copy_field_from_guest(ptr, hnd, field) ({ \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(hnd).p->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x, sizeof(*_x)) : \
- copy_from_user(_y, _x, sizeof(*_x)); \
+ copy_from_user_hvm(_d, _s, sizeof(*_d)) : \
+ copy_from_user(_d, _s, sizeof(*_d)); \
})

/*
@@ -78,35 +80,37 @@
array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))

#define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \
- typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(hnd).p; \
+ ((void)((hnd).p == (ptr))); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x+(off), _y, sizeof(*_x)*(nr)) : \
- __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \
+ copy_to_user_hvm(_d+(off), _s, sizeof(*_s)*(nr)) : \
+ __copy_to_user(_d+(off), _s, sizeof(*_s)*(nr)); \
})

#define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- typeof(ptr) _y = (ptr); \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x+(off),sizeof(*_x)*(nr)) : \
- __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \
+ copy_from_user_hvm(_d, _s+(off), sizeof(*_d)*(nr)) :\
+ __copy_from_user(_d, _s+(off), sizeof(*_d)*(nr)); \
})

#define __copy_field_to_guest(hnd, ptr, field) ({ \
- typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &(hnd).p->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
is_hvm_vcpu(current) ? \
- copy_to_user_hvm(_x, _y, sizeof(*_x)) : \
- __copy_to_user(_x, _y, sizeof(*_x)); \
+ copy_to_user_hvm(_d, _s, sizeof(*_s)) : \
+ __copy_to_user(_d, _s, sizeof(*_s)); \
})

#define __copy_field_from_guest(ptr, hnd, field) ({ \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- typeof(&(ptr)->field) _y = &(ptr)->field; \
+ const typeof(&(ptr)->field) _s = &(hnd).p->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
is_hvm_vcpu(current) ? \
- copy_from_user_hvm(_y, _x, sizeof(*_x)) : \
- __copy_from_user(_y, _x, sizeof(*_x)); \
+ copy_from_user_hvm(_d, _s, sizeof(*_d)) : \
+ __copy_from_user(_d, _s, sizeof(*_d)); \
})

#endif /* __ASM_X86_GUEST_ACCESS_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h Wed Jun 20 12:49:27 2007 -0600
@@ -55,6 +55,14 @@ typedef struct segment_register {
u64 base;
} __attribute__ ((packed)) segment_register_t;

+/* Interrupt acknowledgement sources. */
+enum hvm_intack {
+ hvm_intack_none,
+ hvm_intack_pic,
+ hvm_intack_lapic,
+ hvm_intack_nmi
+};
+
/*
* The hardware virtual machine (HVM) interface abstracts away from the
* x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -106,7 +114,7 @@ struct hvm_function_table {
int (*long_mode_enabled)(struct vcpu *v);
int (*pae_enabled)(struct vcpu *v);
int (*nx_enabled)(struct vcpu *v);
- int (*interrupts_enabled)(struct vcpu *v);
+ int (*interrupts_enabled)(struct vcpu *v, enum hvm_intack);
int (*guest_x86_mode)(struct vcpu *v);
unsigned long (*get_guest_ctrl_reg)(struct vcpu *v, unsigned int num);
unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
@@ -124,6 +132,13 @@ struct hvm_function_table {
void (*update_guest_cr3)(struct vcpu *v);

/*
+ * Called to ensure than all guest-specific mappings in a tagged TLB
+ * are flushed; does *not* flush Xen's TLB entries, and on
+ * processors without a tagged TLB it will be a noop.
+ */
+ void (*flush_guest_tlbs)(void);
+
+ /*
* Reflect the virtual APIC's value in the guest's V_TPR register
*/
void (*update_vtpr)(struct vcpu *v, unsigned long value);
@@ -148,6 +163,7 @@ struct hvm_function_table {
};

extern struct hvm_function_table hvm_funcs;
+extern int hvm_enabled;

int hvm_domain_initialise(struct domain *d);
void hvm_domain_relinquish_resources(struct domain *d);
@@ -191,16 +207,16 @@ hvm_long_mode_enabled(struct vcpu *v)
#define hvm_long_mode_enabled(v) (v,0)
#endif

- static inline int
+static inline int
hvm_pae_enabled(struct vcpu *v)
{
return hvm_funcs.pae_enabled(v);
}

static inline int
-hvm_interrupts_enabled(struct vcpu *v)
-{
- return hvm_funcs.interrupts_enabled(v);
+hvm_interrupts_enabled(struct vcpu *v, enum hvm_intack type)
+{
+ return hvm_funcs.interrupts_enabled(v, type);
}

static inline int
@@ -230,6 +246,13 @@ hvm_update_vtpr(struct vcpu *v, unsigned
}

void hvm_update_guest_cr3(struct vcpu *v, unsigned long guest_cr3);
+
+static inline void
+hvm_flush_guest_tlbs(void)
+{
+ if ( hvm_enabled )
+ hvm_funcs.flush_guest_tlbs();
+}

void hvm_hypercall_page_initialise(struct domain *d,
void *hypercall_page);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/irq.h
--- a/xen/include/asm-x86/hvm/irq.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/irq.h Wed Jun 20 12:49:27 2007 -0600
@@ -24,10 +24,10 @@

#include <xen/types.h>
#include <xen/spinlock.h>
+#include <asm/hvm/hvm.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vioapic.h>
#include <public/hvm/save.h>
-

struct hvm_irq {
/*
@@ -58,7 +58,6 @@ struct hvm_irq {
HVMIRQ_callback_gsi,
HVMIRQ_callback_pci_intx
} callback_via_type;
- uint32_t pad; /* So the next field will be aligned */
};
union {
uint32_t gsi;
@@ -115,9 +114,12 @@ void hvm_set_callback_irq_level(void);
void hvm_set_callback_irq_level(void);
void hvm_set_callback_via(struct domain *d, uint64_t via);

-int cpu_get_interrupt(struct vcpu *v, int *type);
-int cpu_has_pending_irq(struct vcpu *v);
-int get_isa_irq_vector(struct vcpu *vcpu, int irq, int type);
+/* Check/Acknowledge next pending interrupt. */
+enum hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
+int hvm_vcpu_ack_pending_irq(
+ struct vcpu *v, enum hvm_intack type, int *vector);
+
+int get_isa_irq_vector(struct vcpu *vcpu, int irq, enum hvm_intack src);
int is_isa_irq_masked(struct vcpu *v, int isa_irq);

#endif /* __ASM_X86_HVM_IRQ_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/support.h
--- a/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/support.h Wed Jun 20 12:49:27 2007 -0600
@@ -215,7 +215,6 @@ int hvm_load(struct domain *d, hvm_domai
/* End of save/restore */

extern char hvm_io_bitmap[];
-extern int hvm_enabled;

void hvm_enable(struct hvm_function_table *);
void hvm_disable(void);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/svm/asid.h
--- a/xen/include/asm-x86/hvm/svm/asid.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/asid.h Wed Jun 20 12:49:27 2007 -0600
@@ -30,6 +30,7 @@ void svm_asid_init(struct cpuinfo_x86 *c
void svm_asid_init(struct cpuinfo_x86 *c);
void svm_asid_init_vcpu(struct vcpu *v);
void svm_asid_inv_asid(struct vcpu *v);
+void svm_asid_inc_generation(void);

/*
* ASID related, guest triggered events.
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vcpu.h
--- a/xen/include/asm-x86/hvm/vcpu.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vcpu.h Wed Jun 20 12:49:27 2007 -0600
@@ -30,11 +30,13 @@

struct hvm_vcpu {
unsigned long hw_cr3; /* value we give to HW to use */
- unsigned long ioflags;
struct hvm_io_op io_op;
struct vlapic vlapic;
s64 cache_tsc_offset;
u64 guest_time;
+
+ /* Is an NMI pending for delivery to this VCPU core? */
+ bool_t nmi_pending; /* NB. integrate flag with save/restore */

/* Lock and list for virtual platform timers. */
spinlock_t tm_lock;
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vlapic.h Wed Jun 20 12:49:27 2007 -0600
@@ -76,7 +76,7 @@ int vlapic_find_highest_irr(struct vlapi
int vlapic_find_highest_irr(struct vlapic *vlapic);

int vlapic_has_interrupt(struct vcpu *v);
-int cpu_get_apic_interrupt(struct vcpu *v, int *mode);
+int cpu_get_apic_interrupt(struct vcpu *v);

int vlapic_init(struct vcpu *v);
void vlapic_destroy(struct vcpu *v);
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Wed Jun 20 12:49:27 2007 -0600
@@ -336,9 +336,16 @@ static inline void vmx_inject_sw_excepti
instruction_len);
}

-static inline void vmx_inject_extint(struct vcpu *v, int trap, int error_code)
-{
- __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR, error_code, 0);
+static inline void vmx_inject_extint(struct vcpu *v, int trap)
+{
+ __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
+ VMX_DELIVER_NO_ERROR_CODE, 0);
+}
+
+static inline void vmx_inject_nmi(struct vcpu *v)
+{
+ __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
+ VMX_DELIVER_NO_ERROR_CODE, 0);
}

#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpic.h Wed Jun 20 12:49:27 2007 -0600
@@ -32,7 +32,7 @@ void vpic_irq_positive_edge(struct domai
void vpic_irq_positive_edge(struct domain *d, int irq);
void vpic_irq_negative_edge(struct domain *d, int irq);
void vpic_init(struct domain *d);
-int cpu_get_pic_interrupt(struct vcpu *v, int *type);
+int cpu_get_pic_interrupt(struct vcpu *v);
int is_periodic_irq(struct vcpu *v, int irq, int type);

#endif /* __ASM_X86_HVM_VPIC_H__ */
diff -r c20bc60f9243 -r 810885428743 xen/include/asm-x86/hvm/vpt.h
--- a/xen/include/asm-x86/hvm/vpt.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/asm-x86/hvm/vpt.h Wed Jun 20 12:49:27 2007 -0600
@@ -29,6 +29,7 @@
#include <xen/timer.h>
#include <xen/list.h>
#include <asm/hvm/vpic.h>
+#include <asm/hvm/irq.h>
#include <public/hvm/save.h>

struct HPETState;
@@ -119,7 +120,7 @@ void pt_freeze_time(struct vcpu *v);
void pt_freeze_time(struct vcpu *v);
void pt_thaw_time(struct vcpu *v);
void pt_update_irq(struct vcpu *v);
-void pt_intr_post(struct vcpu *v, int vector, int type);
+void pt_intr_post(struct vcpu *v, int vector, enum hvm_intack src);
void pt_reset(struct vcpu *v);
void pt_migrate(struct vcpu *v);
void create_periodic_time(
diff -r c20bc60f9243 -r 810885428743 xen/include/xen/compat.h
--- a/xen/include/xen/compat.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/compat.h Wed Jun 20 12:49:27 2007 -0600
@@ -44,9 +44,10 @@
* specifying an offset into the guest array.
*/
#define copy_to_compat_offset(hnd, off, ptr, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(*(ptr)) *const _y = (ptr); \
- copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c; \
+ ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr))); \
+ copy_to_user(_d + (off), _s, sizeof(*_s) * (nr)); \
})

/*
@@ -54,9 +55,9 @@
* specifying an offset into the guest array.
*/
#define copy_from_compat_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(ptr) _y = (ptr); \
- copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+ typeof(*(ptr)) *_d = (ptr); \
+ copy_from_user(_d, _s + (off), sizeof(*_d) * (nr)); \
})

#define copy_to_compat(hnd, ptr, nr) \
@@ -67,16 +68,19 @@

/* Copy sub-field of a structure to guest context via a compat handle. */
#define copy_field_to_compat(hnd, ptr, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
- const typeof((ptr)->field) *const _y = &(ptr)->field; \
- copy_to_user(_x, _y, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field == \
+ &(ptr)->field)); \
+ copy_to_user(_d, _s, sizeof(*_s)); \
})

/* Copy sub-field of a structure from guest context via a compat handle. */
#define copy_field_from_compat(ptr, hnd, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
- typeof((ptr)->field) *const _y = &(ptr)->field; \
- copy_from_user(_y, _x, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = \
+ &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ copy_from_user(_d, _s, sizeof(*_d)); \
})

/*
@@ -84,18 +88,20 @@
* Allows use of faster __copy_* functions.
*/
#define compat_handle_okay(hnd, nr) \
- compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), sizeof(**(hnd)._))
+ compat_array_access_ok((void *)(full_ptr_t)(hnd).c, (nr), \
+ sizeof(**(hnd)._))

#define __copy_to_compat_offset(hnd, off, ptr, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(*(ptr)) *const _y = (ptr); \
- __copy_to_user(_x + (off), _y, sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ char (*_d)[sizeof(*_s)] = (void *)(full_ptr_t)(hnd).c; \
+ ((void)((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c == (ptr))); \
+ __copy_to_user(_d + (off), _s, sizeof(*_s) * (nr)); \
})

#define __copy_from_compat_offset(ptr, hnd, off, nr) ({ \
- const typeof(ptr) _x = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
- const typeof(ptr) _y = (ptr); \
- __copy_from_user(_y, _x + (off), sizeof(*_x) * (nr)); \
+ const typeof(*(ptr)) *_s = (typeof(**(hnd)._) *)(full_ptr_t)(hnd).c; \
+ typeof(*(ptr)) *_d = (ptr); \
+ __copy_from_user(_d, _s + (off), sizeof(*_d) * (nr)); \
})

#define __copy_to_compat(hnd, ptr, nr) \
@@ -105,15 +111,18 @@
__copy_from_compat_offset(ptr, hnd, 0, nr)

#define __copy_field_to_compat(hnd, ptr, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
- const typeof((ptr)->field) *const _y = &(ptr)->field; \
- __copy_to_user(_x, _y, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ ((void)(&((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field == \
+ &(ptr)->field)); \
+ __copy_to_user(_d, _s, sizeof(*_s)); \
})

#define __copy_field_from_compat(ptr, hnd, field) ({ \
- typeof((ptr)->field) *const _x = &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
- typeof((ptr)->field) *const _y = &(ptr)->field; \
- __copy_from_user(_y, _x, sizeof(*_x)); \
+ const typeof(&(ptr)->field) _s = \
+ &((typeof(**(hnd)._) *)(full_ptr_t)(hnd).c)->field; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ __copy_from_user(_d, _s, sizeof(*_d)); \
})


@@ -169,7 +178,8 @@ int switch_compat(struct domain *);
int switch_compat(struct domain *);
int switch_native(struct domain *);

-#define BITS_PER_GUEST_LONG(d) (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
+#define BITS_PER_GUEST_LONG(d) \
+ (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)

#else

diff -r c20bc60f9243 -r 810885428743 xen/include/xen/xencomm.h
--- a/xen/include/xen/xencomm.h Wed Jun 20 12:47:52 2007 -0600
+++ b/xen/include/xen/xencomm.h Wed Jun 20 12:49:27 2007 -0600
@@ -47,17 +47,17 @@ static inline unsigned long xencomm_inli
((hnd).p == NULL || xencomm_handle_is_null((hnd).p))

/* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ({ \
- const typeof((hnd).p) _ptr; \
- xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr)); \
+#define guest_handle_add_offset(hnd, nr) ({ \
+ const typeof((hnd).p) _ptr; \
+ xencomm_add_offset((void **)&((hnd).p), nr * sizeof(*_ptr)); \
})

/* Cast a guest handle to the specified type of handle. */
#define guest_handle_cast(hnd, type) ({ \
type *_x = (hnd).p; \
- XEN_GUEST_HANDLE(type) _y; \
- set_xen_guest_handle(_y, _x); \
- _y; \
+ XEN_GUEST_HANDLE(type) _y; \
+ set_xen_guest_handle(_y, _x); \
+ _y; \
})

/* Since we run in real mode, we can safely access all addresses. That also
@@ -87,29 +87,32 @@ static inline unsigned long xencomm_inli
__copy_field_from_guest(ptr, hnd, field)

#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
- xencomm_copy_to_guest(_x, _y, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
+ const typeof(*(ptr)) *_s = (ptr); \
+ void *_d = (hnd).p; \
+ ((void)((hnd).p == (ptr))); \
+ xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
})

#define __copy_field_to_guest(hnd, ptr, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
- xencomm_copy_to_guest(_x, _y, sizeof(*_x), sizeof(*_x)*(_off)); \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
+ const typeof(&(ptr)->field) _s = &(ptr)->field; \
+ void *_d = (hnd).p; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
+ xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \
})

#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \
- const typeof(ptr) _x = (hnd).p; \
- const typeof(ptr) _y = (ptr); \
- xencomm_copy_from_guest(_y, _x, sizeof(*_x)*(nr), sizeof(*_x)*(idx)); \
+ const typeof(*(ptr)) *_s = (hnd).p; \
+ typeof(*(ptr)) *_d = (ptr); \
+ xencomm_copy_from_guest(_d, _s, sizeof(*_d)*(nr), sizeof(*_d)*(idx)); \
})

#define __copy_field_from_guest(ptr, hnd, field) ({ \
- const int _off = offsetof(typeof(*ptr), field); \
- const typeof(&(ptr)->field) _x = &(hnd).p->field; \
- const typeof(&(ptr)->field) _y = &(ptr)->field; \
- xencomm_copy_to_guest(_y, _x, sizeof(*_x), sizeof(*_x)*(_off)); \
+ unsigned int _off = offsetof(typeof(*(hnd).p), field); \
+ const void *_s = (hnd).p; \
+ typeof(&(ptr)->field) _d = &(ptr)->field; \
+ ((void)(&(hnd).p->field == &(ptr)->field)); \
+ xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \
})

#endif /* __XENCOMM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog
[xen-unstable] merge with xen-unstable.hg [ In reply to ]
# HG changeset patch
# User Alex Williamson <alex.williamson@hp.com>
# Date 1183400366 21600
# Node ID d146700adf714cdc13f924ab0de1dc895b6927f8
# Parent 443ce7edad0e8a3a640960890a72ce530887b38e
# Parent 182446677b6b56d58523050a6225a73d87a86ab7
merge with xen-unstable.hg
---
buildconfigs/mk.linux-2.6-xen | 5
tools/Makefile | 1
tools/console/daemon/io.c | 38 +++--
tools/examples/init.d/xendomains | 6
tools/examples/network-bridge | 12 +
tools/firmware/vmxassist/head.S | 76 -----------
tools/firmware/vmxassist/machine.h | 15 --
tools/firmware/vmxassist/setup.c | 58 ---------
tools/firmware/vmxassist/vm86.c | 75 +++++++----
tools/ioemu/target-i386-dm/exec-dm.c | 2
tools/libxc/xc_domain_restore.c | 11 -
tools/libxc/xc_misc.c | 28 ++++
tools/libxc/xenctrl.h | 4
tools/misc/xen-python-path | 9 +
tools/python/xen/util/auxbin.py | 9 +
tools/python/xen/xend/XendCheckpoint.py | 2
tools/python/xen/xend/XendConfig.py | 105 +++++++++++++---
tools/python/xen/xend/XendDomain.py | 4
tools/python/xen/xend/server/irqif.py | 2
tools/python/xen/xend/server/pciif.py | 3
tools/python/xen/xm/main.py | 14 +-
xen/acm/acm_core.c | 2
xen/arch/x86/Makefile | 1
xen/arch/x86/boot/edd.S | 24 +--
xen/arch/x86/boot/x86_32.S | 21 ++-
xen/arch/x86/boot/x86_64.S | 15 ++
xen/arch/x86/clear_page.S | 26 ++++
xen/arch/x86/domain.c | 9 -
xen/arch/x86/hvm/hvm.c | 16 --
xen/arch/x86/hvm/io.c | 1
xen/arch/x86/hvm/platform.c | 3
xen/arch/x86/hvm/svm/svm.c | 56 +++++++-
xen/arch/x86/hvm/svm/vmcb.c | 8 -
xen/arch/x86/hvm/vmx/vmcs.c | 5
xen/arch/x86/hvm/vmx/vmx.c | 154 ++++++++++++++----------
xen/arch/x86/io_apic.c | 4
xen/arch/x86/mm.c | 3
xen/arch/x86/platform_hypercall.c | 96 ++++++++++++++
xen/arch/x86/setup.c | 34 ++++-
xen/arch/x86/traps.c | 13 +-
xen/arch/x86/x86_32/entry.S | 173 +++++++++++++--------------
xen/arch/x86/x86_32/supervisor_mode_kernel.S | 27 ++--
xen/arch/x86/x86_32/traps.c | 11 -
xen/arch/x86/x86_64/Makefile | 2
xen/arch/x86/x86_64/compat/entry.S | 10 -
xen/arch/x86/x86_64/entry.S | 68 +++++-----
xen/arch/x86/x86_64/mm.c | 9 -
xen/arch/x86/x86_64/traps.c | 10 +
xen/arch/x86/x86_emulate.c | 1
xen/common/sysctl.c | 33 +++++
xen/include/asm-x86/edd.h | 18 ++
xen/include/asm-x86/hvm/hvm.h | 14 ++
xen/include/asm-x86/hvm/svm/emulate.h | 1
xen/include/asm-x86/hvm/svm/vmcb.h | 8 -
xen/include/asm-x86/hvm/trace.h | 1
xen/include/asm-x86/hvm/vmx/vmcs.h | 2
xen/include/asm-x86/hvm/vmx/vmx.h | 7 -
xen/include/asm-x86/page.h | 13 +-
xen/include/asm-x86/processor.h | 4
xen/include/asm-x86/x86_32/asm_defns.h | 76 +++++++----
xen/include/public/platform.h | 40 ++++++
xen/include/public/sysctl.h | 22 ++-
xen/include/public/trace.h | 1
63 files changed, 961 insertions(+), 560 deletions(-)

diff -r 443ce7edad0e -r d146700adf71 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Mon Jul 02 10:31:03 2007 -0600
+++ b/buildconfigs/mk.linux-2.6-xen Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,11 @@ _build: build
_build: build

include buildconfigs/src.$(XEN_LINUX_SOURCE)
+
+# Default to allowing interface mismatch
+ifndef XEN_LINUX_ALLOW_INTERFACE_MISMATCH
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+endif

# The real action starts here!
.PHONY: build
diff -r 443ce7edad0e -r d146700adf71 tools/Makefile
--- a/tools/Makefile Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/Makefile Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ install: check
$(MAKE) ioemuinstall
$(INSTALL_DIR) $(DESTDIR)/var/xen/dump
$(INSTALL_DIR) $(DESTDIR)/var/log/xen
+ $(INSTALL_DIR) $(DESTDIR)/var/lib/xen

.PHONY: clean
clean: check_clean
diff -r 443ce7edad0e -r d146700adf71 tools/console/daemon/io.c
--- a/tools/console/daemon/io.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/console/daemon/io.c Mon Jul 02 12:19:26 2007 -0600
@@ -764,27 +764,31 @@ void handle_io(void)
/* XXX I wish we didn't have to busy wait for hypervisor logs
* but there's no obvious way to get event channel notifications
* for new HV log data as we can with guest */
- ret = select(max_fd + 1, &readfds, &writefds, 0, log_hv_fd != -1 ? &timeout : NULL);
-
+ ret = select(max_fd + 1, &readfds, &writefds, 0,
+ log_hv_fd != -1 ? &timeout : NULL);
+
+ if (log_reload) {
+ handle_log_reload();
+ log_reload = 0;
+ }
+
+ /* Abort if select failed, except for EINTR cases
+ which indicate a possible log reload */
if (ret == -1) {
- if (errno == EINTR) {
- if (log_reload) {
- handle_log_reload();
- log_reload = 0;
- }
+ if (errno == EINTR)
continue;
- }
dolog(LOG_ERR, "Failure in select: %d (%s)",
errno, strerror(errno));
break;
}

- /* Check for timeout */
- if (ret == 0) {
- if (log_hv_fd != -1)
- handle_hv_logs();
+ /* Always process HV logs even if not a timeout */
+ if (log_hv_fd != -1)
+ handle_hv_logs();
+
+ /* Must not check returned FDSET if it was a timeout */
+ if (ret == 0)
continue;
- }

if (FD_ISSET(xs_fileno(xs), &readfds))
handle_xs();
@@ -806,10 +810,14 @@ void handle_io(void)
}
}

- if (log_hv_fd != -1)
+ if (log_hv_fd != -1) {
close(log_hv_fd);
- if (xc_handle != -1)
+ log_hv_fd = -1;
+ }
+ if (xc_handle != -1) {
xc_interface_close(xc_handle);
+ xc_handle = -1;
+ }
}

/*
diff -r 443ce7edad0e -r d146700adf71 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/init.d/xendomains Mon Jul 02 12:19:26 2007 -0600
@@ -221,11 +221,12 @@ start()
if [ "$XENDOMAINS_RESTORE" = "true" ] &&
contains_something "$XENDOMAINS_SAVE"
then
- mkdir -p $(dirname "$LOCKFILE")
+ XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'`
+ mkdir -p $(dirname "$LOCKFILE")
touch $LOCKFILE
echo -n "Restoring Xen domains:"
saved_domains=`ls $XENDOMAINS_SAVE`
- for dom in $XENDOMAINS_SAVE/*; do
+ for dom in $XENDOMAINS_SAVED; do
echo -n " ${dom##*/}"
xm restore $dom
if [ $? -ne 0 ]; then
@@ -259,6 +260,7 @@ start()
if [ $? -eq 0 ] || is_running $dom; then
echo -n "(skip)"
else
+ echo "(booting)"
xm create --quiet --defconfig $dom
if [ $? -ne 0 ]; then
rc_failed $?
diff -r 443ce7edad0e -r d146700adf71 tools/examples/network-bridge
--- a/tools/examples/network-bridge Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/examples/network-bridge Mon Jul 02 12:19:26 2007 -0600
@@ -172,9 +172,21 @@ show_status () {
echo '============================================================'
}

+is_network_root () {
+ local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}' /etc/mtab)
+ local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' /etc/mtab)
+
+ [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && return 0 || return 1
+}
+
op_start () {
if [ "${bridge}" = "null" ] ; then
return
+ fi
+
+ if is_network_root ; then
+ [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging not supported on network root; not starting"
+ return
fi

if link_exists "$pdev"; then
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/head.S
--- a/tools/firmware/vmxassist/head.S Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/head.S Mon Jul 02 12:19:26 2007 -0600
@@ -25,80 +25,12 @@
* switch happens to the environment below. The magic indicates
* that this is a valid context.
*/
-#ifdef TEST
- .byte 0x55, 0xaa
- .byte 0x80
- .code16
- jmp _start16
-#else
jmp _start
-#endif

.align 8
.long VMXASSIST_MAGIC
.long newctx /* new context */
.long oldctx /* old context */
-
-#ifdef TEST
-/*
- * We are running in 16-bit. Get into the protected mode as soon as
- * possible. We use our own (minimal) GDT to get started.
- *
- * ROM is a misnomer as this code isn't really rommable (although it
- * only requires a few changes) but it does live in a BIOS ROM segment.
- * This code allows me to debug vmxassists under (a modified version of)
- * Bochs and load it as a "optromimage1".
- */
- .code16
- .globl _start16
-_start16:
- cli
-
- /* load our own global descriptor table */
- data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
-
- /* go to protected mode */
- movl %cr0, %eax
- orl $(CR0_PE), %eax
- movl %eax, %cr0
- data32 ljmp $0x08, $1f
-
- .align 32
- .globl rom_gdt
-rom_gdt:
- .word 0, 0 /* 0x00: reserved */
- .byte 0, 0, 0, 0
-
- .word 0xFFFF, 0 /* 0x08: CS 32-bit */
- .byte 0, 0x9A, 0xCF, 0
-
- .word 0xFFFF, 0 /* 0x10: CS 32-bit */
- .byte 0, 0x92, 0xCF, 0
-rom_gdt_end:
-
- .align 4
- .globl rom_gdtr
-rom_gdtr:
- .word rom_gdt_end - rom_gdt - 1
- .long rom_gdt
-
- .code32
-1:
- /* welcome to the 32-bit world */
- movw $0x10, %ax
- movw %ax, %ds
- movw %ax, %es
- movw %ax, %ss
- movw %ax, %fs
- movw %ax, %gs
-
- /* enable Bochs debug facilities */
- movw $0x8A00, %dx
- movw $0x8A00, %ax
- outw %ax, (%dx)
-
- jmp _start
-#endif /* TEST */

/*
* This is the real start. Control was transfered to this point
@@ -111,9 +43,6 @@ _start:
cli

/* save register parameters to C land */
-#ifdef TEST
- xorl %edx, %edx
-#endif

/* clear bss */
cld
@@ -145,11 +74,6 @@ halt:
halt:
push $halt_msg
call printf
-#ifdef TEST
- movw $0x8A00, %dx
- movw $0x8AE0, %ax
- outw %ax, (%dx)
-#endif
cli
jmp .

diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/machine.h
--- a/tools/firmware/vmxassist/machine.h Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/machine.h Mon Jul 02 12:19:26 2007 -0600
@@ -55,13 +55,6 @@
#define PGMASK (~(PGSIZE - 1)) /* page mask */
#define LPGSIZE (1 << LOG_PDSIZE) /* large page size */
#define LPGMASK (~(LPGSIZE - 1)) /* large page mask */
-
-#ifdef TEST
-#define PTE_P (1 << 0) /* Present */
-#define PTE_RW (1 << 1) /* Read/Write */
-#define PTE_US (1 << 2) /* User/Supervisor */
-#define PTE_PS (1 << 7) /* Page Size */
-#endif

/* Programmable Interrupt Contoller (PIC) defines */
#define PIC_MASTER 0x20
@@ -195,14 +188,6 @@ set_cr4(unsigned value)
__asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
}

-#ifdef TEST
-static inline void
-breakpoint(void)
-{
- outw(0x8A00, 0x8AE0);
-}
-#endif /* TEST */
-
#endif /* __ASSEMBLY__ */

#endif /* __MACHINE_H__ */
diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/setup.c
--- a/tools/firmware/vmxassist/setup.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/setup.c Mon Jul 02 12:19:26 2007 -0600
@@ -46,19 +46,6 @@ unsigned long long idt[NR_TRAPS] __attri
unsigned long long idt[NR_TRAPS] __attribute__ ((aligned(32)));

struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
-
-#ifdef TEST
-unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
-
-struct e820entry e820map[] = {
- { 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
- { 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
- { 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
- { 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
- { 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
- { 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
-};
-#endif /* TEST */

struct vmx_assist_context oldctx;
struct vmx_assist_context newctx;
@@ -84,38 +71,11 @@ banner(void)
(((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
memory_size += 0x400 << 10; /* + 1MB */

-#ifdef TEST
- /* Create an SMAP for our debug environment */
- e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
- e820map[5].addr = memory_size - PGSIZE;
- e820map[6].addr = memory_size;
- e820map[7].addr += memory_size;
-
- *HVM_E820_NR = sizeof(e820map)/sizeof(e820map[0]);
- memcpy(HVM_E820, e820map, sizeof(e820map));
-#endif
-
printf("Memory size %ld MB\n", memory_size >> 20);
printf("E820 map:\n");
print_e820_map(HVM_E820, *HVM_E820_NR);
printf("\n");
}
-
-#ifdef TEST
-void
-setup_paging(void)
-{
- unsigned long i;
-
- if (((unsigned)pgd & ~PGMASK) != 0)
- panic("PGD not page aligned");
- set_cr4(get_cr4() | CR4_PSE);
- for (i = 0; i < NR_PGD; i++)
- pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
- set_cr3((unsigned) pgd);
- set_cr0(get_cr0() | (CR0_PE|CR0_PG));
-}
-#endif /* TEST */

void
setup_gdt(void)
@@ -211,11 +171,7 @@ enter_real_mode(struct regs *regs)
regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
if (booting_cpu == 0) {
regs->cs = 0xF000; /* ROM BIOS POST entry point */
-#ifdef TEST
- regs->eip = 0xFFE0;
-#else
regs->eip = 0xFFF0;
-#endif
} else {
regs->cs = booting_vector << 8; /* AP entry point */
regs->eip = 0;
@@ -242,9 +198,10 @@ enter_real_mode(struct regs *regs)
}

/* go from protected to real mode */
- regs->eflags |= EFLAGS_VM;
set_mode(regs, VM86_PROTECTED_TO_REAL);
emulate(regs);
+ if (mode != VM86_REAL)
+ panic("failed to emulate between clear PE and long jump.\n");
}

/*
@@ -269,13 +226,8 @@ setup_ctx(void)
* more natural to enable CR0.PE to cause a world switch to
* protected mode rather than disabling it.
*/
-#ifdef TEST
- c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
- c->cr3 = (unsigned long) pgd;
-#else
c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
c->cr3 = 0;
-#endif
c->cr4 = get_cr4();

c->idtr_limit = sizeof(idt)-1;
@@ -369,16 +321,10 @@ main(void)
if (booting_cpu == 0)
banner();

-#ifdef TEST
- setup_paging();
-#endif
-
setup_gdt();
setup_idt();

-#ifndef TEST
set_cr4(get_cr4() | CR4_VME);
-#endif

setup_ctx();

diff -r 443ce7edad0e -r d146700adf71 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/firmware/vmxassist/vm86.c Mon Jul 02 12:19:26 2007 -0600
@@ -561,11 +561,7 @@ lmsw(struct regs *regs, unsigned prefix,
unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;

TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
-#ifndef TEST
oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
-#else
- oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
-#endif
if (cr0 & CR0_PE)
set_mode(regs, VM86_REAL_TO_PROTECTED);

@@ -584,8 +580,13 @@ movr(struct regs *regs, unsigned prefix,
unsigned addr = operand(prefix, regs, modrm);
unsigned val, r = (modrm >> 3) & 7;

- if ((modrm & 0xC0) == 0xC0) /* no registers */
- return 0;
+ if ((modrm & 0xC0) == 0xC0) {
+ /*
+ * Emulate all guest instructions in protected to real mode.
+ */
+ if (mode != VM86_PROTECTED_TO_REAL)
+ return 0;
+ }

switch (opc) {
case 0x88: /* addr32 mov r8, r/m8 */
@@ -656,13 +657,8 @@ movcr(struct regs *regs, unsigned prefix
TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
switch (cr) {
case 0:
-#ifndef TEST
setreg32(regs, modrm,
oldctx.cr0 & ~(CR0_PE | CR0_NE));
-#else
- setreg32(regs, modrm,
- oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
-#endif
break;
case 2:
setreg32(regs, modrm, get_cr2());
@@ -680,9 +676,6 @@ movcr(struct regs *regs, unsigned prefix
switch (cr) {
case 0:
oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
-#ifdef TEST
- oldctx.cr0 |= CR0_PG;
-#endif
if (getreg32(regs, modrm) & CR0_PE)
set_mode(regs, VM86_REAL_TO_PROTECTED);
else
@@ -818,8 +811,13 @@ mov_to_seg(struct regs *regs, unsigned p
{
unsigned modrm = fetch8(regs);

- /* Only need to emulate segment loads in real->protected mode. */
- if (mode != VM86_REAL_TO_PROTECTED)
+ /*
+ * Emulate segment loads in:
+ * 1) real->protected mode.
+ * 2) protected->real mode.
+ */
+ if ((mode != VM86_REAL_TO_PROTECTED) &&
+ (mode != VM86_PROTECTED_TO_REAL))
return 0;

/* Register source only. */
@@ -829,6 +827,8 @@ mov_to_seg(struct regs *regs, unsigned p
switch ((modrm & 0x38) >> 3) {
case 0: /* es */
regs->ves = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.ves = 0;
oldctx.es_sel = regs->ves;
return 1;
@@ -837,21 +837,29 @@ mov_to_seg(struct regs *regs, unsigned p

case 2: /* ss */
regs->uss = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.uss = 0;
oldctx.ss_sel = regs->uss;
return 1;
case 3: /* ds */
regs->vds = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.vds = 0;
oldctx.ds_sel = regs->vds;
return 1;
case 4: /* fs */
regs->vfs = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.vfs = 0;
oldctx.fs_sel = regs->vfs;
return 1;
case 5: /* gs */
regs->vgs = getreg16(regs, modrm);
+ if (mode == VM86_PROTECTED_TO_REAL)
+ return 1;
saved_rm_regs.vgs = 0;
oldctx.gs_sel = regs->vgs;
return 1;
@@ -1067,7 +1075,8 @@ set_mode(struct regs *regs, enum vm86_mo
}

mode = newmode;
- TRACE((regs, 0, states[mode]));
+ if (mode != VM86_PROTECTED)
+ TRACE((regs, 0, states[mode]));
}

static void
@@ -1086,7 +1095,7 @@ jmpl(struct regs *regs, int prefix)

if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */
set_mode(regs, VM86_PROTECTED);
- else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
+ else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */
set_mode(regs, VM86_REAL);
else
panic("jmpl");
@@ -1280,6 +1289,12 @@ opcode(struct regs *regs)
unsigned eip = regs->eip;
unsigned opc, modrm, disp;
unsigned prefix = 0;
+
+ if (mode == VM86_PROTECTED_TO_REAL &&
+ oldctx.cs_arbytes.fields.default_ops_size) {
+ prefix |= DATA32;
+ prefix |= ADDR32;
+ }

for (;;) {
switch ((opc = fetch8(regs))) {
@@ -1391,17 +1406,29 @@ opcode(struct regs *regs)
continue;

case 0x66:
- TRACE((regs, regs->eip - eip, "data32"));
- prefix |= DATA32;
+ if (mode == VM86_PROTECTED_TO_REAL &&
+ oldctx.cs_arbytes.fields.default_ops_size) {
+ TRACE((regs, regs->eip - eip, "data16"));
+ prefix &= ~DATA32;
+ } else {
+ TRACE((regs, regs->eip - eip, "data32"));
+ prefix |= DATA32;
+ }
continue;

case 0x67:
- TRACE((regs, regs->eip - eip, "addr32"));
- prefix |= ADDR32;
+ if (mode == VM86_PROTECTED_TO_REAL &&
+ oldctx.cs_arbytes.fields.default_ops_size) {
+ TRACE((regs, regs->eip - eip, "addr16"));
+ prefix &= ~ADDR32;
+ } else {
+ TRACE((regs, regs->eip - eip, "addr32"));
+ prefix |= ADDR32;
+ }
continue;

- case 0x88: /* addr32 mov r8, r/m8 */
- case 0x8A: /* addr32 mov r/m8, r8 */
+ case 0x88: /* mov r8, r/m8 */
+ case 0x8A: /* mov r/m8, r8 */
if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
goto invalid;
if ((prefix & ADDR32) == 0)
diff -r 443ce7edad0e -r d146700adf71 tools/ioemu/target-i386-dm/exec-dm.c
--- a/tools/ioemu/target-i386-dm/exec-dm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/ioemu/target-i386-dm/exec-dm.c Mon Jul 02 12:19:26 2007 -0600
@@ -448,7 +448,7 @@ static void memcpy_words(void *dst, void
{
asm (
" movl %%edx,%%ecx \n"
-#ifdef __x86_64
+#ifdef __x86_64__
" shrl $3,%%ecx \n"
" andl $7,%%edx \n"
" rep movsq \n"
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_domain_restore.c
--- a/tools/libxc/xc_domain_restore.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_domain_restore.c Mon Jul 02 12:19:26 2007 -0600
@@ -465,7 +465,7 @@ int xc_domain_restore(int xc_handle, int
if ( j == 0 )
break; /* our work here is done */

- if ( j > MAX_BATCH_SIZE )
+ if ( (j > MAX_BATCH_SIZE) || (j < 0) )
{
ERROR("Max batch size exceeded. Giving up.");
goto out;
@@ -903,13 +903,14 @@ int xc_domain_restore(int xc_handle, int

/* Get the list of PFNs that are not in the psuedo-phys map */
{
- unsigned int count;
+ unsigned int count = 0;
unsigned long *pfntab;
int nr_frees, rc;

- if ( !read_exact(io_fd, &count, sizeof(count)) )
- {
- ERROR("Error when reading pfn count");
+ if ( !read_exact(io_fd, &count, sizeof(count)) ||
+ (count > (1U << 28)) ) /* up to 1TB of address space */
+ {
+ ERROR("Error when reading pfn count (= %u)", count);
goto out;
}

diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xc_misc.c Mon Jul 02 12:19:26 2007 -0600
@@ -101,13 +101,37 @@ int xc_perfc_control(int xc_handle,

rc = do_sysctl(xc_handle, &sysctl);

- if (nbr_desc)
+ if ( nbr_desc )
*nbr_desc = sysctl.u.perfc_op.nr_counters;
- if (nbr_val)
+ if ( nbr_val )
*nbr_val = sysctl.u.perfc_op.nr_vals;

return rc;
}
+
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+ xc_cpuinfo_t *info, int *nr_cpus)
+{
+ int rc;
+ DECLARE_SYSCTL;
+
+ sysctl.cmd = XEN_SYSCTL_getcpuinfo;
+ sysctl.u.getcpuinfo.max_cpus = max_cpus;
+ set_xen_guest_handle(sysctl.u.getcpuinfo.info, info);
+
+ if ( (rc = lock_pages(info, max_cpus*sizeof(*info))) != 0 )
+ return rc;
+
+ rc = do_sysctl(xc_handle, &sysctl);
+
+ unlock_pages(info, max_cpus*sizeof(*info));
+
+ if ( nr_cpus )
+ *nr_cpus = sysctl.u.getcpuinfo.nr_cpus;
+
+ return rc;
+}
+

int xc_hvm_set_pci_intx_level(
int xc_handle, domid_t dom,
diff -r 443ce7edad0e -r d146700adf71 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/libxc/xenctrl.h Mon Jul 02 12:19:26 2007 -0600
@@ -491,6 +491,10 @@ int xc_sched_id(int xc_handle,
int xc_sched_id(int xc_handle,
int *sched_id);

+typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t;
+int xc_getcpuinfo(int xc_handle, int max_cpus,
+ xc_cpuinfo_t *info, int *nr_cpus);
+
int xc_domain_setmaxmem(int xc_handle,
uint32_t domid,
unsigned int max_memkb);
diff -r 443ce7edad0e -r d146700adf71 tools/misc/xen-python-path
--- a/tools/misc/xen-python-path Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/misc/xen-python-path Mon Jul 02 12:19:26 2007 -0600
@@ -28,8 +28,13 @@ import os.path
import os.path
import sys

-for p in ['python%s' % sys.version[:3], 'python']:
- for l in ['/usr/lib64', '/usr/lib']:
+usr = os.path.dirname(os.path.dirname(sys.argv[0]))
+list = [ os.path.join(usr,'lib64') ]
+list += [ os.path.join(usr,'lib') ]
+list += ['/usr/lib64', '/usr/lib']
+
+for l in list:
+ for p in ['python%s' % sys.version[:3], 'python']:
for k in ['', 'site-packages/']:
d = os.path.join(l, p, k)
if os.path.exists(os.path.join(d, AUXBIN)):
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/util/auxbin.py
--- a/tools/python/xen/util/auxbin.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/util/auxbin.py Mon Jul 02 12:19:26 2007 -0600
@@ -27,6 +27,7 @@ LIB_64_ARCHS = [. 'x86_64', 's390x', 'spa

import os
import os.path
+import sys


def execute(exe, args = None):
@@ -47,6 +48,14 @@ def path():

def libpath():
machine = os.uname()[4]
+ if sys.argv[0] != '-c':
+ prefix = os.path.dirname(os.path.dirname(sys.argv[0]))
+ path = os.path.join(prefix, os.path.basename(LIB_64))
+ if machine in LIB_64_ARCHS and os.path.exists(path):
+ return path
+ path = os.path.join(prefix, os.path.basename(LIB_32))
+ if os.path.exists(path):
+ return path
if machine in LIB_64_ARCHS and os.path.exists(LIB_64):
return LIB_64
else:
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendCheckpoint.py Mon Jul 02 12:19:26 2007 -0600
@@ -148,6 +148,8 @@ def save(fd, dominfo, network, live, dst
except:
log.exception("Failed to reset the migrating domain's name")

+ raise exn
+

def restore(xd, fd, dominfo = None, paused = False):
signature = read_exact(fd, len(SIGNATURE),
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py Mon Jul 02 12:19:26 2007 -0600
@@ -28,6 +28,7 @@ from xen.xend.PrettyPrint import prettyp
from xen.xend.PrettyPrint import prettyprintstring
from xen.xend.XendConstants import DOM_STATE_HALTED
from xen.xend.server.netif import randomMAC
+from xen.util.blkif import blkdev_name_to_number

log = logging.getLogger("xend.XendConfig")
log.setLevel(logging.WARN)
@@ -934,6 +935,62 @@ class XendConfig(dict):

return sxpr

+ def _blkdev_name_to_number(self, dev):
+ if 'ioemu:' in dev:
+ _, dev = dev.split(':', 1)
+ try:
+ dev, _ = dev.split(':', 1)
+ except ValueError:
+ pass
+
+ try:
+ devid = int(dev)
+ except ValueError:
+ # devid is not a number but a string containing either device
+ # name (e.g. xvda) or device_type/device_id (e.g. vbd/51728)
+ dev2 = type(dev) is str and dev.split('/')[-1] or None
+ if dev2 == None:
+ log.debug("Could not check the device %s", dev)
+ return None
+ try:
+ devid = int(dev2)
+ except ValueError:
+ devid = blkdev_name_to_number(dev2)
+ if devid == None:
+ log.debug("The device %s is not device name", dev2)
+ return None
+ return devid
+
+ def device_duplicate_check(self, dev_type, dev_info, defined_config):
+ defined_devices_sxpr = self.all_devices_sxpr(target = defined_config)
+
+ if dev_type == 'vbd':
+ dev_uname = dev_info.get('uname')
+ blkdev_name = dev_info.get('dev')
+ devid = self._blkdev_name_to_number(blkdev_name)
+ if devid == None:
+ return
+
+ for o_dev_type, o_dev_info in defined_devices_sxpr:
+ if dev_type == o_dev_type:
+ if dev_uname == sxp.child_value(o_dev_info, 'uname'):
+ raise XendConfigError('The uname "%s" is already defined' %
+ dev_uname)
+ o_blkdev_name = sxp.child_value(o_dev_info, 'dev')
+ o_devid = self._blkdev_name_to_number(o_blkdev_name)
+ if o_devid != None and devid == o_devid:
+ raise XendConfigError('The device "%s" is already defined' %
+ blkdev_name)
+
+ elif dev_type == 'vif':
+ dev_mac = dev_info.get('mac')
+
+ for o_dev_type, o_dev_info in defined_devices_sxpr:
+ if dev_type == o_dev_type:
+ if dev_mac == sxp.child_value(o_dev_info, 'mac'):
+ raise XendConfigError('The mac "%s" is already defined' %
+ dev_mac)
+
def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None,
target = None):
"""Add a device configuration in SXP format or XenAPI struct format.
@@ -997,6 +1054,8 @@ class XendConfig(dict):
if dev_type == 'vif':
if not dev_info.get('mac'):
dev_info['mac'] = randomMAC()
+
+ self.device_duplicate_check(dev_type, dev_info, target)

# create uuid if it doesn't exist
dev_uuid = dev_info.get('uuid', None)
@@ -1275,15 +1334,19 @@ class XendConfig(dict):
return False


- def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None):
+ def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None, target = None):
"""Get Device SXPR by either giving the device UUID or (type, config).

@rtype: list of lists
@return: device config sxpr
"""
sxpr = []
- if dev_uuid != None and dev_uuid in self['devices']:
- dev_type, dev_info = self['devices'][dev_uuid]
+
+ if target == None:
+ target = self
+
+ if dev_uuid != None and dev_uuid in target['devices']:
+ dev_type, dev_info = target['devices'][dev_uuid]

if dev_type == None or dev_info == None:
raise XendConfigError("Required either UUID or device type and "
@@ -1300,8 +1363,12 @@ class XendConfig(dict):

return sxpr

- def ordered_device_refs(self):
+ def ordered_device_refs(self, target = None):
result = []
+
+ if target == None:
+ target = self
+
# vkbd devices *must* be before vfb devices, otherwise
# there is a race condition when setting up devices
# where the daemon spawned for the vfb may write stuff
@@ -1309,27 +1376,30 @@ class XendConfig(dict):
# setup permissions on the vkbd backend path. This race
# results in domain creation failing with 'device already
# connected' messages
- result.extend([u for u in self['devices'].keys() if self['devices'][u][0] == 'vkbd'])
-
- result.extend(self['console_refs'] +
- self['vbd_refs'] +
- self['vif_refs'] +
- self['vtpm_refs'])
-
- result.extend([u for u in self['devices'].keys() if u not in result])
+ result.extend([u for u in target['devices'].keys() if target['devices'][u][0] == 'vkbd'])
+
+ result.extend(target.get('console_refs', []) +
+ target.get('vbd_refs', []) +
+ target.get('vif_refs', []) +
+ target.get('vtpm_refs', []))
+
+ result.extend([u for u in target['devices'].keys() if u not in result])
return result

- def all_devices_sxpr(self):
+ def all_devices_sxpr(self, target = None):
"""Returns the SXPR for all devices in the current configuration."""
sxprs = []
pci_devs = []

- if 'devices' not in self:
+ if target == None:
+ target = self
+
+ if 'devices' not in target:
return sxprs

- ordered_refs = self.ordered_device_refs()
+ ordered_refs = self.ordered_device_refs(target = target)
for dev_uuid in ordered_refs:
- dev_type, dev_info = self['devices'][dev_uuid]
+ dev_type, dev_info = target['devices'][dev_uuid]
if dev_type == 'pci': # special case for pci devices
sxpr = [['uuid', dev_info['uuid']]]
for pci_dev_info in dev_info['devs']:
@@ -1340,7 +1410,8 @@ class XendConfig(dict):
sxprs.append((dev_type, sxpr))
else:
sxpr = self.device_sxpr(dev_type = dev_type,
- dev_info = dev_info)
+ dev_info = dev_info,
+ target = target)
sxprs.append((dev_type, sxpr))

return sxprs
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/XendDomain.py Mon Jul 02 12:19:26 2007 -0600
@@ -1262,8 +1262,10 @@ class XendDomain:
try:
XendCheckpoint.save(fd, dominfo, False, False, dst,
checkpoint=checkpoint)
- finally:
+ except Exception, e:
os.close(fd)
+ raise e
+ os.close(fd)
except OSError, ex:
raise XendError("can't write guest state file %s: %s" %
(dst, ex[1]))
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/irqif.py
--- a/tools/python/xen/xend/server/irqif.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/irqif.py Mon Jul 02 12:19:26 2007 -0600
@@ -61,7 +61,7 @@ class IRQController(DevController):

pirq = get_param('irq')

- rc = xc.domain_irq_permission(dom = self.getDomid(),
+ rc = xc.domain_irq_permission(domid = self.getDomid(),
pirq = pirq,
allow_access = True)

diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xend/server/pciif.py
--- a/tools/python/xen/xend/server/pciif.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xend/server/pciif.py Mon Jul 02 12:19:26 2007 -0600
@@ -185,3 +185,6 @@ class PciController(DevController):

def waitForBackend(self,devid):
return (0, "ok - no hotplug")
+
+ def migrate(self, config, network, dst, step, domName):
+ raise XendError('Migration not permitted with assigned PCI device.')
diff -r 443ce7edad0e -r d146700adf71 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Mon Jul 02 10:31:03 2007 -0600
+++ b/tools/python/xen/xm/main.py Mon Jul 02 12:19:26 2007 -0600
@@ -2168,9 +2168,7 @@ def xm_network_attach(args):
server.xend.domain.device_create(dom, vif)


-def detach(args, command, deviceClass):
- arg_check(args, command, 2, 3)
-
+def detach(args, deviceClass):
dom = args[0]
dev = args[1]
try:
@@ -2204,16 +2202,17 @@ def xm_block_detach(args):
raise OptionError("Cannot find device '%s' in domain '%s'"
% (dev,dom))
else:
+ arg_check(args, 'block-detach', 2, 3)
try:
- detach(args, 'block-detach', 'vbd')
+ detach(args, 'vbd')
return
except:
pass
- detach(args, 'block-detach', 'tap')
+ detach(args, 'tap')

def xm_network_detach(args):
if serverType == SERVER_XEN_API:
- arg_check(args, "xm_block_detach", 2, 3)
+ arg_check(args, "xm_network_detach", 2, 3)
dom = args[0]
devid = args[1]
vif_refs = server.xenapi.VM.get_VIFs(get_single_vm(dom))
@@ -2227,7 +2226,8 @@ def xm_network_detach(args):
else:
print "Cannot find device '%s' in domain '%s'" % (devid,dom)
else:
- detach(args, 'network-detach', 'vif')
+ arg_check(args, 'network-detach', 2, 3)
+ detach(args, 'vif')


def xm_vnet_list(args):
diff -r 443ce7edad0e -r d146700adf71 xen/acm/acm_core.c
--- a/xen/acm/acm_core.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/acm/acm_core.c Mon Jul 02 12:19:26 2007 -0600
@@ -89,7 +89,7 @@ static void __init set_dom0_ssidref(cons
if (hi < ACM_MAX_NUM_TYPES && hi >= 1)
dom0_ste_ssidref = hi;
for (i = 0; i < sizeof(polname); i++) {
- polname[i] = c[7+i];
+ polname[i] = c[5+i];
if (polname[i] == '\0' || polname[i] == '\t' ||
polname[i] == '\n' || polname[i] == ' ' ||
polname[i] == ':') {
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/Makefile Mon Jul 02 12:19:26 2007 -0600
@@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64

obj-y += apic.o
obj-y += bitops.o
+obj-y += clear_page.o
obj-y += compat.o
obj-y += delay.o
obj-y += dmi_scan.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/edd.S
--- a/xen/arch/x86/boot/edd.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/edd.S Mon Jul 02 12:19:26 2007 -0600
@@ -24,7 +24,7 @@
/* Maximum number of EDD information structures at boot_edd_info. */
#define EDD_INFO_MAX 6

-/* Maximum number of MBR signatures at boot_edd_signature. */
+/* Maximum number of MBR signatures at boot_mbr_signature. */
#define EDD_MBR_SIG_MAX 16

/* Size of components of EDD information structure. */
@@ -40,10 +40,8 @@ get_edd:
# Read the first sector of each BIOS disk device and store the 4-byte signature
edd_mbr_sig_start:
movb $0x80, %dl # from device 80
- movw $bootsym(boot_edd_signature),%bx # store buffer ptr in bx
+ movw $bootsym(boot_mbr_signature),%bx # store buffer ptr in bx
edd_mbr_sig_read:
- movl $0xFFFFFFFF, %eax
- movl %eax, (%bx) # assume failure
pushw %bx
movb $0x02, %ah # 0x02 Read Sectors
movb $1, %al # read 1 sector
@@ -64,11 +62,12 @@ edd_mbr_sig_read:
cmpb $0, %ah # some BIOSes do not set CF
jne edd_mbr_sig_done # on failure, we're done.
movl bootsym(boot_edd_info)+EDD_MBR_SIG_OFFSET,%eax
- movl %eax, (%bx) # store signature from MBR
- incb bootsym(boot_edd_signature_nr) # note that we stored something
+ movb %dl, (%bx) # store BIOS drive number
+ movl %eax, 4(%bx) # store signature from MBR
+ incb bootsym(boot_mbr_signature_nr) # note that we stored something
incb %dl # increment to next device
- addw $4, %bx # increment sig buffer ptr
- cmpb $EDD_MBR_SIG_MAX,bootsym(boot_edd_signature_nr)
+ addw $8, %bx # increment sig buffer ptr
+ cmpb $EDD_MBR_SIG_MAX,bootsym(boot_mbr_signature_nr)
jb edd_mbr_sig_read
edd_mbr_sig_done:

@@ -150,12 +149,13 @@ opt_edd:
opt_edd:
.byte 0 # edd=on/off/skipmbr

-.globl boot_edd_info_nr, boot_edd_signature_nr
+.globl boot_edd_info, boot_edd_info_nr
+.globl boot_mbr_signature, boot_mbr_signature_nr
boot_edd_info_nr:
.byte 0
-boot_edd_signature_nr:
+boot_mbr_signature_nr:
.byte 0
-boot_edd_signature:
- .fill EDD_MBR_SIG_MAX*4,1,0
+boot_mbr_signature:
+ .fill EDD_MBR_SIG_MAX*8,1,0
boot_edd_info:
.fill 512,1,0 # big enough for a disc sector
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_32.S
--- a/xen/arch/x86/boot/x86_32.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_32.S Mon Jul 02 12:19:26 2007 -0600
@@ -36,15 +36,29 @@ 1: mov %eax,(%edi)

/* This is the default interrupt handler. */
int_msg:
- .asciz "Unknown interrupt\n"
+ .asciz "Unknown interrupt (cr2=%08x)\n"
+hex_msg:
+ .asciz " %08x"
ALIGN
ignore_int:
+ pusha
cld
mov $(__HYPERVISOR_DS),%eax
mov %eax,%ds
mov %eax,%es
+ mov %cr2,%eax
+ push %eax
pushl $int_msg
call printk
+ add $8,%esp
+ mov %esp,%ebp
+0: pushl (%ebp)
+ add $4,%ebp
+ pushl $hex_msg
+ call printk
+ add $8,%esp
+ test $0xffc,%ebp
+ jnz 0b
1: jmp 1b

ENTRY(stack_start)
@@ -65,11 +79,6 @@ gdt_descr:
gdt_descr:
.word LAST_RESERVED_GDT_BYTE
.long gdt_table - FIRST_RESERVED_GDT_BYTE
-
- .word 0
-nopaging_gdt_descr:
- .word LAST_RESERVED_GDT_BYTE
- .long sym_phys(gdt_table) - FIRST_RESERVED_GDT_BYTE

.align PAGE_SIZE, 0
/* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/boot/x86_64.S
--- a/xen/arch/x86/boot/x86_64.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/boot/x86_64.S Mon Jul 02 12:19:26 2007 -0600
@@ -56,12 +56,23 @@ 1: movq %rax,(%rdi)

/* This is the default interrupt handler. */
int_msg:
- .asciz "Unknown interrupt\n"
+ .asciz "Unknown interrupt (cr2=%016lx)\n"
+hex_msg:
+ .asciz " %016lx"
ignore_int:
- cld
+ SAVE_ALL
+ movq %cr2,%rsi
leaq int_msg(%rip),%rdi
xorl %eax,%eax
call printk
+ movq %rsp,%rbp
+0: movq (%rbp),%rsi
+ addq $8,%rbp
+ leaq hex_msg(%rip),%rdi
+ xorl %eax,%eax
+ call printk
+ testq $0xff8,%rbp
+ jnz 0b
1: jmp 1b


diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/clear_page.S
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/clear_page.S Mon Jul 02 12:19:26 2007 -0600
@@ -0,0 +1,26 @@
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define ptr_reg %edx
+#else
+#define ptr_reg %rdi
+#endif
+
+ENTRY(clear_page_sse2)
+#ifdef __i386__
+ mov 4(%esp), ptr_reg
+#endif
+ mov $PAGE_SIZE/16, %ecx
+ xor %eax,%eax
+
+0: dec %ecx
+ movnti %eax, (ptr_reg)
+ movnti %eax, 4(ptr_reg)
+ movnti %eax, 8(ptr_reg)
+ movnti %eax, 12(ptr_reg)
+ lea 16(ptr_reg), ptr_reg
+ jnz 0b
+
+ sfence
+ ret
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/domain.c Mon Jul 02 12:19:26 2007 -0600
@@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v,
pg = alloc_domheap_page(NULL);
if ( !pg )
return -ENOMEM;
- d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
+ d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
+ clear_page(d->arch.mm_arg_xlat_l3);
}

l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
@@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d)

if ( (pg = alloc_domheap_page(NULL)) == NULL )
goto fail;
- d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
+ d->arch.mm_perdomain_l2 = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l2);
for ( i = 0; i < (1 << pdpt_order); i++ )
d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
@@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d)

if ( (pg = alloc_domheap_page(NULL)) == NULL )
goto fail;
- d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
+ d->arch.mm_perdomain_l3 = page_to_virt(pg);
+ clear_page(d->arch.mm_perdomain_l3);
d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
__PAGE_HYPERVISOR);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/hvm.c Mon Jul 02 12:19:26 2007 -0600
@@ -242,6 +242,11 @@ void hvm_domain_relinquish_resources(str
{
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+
+ pit_deinit(d);
+ rtc_deinit(d);
+ pmtimer_deinit(d);
+ hpet_deinit(d);
}

void hvm_domain_destroy(struct domain *d)
@@ -421,22 +426,11 @@ int hvm_vcpu_initialise(struct vcpu *v)

void hvm_vcpu_destroy(struct vcpu *v)
{
- struct domain *d = v->domain;
-
vlapic_destroy(v);
hvm_funcs.vcpu_destroy(v);

/* Event channel is already freed by evtchn_destroy(). */
/*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
-
- if ( v->vcpu_id == 0 )
- {
- /* NB. All these really belong in hvm_domain_destroy(). */
- pit_deinit(d);
- rtc_deinit(d);
- pmtimer_deinit(d);
- hpet_deinit(d);
- }
}


diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/io.c
--- a/xen/arch/x86/hvm/io.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/io.c Mon Jul 02 12:19:26 2007 -0600
@@ -858,6 +858,7 @@ void hvm_io_assist(void)
}

/* Copy register changes back into current guest state. */
+ regs->eflags &= ~X86_EFLAGS_RF;
hvm_load_cpu_guest_regs(v, regs);
memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);

diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/platform.c Mon Jul 02 12:19:26 2007 -0600
@@ -1065,6 +1065,7 @@ void handle_mmio(unsigned long gpa)
}

regs->eip += inst_len; /* advance %eip */
+ regs->eflags &= ~X86_EFLAGS_RF;

switch ( mmio_op->instr ) {
case INSTR_MOV:
@@ -1122,6 +1123,7 @@ void handle_mmio(unsigned long gpa)
/* IO read --> memory write */
if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
regs->eip -= inst_len; /* do not advance %eip */
+ regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
hvm_inject_exception(TRAP_page_fault, errcode, addr);
return;
}
@@ -1150,6 +1152,7 @@ void handle_mmio(unsigned long gpa)
/* Failed on the page-spanning copy. Inject PF into
* the guest for the address where we failed */
regs->eip -= inst_len; /* do not advance %eip */
+ regs->eflags |= X86_EFLAGS_RF; /* RF was set by #PF */
/* Must set CR2 at the failing address */
addr += size - rv;
gdprintk(XENLOG_DEBUG, "Pagefault on non-io side of a "
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/svm.c Mon Jul 02 12:19:26 2007 -0600
@@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str
}

skip_cr3:
- vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;

vmcb->idtr.limit = c->idtr_limit;
@@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str
/* update VMCB for nested paging restore */
if ( paging_mode_hap(v->domain) ) {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
vmcb->cr3 = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
@@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct
: : "a" (__pa(root_vmcb[cpu])) );

#ifdef __x86_64__
- /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
- idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
+ /* Resume use of ISTs now that the host TR is reinstated. */
+ idt_tables[cpu][TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
#endif
}

@@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc
set_segment_register(ss, 0);

/*
- * Cannot use IST2 for NMIs while we are running with the guest TR. But
- * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
+ * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
+ * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
*/
- idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
+ idt_tables[cpu][TRAP_double_fault].a &= ~(3UL << 32);
+ idt_tables[cpu][TRAP_nmi].a &= ~(3UL << 32);
+ idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
#endif

svm_restore_dr(v);
@@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr,
break;

case 4: /* CR4 */
+ if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set reserved bit in CR4: %lx",
+ value);
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ break;
+ }
+
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
paging_update_paging_modes(v);
/* signal paging update to ASID handler */
svm_asid_g_update_paging (v);
@@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr,
}

v->arch.hvm_svm.cpu_shadow_cr4 = value;
- vmcb->cr4 = value | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = value | HVM_CR4_HOST_MASK;

/*
* Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
@@ -2071,9 +2086,11 @@ static inline void svm_do_msr_access(
case MSR_IA32_TIME_STAMP_COUNTER:
msr_content = hvm_get_guest_time(v);
break;
+
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
+
case MSR_EFER:
msr_content = v->arch.hvm_svm.cpu_shadow_efer;
break;
@@ -2093,6 +2110,10 @@ static inline void svm_do_msr_access(
* particularly meaningful, but at least avoids the guest crashing!
*/
msr_content = 0;
+ break;
+
+ case MSR_K8_VM_HSAVE_PA:
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
break;

default:
@@ -2128,9 +2149,15 @@ static inline void svm_do_msr_access(
hvm_set_guest_time(v, msr_content);
pt_reset(v);
break;
+
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
+
+ case MSR_K8_VM_HSAVE_PA:
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ break;
+
default:
if ( !long_mode_do_msr_write(regs) )
wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
@@ -2265,12 +2292,13 @@ static int svm_reset_to_realmode(struct
vmcb->cr2 = 0;
vmcb->efer = EFER_SVME;

- vmcb->cr4 = SVM_CR4_HOST_MASK;
+ vmcb->cr4 = HVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = 0;

if ( paging_mode_hap(v->domain) ) {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
}

/* This will jump to ROMBIOS */
@@ -2411,6 +2439,12 @@ asmlinkage void svm_vmexit_handler(struc
break;
}

+ case VMEXIT_EXCEPTION_MC:
+ HVMTRACE_0D(MCE, v);
+ svm_store_cpu_guest_regs(v, regs, NULL);
+ do_machine_check(regs);
+ break;
+
case VMEXIT_VINTR:
vmcb->vintr.fields.irq = 0;
vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/svm/vmcb.c
--- a/xen/arch/x86/hvm/svm/vmcb.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/svm/vmcb.c Mon Jul 02 12:19:26 2007 -0600
@@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v
/* Guest CR4. */
arch_svm->cpu_shadow_cr4 =
read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;

paging_update_paging_modes(v);
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
@@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v
vmcb->np_enable = 1; /* enable nested paging */
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
+ vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+ vmcb->exception_intercepts = HVM_TRAP_MASK;

/* No point in intercepting CR0/3/4 reads, because the hardware
* will return the guest versions anyway. */
@@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v
}
else
{
- vmcb->exception_intercepts = 1U << TRAP_page_fault;
+ vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
}

return 0;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Mon Jul 02 12:19:26 2007 -0600
@@ -43,6 +43,7 @@ u32 vmx_secondary_exec_control __read_mo
u32 vmx_secondary_exec_control __read_mostly;
u32 vmx_vmexit_control __read_mostly;
u32 vmx_vmentry_control __read_mostly;
+bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;

static u32 vmcs_revision_id __read_mostly;

@@ -133,6 +134,7 @@ void vmx_init_vmcs_config(void)
vmx_secondary_exec_control = _vmx_secondary_exec_control;
vmx_vmexit_control = _vmx_vmexit_control;
vmx_vmentry_control = _vmx_vmentry_control;
+ cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22));
}
else
{
@@ -142,6 +144,7 @@ void vmx_init_vmcs_config(void)
BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
+ BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22)));
}

/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -421,7 +424,7 @@ static void construct_vmcs(struct vcpu *
__vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
#endif

- __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
+ __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));

/* Guest CR0. */
cr0 = read_cr0();
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Jul 02 12:19:26 2007 -0600
@@ -560,6 +560,9 @@ int vmx_vmcs_restore(struct vcpu *v, str
__vmwrite(GUEST_RSP, c->rsp);
__vmwrite(GUEST_RFLAGS, c->rflags);

+ v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG
+ | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
+ __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
__vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);

@@ -577,33 +580,17 @@ int vmx_vmcs_restore(struct vcpu *v, str
goto skip_cr3;
}

- if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
- /*
- * This is simple TLB flush, implying the guest has
- * removed some translation or changed page attributes.
- * We simply invalidate the shadow.
- */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
- goto bad_cr3;
- }
- } else {
- /*
- * If different, make a shadow. Check if the PDBR is valid
- * first.
- */
- HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
- /* current!=vcpu as not called by arch_vmx_do_launch */
- mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
- if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
- goto bad_cr3;
- }
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- if (old_base_mfn)
- put_page(mfn_to_page(old_base_mfn));
- v->arch.hvm_vmx.cpu_cr3 = c->cr3;
- }
+ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
+ /* current!=vcpu as not called by arch_vmx_do_launch */
+ mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
+ if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
+ goto bad_cr3;
+ }
+ old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ v->arch.guest_table = pagetable_from_pfn(mfn);
+ if (old_base_mfn)
+ put_page(mfn_to_page(old_base_mfn));
+ v->arch.hvm_vmx.cpu_cr3 = c->cr3;

skip_cr3:
#if defined(__x86_64__)
@@ -615,7 +602,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
}
#endif

- __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);

@@ -1315,16 +1302,20 @@ static int __get_instruction_length(void

static void inline __update_guest_eip(unsigned long inst_len)
{
- unsigned long current_eip, intr_shadow;
-
- current_eip = __vmread(GUEST_RIP);
- __vmwrite(GUEST_RIP, current_eip + inst_len);
-
- intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
- if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
- {
- intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
- __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+ unsigned long x;
+
+ x = __vmread(GUEST_RIP);
+ __vmwrite(GUEST_RIP, x + inst_len);
+
+ x = __vmread(GUEST_RFLAGS);
+ if ( x & X86_EFLAGS_RF )
+ __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
+
+ x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+ if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
+ {
+ x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
}
}

@@ -1475,16 +1466,34 @@ static void vmx_do_invlpg(unsigned long
paging_invlpg(v, va);
}

-/*
- * get segment for string pio according to guest instruction
- */
-static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
- int inst_len, enum x86_segment *seg)
+/* Get segment for OUTS according to guest instruction. */
+static enum x86_segment vmx_outs_get_segment(
+ int long_mode, unsigned long eip, int inst_len)
{
unsigned char inst[MAX_INST_LEN];
+ enum x86_segment seg = x86_seg_ds;
int i;
extern int inst_copy_from_guest(unsigned char *, unsigned long, int);

+ if ( likely(cpu_has_vmx_ins_outs_instr_info) )
+ {
+ unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
+
+ /* Get segment register according to bits 17:15. */
+ switch ( (instr_info >> 15) & 7 )
+ {
+ case 0: seg = x86_seg_es; break;
+ case 1: seg = x86_seg_cs; break;
+ case 2: seg = x86_seg_ss; break;
+ case 3: seg = x86_seg_ds; break;
+ case 4: seg = x86_seg_fs; break;
+ case 5: seg = x86_seg_gs; break;
+ default: BUG();
+ }
+
+ goto out;
+ }
+
if ( !long_mode )
eip += __vmread(GUEST_CS_BASE);

@@ -1493,7 +1502,7 @@ static void vmx_str_pio_get_segment(int
{
gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
domain_crash(current->domain);
- return;
+ goto out;
}

for ( i = 0; i < inst_len; i++ )
@@ -1510,25 +1519,28 @@ static void vmx_str_pio_get_segment(int
#endif
continue;
case 0x2e: /* CS */
- *seg = x86_seg_cs;
+ seg = x86_seg_cs;
continue;
case 0x36: /* SS */
- *seg = x86_seg_ss;
+ seg = x86_seg_ss;
continue;
case 0x26: /* ES */
- *seg = x86_seg_es;
+ seg = x86_seg_es;
continue;
case 0x64: /* FS */
- *seg = x86_seg_fs;
+ seg = x86_seg_fs;
continue;
case 0x65: /* GS */
- *seg = x86_seg_gs;
+ seg = x86_seg_gs;
continue;
case 0x3e: /* DS */
- *seg = x86_seg_ds;
+ seg = x86_seg_ds;
continue;
}
}
+
+ out:
+ return seg;
}

static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
@@ -1541,7 +1553,7 @@ static int vmx_str_pio_check_descriptor(
*base = 0;
*limit = 0;
if ( seg != x86_seg_es )
- vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
+ seg = vmx_outs_get_segment(long_mode, eip, inst_len);

switch ( seg )
{
@@ -1587,7 +1599,7 @@ static int vmx_str_pio_check_descriptor(
}
*ar_bytes = __vmread(ar_field);

- return !(*ar_bytes & 0x10000);
+ return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
}


@@ -1896,7 +1908,7 @@ static void vmx_world_save(struct vcpu *
c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */

c->esp = __vmread(GUEST_RSP);
- c->eflags = __vmread(GUEST_RFLAGS);
+ c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;

c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
c->cr3 = v->arch.hvm_vmx.cpu_cr3;
@@ -1997,7 +2009,7 @@ static int vmx_world_restore(struct vcpu
else
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);

- __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);

@@ -2272,7 +2284,6 @@ static int vmx_set_cr0(unsigned long val
"Enabling CR0.PE at %%eip 0x%lx", eip);
if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
{
- eip = __vmread(GUEST_RIP);
HVM_DBG_LOG(DBG_LEVEL_1,
"Restoring to %%eip 0x%lx", eip);
return 0; /* do not update eip! */
@@ -2397,6 +2408,15 @@ static int mov_to_cr(int gp, int cr, str
case 4: /* CR4 */
old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;

+ if ( value & HVM_CR4_GUEST_RESERVED_BITS )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set reserved bit in CR4: %lx",
+ value);
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ break;
+ }
+
if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
{
if ( vmx_pgbit_test(v) )
@@ -2437,7 +2457,7 @@ static int mov_to_cr(int gp, int cr, str
}
}

- __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
+ __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
v->arch.hvm_vmx.cpu_shadow_cr4 = value;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);

@@ -2581,7 +2601,7 @@ static inline int vmx_do_msr_read(struct
case MSR_IA32_APICBASE:
msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
break;
- case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
default:
if ( long_mode_do_msr_read(regs) )
@@ -2707,7 +2727,7 @@ static inline int vmx_do_msr_write(struc
case MSR_IA32_APICBASE:
vlapic_msr_set(vcpu_vlapic(v), msr_content);
break;
- case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
+ case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
goto gp_fault;
default:
if ( !long_mode_do_msr_write(regs) )
@@ -2823,7 +2843,8 @@ static void vmx_reflect_exception(struct
}
}

-static void vmx_failed_vmentry(unsigned int exit_reason)
+static void vmx_failed_vmentry(unsigned int exit_reason,
+ struct cpu_user_regs *regs)
{
unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
unsigned long exit_qualification;
@@ -2840,6 +2861,9 @@ static void vmx_failed_vmentry(unsigned
break;
case EXIT_REASON_MACHINE_CHECK:
printk("caused by machine check.\n");
+ HVMTRACE_0D(MCE, current);
+ vmx_store_cpu_guest_regs(current, regs, NULL);
+ do_machine_check(regs);
break;
default:
printk("reason not known yet!");
@@ -2869,7 +2893,7 @@ asmlinkage void vmx_vmexit_handler(struc
local_irq_enable();

if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
- return vmx_failed_vmentry(exit_reason);
+ return vmx_failed_vmentry(exit_reason, regs);

switch ( exit_reason )
{
@@ -2920,11 +2944,19 @@ asmlinkage void vmx_vmexit_handler(struc
vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
- HVMTRACE_0D(NMI, v);
if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
+ {
+ HVMTRACE_0D(NMI, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
do_nmi(regs); /* Real NMI, vector 2: normal processing. */
+ }
else
vmx_reflect_exception(v);
+ break;
+ case TRAP_machine_check:
+ HVMTRACE_0D(MCE, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
+ do_machine_check(regs);
break;
default:
goto exit_and_crash;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/io_apic.c Mon Jul 02 12:19:26 2007 -0600
@@ -371,7 +371,7 @@ static int pin_2_irq(int idx, int apic,
* so mask in all cases should simply be TARGET_CPUS
*/
#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
+void /*__init*/ setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;

@@ -849,7 +849,7 @@ static inline void UNEXPECTED_IO_APIC(vo
{
}

-void __init __print_IO_APIC(void)
+void /*__init*/ __print_IO_APIC(void)
{
int apic, i;
union IO_APIC_reg_00 reg_00;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/mm.c Mon Jul 02 12:19:26 2007 -0600
@@ -3240,6 +3240,7 @@ static int ptwr_emulated_update(
struct ptwr_emulate_ctxt *ptwr_ctxt)
{
unsigned long mfn;
+ unsigned long unaligned_addr = addr;
struct page_info *page;
l1_pgentry_t pte, ol1e, nl1e, *pl1e;
struct vcpu *v = current;
@@ -3294,7 +3295,7 @@ static int ptwr_emulated_update(
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
{
if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
- (bytes == 4) && (addr & 4) && !do_cmpxchg &&
+ (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
(l1e_get_flags(nl1e) & _PAGE_PRESENT) )
{
/*
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/platform_hypercall.c Mon Jul 02 12:19:26 2007 -0600
@@ -20,12 +20,20 @@
#include <xen/guest_access.h>
#include <asm/current.h>
#include <public/platform.h>
+#include <asm/edd.h>
#include <asm/mtrr.h>
#include "cpu/mtrr/mtrr.h"
+
+extern uint16_t boot_edid_caps;
+extern uint8_t boot_edid_info[];

#ifndef COMPAT
typedef long ret_t;
DEFINE_SPINLOCK(xenpf_lock);
+# undef copy_from_compat
+# define copy_from_compat copy_from_guest
+# undef copy_to_compat
+# define copy_to_compat copy_to_guest
#else
extern spinlock_t xenpf_lock;
#endif
@@ -150,6 +158,94 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
}
}
break;
+
+ case XENPF_firmware_info:
+ switch ( op->u.firmware_info.type )
+ {
+ case XEN_FW_DISK_INFO: {
+ const struct edd_info *info;
+ u16 length;
+
+ ret = -ESRCH;
+ if ( op->u.firmware_info.index >= bootsym(boot_edd_info_nr) )
+ break;
+
+ info = bootsym(boot_edd_info) + op->u.firmware_info.index;
+
+ /* Transfer the EDD info block. */
+ ret = -EFAULT;
+ if ( copy_from_compat(&length, op->u.firmware_info.u.
+ disk_info.edd_params, 1) )
+ break;
+ if ( length > info->edd_device_params.length )
+ length = info->edd_device_params.length;
+ if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+ (u8 *)&info->edd_device_params,
+ length) )
+ break;
+ if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
+ &length, 1) )
+ break;
+
+ /* Transfer miscellaneous other information values. */
+#define C(x) op->u.firmware_info.u.disk_info.x = info->x
+ C(device);
+ C(version);
+ C(interface_support);
+ C(legacy_max_cylinder);
+ C(legacy_max_head);
+ C(legacy_sectors_per_track);
+#undef C
+
+ ret = (copy_field_to_guest(u_xenpf_op, op,
+ u.firmware_info.u.disk_info)
+ ? -EFAULT : 0);
+ break;
+ }
+ case XEN_FW_DISK_MBR_SIGNATURE: {
+ const struct mbr_signature *sig;
+
+ ret = -ESRCH;
+ if ( op->u.firmware_info.index >= bootsym(boot_mbr_signature_nr) )
+ break;
+
+ sig = bootsym(boot_mbr_signature) + op->u.firmware_info.index;
+
+ op->u.firmware_info.u.disk_mbr_signature.device = sig->device;
+ op->u.firmware_info.u.disk_mbr_signature.mbr_signature =
+ sig->signature;
+
+ ret = (copy_field_to_guest(u_xenpf_op, op,
+ u.firmware_info.u.disk_mbr_signature)
+ ? -EFAULT : 0);
+ break;
+ }
+ case XEN_FW_VBEDDC_INFO:
+ ret = -ESRCH;
+ if ( op->u.firmware_info.index != 0 )
+ break;
+ if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
+ break;
+
+ op->u.firmware_info.u.vbeddc_info.capabilities =
+ bootsym(boot_edid_caps);
+ op->u.firmware_info.u.vbeddc_info.edid_transfer_time =
+ bootsym(boot_edid_caps) >> 8;
+
+ ret = 0;
+ if ( copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+ u.vbeddc_info.capabilities) ||
+ copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
+ u.vbeddc_info.edid_transfer_time) ||
+ copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid,
+ bootsym(boot_edid_info), 128) )
+ ret = -EFAULT;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;

default:
ret = -ENOSYS;
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/setup.c Mon Jul 02 12:19:26 2007 -0600
@@ -405,7 +405,7 @@ void __init __start_xen(unsigned long mb
void __init __start_xen(unsigned long mbi_p)
{
char *memmap_type = NULL;
- char __cmdline[] = "", *cmdline = __cmdline;
+ char __cmdline[] = "", *cmdline = __cmdline, *kextra;
unsigned long _initrd_start = 0, _initrd_len = 0;
unsigned int initrdidx = 1;
char *_policy_start = NULL;
@@ -426,6 +426,17 @@ void __init __start_xen(unsigned long mb
/* Parse the command-line options. */
if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
cmdline = __va(mbi->cmdline);
+ if ( (kextra = strstr(cmdline, " -- ")) != NULL )
+ {
+ /*
+ * Options after ' -- ' separator belong to dom0.
+ * 1. Orphan dom0's options from Xen's command line.
+ * 2. Skip all but final leading space from dom0's options.
+ */
+ *kextra = '\0';
+ kextra += 3;
+ while ( kextra[1] == ' ' ) kextra++;
+ }
cmdline_parse(cmdline);

parse_video_info();
@@ -494,7 +505,7 @@ void __init __start_xen(unsigned long mb

printk("Disc information:\n");
printk(" Found %d MBR signatures\n",
- bootsym(boot_edd_signature_nr));
+ bootsym(boot_mbr_signature_nr));
printk(" Found %d EDD information structures\n",
bootsym(boot_edd_info_nr));

@@ -1009,17 +1020,26 @@ void __init __start_xen(unsigned long mb

/* Grab the DOM0 command line. */
cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
- if ( cmdline != NULL )
+ if ( (cmdline != NULL) || (kextra != NULL) )
{
static char dom0_cmdline[MAX_GUEST_CMDLINE];

- /* Skip past the image name and copy to a local buffer. */
- while ( *cmdline == ' ' ) cmdline++;
- if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+ dom0_cmdline[0] = '\0';
+
+ if ( cmdline != NULL )
{
+ /* Skip past the image name and copy to a local buffer. */
while ( *cmdline == ' ' ) cmdline++;
- safe_strcpy(dom0_cmdline, cmdline);
+ if ( (cmdline = strchr(cmdline, ' ')) != NULL )
+ {
+ while ( *cmdline == ' ' ) cmdline++;
+ safe_strcpy(dom0_cmdline, cmdline);
+ }
}
+
+ if ( kextra != NULL )
+ /* kextra always includes exactly one leading space. */
+ safe_strcat(dom0_cmdline, kextra);

/* Append any extra parameters. */
if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/traps.c Mon Jul 02 12:19:26 2007 -0600
@@ -86,6 +86,7 @@ asmlinkage int do_ ## _name(struct cpu_u
asmlinkage int do_ ## _name(struct cpu_user_regs *regs)

asmlinkage void nmi(void);
+asmlinkage void machine_check(void);
DECLARE_TRAP_HANDLER(divide_error);
DECLARE_TRAP_HANDLER(debug);
DECLARE_TRAP_HANDLER(int3);
@@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(simd_coprocessor_er
DECLARE_TRAP_HANDLER(simd_coprocessor_error);
DECLARE_TRAP_HANDLER(alignment_check);
DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
-DECLARE_TRAP_HANDLER(machine_check);

long do_set_debugreg(int reg, unsigned long value);
unsigned long do_get_debugreg(int reg);
@@ -631,6 +631,7 @@ static int emulate_forced_invalid_op(str
regs->ecx = c;
regs->edx = d;
regs->eip = eip;
+ regs->eflags &= ~X86_EFLAGS_RF;

return EXCRET_fault_fixed;
}
@@ -730,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r
return do_guest_trap(TRAP_int3, regs, 0);
}

-asmlinkage int do_machine_check(struct cpu_user_regs *regs)
-{
- fatal_trap(TRAP_machine_check, regs);
- return 0;
+asmlinkage void do_machine_check(struct cpu_user_regs *regs)
+{
+ extern fastcall void (*machine_check_vector)(
+ struct cpu_user_regs *, long error_code);
+ machine_check_vector(regs, regs->error_code);
}

void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1787,6 +1789,7 @@ static int emulate_privileged_op(struct

done:
regs->eip = eip;
+ regs->eflags &= ~X86_EFLAGS_RF;
return EXCRET_fault_fixed;

fail:
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/entry.S Mon Jul 02 12:19:26 2007 -0600
@@ -72,48 +72,36 @@
andl $~3,reg; \
movl (reg),reg;

-
ALIGN
restore_all_guest:
ASSERT_INTERRUPTS_DISABLED
testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
- jnz restore_all_vm86
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ leal 4(%esp),%esp
+ jnz .Lrestore_iret_guest
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- testl $2,UREGS_cs(%esp)
- jnz 1f
+ testb $2,UREGS_cs-UREGS_eip(%esp)
+ jnz .Lrestore_sregs_guest
call restore_ring0_guest
- jmp restore_all_vm86
-1:
+ jmp .Lrestore_iret_guest
#endif
-.Lft1: mov UREGS_ds(%esp),%ds
-.Lft2: mov UREGS_es(%esp),%es
-.Lft3: mov UREGS_fs(%esp),%fs
-.Lft4: mov UREGS_gs(%esp),%gs
-restore_all_vm86:
- popl %ebx
- popl %ecx
- popl %edx
- popl %esi
- popl %edi
- popl %ebp
- popl %eax
- addl $4,%esp
+.Lrestore_sregs_guest:
+.Lft1: mov UREGS_ds-UREGS_eip(%esp),%ds
+.Lft2: mov UREGS_es-UREGS_eip(%esp),%es
+.Lft3: mov UREGS_fs-UREGS_eip(%esp),%fs
+.Lft4: mov UREGS_gs-UREGS_eip(%esp),%gs
+.Lrestore_iret_guest:
.Lft5: iret
.section .fixup,"ax"
-.Lfx5: subl $28,%esp
- pushl 28(%esp) # error_code/entry_vector
- movl %eax,UREGS_eax+4(%esp)
- movl %ebp,UREGS_ebp+4(%esp)
- movl %edi,UREGS_edi+4(%esp)
- movl %esi,UREGS_esi+4(%esp)
- movl %edx,UREGS_edx+4(%esp)
- movl %ecx,UREGS_ecx+4(%esp)
- movl %ebx,UREGS_ebx+4(%esp)
-.Lfx1: SET_XEN_SEGMENTS(a)
- movl %eax,%fs
- movl %eax,%gs
- sti
- popl %esi
+.Lfx1: sti
+ SAVE_ALL_GPRS
+ mov UREGS_error_code(%esp),%esi
pushfl # EFLAGS
movl $__HYPERVISOR_CS,%eax
pushl %eax # CS
@@ -147,7 +135,7 @@ 1: call create_bounce_frame
.long .Lft2,.Lfx1
.long .Lft3,.Lfx1
.long .Lft4,.Lfx1
- .long .Lft5,.Lfx5
+ .long .Lft5,.Lfx1
.previous
.section __ex_table,"a"
.long .Ldf1,failsafe_callback
@@ -169,8 +157,8 @@ ENTRY(hypercall)
ENTRY(hypercall)
subl $4,%esp
FIXUP_RING0_GUEST_STACK
- SAVE_ALL(b)
- sti
+ SAVE_ALL(1f,1f)
+1: sti
GET_CURRENT(%ebx)
cmpl $NR_hypercalls,%eax
jae bad_hypercall
@@ -420,9 +408,14 @@ ENTRY(divide_error)
ALIGN
handle_exception:
FIXUP_RING0_GUEST_STACK
- SAVE_ALL_NOSEGREGS(a)
- SET_XEN_SEGMENTS(a)
- testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
+ SAVE_ALL(1f,2f)
+ .text 1
+ /* Exception within Xen: make sure we have valid %ds,%es. */
+1: mov %ecx,%ds
+ mov %ecx,%es
+ jmp 2f
+ .previous
+2: testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
jz exception_with_ints_disabled
sti # re-enable interrupts
1: xorl %eax,%eax
@@ -533,71 +526,81 @@ ENTRY(page_fault)
movw $TRAP_page_fault,2(%esp)
jmp handle_exception

-ENTRY(machine_check)
- pushl $TRAP_machine_check<<16
- jmp handle_exception
-
ENTRY(spurious_interrupt_bug)
pushl $TRAP_spurious_int<<16
jmp handle_exception

ENTRY(early_page_fault)
- SAVE_ALL_NOSEGREGS(a)
- movl %esp,%edx
- pushl %edx
+ SAVE_ALL(1f,1f)
+1: movl %esp,%eax
+ pushl %eax
call do_early_page_fault
addl $4,%esp
jmp restore_all_xen

-ENTRY(nmi)
+handle_nmi_mce:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- # NMI entry protocol is incompatible with guest kernel in ring 0.
+ # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
+ addl $4,%esp
iret
#else
# Save state but do not trash the segment registers!
- # We may otherwise be unable to reload them or copy them to ring 1.
- pushl %eax
- SAVE_ALL_NOSEGREGS(a)
-
- # We can only process the NMI if:
- # A. We are the outermost Xen activation (in which case we have
- # the selectors safely saved on our stack)
- # B. DS and ES contain sane Xen values.
- # In all other cases we bail without touching DS-GS, as we have
- # interrupted an enclosing Xen activation in tricky prologue or
- # epilogue code.
- movl UREGS_eflags(%esp),%eax
- movb UREGS_cs(%esp),%al
- testl $(3|X86_EFLAGS_VM),%eax
- jnz continue_nmi
- movl %ds,%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne defer_nmi
- movl %es,%eax
- cmpw $(__HYPERVISOR_DS),%ax
- jne defer_nmi
-
-continue_nmi:
- SET_XEN_SEGMENTS(d)
+ SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
+.Lnmi_mce_common:
+ xorl %eax,%eax
+ movw UREGS_entry_vector(%esp),%ax
movl %esp,%edx
pushl %edx
- call do_nmi
+ call *exception_table(,%eax,4)
addl $4,%esp
+ /*
+ * NB. We may return to Xen context with polluted %ds/%es. But in such
+ * cases we have put guest DS/ES on the guest stack frame, which will
+ * be detected by SAVE_ALL(), or we have rolled back restore_guest.
+ */
jmp ret_from_intr
-
-defer_nmi:
- movl $FIXMAP_apic_base,%eax
- # apic_wait_icr_idle()
-1: movl %ss:APIC_ICR(%eax),%ebx
- testl $APIC_ICR_BUSY,%ebx
- jnz 1b
- # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi)
- movl $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \
- TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
- jmp restore_all_xen
+.Lnmi_mce_xen:
+ /* Check the outer (guest) context for %ds/%es state validity. */
+ GET_GUEST_REGS(%ebx)
+ testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
+ mov %ds,%eax
+ mov %es,%edx
+ jnz .Lnmi_mce_vm86
+ /* We may have interrupted Xen while messing with %ds/%es... */
+ cmpw %ax,%cx
+ mov %ecx,%ds /* Ensure %ds is valid */
+ cmove UREGS_ds(%ebx),%eax /* Grab guest DS if it wasn't in %ds */
+ cmpw %dx,%cx
+ movl %eax,UREGS_ds(%ebx) /* Ensure guest frame contains guest DS */
+ cmove UREGS_es(%ebx),%edx /* Grab guest ES if it wasn't in %es */
+ mov %ecx,%es /* Ensure %es is valid */
+ movl $.Lrestore_sregs_guest,%ecx
+ movl %edx,UREGS_es(%ebx) /* Ensure guest frame contains guest ES */
+ cmpl %ecx,UREGS_eip(%esp)
+ jbe .Lnmi_mce_common
+ cmpl $.Lrestore_iret_guest,UREGS_eip(%esp)
+ ja .Lnmi_mce_common
+ /* Roll outer context restore_guest back to restoring %ds/%es. */
+ movl %ecx,UREGS_eip(%esp)
+ jmp .Lnmi_mce_common
+.Lnmi_mce_vm86:
+ /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
+ mov %ecx,%ds
+ mov %ecx,%es
+ jmp .Lnmi_mce_common
#endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */

+ENTRY(nmi)
+ pushl $TRAP_nmi<<16
+ jmp handle_nmi_mce
+
+ENTRY(machine_check)
+ pushl $TRAP_machine_check<<16
+ jmp handle_nmi_mce
+
ENTRY(setup_vm86_frame)
+ mov %ecx,%ds
+ mov %ecx,%es
# Copies the entire stack frame forwards by 16 bytes.
.macro copy_vm86_words count=18
.if \count
@@ -615,7 +618,7 @@ ENTRY(exception_table)
ENTRY(exception_table)
.long do_divide_error
.long do_debug
- .long 0 # nmi
+ .long do_nmi
.long do_int3
.long do_overflow
.long do_bounds
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/supervisor_mode_kernel.S
--- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S Mon Jul 02 12:19:26 2007 -0600
@@ -20,40 +20,45 @@
#include <asm/asm_defns.h>
#include <public/xen.h>

+#define guestreg(field) ((field)-UREGS_eip+36)
+
# Upon entry the stack should be the Xen stack and contain:
- # %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN
+ # %ss, %esp, EFLAGS, %cs|1, %eip, RETURN
# On exit the stack should be %ss:%esp (i.e. the guest stack)
# and contain:
- # EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN
+ # EFLAGS, %cs, %eip, RETURN
ALIGN
ENTRY(restore_ring0_guest)
+ pusha
+
# Point %gs:%esi to guest stack.
-RRG0: movw UREGS_ss+4(%esp),%gs
- movl UREGS_esp+4(%esp),%esi
+RRG0: movw guestreg(UREGS_ss)(%esp),%gs
+ movl guestreg(UREGS_esp)(%esp),%esi

- # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack.
- movl $(UREGS_kernel_sizeof>>2)+1,%ecx
+ # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack.
+ movl $12,%ecx /* 12 32-bit values */

1: subl $4,%esi
movl -4(%esp,%ecx,4),%eax
RRG1: movl %eax,%gs:(%esi)
loop 1b

-RRG2: andl $~3,%gs:UREGS_cs+4(%esi)
+RRG2: andl $~3,%gs:guestreg(UREGS_cs)(%esi)

movl %gs,%eax

# We need to do this because these registers are not present
# on the guest stack so they cannot be restored by the code in
# restore_all_guest.
-RRG3: mov UREGS_ds+4(%esp),%ds
-RRG4: mov UREGS_es+4(%esp),%es
-RRG5: mov UREGS_fs+4(%esp),%fs
-RRG6: mov UREGS_gs+4(%esp),%gs
+RRG3: mov guestreg(UREGS_ds)(%esp),%ds
+RRG4: mov guestreg(UREGS_es)(%esp),%es
+RRG5: mov guestreg(UREGS_fs)(%esp),%fs
+RRG6: mov guestreg(UREGS_gs)(%esp),%gs

RRG7: movl %eax,%ss
movl %esi,%esp

+ popa
ret
.section __ex_table,"a"
.long RRG0,domain_crash_synchronous
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_32/traps.c
--- a/xen/arch/x86/x86_32/traps.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_32/traps.c Mon Jul 02 12:19:26 2007 -0600
@@ -232,15 +232,6 @@ unsigned long do_iret(void)
return 0;
}

-#include <asm/asm_defns.h>
-BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
-fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
-{
- asmlinkage void do_nmi(struct cpu_user_regs *);
- ack_APIC_irq();
- do_nmi(regs);
-}
-
void __init percpu_traps_init(void)
{
struct tss_struct *tss = &doublefault_tss;
@@ -251,8 +242,6 @@ void __init percpu_traps_init(void)

/* The hypercall entry vector is only accessible from ring 1. */
_set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
-
- set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);

/*
* Make a separate task for double faults. This will get us debug output if
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/Makefile Mon Jul 02 12:19:26 2007 -0600
@@ -1,12 +1,12 @@ subdir-y += compat
subdir-y += compat

obj-y += entry.o
-obj-y += compat_kexec.o
obj-y += gpr_switch.o
obj-y += mm.o
obj-y += traps.o

obj-$(CONFIG_COMPAT) += compat.o
+obj-$(CONFIG_COMPAT) += compat_kexec.o
obj-$(CONFIG_COMPAT) += domain.o
obj-$(CONFIG_COMPAT) += physdev.o
obj-$(CONFIG_COMPAT) += platform_hypercall.o
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/compat/entry.S Mon Jul 02 12:19:26 2007 -0600
@@ -143,12 +143,12 @@ compat_restore_all_guest:
.Lft0: iretq

.section .fixup,"ax"
-.Lfx0: popq -15*8-8(%rsp) # error_code/entry_vector
- SAVE_ALL # 15*8 bytes pushed
- movq -8(%rsp),%rsi # error_code/entry_vector
- sti # after stack abuse (-1024(%rsp))
+.Lfx0: sti
+ SAVE_ALL
+ movq UREGS_error_code(%rsp),%rsi
+ movq %rsp,%rax
+ andq $~0xf,%rsp
pushq $__HYPERVISOR_DS # SS
- leaq 8(%rsp),%rax
pushq %rax # RSP
pushfq # RFLAGS
pushq $__HYPERVISOR_CS # CS
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/entry.S Mon Jul 02 12:19:26 2007 -0600
@@ -57,23 +57,23 @@ 1: sysretl
/* No special register assumptions. */
iret_exit_to_guest:
addq $8,%rsp
-.Lft1: iretq
+.Lft0: iretq

.section .fixup,"ax"
-.Lfx1: popq -15*8-8(%rsp) # error_code/entry_vector
- SAVE_ALL # 15*8 bytes pushed
- movq -8(%rsp),%rsi # error_code/entry_vector
- sti # after stack abuse (-1024(%rsp))
+.Lfx0: sti
+ SAVE_ALL
+ movq UREGS_error_code(%rsp),%rsi
+ movq %rsp,%rax
+ andq $~0xf,%rsp
pushq $__HYPERVISOR_DS # SS
- leaq 8(%rsp),%rax
pushq %rax # RSP
- pushf # RFLAGS
+ pushfq # RFLAGS
pushq $__HYPERVISOR_CS # CS
- leaq .Ldf1(%rip),%rax
+ leaq .Ldf0(%rip),%rax
pushq %rax # RIP
pushq %rsi # error_code/entry_vector
jmp handle_exception
-.Ldf1: GET_CURRENT(%rbx)
+.Ldf0: GET_CURRENT(%rbx)
jmp test_all_events
failsafe_callback:
GET_CURRENT(%rbx)
@@ -88,10 +88,10 @@ 1: call create_bounce_frame
jmp test_all_events
.previous
.section __pre_ex_table,"a"
- .quad .Lft1,.Lfx1
+ .quad .Lft0,.Lfx0
.previous
.section __ex_table,"a"
- .quad .Ldf1,failsafe_callback
+ .quad .Ldf0,failsafe_callback
.previous

ALIGN
@@ -505,11 +505,6 @@ ENTRY(page_fault)
movl $TRAP_page_fault,4(%rsp)
jmp handle_exception

-ENTRY(machine_check)
- pushq $0
- movl $TRAP_machine_check,4(%rsp)
- jmp handle_exception
-
ENTRY(spurious_interrupt_bug)
pushq $0
movl $TRAP_spurious_int,4(%rsp)
@@ -527,31 +522,38 @@ ENTRY(early_page_fault)
call do_early_page_fault
jmp restore_all_xen

+handle_ist_exception:
+ SAVE_ALL
+ testb $3,UREGS_cs(%rsp)
+ jz 1f
+ /* Interrupted guest context. Copy the context to stack bottom. */
+ GET_GUEST_REGS(%rdi)
+ movq %rsp,%rsi
+ movl $UREGS_kernel_sizeof/8,%ecx
+ movq %rdi,%rsp
+ rep movsq
+1: movq %rsp,%rdi
+ movl UREGS_entry_vector(%rsp),%eax
+ leaq exception_table(%rip),%rdx
+ callq *(%rdx,%rax,8)
+ jmp ret_from_intr
+
ENTRY(nmi)
pushq $0
- SAVE_ALL
- testb $3,UREGS_cs(%rsp)
- jz nmi_in_hypervisor_mode
- /* Interrupted guest context. Copy the context to stack bottom. */
- GET_GUEST_REGS(%rbx)
- movl $UREGS_kernel_sizeof/8,%ecx
-1: popq %rax
- movq %rax,(%rbx)
- addq $8,%rbx
- loop 1b
- subq $UREGS_kernel_sizeof,%rbx
- movq %rbx,%rsp
-nmi_in_hypervisor_mode:
- movq %rsp,%rdi
- call do_nmi
- jmp ret_from_intr
+ movl $TRAP_nmi,4(%rsp)
+ jmp handle_ist_exception
+
+ENTRY(machine_check)
+ pushq $0
+ movl $TRAP_machine_check,4(%rsp)
+ jmp handle_ist_exception

.data

ENTRY(exception_table)
.quad do_divide_error
.quad do_debug
- .quad 0 # nmi
+ .quad do_nmi
.quad do_int3
.quad do_overflow
.quad do_bounds
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/mm.c Mon Jul 02 12:19:26 2007 -0600
@@ -106,7 +106,8 @@ void __init paging_init(void)
/* Create user-accessible L2 directory to map the MPT for guests. */
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
- l3_ro_mpt = clear_page(page_to_virt(l2_pg));
+ l3_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l3_ro_mpt);
l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));

@@ -132,7 +133,8 @@ void __init paging_init(void)
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
- l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+ l2_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(va)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
l2_ro_mpt += l2_table_offset(va);
@@ -152,7 +154,8 @@ void __init paging_init(void)
l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
goto nomem;
- compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
+ compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
+ clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_64/traps.c Mon Jul 02 12:19:26 2007 -0600
@@ -294,8 +294,9 @@ void __init percpu_traps_init(void)
{
/* Specify dedicated interrupt stacks for NMIs and double faults. */
set_intr_gate(TRAP_double_fault, &double_fault);
- idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
- idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */

/*
* The 32-on-64 hypercall entry vector is only accessible from ring 1.
@@ -310,7 +311,10 @@ void __init percpu_traps_init(void)
stack_bottom = (char *)get_stack_bottom();
stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));

- /* Double-fault handler has its own per-CPU 2kB stack. */
+ /* Machine Check handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
+
+ /* Double-fault handler has its own per-CPU 1kB stack. */
init_tss[cpu].ist[0] = (unsigned long)&stack[2048];

/* NMI handler has its own per-CPU 1kB stack. */
diff -r 443ce7edad0e -r d146700adf71 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/arch/x86/x86_emulate.c Mon Jul 02 12:19:26 2007 -0600
@@ -1630,6 +1630,7 @@ x86_emulate(
}

/* Commit shadow register state. */
+ _regs.eflags &= ~EF_RF;
*ctxt->regs = _regs;

done:
diff -r 443ce7edad0e -r d146700adf71 xen/common/sysctl.c
--- a/xen/common/sysctl.c Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/common/sysctl.c Mon Jul 02 12:19:26 2007 -0600
@@ -136,6 +136,39 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
}
break;

+ case XEN_SYSCTL_getcpuinfo:
+ {
+ uint32_t i, nr_cpus;
+ struct xen_sysctl_cpuinfo cpuinfo;
+ struct vcpu *v;
+
+ nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
+
+ for ( i = 0; i < nr_cpus; i++ )
+ {
+ /* Assume no holes in idle-vcpu map. */
+ if ( (v = idle_vcpu[i]) == NULL )
+ break;
+
+ cpuinfo.idletime = v->runstate.time[RUNSTATE_running];
+ if ( v->is_running )
+ cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
+
+ if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+ }
+
+ op->u.getcpuinfo.nr_cpus = i;
+ ret = 0;
+
+ if ( copy_to_guest(u_sysctl, op, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = arch_do_sysctl(op, u_sysctl);
break;
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/edd.h
--- a/xen/include/asm-x86/edd.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/edd.h Mon Jul 02 12:19:26 2007 -0600
@@ -32,12 +32,22 @@ struct edd_info {
u16 legacy_max_cylinder; /* %cl[7:6]:%ch: maximum cylinder number */
u8 legacy_max_head; /* %dh: maximum head number */
u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
- /* Int13, Fn41: Get Device Parameters */
- u8 edd_device_params[74]; /* as filled into %ds:%si */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ struct {
+ u16 length;
+ u8 data[72];
+ } edd_device_params;
} __attribute__ ((packed));

-extern u32 boot_edd_signature[];
-extern u8 boot_edd_signature_nr;
+struct mbr_signature {
+ u8 device;
+ u8 pad[3];
+ u32 signature;
+} __attribute__ ((packed));
+
+/* These all reside in the boot trampoline. Access via bootsym(). */
+extern struct mbr_signature boot_mbr_signature[];
+extern u8 boot_mbr_signature_nr;
extern struct edd_info boot_edd_info[];
extern u8 boot_edd_info_nr;

diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/hvm.h Mon Jul 02 12:19:26 2007 -0600
@@ -302,4 +302,18 @@ static inline int hvm_event_injection_fa
return hvm_funcs.event_injection_faulted(v);
}

+/* These bits in CR4 are owned by the host. */
+#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
+ (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
+
+/* These bits in CR4 cannot be set by the guest. */
+#define HVM_CR4_GUEST_RESERVED_BITS \
+ ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
+/* These exceptions must always be intercepted. */
+#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
#endif /* __ASM_X86_HVM_HVM_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/emulate.h
--- a/xen/include/asm-x86/hvm/svm/emulate.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/emulate.h Mon Jul 02 12:19:26 2007 -0600
@@ -138,6 +138,7 @@ static void inline __update_guest_eip(
{
ASSERT(inst_len > 0);
vmcb->rip += inst_len;
+ vmcb->rflags &= ~X86_EFLAGS_RF;
}

#endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Mon Jul 02 12:19:26 2007 -0600
@@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v);

void setup_vmcb_dump(void);

-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
-#else
-#define SVM_CR4_HOST_MASK 0
-#endif
-
-
#endif /* ASM_X86_HVM_SVM_VMCS_H__ */

/*
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/trace.h
--- a/xen/include/asm-x86/hvm/trace.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/trace.h Mon Jul 02 12:19:26 2007 -0600
@@ -21,6 +21,7 @@
#define DO_TRC_HVM_CPUID 1
#define DO_TRC_HVM_INTR 1
#define DO_TRC_HVM_NMI 1
+#define DO_TRC_HVM_MCE 1
#define DO_TRC_HVM_SMI 1
#define DO_TRC_HVM_VMMCALL 1
#define DO_TRC_HVM_HLT 1
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Mon Jul 02 12:19:26 2007 -0600
@@ -130,6 +130,8 @@ extern u32 vmx_vmentry_control;

#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
extern u32 vmx_secondary_exec_control;
+
+extern bool_t cpu_has_vmx_ins_outs_instr_info;

#define cpu_has_vmx_virtualize_apic_accesses \
(vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Mon Jul 02 12:19:26 2007 -0600
@@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu
#define X86_SEG_AR_GRANULARITY (1u << 15) /* 15, granularity */
#define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */

-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
-#else
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
-#endif
-
#define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n"
#define VMCLEAR_OPCODE ".byte 0x66,0x0f,0xc7\n" /* reg/opcode: /6 */
#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/page.h
--- a/xen/include/asm-x86/page.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/page.h Mon Jul 02 12:19:26 2007 -0600
@@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd
#define pgentry_ptr_to_slot(_p) \
(((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))

+#ifndef __ASSEMBLY__
+
/* Page-table type. */
-#ifndef __ASSEMBLY__
#if CONFIG_PAGING_LEVELS == 2
/* x86_32 default */
typedef struct { u32 pfn; } pagetable_t;
@@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t;
#define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
#define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
#define pagetable_null() pagetable_from_pfn(0)
-#endif
-
-#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE)
+
+void clear_page_sse2(void *);
+#define clear_page(_p) (cpu_has_xmm2 ? \
+ clear_page_sse2((void *)(_p)) : \
+ (void)memset((void *)(_p), 0, PAGE_SIZE))
#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)

#define mfn_valid(mfn) ((mfn) < max_page)
@@ -244,6 +247,8 @@ typedef struct { u64 pfn; } pagetable_t;
/* Convert between frame number and address formats. */
#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+
+#endif /* !defined(__ASSEMBLY__) */

/* High table entries are reserved by the hypervisor. */
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/processor.h Mon Jul 02 12:19:26 2007 -0600
@@ -104,7 +104,6 @@
#define TRAP_alignment_check 17
#define TRAP_machine_check 18
#define TRAP_simd_error 19
-#define TRAP_deferred_nmi 31

/* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
/* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
@@ -567,7 +566,8 @@ extern void mtrr_ap_init(void);
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);

-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
+asmlinkage void do_machine_check(struct cpu_user_regs *regs);

int cpuid_hypervisor_leaves(
uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
diff -r 443ce7edad0e -r d146700adf71 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/asm-x86/x86_32/asm_defns.h Mon Jul 02 12:19:26 2007 -0600
@@ -26,7 +26,7 @@ 1: addl $4,%esp;
#define ASSERT_INTERRUPTS_ENABLED ASSERT_INTERRUPT_STATUS(nz)
#define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)

-#define __SAVE_ALL_PRE \
+#define SAVE_ALL_GPRS \
cld; \
pushl %eax; \
pushl %ebp; \
@@ -35,32 +35,48 @@ 1: addl $4,%esp;
pushl %esi; \
pushl %edx; \
pushl %ecx; \
- pushl %ebx; \
+ pushl %ebx
+
+/*
+ * Saves all register state into an exception/interrupt stack frame.
+ * Returns to the caller at <xen_lbl> if the interrupted context is within
+ * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through
+ * if the interrupted context is an ordinary guest protected-mode context.
+ * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to
+ * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case
+ * the caller is reponsible for validity of %ds/%es.
+ */
+#define SAVE_ALL(xen_lbl, vm86_lbl) \
+ SAVE_ALL_GPRS; \
testl $(X86_EFLAGS_VM),UREGS_eflags(%esp); \
- jz 2f; \
- call setup_vm86_frame; \
- jmp 3f; \
- 2:testb $3,UREGS_cs(%esp); \
- jz 1f; \
- mov %ds,UREGS_ds(%esp); \
- mov %es,UREGS_es(%esp); \
- mov %fs,UREGS_fs(%esp); \
- mov %gs,UREGS_gs(%esp); \
- 3:
-
-#define SAVE_ALL_NOSEGREGS(_reg) \
- __SAVE_ALL_PRE \
- 1:
-
-#define SET_XEN_SEGMENTS(_reg) \
- movl $(__HYPERVISOR_DS),%e ## _reg ## x; \
- mov %e ## _reg ## x,%ds; \
- mov %e ## _reg ## x,%es;
-
-#define SAVE_ALL(_reg) \
- __SAVE_ALL_PRE \
- SET_XEN_SEGMENTS(_reg) \
- 1:
+ mov %ds,%edi; \
+ mov %es,%esi; \
+ mov $(__HYPERVISOR_DS),%ecx; \
+ jnz 86f; \
+ .text 1; \
+ 86: call setup_vm86_frame; \
+ jmp vm86_lbl; \
+ .previous; \
+ testb $3,UREGS_cs(%esp); \
+ jz xen_lbl; \
+ /* \
+ * We are the outermost Xen context, but our \
+ * life is complicated by NMIs and MCEs. These \
+ * could occur in our critical section and \
+ * pollute %ds and %es. We have to detect that \
+ * this has occurred and avoid saving Xen DS/ES \
+ * values to the guest stack frame. \
+ */ \
+ cmpw %cx,%di; \
+ mov %ecx,%ds; \
+ mov %fs,UREGS_fs(%esp); \
+ cmove UREGS_ds(%esp),%edi; \
+ cmpw %cx,%si; \
+ mov %edi,UREGS_ds(%esp); \
+ cmove UREGS_es(%esp),%esi; \
+ mov %ecx,%es; \
+ mov %gs,UREGS_gs(%esp); \
+ mov %esi,UREGS_es(%esp)

#ifdef PERF_COUNTERS
#define PERFC_INCR(_name,_idx,_cur) \
@@ -97,8 +113,8 @@ __asm__(
STR(x) ":\n\t" \
"pushl $"#v"<<16\n\t" \
STR(FIXUP_RING0_GUEST_STACK) \
- STR(SAVE_ALL(a)) \
- "movl %esp,%eax\n\t" \
+ STR(SAVE_ALL(1f,1f)) "\n\t" \
+ "1:movl %esp,%eax\n\t" \
"pushl %eax\n\t" \
"call "STR(smp_##x)"\n\t" \
"addl $4,%esp\n\t" \
@@ -109,8 +125,8 @@ __asm__(
"\n" __ALIGN_STR"\n" \
"common_interrupt:\n\t" \
STR(FIXUP_RING0_GUEST_STACK) \
- STR(SAVE_ALL(a)) \
- "movl %esp,%eax\n\t" \
+ STR(SAVE_ALL(1f,1f)) "\n\t" \
+ "1:movl %esp,%eax\n\t" \
"pushl %eax\n\t" \
"call " STR(do_IRQ) "\n\t" \
"addl $4,%esp\n\t" \
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/platform.h
--- a/xen/include/public/platform.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/platform.h Mon Jul 02 12:19:26 2007 -0600
@@ -114,6 +114,45 @@ typedef struct xenpf_platform_quirk xenp
typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);

+#define XENPF_firmware_info 50
+#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+ /* IN variables. */
+ uint32_t type;
+ uint32_t index;
+ /* OUT variables. */
+ union {
+ struct {
+ /* Int13, Fn48: Check Extensions Present. */
+ uint8_t device; /* %dl: bios device number */
+ uint8_t version; /* %ah: major version */
+ uint16_t interface_support; /* %cx: support bitmap */
+ /* Int13, Fn08: Legacy Get Device Parameters. */
+ uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */
+ uint8_t legacy_max_head; /* %dh: max head # */
+ uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */
+ /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+ /* NB. First uint16_t of buffer must be set to buffer size. */
+ XEN_GUEST_HANDLE(void) edd_params;
+ } disk_info; /* XEN_FW_DISK_INFO */
+ struct {
+ uint8_t device; /* bios device number */
+ uint32_t mbr_signature; /* offset 0x1b8 in mbr */
+ } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+ struct {
+ /* Int10, AX=4F15: Get EDID info. */
+ uint8_t capabilities;
+ uint8_t edid_transfer_time;
+ /* must refer to 128-byte buffer */
+ XEN_GUEST_HANDLE(uint8_t) edid;
+ } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+ } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
struct xen_platform_op {
uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -124,6 +163,7 @@ struct xen_platform_op {
struct xenpf_read_memtype read_memtype;
struct xenpf_microcode_update microcode;
struct xenpf_platform_quirk platform_quirk;
+ struct xenpf_firmware_info firmware_info;
uint8_t pad[128];
} u;
};
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/sysctl.h Mon Jul 02 12:19:26 2007 -0600
@@ -140,9 +140,7 @@ typedef struct xen_sysctl_getdomaininfol
typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);

-/*
- * Inject debug keys into Xen.
- */
+/* Inject debug keys into Xen. */
#define XEN_SYSCTL_debug_keys 7
struct xen_sysctl_debug_keys {
/* IN variables. */
@@ -151,6 +149,23 @@ struct xen_sysctl_debug_keys {
};
typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo 8
+struct xen_sysctl_cpuinfo {
+ uint64_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t);
+struct xen_sysctl_getcpuinfo {
+ /* IN variables. */
+ uint32_t max_cpus;
+ XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+ /* OUT variables. */
+ uint32_t nr_cpus;
+};
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t);

struct xen_sysctl {
uint32_t cmd;
@@ -163,6 +178,7 @@ struct xen_sysctl {
struct xen_sysctl_perfc_op perfc_op;
struct xen_sysctl_getdomaininfolist getdomaininfolist;
struct xen_sysctl_debug_keys debug_keys;
+ struct xen_sysctl_getcpuinfo getcpuinfo;
uint8_t pad[128];
} u;
};
diff -r 443ce7edad0e -r d146700adf71 xen/include/public/trace.h
--- a/xen/include/public/trace.h Mon Jul 02 10:31:03 2007 -0600
+++ b/xen/include/public/trace.h Mon Jul 02 12:19:26 2007 -0600
@@ -88,6 +88,7 @@
#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12)
#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13)
#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)

/* This structure represents a single trace buffer record. */
struct t_rec {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog
[xen-unstable] merge with xen-unstable.hg [ In reply to ]
# HG changeset patch
# User Alex Williamson <alex.williamson@hp.com>
# Date 1184078366 21600
# Node ID 42586a0f4407528a32ba9da003d14a8ff49193bf
# Parent 87b0b6a08dbdf5882c6223b0b6b7b189a15b0482
# Parent 80099a459d7a2f9c2d7f895ffc8854ca9435206d
merge with xen-unstable.hg
---
tools/xenstore/fake_libxc.c | 143 -
tools/xenstore/speedtest.c | 130 -
tools/xenstore/testsuite/01simple.test | 4
tools/xenstore/testsuite/02directory.test | 45
tools/xenstore/testsuite/03write.test | 28
tools/xenstore/testsuite/04rm.test | 20
tools/xenstore/testsuite/05filepermissions.test | 81 -
tools/xenstore/testsuite/06dirpermissions.test | 119 -
tools/xenstore/testsuite/07watch.test | 176 --
tools/xenstore/testsuite/08transaction.slowtest | 43
tools/xenstore/testsuite/08transaction.test | 92 -
tools/xenstore/testsuite/09domain.test | 19
tools/xenstore/testsuite/10domain-homedir.test | 18
tools/xenstore/testsuite/11domain-watch.test | 50
tools/xenstore/testsuite/12readonly.test | 38
tools/xenstore/testsuite/13watch-ack.test | 21
tools/xenstore/testsuite/14complexperms.test | 68
tools/xenstore/testsuite/test.sh | 64
tools/xenstore/testsuite/vg-suppressions | 9
tools/xenstore/xenstored_test.h | 37
tools/xenstore/xs_crashme.c | 393 ----
tools/xenstore/xs_random.c | 1590 --------------------
tools/xenstore/xs_stress.c | 207 --
tools/xenstore/xs_test.c | 812 ----------
xen/arch/x86/genapic/es7000.h | 120 -
.hgignore | 4
Config.mk | 7
buildconfigs/enable-xen-config | 36
buildconfigs/ketchup | 742 +++++++++
buildconfigs/mk.linux-2.6-mm | 14
buildconfigs/mk.linux-2.6-paravirt | 4
buildconfigs/mk.linux-2.6-rc | 14
buildconfigs/mk.linux-2.6-tip | 14
buildconfigs/mk.linux-2.6-xen | 4
buildconfigs/src.tarball | 12
docs/man/xm.pod.1 | 331 ++--
tools/blktap/drivers/Makefile | 1
tools/blktap/lib/Makefile | 2
tools/examples/init.d/xendomains | 29
tools/examples/xend-config.sxp | 6
tools/firmware/etherboot/README | 5
tools/firmware/rombios/rombios.c | 4
tools/firmware/vmxassist/vm86.c | 190 +-
tools/ioemu/keymaps/ja | 1
tools/ioemu/keymaps/modifiers | 4
tools/ioemu/vnc_keysym.h | 4
tools/libxc/Makefile | 1
tools/libxc/xc_domain.c | 1
tools/libxc/xc_misc.c | 2
tools/libxc/xenctrl.h | 3
tools/python/xen/lowlevel/xc/xc.c | 69
tools/python/xen/util/acmpolicy.py | 1199 +++++++++++++++
tools/python/xen/util/bootloader.py | 521 ++++++
tools/python/xen/util/security.py | 791 ++++++++-
tools/python/xen/util/xsconstants.py | 104 +
tools/python/xen/util/xspolicy.py | 66
tools/python/xen/xend/XendAPI.py | 66
tools/python/xen/xend/XendConfig.py | 40
tools/python/xen/xend/XendDomain.py | 35
tools/python/xen/xend/XendDomainInfo.py | 198 ++
tools/python/xen/xend/XendError.py | 18
tools/python/xen/xend/XendNode.py | 61
tools/python/xen/xend/XendOptions.py | 3
tools/python/xen/xend/XendVDI.py | 12
tools/python/xen/xend/XendXSPolicy.py | 222 ++
tools/python/xen/xend/XendXSPolicyAdmin.py | 314 +++
tools/python/xen/xend/server/SrvDomain.py | 3
tools/python/xen/xend/server/blkif.py | 15
tools/python/xen/xend/server/netif.py | 9
tools/python/xen/xend/server/vfbif.py | 5
tools/python/xen/xm/create.py | 8
tools/python/xen/xm/main.py | 42
tools/security/policies/security_policy.xsd | 29
tools/security/xensec_ezpolicy | 16
tools/xcutils/Makefile | 2
tools/xenmon/xenbaked.c | 2
tools/xenstat/libxenstat/src/xenstat.c | 2
tools/xenstore/Makefile | 94 -
tools/xenstore/xenstored_core.c | 105 -
tools/xenstore/xenstored_domain.c | 4
tools/xenstore/xenstored_transaction.c | 1
tools/xenstore/xenstored_watch.c | 12
tools/xentrace/xentrace.c | 2
tools/xm-test/tests/info/02_info_compiledata_pos.py | 4
xen/arch/ia64/Rules.mk | 2
xen/arch/ia64/xen/dom0_ops.c | 46
xen/arch/powerpc/Makefile | 16
xen/arch/powerpc/Rules.mk | 2
xen/arch/powerpc/boot_of.c | 27
xen/arch/powerpc/domain.c | 2
xen/arch/powerpc/domain_build.c | 136 -
xen/arch/powerpc/external.c | 85 -
xen/arch/powerpc/mm.c | 2
xen/arch/powerpc/mpic_init.c | 58
xen/arch/powerpc/mpic_init.h | 4
xen/arch/powerpc/of_handler/Makefile | 1
xen/arch/powerpc/of_handler/head.S | 26
xen/arch/powerpc/of_handler/ofh.c | 1
xen/arch/powerpc/of_handler/rtas.c | 82 +
xen/arch/powerpc/of_handler/vdevice.c | 2
xen/arch/powerpc/of_handler/xen_hvcall.S | 26
xen/arch/powerpc/ofd_fixup.c | 37
xen/arch/powerpc/oftree.h | 4
xen/arch/powerpc/powerpc64/hypercall_table.S | 2
xen/arch/powerpc/rtas.c | 151 +
xen/arch/powerpc/rtas.h | 31
xen/arch/powerpc/rtas_flash.c | 182 ++
xen/arch/powerpc/rtas_nvram.c | 129 +
xen/arch/powerpc/sysctl.c | 6
xen/arch/powerpc/time.c | 2
xen/arch/x86/Rules.mk | 2
xen/arch/x86/dmi_scan.c | 2
xen/arch/x86/domain.c | 2
xen/arch/x86/domain_build.c | 3
xen/arch/x86/e820.c | 34
xen/arch/x86/genapic/es7000plat.c | 214 --
xen/arch/x86/hvm/svm/emulate.c | 4
xen/arch/x86/hvm/svm/svm.c | 135 +
xen/arch/x86/hvm/vlapic.c | 4
xen/arch/x86/hvm/vmx/intr.c | 67
xen/arch/x86/hvm/vmx/vmcs.c | 15
xen/arch/x86/hvm/vmx/vmx.c | 630 ++++---
xen/arch/x86/mm.c | 34
xen/arch/x86/mm/hap/hap.c | 2
xen/arch/x86/mm/p2m.c | 23
xen/arch/x86/mm/shadow/common.c | 15
xen/arch/x86/mm/shadow/multi.c | 13
xen/arch/x86/mm/shadow/private.h | 4
xen/arch/x86/mpparse.c | 6
xen/arch/x86/setup.c | 126 -
xen/arch/x86/sysctl.c | 32
xen/arch/x86/traps.c | 2
xen/arch/x86/x86_32/seg_fixup.c | 84 +
xen/arch/x86/x86_32/traps.c | 10
xen/arch/x86/x86_32/xen.lds.S | 4
xen/arch/x86/x86_64/compat/traps.c | 25
xen/arch/x86/x86_64/compat_kexec.S | 2
xen/arch/x86/x86_64/entry.S | 1
xen/arch/x86/x86_64/mm.c | 113 -
xen/arch/x86/x86_64/traps.c | 29
xen/arch/x86/x86_64/xen.lds.S | 4
xen/common/compat/kernel.c | 3
xen/common/domain.c | 41
xen/common/domctl.c | 1
xen/common/event_channel.c | 21
xen/common/kernel.c | 21
xen/common/page_alloc.c | 9
xen/include/asm-x86/config.h | 8
xen/include/asm-x86/hvm/svm/svm.h | 40
xen/include/asm-x86/hvm/vmx/vmcs.h | 4
xen/include/asm-x86/hvm/vmx/vmx.h | 24
xen/include/asm-x86/mach-es7000/mach_mpparse.h | 7
xen/include/asm-x86/mach-generic/mach_apic.h | 5
xen/include/asm-x86/mm.h | 3
xen/include/asm-x86/paging.h | 16
xen/include/asm-x86/processor.h | 7
xen/include/asm-x86/regs.h | 5
xen/include/asm-x86/system.h | 4
xen/include/public/domctl.h | 3
xen/include/public/sysctl.h | 20
xen/include/xen/compat.h | 2
xen/include/xen/cpumask.h | 9
xen/include/xen/init.h | 6
firmware/etherboot/eb-rtl8139.zrom | 0
164 files changed, 6505 insertions(+), 6293 deletions(-)

diff -r 87b0b6a08dbd -r 42586a0f4407 .hgignore
--- a/.hgignore Mon Jul 09 09:22:58 2007 -0600
+++ b/.hgignore Tue Jul 10 08:39:26 2007 -0600
@@ -67,6 +67,9 @@
^linux-[^/]*-xen0/.*$
^linux-[^/]*-xenU/.*$
^linux-[^/]*-paravirt/.*$
+^linux-[^/]*-mm/.*$
+^linux-[^/]*-rc/.*$
+^linux-[^/]*-tip/.*$
^linux-[^/]*\.patch$
^mkddbxen$
^netbsd-[^/]*-tools/.*$
@@ -241,6 +244,7 @@
^xen/arch/powerpc/dom0\.bin$
^xen/arch/powerpc/asm-offsets\.s$
^xen/arch/powerpc/firmware$
+^xen/arch/powerpc/firmware.dbg$
^xen/arch/powerpc/firmware_image.bin$
^xen/arch/powerpc/xen\.lds$
^xen/arch/powerpc/\.xen-syms$
diff -r 87b0b6a08dbd -r 42586a0f4407 Config.mk
--- a/Config.mk Mon Jul 09 09:22:58 2007 -0600
+++ b/Config.mk Tue Jul 10 08:39:26 2007 -0600
@@ -17,8 +17,9 @@ SHELL ?= /bin/sh
SHELL ?= /bin/sh

# Tools to run on system hosting the build
-HOSTCC = gcc
-HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer
+HOSTCC = gcc
+HOSTCFLAGS = -Wall -Werror -Wstrict-prototypes -O2 -fomit-frame-pointer
+HOSTCFLAGS += -fno-strict-aliasing

DISTDIR ?= $(XEN_ROOT)/dist
DESTDIR ?= /
@@ -58,6 +59,8 @@ CFLAGS += -g
CFLAGS += -g
endif

+CFLAGS += -fno-strict-aliasing
+
CFLAGS += -std=gnu99

CFLAGS += -Wall -Wstrict-prototypes
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/enable-xen-config
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/buildconfigs/enable-xen-config Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+set -ex
+
+if [ $# -ne 1 ] ; then
+ echo "Usage $(basename $0) <config-file>" 1>&2
+ exit 1
+fi
+
+CONFIG=$1
+
+setopt()
+{
+ OPTION=$1
+ VALUE=$2
+
+ # First remove any existing instances of this option
+ sed -e "s/^# ${OPTION} is not set$//g ; s/^^{OPTION}=.$//g" -i "${CONFIG}"
+
+ # Then append the new value
+ case ${VALUE} in
+ y|m) echo "${OPTION}=${VALUE}" >> "${CONFIG}" ;;
+ n) echo "# ${OPTION} is not set" >> "${CONFIG}" ;;
+ *) echo "Invalid value ${VALUE} for ${OPTION}" 1>&2 ; exit 1 ;;
+ esac
+}
+
+setopt CONFIG_PARAVIRT y
+setopt CONFIG_XEN y
+setopt CONFIG_VMI y
+setopt CONFIG_LGUEST n
+setopt CONFIG_XEN_BLKDEV_FRONTEND y
+setopt CONFIG_XEN_NETDEV_FRONTEND y
+setopt CONFIG_HVC_XEN y
+
+exit 0
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/ketchup
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/buildconfigs/ketchup Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,742 @@
+#!/usr/bin/python
+#
+# ketchup 0.9.8
+# http://selenic.com/ketchup/wiki
+#
+# Copyright 2004 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+#
+# Usage:
+#
+# in an existing kernel directory, run:
+#
+# ketchup <version>
+#
+# where version is a complete kernel version, or a branch name to grab
+# the latest version
+#
+# You can override some variables by creating a ~/.ketchuprc file.
+# The ~/.ketchuprc is just a Python script, eg. it might look like this:
+#
+# kernel_url = 'http://kernel.localdomain/pub/linux/kernel'
+# archive = os.environ["HOME"] + '/tmp/ketchup-archive'
+# gpg = '/weird/path/to/gpg'
+#
+
+import re, sys, urllib, os, getopt, glob, shutil
+
+def error(*args):
+ sys.stderr.write("ketchup: ")
+ for a in args:
+ sys.stderr.write(str(a))
+ sys.stderr.write("\n")
+
+def qprint(*args):
+ if not options["quiet"]:
+ sys.stdout.write(" ".join(map(str, args)))
+ sys.stdout.write("\n")
+
+def lprint(*args):
+ sys.stdout.write(" ".join(map(str, args)))
+ sys.stdout.write("\n")
+
+
+def fancyopts(args, options, state, syntax=''):
+ long = []
+ short = ''
+ map = {}
+ dt = {}
+
+ def help(state, opt, arg, options = options, syntax = syntax):
+ lprint("Usage: ", syntax)
+
+ for s, l, d, c in options:
+ opt = ' '
+ if s: opt = opt + '-' + s + ' '
+ if l: opt = opt + '--' + l + ' '
+ if d: opt = opt + '(' + str(d) + ')'
+ lprint(opt)
+ if c: lprint(' %s' % c)
+ sys.exit(0)
+
+ options = [('h', 'help', help, 'Show usage info')] + options
+
+ for s, l, d, c in options:
+ map['-'+s] = map['--'+l]=l
+ state[l] = d
+ dt[l] = type(d)
+ if not d is None and not type(d) is type(help): s, l = s + ':', l + '='
+ if s: short = short + s
+ if l: long.append(l)
+
+ if os.environ.has_key("KETCHUP_OPTS"):
+ args = os.environ["KETCHUP_OPTS"].split() + args
+
+ try:
+ opts, args = getopt.getopt(args, short, long)
+ except getopt.GetoptError:
+ help(state, None, args)
+ sys.exit(-1)
+
+ for opt, arg in opts:
+ if dt[map[opt]] is type(help): state[map[opt]](state,map[opt],arg)
+ elif dt[map[opt]] is type(1): state[map[opt]] = int(arg)
+ elif dt[map[opt]] is type(''): state[map[opt]] = arg
+ elif dt[map[opt]] is type([]): state[map[opt]].append(arg)
+ elif dt[map[opt]] is type(None): state[map[opt]] = 1
+
+ return args
+
+# Default values
+kernel_url = 'http://www.kernel.org/pub/linux/kernel'
+archive = os.environ["HOME"] + "/.ketchup"
+rename_prefix = 'linux-'
+rename_with_localversion = False
+wget = "/usr/bin/wget"
+gpg = "/usr/bin/gpg"
+precommand = postcommand = None
+default_tree = None
+local_trees = {}
+
+# Functions to parse version strings
+
+def tree(ver):
+ return float(re.match(r'(\d+\.\d+)', ver).group(1))
+
+def rev(ver):
+ p = pre(ver)
+ r = int(re.match(r'\d+\.\d+\.(\d+)', ver).group(1))
+ if p: r = r - 1
+ return r
+
+def pre(ver):
+ try: return re.match(r'\d+\.\d+\.\d+(\.\d+)?-((rc|pre)\d+)', ver).group(2)
+ except: return None
+
+def post(ver):
+ try: return re.match(r'\d+\.\d+\.\d+\.(\d+)', ver).group(1)
+ except: return None
+
+def pretype(ver):
+ try: return re.match(r'\d+\.\d+\.\d+(\.\d+)?-((rc|pre)\d+)', ver).group(3)
+ except: return None
+
+def prenum(ver):
+ try: return int(re.match(r'\d+\.\d+\.\d+-((rc|pre)(\d+))', ver).group(3))
+ except: return None
+
+def prebase(ver):
+ return re.match(r'(\d+\.\d+\.\d+((-(rc|pre)|\.)\d+)?)', ver).group(1)
+
+def revbase(ver):
+ return "%s.%s" % (tree(ver), rev(ver))
+
+def base(ver):
+ v = revbase(ver)
+ if post(ver): v += "." + post(ver)
+ return v
+
+def forkname(ver):
+ try: return re.match(r'\d+.\d+.\d+(\.\d+)?(-(rc|pre)\d+)?(-(\w+?)\d+)?',
+ ver).group(5)
+ except: return None
+
+def forknum(ver):
+ try: return int(
+ re.match(r'\d+.\d+.\d+(\.\d+)?(-(rc|pre)\d+)?(-(\w+?)(\d+))?',
+ ver).group(6))
+ except: return None
+
+def fork(ver):
+ try: return re.match(r'\d+.\d+.\d+(\.\d+)?(-(rc|pre)\d+)?(-(\w+))?', ver).group(4)
+ except: return None
+
+def get_ver(makefile):
+ """ Read the version information from the specified makefile """
+ part = {}
+ parts = "VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION".split(' ')
+ m = open(makefile)
+ for l in m.readlines():
+ for p in parts:
+ try: part[p] = re.match(r'%s\s*=\s*(\S+)' % p, l).group(1)
+ except: pass
+
+ version = "%s.%s.%s" % tuple([part[p] for p in parts[:3]])
+ version += part.get("EXTRAVERSION","")
+ return version
+
+def get_localversion():
+ v = ''
+
+ for name in glob.glob('localversion*'):
+ try: v += open(name).readline().strip()
+ except: pass
+
+ try:
+ c = open('.config').read()
+ v += re.search(r'^CONFIG_LOCALVERSION="(.+)"', c, re.M).group(1)
+ except: pass
+
+ return v
+
+def compare_ver(a, b):
+ """
+ Compare kernel versions a and b
+
+ Note that -pre and -rc versions sort before the version they modify,
+ -pre sorts before -rc, -bk, -git, and -mm, etc. sort alphabetically.
+ """
+ if a == b: return 0
+
+ c = cmp(float(tree(a)), float(tree(b)))
+ if c: return c
+ c = cmp(rev(a), rev(b))
+ if c: return c
+ c = cmp(int(post(a) or 0), int(post(b) or 0))
+ if c: return c
+ c = cmp(pretype(a), pretype(b)) # pre sorts before rc
+ if c: return c
+ c = cmp(prenum(a), prenum(b))
+ if c: return c
+ c = cmp(forkname(a), forkname(b))
+ if c: return c
+ return cmp(forknum(a), forknum(b))
+
+def last(url, pat="(.*/)"):
+ for l in urllib.urlopen(url).readlines():
+ m = re.search('(?i)<a href="%s">' % pat, l)
+ if m: n = m.group(1)
+ return n
+
+def latest_mm(url, pat):
+ url = kernel_url + '/people/akpm/patches/2.6/'
+ url += last(url)
+ part = last(url)
+ return part[:-1]
+
+def latest_ck(url, pat):
+ url = "http://ck.kolivas.org/patches/2.6/pre-releases/"
+ url += last(url)
+ part = last(url)
+ pre = part[:-1]
+
+ url = "http://ck.kolivas.org/patches/2.6/"
+ url += last(url,"(2.6.*/)")
+ part = last(url)
+ rel = part[:-1]
+
+ l = [pre, rel]
+ l.sort(compare_ver)
+ return l[-1]
+
+def latest_dir(url, pat):
+ """Find the latest link matching pat at url after sorting"""
+ p = []
+ for l in urllib.urlopen(url).readlines():
+ m = re.search('"%s"' % pat, l)
+ if m: p.append(m.group(1))
+
+ if not p: return None
+
+ p.sort(compare_ver)
+ return p[-1]
+
+# mbligh is lazy and has a bunch of empty directories
+def latest_mjb(url, pat):
+ url = kernel_url + '/people/mbligh/'
+
+ # find the last Linus release and search backwards
+ l = [find_ver('2.6'), find_ver("2.6-pre")]
+ l.sort(compare_ver)
+ linus = l[-1]
+
+ p = []
+ for l in urllib.urlopen(url).readlines():
+ m = re.search('"(2\.6\..*/)"', l)
+ if m:
+ v = m.group(1)
+ if compare_ver(v, linus) <= 0:
+ p.append(v)
+
+ p.sort(compare_ver)
+ p.reverse()
+
+ for ver in p:
+ mjb = latest_dir(url + ver, pat)
+ if mjb: return mjb
+
+ return None
+
+def latest_26_tip(url, pat):
+ l = [find_ver('2.6'), find_ver('2.6-git'), find_ver('2.6-pre')]
+ l.sort(compare_ver)
+ return l[-1]
+
+def find_info(ver):
+ b = "%.1f" % tree(ver)
+ f = forkname(ver)
+ p = pre(ver)
+
+ s = b
+ if f:
+ s = "%s-%s" % (b, f)
+ elif p:
+ s = "%s-pre" % b
+
+ return version_info[s]
+
+def version_urls(ver):
+ """ Return the URL for the patch associated with the specified version """
+ i = find_info(ver)[1]
+ if type(i) != type([]):
+ i = [i]
+
+ v = {
+ 'full': ver,
+ 'tree': tree(ver),
+ 'base': base(ver),
+ 'prebase': prebase(ver)
+ }
+
+ l = []
+ for e in i:
+ l.append(e % v)
+
+ return l
+
+def patch_path(ver):
+ return os.path.join(archive, os.path.basename(version_urls(ver)[0]))
+
+def download(url, f):
+ qprint("Downloading %s" % os.path.basename(url))
+ if options["dry-run"]:
+ return 1
+
+ if not options["wget"]:
+ p = urllib.urlopen(url).read()
+ if p.find("<title>404") != -1:
+ return None
+ open(f, 'w').write(p)
+ else:
+ e = os.system("%s -c -O %s %s" %
+ (options["wget"], f + ".partial", url))
+ if e:
+ return None
+ os.rename(f + ".partial", f)
+
+ return 1
+
+def verify(url, f, sign):
+ if options["no-gpg"] or options["dry-run"] or not options["gpg-path"]:
+ return 1
+
+ sf = f + sign
+ if not download(url + sign, sf):
+ error("signature download failed")
+ error("removing files...")
+ os.unlink(f)
+ return 0
+
+ qprint("Verifying signature...")
+ r = os.system("%s --verify %s %s" % (options["gpg-path"], sf, f))
+ if r:
+ error("gpg returned %d" % r)
+ error("removing files...")
+ os.unlink(f)
+ os.unlink(sf)
+ return 0
+
+ return 1
+
+def trydownload(urls, f, sign):
+ for url in urls:
+ if download(url, f):
+ if not sign or verify(url, f, sign):
+ return f
+ if url[-4:] == ".bz2":
+ f2 = f[:-4] + ".gz"
+ url2 = url[:-4] + ".gz"
+ if download(url2, f2):
+ if not sign or verify(url2, f2, sign):
+ return f2
+ return None
+
+def get_patch(ver):
+ """Return the path to patch for given ver, downloading if necessary"""
+ f = patch_path(ver)
+ if os.path.exists(f):
+ return f
+ if f[-4:] == ".bz2":
+ f2 = f[:-4] + ".gz"
+ if os.path.exists(f2):
+ return f2
+
+ urls = version_urls(ver)
+ sign = find_info(ver)[3]
+ if sign == 1: sign = ".sign"
+ f = trydownload(urls, f, sign)
+ if not f:
+ error("patch download failed")
+ sys.exit(-1)
+
+ return f
+
+def apply_patch(ver, reverse = 0):
+ """Find the patch to upgrade from the predecessor of ver to ver and
+ apply or reverse it."""
+ p = get_patch(ver)
+ r = ""
+ if reverse:
+ r = " -R"
+
+ qprint("Applying %s%s" % (os.path.basename(p), r))
+ if options["dry-run"]:
+ return ver
+
+ def cmd(patch, reverse, dry):
+ base = "patch -l -p1%s" % reverse
+ if dry:
+ base += " --dry-run"
+
+ if p[-4:] == ".bz2":
+ pipe = "bzcat %s | %s" % (patch, base)
+ elif p[-3:] == ".gz":
+ pipe = "zcat %s | %s" % (patch, base)
+ else:
+ pipe = "%s < %s" % (base, patch)
+
+ err = os.system(pipe + " > .patchdiag")
+ if err:
+ sys.stderr.write(open(".patchdiag").read())
+ os.unlink(".patchdiag")
+ return err
+
+ err = cmd(p, r, 1)
+ if err:
+ error("patch %s failed: %d" % (p, err))
+ sys.exit(-1)
+
+ err = cmd(p, r, 0)
+ if err:
+ error("patch %s failed while it was supposed to apply: %d" % (p, err))
+ sys.exit(-1)
+
+def untar(tarfile):
+ old = os.getcwd()
+ os.mkdir("ketchup-tmp")
+ os.chdir("ketchup-tmp")
+
+ err = os.system("bzcat %s | tar -xf -" % tarfile)
+ if err:
+ error("Unpacking failed: ", err)
+ sys.exit(-1)
+
+ err = os.system("mv linux*/* linux*/.[^.]* ..; rmdir linux*")
+ if err:
+ error("Unpacking failed: ", err)
+ sys.exit(-1)
+
+ os.chdir(old)
+ shutil.rmtree("ketchup-tmp")
+
+def install_nearest(ver):
+ t = tree(ver)
+ tarballs = glob.glob(archive + "/linux-%s.*.tar.bz2" % t)
+ list = []
+
+ for f in tarballs:
+ m = re.match(r'.*/linux-(.*).tar.bz2$', f)
+ v = m.group(1)
+ d = abs(rev(v) - rev(ver))
+ list.append((d, f, v))
+ list.sort()
+
+ if not list or (options["full-tarball"] and list[0][0]):
+ f = "linux-%s.tar.bz2" % ver
+ url = "%s/v%s/%s" % (kernel_url, t, f)
+ f = archive + "/" + f
+
+ sign = find_info(ver)[3]
+ if sign == 1: sign = ".sign"
+
+ f = trydownload([url], f, sign)
+ if not f:
+ error("Tarball download failed")
+ sys.exit(-1)
+
+ else:
+ f = list[0][1]
+ ver = list[0][2]
+
+ qprint("Unpacking %s" % os.path.basename(f))
+ if options["dry-run"]: return ver
+ untar(f)
+
+ return ver
+
+def find_ver(ver):
+ if ver in version_info.keys():
+ v = version_info[ver]
+ d = v[1]
+ if type(d) is type([]):
+ d = d[0]
+ for n in range(5):
+ return v[0](os.path.dirname(d), v[2])
+ error('retrying version lookup for %s' % ver)
+ else:
+ return ver
+
+def transform(a, b):
+ if a == b:
+ qprint("Nothing to do!")
+ return
+ if not a:
+ a = install_nearest(base(b))
+ t = tree(a)
+ if t != tree(b):
+ error("Can't patch %s to %s" % (tree(a), tree(b)))
+ sys.exit(-1)
+ if fork(a):
+ apply_patch(a, 1)
+ a = prebase(a)
+ if prebase(a) != prebase(b):
+ if pre(a):
+ apply_patch(a, 1)
+ a = base(a)
+
+ if post(a) and post(a) != post(b):
+ apply_patch(prebase(a), 1)
+
+ ra, rb = rev(a), rev(b)
+ if ra > rb:
+ for r in range(ra, rb, -1):
+ apply_patch("%s.%s" % (t, r), -1)
+ if ra < rb:
+ for r in range(ra + 1, rb + 1):
+ apply_patch("%s.%s" % (t, r))
+ a = revbase(b)
+
+ if post(b) and post(a) != post(b):
+ apply_patch(prebase(b), 0)
+ a = base(b)
+
+ if pre(b):
+ apply_patch(prebase(b))
+ a = prebase(b)
+
+ if fork(b):
+ a = apply_patch(b)
+
+def rename_dir(v):
+ """Rename the current directory to linux-v, where v is the function arg"""
+ if rename_with_localversion:
+ v += get_localversion()
+ cwd = os.getcwd()
+ basedir = os.path.dirname(cwd)
+ newdir = os.path.join(basedir, rename_prefix + v)
+ if newdir == cwd:
+ return
+ if os.access(newdir, os.F_OK):
+ error("Cannot rename directory, destination exists: %s", newdir);
+ return
+ os.rename(cwd, newdir)
+ qprint('Current directory renamed to %s' % newdir)
+
+
+# latest lookup function, canonical urls, pattern for lookup function,
+# signature flag, description
+version_info = {
+ '2.4': (latest_dir,
+ kernel_url + "/v2.4" + "/patch-%(base)s.bz2",
+ r'patch-(.*?).bz2',
+ 1, "old stable kernel series"),
+ '2.4-pre': (latest_dir,
+ kernel_url + "/v2.4" + "/testing/patch-%(prebase)s.bz2",
+ r'patch-(.*?).bz2',
+ 1, "old stable kernel series prereleases"),
+ '2.6': (latest_dir,
+ kernel_url + "/v2.6" + "/patch-%(prebase)s.bz2",
+ r'patch-(.*?).bz2',
+ 1, "current stable kernel series"),
+ '2.6-rc': (latest_dir,
+ kernel_url + "/v2.6" + "/testing/patch-%(prebase)s.bz2",
+ r'patch-(.*?).bz2',
+ 1, "current stable kernel series prereleases"),
+ '2.6-pre': (latest_dir,
+ kernel_url + "/v2.6" + "/testing/patch-%(prebase)s.bz2",
+ r'patch-(.*?).bz2',
+ 1, "current stable kernel series prereleases"),
+ '2.6-git': (latest_dir,
+ [.kernel_url + "/v2.6" + "/snapshots/patch-%(full)s.bz2",
+ kernel_url + "/v2.6" + "/snapshots/old/patch-%(full)s.bz2"],
+ r'patch-(.*?).bz2',
+ 1, "current stable kernel series snapshots"),
+ '2.6-bk': (latest_dir,
+ [.kernel_url + "/v2.6" + "/snapshots/patch-%(full)s.bz2",
+ kernel_url + "/v2.6" + "/snapshots/old/patch-%(full)s.bz2"],
+ r'patch-(.*?).bz2',
+ 1, "old stable kernel series snapshots"),
+ '2.6-tip': (latest_26_tip, "", "", 1,
+ "current stable kernel series tip"),
+ '2.6-mm': (latest_mm,
+ kernel_url + "/people/akpm/patches/" +
+ "%(tree)s/%(prebase)s/%(full)s/%(full)s.bz2", "",
+ 1, "Andrew Morton's -mm development tree"),
+ '2.6-tiny': (latest_dir,
+ "http://www.selenic.com/tiny/%(full)s.patch.bz2",
+ r'(2.6.*?).patch.bz2',
+ 1, "Matt Mackall's -tiny tree for small systems"),
+ '2.6-mjb': (latest_mjb,
+ kernel_url + "/people/mbligh/%(prebase)s/patch-%(full)s.bz2",
+ r'patch-(2.6.*?).bz2',
+ 1, "Martin Bligh's random collection 'o crap"),
+ '2.6-rt': (latest_dir,
+ ["http://people.redhat.com/mingo/" +
+ "realtime-preempt/patch-%(full)s",
+ "http://people.redhat.com/mingo/" +
+ "realtime-preempt/older/patch-%(full)s"],
+ r'patch-(2.6.*?)',
+ 0, "Ingo Molnar's realtime-preempt kernel"),
+ '2.6-ck': (latest_ck,
+ ["http://ck.kolivas.org/patches/2.6/" +
+ "%(prebase)s/%(full)s/patch-%(full)s.bz2",
+ "http://ck.kolivas.org/patches/2.6/pre-releases/" +
+ "%(prebase)s/%(full)s/patch-%(full)s.bz2"],
+ "", ".sig",
+ "Con Kolivas' patches for system responsiveness (desktop)"),
+ '2.6-cks': (latest_dir,
+ "http://ck.kolivas.org/patches/cks/patch-%(full)s.bz2",
+ r'patch-(2.6.*?).bz2', ".sig",
+ "Con Kolivas' patches for system responsiveness (server)")
+ }
+
+# Override defaults with ~/.ketchuprc which is just a Python script
+rcpath = os.path.expanduser('~/.ketchuprc')
+if os.path.isfile(rcpath):
+ try:
+ execfile(rcpath)
+ except Exception, e:
+ sys.exit('Failed parsing %s\nError was: %s' % (rcpath, e))
+
+# Add local trees
+for k,v in local_trees.items():
+ version_info[k] = v
+
+# Environment variables override defaults and ketchuprc
+kernel_url = os.environ.get("KETCHUP_URL", kernel_url)
+archive = os.environ.get("KETCHUP_ARCH", archive)
+
+# And finally command line overrides everything
+if not os.path.exists(wget): wget = ""
+if not os.path.exists(gpg): gpg = ""
+
+options = {}
+opts = [.
+ ('a', 'archive', archive, 'cache directory'),
+ ('d', 'directory', '.', 'directory to update'),
+ ('f', 'full-tarball', None, 'if unpacking a tarball, download the latest'),
+ ('g', 'gpg-path', gpg, 'path for GnuPG'),
+ ('G', 'no-gpg', None, 'disable GPG signature verification'),
+ ('k', 'kernel-url', kernel_url, 'base url for kernel.org mirror'),
+ ('l', 'list-trees', None, 'list supported trees'),
+ ('m', 'show-makefile', None, 'output version in makefile <arg>'),
+ ('n', 'dry-run', None, 'don\'t download or apply patches'),
+ ('p', 'show-previous', None, 'output version previous to <arg>'),
+ ('q', 'quiet', None, 'reduce output'),
+ ('r', 'rename-directory', None, 'rename updated directory to %s<v>'
+ % rename_prefix),
+ ('s', 'show-latest', None, 'output the latest version of <arg>'),
+ ('u', 'show-url', None, 'output URL for <arg>'),
+ ('w', 'wget', wget, 'command to use for wget'),
+ ]
+
+args = fancyopts(sys.argv[1:], opts, options,
+ 'ketchup [options] [ver]')
+
+archive = options["archive"]
+kernel_url = options["kernel-url"]
+if options["no-gpg"]: options["gpg-path"] = ''
+
+# Process args
+
+if not os.path.exists(options["directory"]):
+ qprint("Creating target directory", options["directory"])
+ os.mkdir(options["directory"])
+os.chdir(options["directory"])
+
+if os.path.isfile(".ketchuprc"):
+ try:
+ execfile(".ketchuprc")
+ except Exception, e:
+ sys.exit('Failed parsing .ketchuprc\nError was: %s' % (e))
+
+if options["list-trees"]:
+ l = version_info.keys()
+ l.sort()
+ for tree in l:
+ if version_info[tree][3] == 0:
+ lprint(tree, "(unsigned)")
+ else:
+ lprint(tree, "(signed)")
+ lprint(" " + version_info[tree][4])
+ sys.exit(0)
+
+if options["show-makefile"] and len(args) < 2:
+ if not args:
+ lprint(get_ver("Makefile"))
+ else:
+ lprint(get_ver(args[0]))
+ sys.exit(0)
+
+if len(args) == 0 and default_tree:
+ qprint("Using default tree \"%s\"" % (default_tree))
+ args.append(default_tree)
+
+if len(args) != 1:
+ error("No version given on command line and no default in configuration")
+ sys.exit(-1)
+
+if options["show-latest"]:
+ lprint(find_ver(args[0]))
+ sys.exit(0)
+
+if options["show-url"]:
+ lprint(version_urls(find_ver(args[0]))[0])
+ sys.exit(0)
+
+if options["show-previous"]:
+ v = find_ver(args[0])
+ p = prebase(v)
+ if p == v: p = base(v)
+ if p == v:
+ if rev(v) > 0: p = "%.1f.%s" % (tree(v), rev(v) -1)
+ else: p = "unknown"
+ lprint(p)
+ sys.exit(0)
+
+if not os.path.exists(options["archive"]):
+ qprint("Creating cache directory", options["archive"])
+ os.mkdir(options["archive"])
+
+if precommand and os.system(precommand):
+ sys.exit('Precommand "%s" failed!' % precommand)
+
+try:
+ a = get_ver('Makefile')
+except:
+ a = None
+
+if not a and os.listdir("."):
+ error("Can't find kernel version for non-empty directory")
+ sys.exit(-1)
+
+b = find_ver(args[0])
+qprint("%s -> %s" % (a, b))
+transform(a, b)
+if options["rename-directory"] and not options["dry-run"]:
+ rename_dir(b)
+
+if postcommand and os.system(postcommand):
+ sys.exit('Postcommand "%s" failed!' % postcommand)
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/mk.linux-2.6-mm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/buildconfigs/mk.linux-2.6-mm Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,14 @@
+XEN_LINUX_SOURCE ?= tarball
+LINUX_VER ?= 2.6-mm
+
+XEN_LINUX_TARBALL_KETCHUP := y
+
+IMAGE_TARGET ?= vmlinux bzImage
+
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+
+XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
+
+EXTRAVERSION ?=
+
+include buildconfigs/mk.linux-2.6-xen
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/mk.linux-2.6-paravirt
--- a/buildconfigs/mk.linux-2.6-paravirt Mon Jul 09 09:22:58 2007 -0600
+++ b/buildconfigs/mk.linux-2.6-paravirt Tue Jul 10 08:39:26 2007 -0600
@@ -8,6 +8,8 @@ IMAGE_TARGET ?= vmlinux bzImage

XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y

-EXTRAVERSION ?=
+XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
+
+EXTRAVERSION ?= -paravirt

include buildconfigs/mk.linux-2.6-xen
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/mk.linux-2.6-rc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/buildconfigs/mk.linux-2.6-rc Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,14 @@
+XEN_LINUX_SOURCE ?= tarball
+LINUX_VER ?= 2.6-rc
+
+XEN_LINUX_TARBALL_KETCHUP := y
+
+IMAGE_TARGET ?= vmlinux bzImage
+
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+
+XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
+
+EXTRAVERSION ?=
+
+include buildconfigs/mk.linux-2.6-xen
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/mk.linux-2.6-tip
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/buildconfigs/mk.linux-2.6-tip Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,14 @@
+XEN_LINUX_SOURCE ?= tarball
+LINUX_VER ?= 2.6-tip
+
+XEN_LINUX_TARBALL_KETCHUP := y
+
+IMAGE_TARGET ?= vmlinux bzImage
+
+XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
+
+XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
+
+EXTRAVERSION ?=
+
+include buildconfigs/mk.linux-2.6-xen
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/mk.linux-2.6-xen
--- a/buildconfigs/mk.linux-2.6-xen Mon Jul 09 09:22:58 2007 -0600
+++ b/buildconfigs/mk.linux-2.6-xen Tue Jul 10 08:39:26 2007 -0600
@@ -74,6 +74,10 @@ endif
else \
echo "No configuration method found for this kernel" ; \
fi
+ifneq ($(XEN_LINUX_CONFIG_UPDATE),)
+ echo "Updating $(CONFIG_FILE) using $(XEN_LINUX_CONFIG_UPDATE)"
+ sh $(XEN_LINUX_CONFIG_UPDATE) $(CONFIG_FILE)
+endif
ifeq ($(XEN_TARGET_ARCH),x86_32)
ifeq ($(pae),y)
sed -e 's!^CONFIG_HIGHMEM4G=y$$!\# CONFIG_HIGHMEM4G is not set!;s!^\# CONFIG_HIGHMEM64G is not set$$!CONFIG_HIGHMEM64G=y!' $(CONFIG_FILE) > $(CONFIG_FILE)- && mv $(CONFIG_FILE)- $(CONFIG_FILE)
diff -r 87b0b6a08dbd -r 42586a0f4407 buildconfigs/src.tarball
--- a/buildconfigs/src.tarball Mon Jul 09 09:22:58 2007 -0600
+++ b/buildconfigs/src.tarball Tue Jul 10 08:39:26 2007 -0600
@@ -1,7 +1,12 @@ XEN_LINUX_MIRROR ?= http://www.kernel.or
XEN_LINUX_MIRROR ?= http://www.kernel.org/pub/linux/kernel/v2.6/
XEN_LINUX_TARBALL ?= linux-$(LINUX_VER)-xen.tar.bz2

+# Update using ketchup instead of manipulating tarball manually.
+XEN_LINUX_TARBALL_KETCHUP ?= n
+
LINUX_SRCDIR ?= linux-$(LINUX_VER)
+
+KETCHUP ?= python buildconfigs/ketchup

vpath linux-%.tar.bz2 $(LINUX_SRC_PATH)

@@ -12,6 +17,11 @@ linux-%.tar.bz2:

# XXX create a pristine tree for diff -Nurp convenience

+ifeq ($(XEN_LINUX_TARBALL_KETCHUP),y)
+%/.valid-src:
+ $(KETCHUP) -d $(@D) $(LINUX_VER)
+ touch $@ # update timestamp to avoid rebuild
+else
%/.valid-src: %.tar.bz2
rm -rf tmp-linux-$* $(@D)
mkdir -p tmp-linux-$*
@@ -19,5 +29,5 @@ linux-%.tar.bz2:
-@rm -f tmp-linux-$*/pax_global_header
mv tmp-linux-$*/* $(@D)
@rm -rf tmp-linux-$*
- touch $(@D)/.hgskip
touch $@ # update timestamp to avoid rebuild
+endif
diff -r 87b0b6a08dbd -r 42586a0f4407 docs/man/xm.pod.1
--- a/docs/man/xm.pod.1 Mon Jul 09 09:22:58 2007 -0600
+++ b/docs/man/xm.pod.1 Tue Jul 10 08:39:26 2007 -0600
@@ -4,7 +4,7 @@ xm - Xen management user interface

=head1 SYNOPSIS

-xm <subcommand> [args]
+B<xm> I<subcommand> [I<args>]

=head1 DESCRIPTION

@@ -13,46 +13,50 @@ domains. It can also be used to list cur
domains. It can also be used to list current domains, enable or pin
VCPUs, and attach or detach virtual block devices.

-The basic structure of every xm command is almost always:
-
- xm <subcommand> <domain-id> [OPTIONS]
-
-Where I<subcommand> is one of the sub commands listed below, I<domain-id>
+The basic structure of every B<xm> command is almost always:
+
+=over 2
+
+B<xm> I<subcommand> I<domain-id> [I<OPTIONS>]
+
+=back
+
+Where I<subcommand> is one of the subcommands listed below, I<domain-id>
is the numeric domain id, or the domain name (which will be internally
-translated to domain id), and I<OPTIONS> are sub command specific
+translated to domain id), and I<OPTIONS> are subcommand specific
options. There are a few exceptions to this rule in the cases where
-the sub command in question acts on all domains, the entire machine,
-or directly on the xen hypervisor. Those exceptions will be clear for
-each of those sub commands.
+the subcommand in question acts on all domains, the entire machine,
+or directly on the Xen hypervisor. Those exceptions will be clear for
+each of those subcommands.

=head1 NOTES

All B<xm> operations rely upon the Xen control daemon, aka B<xend>.
-For any xm commands to run xend must also be running. For this reason
-you should start xend as a service when your system first boots using
-xen.
+For any B<xm> commands to run, xend must also be running. For this
+reason you should start xend as a service when your system first boots
+using Xen.

Most B<xm> commands require root privileges to run due to the
communications channels used to talk to the hypervisor. Running as
non root will return an error.

Most B<xm> commands act asynchronously, so just because the B<xm>
-command returned, doesn't mean the action is complete. This is
+command returned doesn't mean the action is complete. This is
important, as many operations on domains, like create and shutdown,
can take considerable time (30 seconds or more) to bring the machine
into a fully compliant state. If you want to know when one of these
-actions has finished you must poll through xm list periodically.
+actions has finished you must poll through B<xm list> periodically.

=head1 DOMAIN SUBCOMMANDS

-The following sub commands manipulate domains directly, as stated
-previously most commands take domain-id as the first parameter.
+The following subcommands manipulate domains directly. As stated
+previously, most commands take I<domain-id> as the first parameter.

=over 4

=item B<console> I<domain-id>

-Attach to domain domain-id's console. If you've set up your Domains to
+Attach to domain I<domain-id>'s console. If you've set up your domains to
have a traditional log in console this will look much like a normal
text log in screen.

@@ -63,15 +67,15 @@ so running curses based interfaces over
so running curses based interfaces over the console B<is not
advised>. Vi tends to get very odd when using it over this interface.

-=item B<create> I<[-c]> I<configfile> I<[name=value]>..
-
-The create sub command requires a configfile and can optional take a
+=item B<create> [B<-c>] I<configfile> [I<name>=I<value>]..
+
+The create sub command requires a config file and can optionally take a
series of name value pairs that add to or override variables defined
in the config file. See L<xmdomain.cfg> for full details of that file
format, and possible options used in either the configfile or
-Name=Value combinations.
-
-Configfile can either be an absolute path to a file, or a relative
+I<name>=I<value> combinations.
+
+I<configfile> can either be an absolute path to a file, or a relative
path to a file located in /etc/xen.

Create will return B<as soon> as the domain is started. This B<does
@@ -116,10 +120,10 @@ virtual networking. (This example comes

=item B<destroy> I<domain-id>

-Immediately terminate the domain domain-id. This doesn't give the domain
-OS any chance to react, and it the equivalent of ripping the power
-cord out on a physical machine. In most cases you will want to use
-the B<shutdown> command instead.
+Immediately terminate the domain I<domain-id>. This doesn't give the
+domain OS any chance to react, and is the equivalent of ripping the
+power cord out on a physical machine. In most cases you will want to
+use the B<shutdown> command instead.

=item B<domid> I<domain-name>

@@ -129,14 +133,14 @@ Converts a domain name to a domain id us

Converts a domain id to a domain name using xend's internal mapping.

-=item B<help> I<[--long]>
+=item B<help> [B<--long>]

Displays the short help message (i.e. common commands).

-The I<--long> option prints out the complete set of B<xm> subcommands,
+The B<--long> option prints out the complete set of B<xm> subcommands,
grouped by function.

-=item B<list> I<[--long | --label]> I<[domain-id, ...]>
+=item B<list> [B<--long> | B<--label>] [I<domain-id> ...]

Prints information about one or more domains. If no domains are
specified it prints out information about all domains.
@@ -151,21 +155,23 @@ An example format for the list is as fol
Mandrake10.2 167 128 1 ------ 2.5
Suse9.2 168 100 1 ------ 1.8

-Name is the name of the domain. ID the domain numeric id. Mem is the
-size of the memory allocated to the domain. VCPUS is the number of
-VCPUS allocated to domain. State is the run state (see below). Time
-is the total run time of the domain as accounted for by Xen.
+Name is the name of the domain. ID the numeric domain id. Mem is the
+desired amount of memory to allocate to the domain (although it may
+not be the currently allocated amount). VCPUs is the number of
+virtual CPUs allocated to the domain. State is the run state (see
+below). Time is the total run time of the domain as accounted for by
+Xen.

B<STATES>

=over 4

-The State field lists 6 states for a Xen Domain, and which ones the
-current Domain is in.
+The State field lists 6 states for a Xen domain, and which ones the
+current domain is in.

=item B<r - running>

-The domain is currently running on a CPU
+The domain is currently running on a CPU.

=item B<b - blocked>

@@ -203,12 +209,12 @@ B<LONG OUTPUT>

=over 4

-If I<--long> is specified, the output for xm list is not the table
+If B<--long> is specified, the output for B<xm list> is not the table
view shown above, but instead is an S-Expression representing all
information known about all domains asked for. This is mostly only
useful for external programs to parse the data.

-B<Note:> there is no stable guarantees on the format of this data.
+B<Note:> There is no stable guarantees on the format of this data.
Use at your own risk.

=back
@@ -217,10 +223,10 @@ B<LABEL OUTPUT>

=over 4

-If I<--label> is specified, the security labels are added to the
-output of xm list and the lines are sorted by the labels (ignoring
-case). The I<--long> option prints the labels by default and cannot be
-combined with I<--label>. See the ACCESS CONTROL SUBCOMMAND section of
+If B<--label> is specified, the security labels are added to the
+output of B<xm list> and the lines are sorted by the labels (ignoring
+case). The B<--long> option prints the labels by default and cannot be
+combined with B<--label>. See the ACCESS CONTROL SUBCOMMAND section of
this man page for more information about labels.

==back
@@ -230,7 +236,7 @@ B<NOTES>
=over 4

The Time column is deceptive. Virtual IO (network and block devices)
-used by Domains requires coordination by Domain0, which means that
+used by domains requires coordination by Domain0, which means that
Domain0 is actually charged for much of the time that a DomainU is
doing IO. Use of this time value to determine relative utilizations
by domains is thus very suspect, as a high IO workload may show as
@@ -240,11 +246,11 @@ less utilized than a high CPU workload.

=item B<mem-max> I<domain-id> I<mem>

-Specify the maximum amount of memory the Domain is able to use. Mem
+Specify the maximum amount of memory the domain is able to use. I<mem>
is specified in megabytes.

The mem-max value may not correspond to the actual memory used in the
-Domain, as it may balloon down it's memory to give more back to the OS.
+domain, as it may balloon down its memory to give more back to the OS.

=item B<mem-set> I<domain-id> I<mem>

@@ -252,20 +258,20 @@ operation requires cooperation from the
operation requires cooperation from the domain operating system, there
is no guarantee that it will succeed.

-B<Warning:> there is no good way to know in advance how small of a
+B<Warning:> There is no good way to know in advance how small of a
mem-set will make a domain unstable and cause it to crash. Be very
careful when using this command on running domains.

-=item B<migrate> I<domain-id> I<host> I<[options]>
-
-Migrate a domain to another Host machine. B<Xend> must be running on
-other host machine, it must be running the same version of xen, it
+=item B<migrate> I<domain-id> I<host> [I<OPTIONS>]
+
+Migrate a domain to another host machine. Xend must be running on
+other host machine, it must be running the same version of Xen, it
must have the migration TCP port open and accepting connections from
the source host, and there must be sufficient resources for the domain
to run (memory, disk, etc).

-Migration is pretty complicated, and has many security implications,
-please read the Xen Users Guide to ensure you understand the
+Migration is pretty complicated, and has many security implications.
+Please read the Xen User's Guide to ensure you understand the
ramifications and limitations on migration before attempting it in
production.

@@ -273,13 +279,13 @@ B<OPTIONS>

=over 4

-=item B<-l, --live>
+=item B<-l>, B<--live>

Use live migration. This will migrate the domain between hosts
-without shutting down the domain. See the Xen Users Guide for more
+without shutting down the domain. See the Xen User's Guide for more
information.

-=item B<-r, --resource> I<Mbs>
+=item B<-r>, B<--resource> I<Mbs>

Set maximum Mbs allowed for migrating the domain. This ensures that
the network link is not saturated with migration traffic while
@@ -293,7 +299,7 @@ allocated resources such as memory, but
allocated resources such as memory, but will not be eligible for
scheduling by the Xen hypervisor.

-=item B<reboot> I<[options]> I<domain-id>
+=item B<reboot> [I<OPTIONS>] I<domain-id>

Reboot a domain. This acts just as if the domain had the B<reboot>
command run from the console. The command returns as soon as it has
@@ -301,18 +307,18 @@ domain actually reboots.
domain actually reboots.

The behavior of what happens to a domain when it reboots is set by the
-I<on_reboot> parameter of the xmdomain.cfg file when the domain was
+B<on_reboot> parameter of the xmdomain.cfg file when the domain was
created.

B<OPTIONS>

=over 4

-=item B<-a, --all>
-
-Reboot all domains
-
-=item B<-w, --wait>
+=item B<-a>, B<--all>
+
+Reboot all domains.
+
+=item B<-w>, B<--wait>

Wait for reboot to complete before returning. This may take a while,
as all services in the domain will have to be shut down cleanly.
@@ -321,7 +327,7 @@ as all services in the domain will have

=item B<restore> I<state-file>

-Build a domain from an B<xm save> state file. See I<save> for more info.
+Build a domain from an B<xm save> state file. See B<save> for more info.

=item B<save> I<domain-id> I<state-file>

@@ -334,16 +340,16 @@ with all the same limitations. Open net
with all the same limitations. Open network connections may be
severed upon restore, as TCP timeouts may have expired.

-=item B<shutdown> I<[options]> I<domain-id>
+=item B<shutdown> [I<OPTIONS>] I<domain-id>

Gracefully shuts down a domain. This coordinates with the domain OS
to perform graceful shutdown, so there is no guarantee that it will
succeed, and may take a variable length of time depending on what
services must be shutdown in the domain. The command returns
-immediately after signally the domain unless that I<-w> flag is used.
+immediately after signally the domain unless that B<-w> flag is used.

The behavior of what happens to a domain when it reboots is set by the
-I<on_shutdown> parameter of the xmdomain.cfg file when the domain was
+B<on_shutdown> parameter of the xmdomain.cfg file when the domain was
created.

B<OPTIONS>
@@ -386,7 +392,7 @@ configured VCPU count is an error. Tryi
configured VCPU count is an error. Trying to set VCPUs to < 1 will be
quietly ignored.

-=item B<vcpu-list> I<[domain-id]>
+=item B<vcpu-list> [I<domain-id>]

Lists VCPU information for a specific domain. If no domain is
specified, VCPU information for all domains will be provided.
@@ -394,7 +400,7 @@ specified, VCPU information for all doma
=item B<vcpu-pin> I<domain-id> I<vcpu> I<cpus>

Pins the the VCPU to only run on the specific CPUs. The keyword
-I<all> can be used to apply the I<cpus> list to all VCPUs in the
+B<all> can be used to apply the I<cpus> list to all VCPUs in the
domain.

Normally VCPUs can float between available CPUs whenever Xen deems a
@@ -408,7 +414,7 @@ CPUs.

=over 4

-=item B<dmesg> I<[-c]>
+=item B<dmesg> [B<-c>]

Reads the Xen message buffer, similar to dmesg on a Linux system. The
buffer contains informational, warning, and error messages created
@@ -419,7 +425,7 @@ B<OPTIONS>

=over 4

-=item B<-c, --clear>
+=item B<-c>, B<--clear>

Clears Xen's message buffer.

@@ -431,8 +437,8 @@ reporting a Xen bug, please provide this
reporting a Xen bug, please provide this information as part of the
bug report.

-Sample xen domain info looks as follows (lines wrapped manually to
-make the man page more readable):
+Sample output looks as follows (lines wrapped manually to make the man
+page more readable):

host : talon
release : 2.6.12.6-xen0
@@ -470,36 +476,36 @@ Not all fields will be explained here, b
Not all fields will be explained here, but some of the less obvious
ones deserve explanation:

-=item I<hw_caps>
+=item B<hw_caps>

A vector showing what hardware capabilities are supported by your
processor. This is equivalent to, though more cryptic, the flags
field in /proc/cpuinfo on a normal Linux machine.

-=item I<free_memory>
-
-Available memory (in MB) not allocated to Xen, or any other Domains.
-
-=item I<xen_caps>
-
-The xen version, architecture. Architecture values can be one of:
+=item B<free_memory>
+
+Available memory (in MB) not allocated to Xen, or any other domains.
+
+=item B<xen_caps>
+
+The Xen version and architecture. Architecture values can be one of:
x86_32, x86_32p (i.e. PAE enabled), x86_64, ia64.

-=item I<xen_changeset>
-
-The xen mercurial changeset id. Very useful for determining exactly
+=item B<xen_changeset>
+
+The Xen mercurial changeset id. Very useful for determining exactly
what version of code your Xen system was built from.

=back

=item B<log>

-Print out the B<xend> log. This log file can be found in
+Print out the xend log. This log file can be found in
/var/log/xend.log.

=item B<top>

-Executes the xentop command, which provides real time monitoring of
+Executes the B<xentop> command, which provides real time monitoring of
domains. Xentop is a curses interface, and reasonably self
explanatory.

@@ -508,12 +514,40 @@ explanatory.
=head1 SCHEDULER SUBCOMMANDS

Xen ships with a number of domain schedulers, which can be set at boot
-time with the I<sched=> parameter on the Xen command line. By
-default I<sedf> is used for scheduling.
+time with the B<sched=> parameter on the Xen command line. By
+default B<credit> is used for scheduling.

FIXME: we really need a scheduler expert to write up this section.

=over 4
+
+=item B<sched-credit> [ B<-d> I<domain-id> [ B<-w>[B<=>I<WEIGHT>] | B<-c>[B<=>I<CAP>] ] ]
+
+Set credit scheduler parameters. The credit scheduler is a
+proportional fair share CPU scheduler built from the ground up to be
+work conserving on SMP hosts.
+
+Each domain (including Domain0) is assigned a weight and a cap.
+
+B<PARAMETERS>
+
+=over 4
+
+=item I<WEIGHT>
+
+A domain with a weight of 512 will get twice as much CPU as a domain
+with a weight of 256 on a contended host. Legal weights range from 1
+to 65535 and the default is 256.
+
+=item I<CAP>
+
+The cap optionally fixes the maximum amount of CPU a domain will be
+able to consume, even if the host system has idle CPU cycles. The cap
+is expressed in percentage of one physical CPU: 100 is 1 physical CPU,
+50 is half a CPU, 400 is 4 CPUs, etc. The default, 0, means there is
+no upper cap.
+
+=back

=item B<sched-sedf> I<period> I<slice> I<latency-hint> I<extratime> I<weight>

@@ -546,7 +580,7 @@ Flag for allowing domain to run in extra

=item I<weight>

-Another way of setting cpu slice.
+Another way of setting CPU slice.

=back

@@ -591,7 +625,7 @@ event.

=over 4

-=item B<block-attach> I<domain-id> I<be-dev> I<fe-dev> I<mode> I<[bedomain-id]>
+=item B<block-attach> I<domain-id> I<be-dev> I<fe-dev> I<mode> [I<bedomain-id>]

Create a new virtual block device. This will trigger a hotplug event
for the guest.
@@ -619,7 +653,7 @@ devices, or by device id, such as 0x1400
=item I<mode>

The access mode for the device from the guest domain. Supported modes
-are I<w> (read/write) or I<r> (read-only).
+are B<w> (read/write) or B<r> (read-only).

=item I<bedomain-id>

@@ -635,62 +669,65 @@ B<EXAMPLES>

xm block-attach guestdomain file://path/to/dsl-2.0RC2.iso /dev/hdc ro

-This will mount the dsl iso as /dev/hdc in the guestdomain as a read
-only device. This will probably not be detected as a cdrom by the
+This will mount the dsl ISO as /dev/hdc in the guestdomain as a read
+only device. This will probably not be detected as a CD-ROM by the
guest, but mounting /dev/hdc manually will work.

=back

-=item B<block-detach> I<domain-id> I<devid>
-
-Destroy a domain's virtual block device. devid B<must> be the device
-id given to the device by domain 0. You will need to run I<xm
-block-list> to determine that number.
-
-FIXME: this is currently B<broken>. Even though a block device is
-removed from domU, it appears to still be allocated in the domain 0.
-
-=item B<block-list> I<[-l|--long]> I<domain-id>
+=item B<block-detach> I<domain-id> I<devid> [B<--force>]
+
+Detach a domain's virtual block device. I<devid> may be the symbolic
+name or the numeric device id given to the device by domain 0. You
+will need to run B<xm block-list> to determine that number.
+
+Detaching the device requires the cooperation of the domain. If the
+domain fails to release the device (perhaps because the domain is hung
+or is still using the device), the detach will fail. The B<--force>
+parameter will forcefully detach the device, but may cause IO errors
+in the domain.
+
+=item B<block-list> [B<-l>|B<--long>] I<domain-id>

List virtual block devices for a domain. The returned output is
-formatted as a list or as an S-Expression if the '--long' option was given.
+formatted as a list or as an S-Expression if the B<--long> option was given.

=head2 NETWORK DEVICES

-=item B<network-attach> I<domain-id> I<[script=scriptname]> I<[ip=ipaddr]>
-I<[mac=macaddr]> I<[bridge=bridge-name]> I<[backend=bedomain-id]>
-
-Creates a new network device in the domain specified by domain-id. It
+=item B<network-attach> I<domain-id> [B<script=>I<scriptname>] [B<ip=>I<ipaddr>]
+[B<mac=>I<macaddr>] [B<bridge=>I<bridge-name>] [B<backend=>I<bedomain-id>]
+
+Creates a new network device in the domain specified by I<domain-id>. It
takes the following optional options:

B<OPTIONS>

=over 4

-=item I<script=scriptname>
+=item B<script=>I<scriptname>

Use the specified script name to bring up the network. Defaults to
-the default setting in xend-config.sxp for I<vif-script>.
-
-=item I<ip=ipaddr>
+the default setting in xend-config.sxp for B<vif-script>.
+
+=item B<ip=>I<ipaddr>

Passes the specified IP Address to the adapter on creation.

FIXME: this currently appears to be B<broken>. I'm not sure under what
circumstances this should actually work.

-=item I<mac=macaddr>
+=item B<mac=>I<macaddr>

The MAC address that the domain will see on its Ethernet device. If
the device is not specified it will be randomly generated with the
00:16:3e vendor id prefix.

-=item I<bridge=bridge-name>
+=item B<bridge=>I<bridge-name>

The name of the bridge to attach the vif to, in case you have more
-than one. This defaults to
-
-=item I<backend=bedomain-id>
+than one. This defaults to xenbr0.
+
+=item B<backend=>I<bedomain-id>

The backend domain id. By default this is domain 0.

@@ -705,17 +742,17 @@ FIXME: this is currently B<broken>. Net
FIXME: this is currently B<broken>. Network devices aren't completely
removed from domain 0.

-=item B<network-list> I<[-l|--long]> I<domain-id>
+=item B<network-list> [B<-l>|B<--long>]> I<domain-id>

List virtual network interfaces for a domain. The returned output is
-formatted as a list or as an S-Expression if the '--long' option was given.
+formatted as a list or as an S-Expression if the B<--long> option was given.

=head2 VIRTUAL TPM DEVICES

-=item B<vtpm-list> I<[-l|--long]> I<domain-id>
+=item B<vtpm-list> [B<-l>|B<--long>] I<domain-id>

Show the virtual TPM device for a domain. The returned output is
-formatted as a list or as an S-Expression if the '--long' option was given.
+formatted as a list or as an S-Expression if the B<--long> option was given.

=back

@@ -728,7 +765,7 @@ out entirely.

=over 4

-=item B<vnet-list> I<[-l|--long]>
+=item B<vnet-list> [B<-l>|B<--long>]

List vnets.

@@ -762,7 +799,7 @@ interpret labels:
interpret labels:

(1) Simple Type Enforcement: Labels are interpreted to decide access
-of domains to comunication means and virtual or physical
+of domains to communication means and virtual or physical
resources. Communication between domains as well as access to
resources are forbidden by default and can only take place if they are
explicitly allowed by the security policy. The proper assignment of
@@ -796,8 +833,8 @@ time with the B<cfgbootpolicy> subcomman
=over 4

I<policy> is a dot-separated list of names. The last part is the file
-name pre-fix for the policy xml file. The preceding name parts are
-translated into the local path pointing to the policy xml file
+name pre-fix for the policy XML file. The preceding name parts are
+translated into the local path pointing to the policy XML file
relative to the global policy root directory
(/etc/xen/acm-security/policies). For example,
example.chwall_ste.client_v1 denotes the policy file
@@ -823,16 +860,16 @@ I<boot title> parameter to specify a uni

Prints the current security policy state information of Xen.

-=item B<labels> [I<policy>] [I<type>=dom|res|any]
+=item B<labels> [I<policy>] [B<type=dom>|B<res>|B<any>]

Lists all labels of a I<type> (domain, resource, or both) that are
defined in the I<policy>. Unless specified, the default I<policy> is
the currently enforced access control policy. The default for I<type>
is 'dom'. The labels are arranged in alphabetical order.

-=item B<addlabel> I<label> dom I<configfile> [I<policy>]
-
-=item B<addlabel> I<label> res I<resource> [I<policy>]
+=item B<addlabel> I<label> B<dom> I<configfile> [I<policy>]
+
+=item B<addlabel> I<label> B<res> I<resource> [I<policy>]

Adds the security label with name I<label> to a domain
I<configfile> (dom) or to the global resource label file for the
@@ -841,17 +878,17 @@ verifies that the I<policy> definition s
verifies that the I<policy> definition supports the specified I<label>
name.

-=item B<rmlabel> dom I<configfile>
-
-=item B<rmlabel> res I<resource>
-
-Works the same as the I<addlabel> command (above), except that this
+=item B<rmlabel> B<dom> I<configfile>
+
+=item B<rmlabel> B<res> I<resource>
+
+Works the same as the B<addlabel> command (above), except that this
command will remove the label from the domain I<configfile> (dom) or
the global resource label file (res).

-=item B<getlabel> dom I<configfile>
-
-=item B<getlabel> res I<resource>
+=item B<getlabel> B<dom> I<configfile>
+
+=item B<getlabel> B<res> I<resource>

Shows the label for the given I<configfile> or I<resource>

@@ -881,7 +918,7 @@ Then recompile and install xen and the s

cd xen_source_dir/xen; make clean; make; cp xen.gz /boot;
cd xen_source_dir/tools/security; make install;
- reboot into xen
+ reboot into Xen

=back

@@ -944,10 +981,10 @@ B<ATTACHING A SECURITY LABEL TO A DOMAIN

=over 4

-The I<addlabel> subcommand can attach a security label to a domain
+The B<addlabel> subcommand can attach a security label to a domain
configuration file, here a HomeBanking label. The example policy
ensures that this domain does not share information with other
-non-hombanking user domains (i.e., domains labeled as dom_Fun or
+non-homebanking user domains (i.e., domains labeled as dom_Fun or
dom_Boinc) and that it will not run simultaneously with domains
labeled as dom_Fun.

@@ -958,7 +995,7 @@ probably just a browser environment for
xm addlabel dom_HomeBanking dom myconfig.xm

The very simple configuration file might now look as printed
-below. The I<addlabel> subcommand added the B<access_control> entry at
+below. The B<addlabel> subcommand added the B<access_control> entry at
the end of the file, consisting of a label name and the policy that
specifies this label name:

@@ -986,7 +1023,7 @@ B<ATTACHING A SECURITY LABEL TO A RESOUR

=over 4

-The I<addlabel> subcommand can also be used to attach a security
+The B<addlabel> subcommand can also be used to attach a security
label to a resource. Following the home banking example from above,
we can label a disk resource (e.g., a physical partition or a file)
to make it accessible to the home banking domain. The example policy
@@ -1002,7 +1039,7 @@ attaches this disk to the domain at boot
disk = [ 'phy:hda6,sda2,w' ]

Alternatively, the resource can be attached after booting the domain
-by using the I<block-attach> subcommand.
+by using the B<block-attach> subcommand.

xm block-attach homebanking phy:hda6 sda2 w

@@ -1010,7 +1047,7 @@ off. Any attempt to use labeled resourc
off. Any attempt to use labeled resources with security turned off
will result in a failure with a corresponding error message. The
solution is to enable security or, if security is no longer desired,
-to remove the resource label using the I<rmlabel> subcommand.
+to remove the resource label using the B<rmlabel> subcommand.

=back

@@ -1048,7 +1085,7 @@ B<POLICY REPRESENTATIONS>
=over 4

We distinguish three representations of the Xen access control policy:
-the I<source XML> version, its I<binary> counterpart, and a I<mapping>
+the source XML version, its binary counterpart, and a mapping
representation that enables the tools to deterministically translate
back and forth between label names of the XML policy and label
identifiers of the binary policy. All three versions must be kept
@@ -1075,8 +1112,6 @@ their binary identifiers (ssidrefs) used

=back

-=head1 EXAMPLES
-
=head1 SEE ALSO

B<xmdomain.cfg>(5), B<xentop>(1)
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/blktap/drivers/Makefile
--- a/tools/blktap/drivers/Makefile Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/blktap/drivers/Makefile Tue Jul 10 08:39:26 2007 -0600
@@ -10,7 +10,6 @@ LIBAIO_DIR = ../../libaio/src

CFLAGS += -Werror
CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing
CFLAGS += -I $(XEN_LIBXC) -I $(LIBAIO_DIR)
CFLAGS += $(INCLUDES) -I. -I../../xenstore
CFLAGS += -D_GNU_SOURCE
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/blktap/lib/Makefile
--- a/tools/blktap/lib/Makefile Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/blktap/lib/Makefile Tue Jul 10 08:39:26 2007 -0600
@@ -16,7 +16,7 @@ SRCS += xenbus.c blkif.c xs_api.c

CFLAGS += -Werror
CFLAGS += -Wno-unused
-CFLAGS += -fno-strict-aliasing -fPIC
+CFLAGS += -fPIC
# get asprintf():
CFLAGS += -D _GNU_SOURCE

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/examples/init.d/xendomains
--- a/tools/examples/init.d/xendomains Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/examples/init.d/xendomains Tue Jul 10 08:39:26 2007 -0600
@@ -221,22 +221,26 @@ start()
if [ "$XENDOMAINS_RESTORE" = "true" ] &&
contains_something "$XENDOMAINS_SAVE"
then
- XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'`
mkdir -p $(dirname "$LOCKFILE")
touch $LOCKFILE
echo -n "Restoring Xen domains:"
saved_domains=`ls $XENDOMAINS_SAVE`
- for dom in $XENDOMAINS_SAVED; do
- echo -n " ${dom##*/}"
- xm restore $dom
- if [ $? -ne 0 ]; then
- rc_failed $?
- echo -n '!'
- else
- # mv $dom ${dom%/*}/.${dom##*/}
- rm $dom
- fi
- done
+ for dom in $XENDOMAINS_SAVE/*; do
+ if [ -f $dom ] ; then
+ HEADER=`head -c 16 $dom | head -n 1 2> /dev/null`
+ if [ $HEADER = "LinuxGuestRecord" ]; then
+ echo -n " ${dom##*/}"
+ xm restore $dom
+ if [ $? -ne 0 ]; then
+ rc_failed $?
+ echo -n '!'
+ else
+ # mv $dom ${dom%/*}/.${dom##*/}
+ rm $dom
+ fi
+ fi
+ fi
+ done
echo .
fi

@@ -260,7 +264,6 @@ start()
if [ $? -eq 0 ] || is_running $dom; then
echo -n "(skip)"
else
- echo "(booting)"
xm create --quiet --defconfig $dom
if [ $? -ne 0 ]; then
rc_failed $?
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/examples/xend-config.sxp
--- a/tools/examples/xend-config.sxp Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/examples/xend-config.sxp Tue Jul 10 08:39:26 2007 -0600
@@ -191,3 +191,9 @@
# The default password for VNC console on HVM domain.
# Empty string is no authentication.
(vncpasswd '')
+
+# The default keymap to use for the VM's virtual keyboard
+# when not specififed in VM's configuration
+#(keymap 'en-us')
+
+
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/firmware/etherboot/README
--- a/tools/firmware/etherboot/README Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/firmware/etherboot/README Tue Jul 10 08:39:26 2007 -0600
@@ -1,7 +1,8 @@

This is an Etherboot option ROM for the rtl8139 NIC. It has a few
-non-standard settings, just to do with timeouts and when to give up.
+non-standard settings, most to do with timeouts and when to give up,
+and for stricter DHCP spec compliance.

Rom-o-matic.net will provide this image at the following URL:

-http://rom-o-matic.net/5.4.2/build.php?version=5.4.2&F=ignore&nic=rtl8139%3Artl8139+--+%5B0x10ec%2C0x8139%5D&ofmt=Binary+ROM+Image%28.zrom%29&arch=i386&ASK_BOOT=-1&BOOT_FIRST=BOOT_NIC&BOOT_SECOND=BOOT_NOTHING&BOOT_THIRD=BOOT_NOTHING&BOOT_INDEX=0&STATIC_CLIENT_IP=&STATIC_SUBNET_MASK=&STATIC_SERVER_IP=&STATIC_GATEWAY_IP=&STATIC_BOOTFILE=&EXIT_ON_FILE_LOAD_ERROR=on&DHCP_CLIENT_ID=&DHCP_CLIENT_ID_LEN=&DHCP_CLIENT_ID_TYPE=&DHCP_USER_CLASS=&DHCP_USER_CLASS_LEN=&ALLOW_ONLY_ENCAPSULATED=on&DEFAULT_BOOTFILE=&CONGESTED=on&BACKOFF_LIMIT=7&TIMEOUT=180&TRY_FLOPPY_FIRST=0&EXIT_IF_NO_OFFER=on&TAGGED_IMAGE=on&ELF_IMAGE=on&PXE_IMAGE=on&DOWNLOAD_PROTO_TFTP=on&COMCONSOLE=0x3F8&CONSPEED=9600&COMPARM=0x03&PXE_EXPORT=on&CONFIG_PCI=on&CONFIG_ISA=on&BUILD_ID=&PCBIOS=on&A=Get+ROM
+http://rom-o-matic.net/5.4.3/build.php?version=5.4.3&F=ignore&nic=rtl8139%3Artl8139+--+%5B0x10ec%2C0x8139%5D&ofmt=Binary+ROM+Image%28.zrom%29&arch=i386&ASK_BOOT=-1&BOOT_FIRST=BOOT_NIC&BOOT_SECOND=BOOT_NOTHING&BOOT_THIRD=BOOT_NOTHING&BOOT_INDEX=0&STATIC_CLIENT_IP=&STATIC_SUBNET_MASK=&STATIC_SERVER_IP=&STATIC_GATEWAY_IP=&STATIC_BOOTFILE=&EXIT_ON_FILE_LOAD_ERROR=on&DHCP_CLIENT_ID=&DHCP_CLIENT_ID_LEN=&DHCP_CLIENT_ID_TYPE=&DHCP_USER_CLASS=&DHCP_USER_CLASS_LEN=&ALLOW_ONLY_ENCAPSULATED=on&DEFAULT_BOOTFILE=&CONGESTED=on&BACKOFF_LIMIT=7&TIMEOUT=180&TRY_FLOPPY_FIRST=0&EXIT_IF_NO_OFFER=on&TAGGED_IMAGE=on&ELF_IMAGE=on&PXE_IMAGE=on&DOWNLOAD_PROTO_TFTP=on&COMCONSOLE=0x3F8&CONSPEED=9600&COMPARM=0x03&PXE_EXPORT=on&CONFIG_PCI=on&CONFIG_ISA=on&BUILD_ID=&PCBIOS=on&PXE_DHCP_STRICT=on&A=Get+ROM
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/firmware/etherboot/eb-rtl8139.zrom
Binary file tools/firmware/etherboot/eb-rtl8139.zrom has changed
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/firmware/rombios/rombios.c
--- a/tools/firmware/rombios/rombios.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/firmware/rombios/rombios.c Tue Jul 10 08:39:26 2007 -0600
@@ -4675,6 +4675,10 @@ int09_function(DI, SI, BP, SP, BX, DX, C
write_byte(0x0040, 0x18, mf2_flags);
break;

+ case 0x53: /* Del */
+ if ((shift_flags & 0x0c) == 0x0c) /* Ctrl + Alt */
+ machine_reset();
+ /* Fall through */
default:
if (scancode & 0x80) return; /* toss key releases ... */
if (scancode > MAX_SCAN_CODE) {
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/firmware/vmxassist/vm86.c Tue Jul 10 08:39:26 2007 -0600
@@ -594,16 +594,24 @@ movr(struct regs *regs, unsigned prefix,
TRACE((regs, regs->eip - eip,
"movb %%e%s, *0x%x", rnames[r], addr));
write8(addr, val);
- break;
+ return 1;

case 0x8A: /* addr32 mov r/m8, r8 */
TRACE((regs, regs->eip - eip,
"movb *0x%x, %%%s", addr, rnames[r]));
setreg8(regs, r, read8(addr));
- break;
+ return 1;

case 0x89: /* addr32 mov r16, r/m16 */
val = getreg32(regs, r);
+ if ((modrm & 0xC0) == 0xC0) {
+ if (prefix & DATA32)
+ setreg32(regs, modrm & 7, val);
+ else
+ setreg16(regs, modrm & 7, MASK16(val));
+ return 1;
+ }
+
if (prefix & DATA32) {
TRACE((regs, regs->eip - eip,
"movl %%e%s, *0x%x", rnames[r], addr));
@@ -613,9 +621,17 @@ movr(struct regs *regs, unsigned prefix,
"movw %%%s, *0x%x", rnames[r], addr));
write16(addr, MASK16(val));
}
- break;
-
- case 0x8B: /* addr32 mov r/m16, r16 */
+ return 1;
+
+ case 0x8B: /* mov r/m16, r16 */
+ if ((modrm & 0xC0) == 0xC0) {
+ if (prefix & DATA32)
+ setreg32(regs, r, addr);
+ else
+ setreg16(regs, r, MASK16(addr));
+ return 1;
+ }
+
if (prefix & DATA32) {
TRACE((regs, regs->eip - eip,
"movl *0x%x, %%e%s", addr, rnames[r]));
@@ -625,7 +641,7 @@ movr(struct regs *regs, unsigned prefix,
"movw *0x%x, %%%s", addr, rnames[r]));
setreg16(regs, r, read16(addr));
}
- break;
+ return 1;

case 0xC6: /* addr32 movb $imm, r/m8 */
if ((modrm >> 3) & 7)
@@ -634,9 +650,9 @@ movr(struct regs *regs, unsigned prefix,
write8(addr, val);
TRACE((regs, regs->eip - eip, "movb $0x%x, *0x%x",
val, addr));
- break;
- }
- return 1;
+ return 1;
+ }
+ return 0;
}

/*
@@ -816,8 +832,8 @@ mov_to_seg(struct regs *regs, unsigned p
* 1) real->protected mode.
* 2) protected->real mode.
*/
- if ((mode != VM86_REAL_TO_PROTECTED) &&
- (mode != VM86_PROTECTED_TO_REAL))
+ if (mode != VM86_REAL_TO_PROTECTED &&
+ mode != VM86_PROTECTED_TO_REAL)
return 0;

/* Register source only. */
@@ -1037,8 +1053,8 @@ set_mode(struct regs *regs, enum vm86_mo
{
switch (newmode) {
case VM86_REAL:
- if ((mode == VM86_PROTECTED_TO_REAL) ||
- (mode == VM86_REAL_TO_PROTECTED)) {
+ if (mode == VM86_PROTECTED_TO_REAL ||
+ mode == VM86_REAL_TO_PROTECTED) {
regs->eflags &= ~EFLAGS_TF;
real_mode(regs);
} else if (mode != VM86_REAL)
@@ -1121,7 +1137,7 @@ jmpl_indirect(struct regs *regs, int pre

if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */
set_mode(regs, VM86_PROTECTED);
- else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
+ else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */
set_mode(regs, VM86_REAL);
else
panic("jmpl");
@@ -1147,7 +1163,7 @@ retl(struct regs *regs, int prefix)

if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */
set_mode(regs, VM86_PROTECTED);
- else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
+ else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */
set_mode(regs, VM86_REAL);
else
panic("retl");
@@ -1382,9 +1398,7 @@ opcode(struct regs *regs)

case 0x39: /* addr32 cmp r16, r/m16 */
case 0x3B: /* addr32 cmp r/m16, r16 */
- if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
- goto invalid;
- if ((prefix & ADDR32) == 0)
+ if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32))
goto invalid;
if (!cmp(regs, prefix, opc))
goto invalid;
@@ -1427,37 +1441,17 @@ opcode(struct regs *regs)
}
continue;

- case 0x88: /* mov r8, r/m8 */
- case 0x8A: /* mov r/m8, r8 */
- if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
- goto invalid;
- if ((prefix & ADDR32) == 0)
+ case 0x88: /* addr32 mov r8, r/m8 */
+ case 0x8A: /* addr32 mov r/m8, r8 */
+ if (mode == VM86_PROTECTED_TO_REAL || !(prefix & ADDR32))
goto invalid;
if (!movr(regs, prefix, opc))
goto invalid;
return OPC_EMULATED;

- case 0x89: /* addr32 mov r16, r/m16 */
- if (mode == VM86_PROTECTED_TO_REAL) {
- unsigned modrm = fetch8(regs);
- unsigned addr = operand(prefix, regs, modrm);
- unsigned val, r = (modrm >> 3) & 7;
-
- if (prefix & DATA32) {
- val = getreg16(regs, r);
- write32(addr, val);
- } else {
- val = getreg32(regs, r);
- write16(addr, MASK16(val));
- }
- TRACE((regs, regs->eip - eip,
- "mov %%%s, *0x%x", rnames[r], addr));
- return OPC_EMULATED;
- }
- case 0x8B: /* addr32 mov r/m16, r16 */
- if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
- goto invalid;
- if ((prefix & ADDR32) == 0)
+ case 0x89: /* mov r16, r/m16 */
+ case 0x8B: /* mov r/m16, r16 */
+ if (mode != VM86_PROTECTED_TO_REAL && !(prefix & ADDR32))
goto invalid;
if (!movr(regs, prefix, opc))
goto invalid;
@@ -1469,7 +1463,7 @@ opcode(struct regs *regs)
return OPC_EMULATED;

case 0x8F: /* addr32 pop r/m16 */
- if ((prefix & ADDR32) == 0)
+ if (!(prefix & ADDR32))
goto invalid;
if (!pop(regs, prefix, opc))
goto invalid;
@@ -1498,48 +1492,48 @@ opcode(struct regs *regs)
return OPC_EMULATED;

case 0xA1: /* mov ax, r/m16 */
- {
- int addr, data;
- int seg = segment(prefix, regs, regs->vds);
- int offset = prefix & ADDR32? fetch32(regs) : fetch16(regs);
-
- if (prefix & DATA32) {
- addr = address(regs, seg, offset);
- data = read32(addr);
- setreg32(regs, 0, data);
- } else {
- addr = address(regs, seg, offset);
- data = read16(addr);
- setreg16(regs, 0, data);
- }
- TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
+ {
+ int addr, data;
+ int seg = segment(prefix, regs, regs->vds);
+ int offset = prefix & ADDR32 ? fetch32(regs) : fetch16(regs);
+
+ if (prefix & DATA32) {
+ addr = address(regs, seg, offset);
+ data = read32(addr);
+ setreg32(regs, 0, data);
+ } else {
+ addr = address(regs, seg, offset);
+ data = read16(addr);
+ setreg16(regs, 0, data);
}
- return OPC_EMULATED;
+ TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
+ return OPC_EMULATED;
+ }

case 0xBB: /* mov bx, imm16 */
- {
- int data;
- if (prefix & DATA32) {
- data = fetch32(regs);
- setreg32(regs, 3, data);
- } else {
- data = fetch16(regs);
- setreg16(regs, 3, data);
- }
- TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
+ {
+ int data;
+ if (prefix & DATA32) {
+ data = fetch32(regs);
+ setreg32(regs, 3, data);
+ } else {
+ data = fetch16(regs);
+ setreg16(regs, 3, data);
}
- return OPC_EMULATED;
+ TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
+ return OPC_EMULATED;
+ }

case 0xC6: /* addr32 movb $imm, r/m8 */
- if ((prefix & ADDR32) == 0)
+ if (!(prefix & ADDR32))
goto invalid;
if (!movr(regs, prefix, opc))
goto invalid;
return OPC_EMULATED;

case 0xCB: /* retl */
- if ((mode == VM86_REAL_TO_PROTECTED) ||
- (mode == VM86_PROTECTED_TO_REAL)) {
+ if (mode == VM86_REAL_TO_PROTECTED ||
+ mode == VM86_PROTECTED_TO_REAL) {
retl(regs, prefix);
return OPC_INVALID;
}
@@ -1576,37 +1570,37 @@ opcode(struct regs *regs)
return OPC_EMULATED;

case 0xEA: /* jmpl */
- if ((mode == VM86_REAL_TO_PROTECTED) ||
- (mode == VM86_PROTECTED_TO_REAL)) {
+ if (mode == VM86_REAL_TO_PROTECTED ||
+ mode == VM86_PROTECTED_TO_REAL) {
jmpl(regs, prefix);
return OPC_INVALID;
}
goto invalid;

- case 0xFF: /* jmpl (indirect) */
- {
- unsigned modrm = fetch8(regs);
- switch((modrm >> 3) & 7) {
- case 5: /* jmpl (indirect) */
- if ((mode == VM86_REAL_TO_PROTECTED) ||
- (mode == VM86_PROTECTED_TO_REAL)) {
- jmpl_indirect(regs, prefix, modrm);
- return OPC_INVALID;
- }
- goto invalid;
-
- case 6: /* push r/m16 */
- pushrm(regs, prefix, modrm);
- return OPC_EMULATED;
-
- default:
- goto invalid;
+ case 0xFF:
+ {
+ unsigned modrm = fetch8(regs);
+ switch((modrm >> 3) & 7) {
+ case 5: /* jmpl (indirect) */
+ if (mode == VM86_REAL_TO_PROTECTED ||
+ mode == VM86_PROTECTED_TO_REAL) {
+ jmpl_indirect(regs, prefix, modrm);
+ return OPC_INVALID;
}
+ goto invalid;
+
+ case 6: /* push r/m16 */
+ pushrm(regs, prefix, modrm);
+ return OPC_EMULATED;
+
+ default:
+ goto invalid;
}
+ }

case 0xEB: /* short jump */
- if ((mode == VM86_REAL_TO_PROTECTED) ||
- (mode == VM86_PROTECTED_TO_REAL)) {
+ if (mode == VM86_REAL_TO_PROTECTED ||
+ mode == VM86_PROTECTED_TO_REAL) {
disp = (char) fetch8(regs);
TRACE((regs, 2, "jmp 0x%x", regs->eip + disp));
regs->eip += disp;
@@ -1629,7 +1623,7 @@ opcode(struct regs *regs)
continue;

case 0xF6: /* addr32 testb $imm, r/m8 */
- if ((prefix & ADDR32) == 0)
+ if (!(prefix & ADDR32))
goto invalid;
if (!test(regs, prefix, opc))
goto invalid;
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/ioemu/keymaps/ja
--- a/tools/ioemu/keymaps/ja Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/ioemu/keymaps/ja Tue Jul 10 08:39:26 2007 -0600
@@ -101,6 +101,7 @@ bar 0x7d shift
bar 0x7d shift
underscore 0x73 shift
Henkan_Mode 0x79
+Katakana_Real 0x70
Katakana 0x70
Muhenkan 0x7b
Henkan_Mode_Real 0x79
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/ioemu/keymaps/modifiers
--- a/tools/ioemu/keymaps/modifiers Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/ioemu/keymaps/modifiers Tue Jul 10 08:39:26 2007 -0600
@@ -11,8 +11,8 @@ Control_L 0x1d

# Translate Super to Windows keys.
# This is hardcoded. See documentation for details.
-Super_R 0xdb
-Super_L 0xdc
+Super_R 0xdc
+Super_L 0xdb

# Translate Menu to the Windows Application key.
Menu 0xdd
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/ioemu/vnc_keysym.h
--- a/tools/ioemu/vnc_keysym.h Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/ioemu/vnc_keysym.h Tue Jul 10 08:39:26 2007 -0600
@@ -290,12 +290,14 @@ static name2keysym_t name2keysym[]={
/* localized keys */
{"BackApostrophe", 0xff21},
{"Muhenkan", 0xff22},
-{"Katakana", 0xff25},
+{"Katakana", 0xff27},
{"Hankaku", 0xff29},
{"Zenkaku_Hankaku", 0xff2a},
{"Henkan_Mode_Real", 0xff23},
{"Henkan_Mode_Ultra", 0xff3e},
{"backslash_ja", 0xffa5},
+{"Katakana_Real", 0xff25},
+{"Eisu_toggle", 0xff30},

/* dead keys */
{"dead_grave", 0xfe50},
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/libxc/Makefile
--- a/tools/libxc/Makefile Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/libxc/Makefile Tue Jul 10 08:39:26 2007 -0600
@@ -57,7 +57,6 @@ GUEST_SRCS-$(CONFIG_POWERPC) += xc_dom_p
-include $(XEN_TARGET_ARCH)/Makefile

CFLAGS += -Werror -Wmissing-prototypes
-CFLAGS += -fno-strict-aliasing
CFLAGS += $(INCLUDES) -I. -I../xenstore

# Needed for posix_fadvise64() in xc_linux.c
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/libxc/xc_domain.c Tue Jul 10 08:39:26 2007 -0600
@@ -181,6 +181,7 @@ int xc_domain_getinfo(int xc_handle,
info->blocked = !!(domctl.u.getdomaininfo.flags&XEN_DOMINF_blocked);
info->running = !!(domctl.u.getdomaininfo.flags&XEN_DOMINF_running);
info->hvm = !!(domctl.u.getdomaininfo.flags&XEN_DOMINF_hvm_guest);
+ info->debugged = !!(domctl.u.getdomaininfo.flags&XEN_DOMINF_debugged);

info->shutdown_reason =
(domctl.u.getdomaininfo.flags>>XEN_DOMINF_shutdownshift) &
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/libxc/xc_misc.c Tue Jul 10 08:39:26 2007 -0600
@@ -59,6 +59,8 @@ int xc_physinfo(int xc_handle,
DECLARE_SYSCTL;

sysctl.cmd = XEN_SYSCTL_physinfo;
+
+ memcpy(&sysctl.u.physinfo, put_info, sizeof(*put_info));

if ( (ret = do_sysctl(xc_handle, &sysctl)) != 0 )
return ret;
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/libxc/xenctrl.h Tue Jul 10 08:39:26 2007 -0600
@@ -153,7 +153,7 @@ typedef struct xc_dominfo {
uint32_t ssidref;
unsigned int dying:1, crashed:1, shutdown:1,
paused:1, blocked:1, running:1,
- hvm:1;
+ hvm:1, debugged:1;
unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
unsigned long nr_pages;
unsigned long shared_info_frame;
@@ -485,6 +485,7 @@ int xc_send_debug_keys(int xc_handle, ch
int xc_send_debug_keys(int xc_handle, char *keys);

typedef xen_sysctl_physinfo_t xc_physinfo_t;
+typedef uint32_t xc_cpu_to_node_t;
int xc_physinfo(int xc_handle,
xc_physinfo_t *info);

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue Jul 10 08:39:26 2007 -0600
@@ -680,33 +680,62 @@ static PyObject *pyxc_pages_to_kib(XcObj

static PyObject *pyxc_physinfo(XcObject *self)
{
+#define MAX_CPU_ID 255
xc_physinfo_t info;
char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
- int i;
-
+ int i, j, max_cpu_id;
+ PyObject *ret_obj, *node_to_cpu_obj;
+ xc_cpu_to_node_t map[MAX_CPU_ID];
+
+ set_xen_guest_handle(info.cpu_to_node, map);
+ info.max_cpu_id = MAX_CPU_ID;
+
if ( xc_physinfo(self->xc_handle, &info) != 0 )
return pyxc_error_to_exception();

- *q=0;
- for(i=0;i<sizeof(info.hw_cap)/4;i++)
+ *q = 0;
+ for ( i = 0; i < sizeof(info.hw_cap)/4; i++ )
{
- p+=sprintf(p,"%08x:",info.hw_cap[i]);
- if(info.hw_cap[i])
- q=p;
+ p += sprintf(p, "%08x:", info.hw_cap[i]);
+ if ( info.hw_cap[i] )
+ q = p;
}
- if(q>cpu_cap)
- *(q-1)=0;
-
- return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
- "threads_per_core", info.threads_per_core,
- "cores_per_socket", info.cores_per_socket,
- "sockets_per_node", info.sockets_per_node,
- "nr_nodes", info.nr_nodes,
- "total_memory", pages_to_kib(info.total_pages),
- "free_memory", pages_to_kib(info.free_pages),
- "scrub_memory", pages_to_kib(info.scrub_pages),
- "cpu_khz", info.cpu_khz,
- "hw_caps", cpu_cap);
+ if ( q > cpu_cap )
+ *(q-1) = 0;
+
+ ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
+ "nr_nodes", info.nr_nodes,
+ "max_cpu_id", info.max_cpu_id,
+ "threads_per_core", info.threads_per_core,
+ "cores_per_socket", info.cores_per_socket,
+ "sockets_per_node", info.sockets_per_node,
+ "total_memory", pages_to_kib(info.total_pages),
+ "free_memory", pages_to_kib(info.free_pages),
+ "scrub_memory", pages_to_kib(info.scrub_pages),
+ "cpu_khz", info.cpu_khz,
+ "hw_caps", cpu_cap);
+
+ max_cpu_id = info.max_cpu_id;
+ if ( max_cpu_id > MAX_CPU_ID )
+ max_cpu_id = MAX_CPU_ID;
+
+ /* Construct node-to-cpu lists. */
+ node_to_cpu_obj = PyList_New(0);
+
+ /* Make a list for each node. */
+ for ( i = 0; i < info.nr_nodes; i++ )
+ {
+ PyObject *cpus = PyList_New(0);
+ for ( j = 0; j <= max_cpu_id; j++ )
+ if ( i == map[j])
+ PyList_Append(cpus, PyInt_FromLong(j));
+ PyList_Append(node_to_cpu_obj, cpus);
+ }
+
+ PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
+
+ return ret_obj;
+#undef MAX_CPU_ID
}

static PyObject *pyxc_xeninfo(XcObject *self)
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/util/acmpolicy.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/acmpolicy.py Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,1199 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2006,2007 International Business Machines Corp.
+# Author: Stefan Berger <stefanb@us.ibm.com>
+#============================================================================
+
+import os
+import commands
+import struct
+import stat
+import array
+from xml.dom import minidom, Node
+from xen.xend.XendLogging import log
+from xen.util import security, xsconstants, bootloader, mkdir
+from xen.util.xspolicy import XSPolicy
+from xen.util.security import ACMError
+from xen.xend.XendError import SecurityError
+
+ACM_POLICIES_DIR = security.policy_dir_prefix + "/"
+
+# Constants needed for generating a binary policy from its XML
+# representation
+ACM_POLICY_VERSION = 3 # Latest one
+ACM_CHWALL_VERSION = 1
+
+ACM_STE_VERSION = 1
+
+ACM_MAGIC = 0x001debc;
+
+ACM_NULL_POLICY = 0
+ACM_CHINESE_WALL_POLICY = 1
+ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY = 2
+ACM_POLICY_UNDEFINED = 15
+
+
+ACM_SCHEMA_FILE = "/etc/xen/acm-security/policies/security_policy.xsd"
+
+class ACMPolicy(XSPolicy):
+ """
+ ACMPolicy class. Implements methods for getting information from
+ the XML representation of the policy as well as compilation and
+ loading of a policy into the HV.
+ """
+
+ def __init__(self, name=None, dom=None, ref=None, xml=None):
+ if name:
+ self.name = name
+ self.dom = minidom.parse(self.path_from_policy_name(name))
+ elif dom:
+ self.dom = dom
+ self.name = self.get_name()
+ elif xml:
+ self.dom = minidom.parseString(xml)
+ self.name = self.get_name()
+ rc = self.validate()
+ if rc != xsconstants.XSERR_SUCCESS:
+ raise SecurityError(rc)
+ mkdir.parents(ACM_POLICIES_DIR, stat.S_IRWXU)
+ if ref:
+ from xen.xend.XendXSPolicy import XendACMPolicy
+ self.xendacmpolicy = XendACMPolicy(self, {}, ref)
+ else:
+ self.xendacmpolicy = None
+ XSPolicy.__init__(self, name=self.name, ref=ref)
+
+ def get_dom(self):
+ return self.dom
+
+ def get_name(self):
+ return self.policy_dom_get_hdr_item("PolicyName")
+
+ def get_type(self):
+ return xsconstants.XS_POLICY_ACM
+
+ def get_type_name(self):
+ return xsconstants.ACM_POLICY_ID
+
+ def __str__(self):
+ return self.get_name()
+
+
+ def validate(self):
+ """
+ validate against the policy's schema Does not fail if the
+ libxml2 python lib is not installed
+ """
+ rc = xsconstants.XSERR_SUCCESS
+ try:
+ import libxml2
+ except Exception, e:
+ log.warn("Libxml2 python-wrapper is not installed on the system.")
+ return xsconstants.XSERR_SUCCESS
+ try:
+ parserctxt = libxml2.schemaNewParserCtxt(ACM_SCHEMA_FILE)
+ schemaparser = parserctxt.schemaParse()
+ valid = schemaparser.schemaNewValidCtxt()
+ doc = libxml2.parseDoc(self.toxml())
+ if doc.schemaValidateDoc(valid) != 0:
+ rc = -xsconstants.XSERR_BAD_XML
+ except Exception, e:
+ log.warn("Problem with the schema: %s" % str(e))
+ rc = -xsconstants.XSERR_GENERAL_FAILURE
+ if rc != xsconstants.XSERR_SUCCESS:
+ log.warn("XML did not validate against schema")
+ rc = self.__validate_name_and_labels()
+ return rc
+
+ def __validate_name_and_labels(self):
+ """ no ':' allowed in the policy name and the labels """
+ if ':' in self.get_name():
+ return -xsconstants.XSERR_BAD_POLICY_NAME
+ for s in self.policy_get_resourcelabel_names():
+ if ':' in s:
+ return -xsconstants.XSERR_BAD_LABEL
+ for s in self.policy_get_virtualmachinelabel_names():
+ if ':' in s:
+ return -xsconstants.XSERR_BAD_LABEL
+ return xsconstants.XSERR_SUCCESS
+
+
+ def update(self, xml_new):
+ """
+ Update the policy with the new XML. The hypervisor decides
+ whether the new policy can be applied.
+ """
+ rc = -xsconstants.XSERR_XML_PROCESSING
+ errors = ""
+ acmpol_old = self
+ try:
+ acmpol_new = ACMPolicy(xml=xml_new)
+ except Exception:
+ return -xsconstants.XSERR_XML_PROCESSING, errors
+
+ vmlabel_map = acmpol_new.policy_get_vmlabel_translation_map()
+ # An update requires version information in the current
+ # and new policy. The version number of the current policy
+ # must be the same as what is in the FromPolicy/Version node
+ # in the new one and the current policy's name must be the
+ # same as in FromPolicy/PolicyName
+
+ now_vers = acmpol_old.policy_dom_get_hdr_item("Version")
+ now_name = acmpol_old.policy_dom_get_hdr_item("PolicyName")
+ req_oldvers = acmpol_new.policy_dom_get_frompol_item("Version")
+ req_oldname = acmpol_new.policy_dom_get_frompol_item("PolicyName")
+
+ if now_vers == "" or \
+ now_vers != req_oldvers or \
+ now_name != req_oldname:
+ log.info("Policy rejected: %s != %s or %s != %s" % \
+ (now_vers,req_oldvers,now_name,req_oldname))
+ return -xsconstants.XSERR_VERSION_PREVENTS_UPDATE, errors
+
+ if not self.isVersionUpdate(acmpol_new):
+ log.info("Policy rejected since new version is not an update.")
+ return -xsconstants.XSERR_VERSION_PREVENTS_UPDATE, errors
+
+ if self.isloaded():
+ newvmnames = \
+ acmpol_new.policy_get_virtualmachinelabel_names_sorted()
+ oldvmnames = \
+ acmpol_old.policy_get_virtualmachinelabel_names_sorted()
+ del_array = ""
+ chg_array = ""
+ for o in oldvmnames:
+ if o not in newvmnames:
+ old_idx = oldvmnames.index(o) + 1 # for _NULL_LABEL_
+ if vmlabel_map.has_key(o):
+ #not a deletion, but a renaming
+ new = vmlabel_map[o]
+ new_idx = newvmnames.index(new) + 1 # for _NULL_LABEL_
+ chg_array += struct.pack("ii", old_idx, new_idx)
+ else:
+ del_array += struct.pack("i", old_idx)
+ for v in newvmnames:
+ if v in oldvmnames:
+ old_idx = oldvmnames.index(v) + 1 # for _NULL_LABEL_
+ new_idx = newvmnames.index(v) + 1 # for _NULL_LABEL_
+ if old_idx != new_idx:
+ chg_array += struct.pack("ii", old_idx, new_idx)
+
+ # VM labels indicated in the 'from' attribute of a VM or
+ # resource node but that did not exist in the old policy
+ # are considered bad labels.
+ bad_renamings = set(vmlabel_map.keys()) - set(oldvmnames)
+ if len(bad_renamings) > 0:
+ log.error("Bad VM label renamings: %s" %
+ list(bad_renamings))
+ return -xsconstants.XSERR_BAD_LABEL, errors
+
+ reslabel_map = acmpol_new.policy_get_reslabel_translation_map()
+ oldresnames = acmpol_old.policy_get_resourcelabel_names()
+ bad_renamings = set(reslabel_map.keys()) - set(oldresnames)
+ if len(bad_renamings) > 0:
+ log.error("Bad resource label renamings: %s" %
+ list(bad_renamings))
+ return -xsconstants.XSERR_BAD_LABEL, errors
+
+ #Get binary and map from the new policy
+ rc, map, bin_pol = acmpol_new.policy_create_map_and_bin()
+ if rc != xsconstants.XSERR_SUCCESS:
+ log.error("Could not build the map and binary policy.")
+ return rc, errors
+
+ #Need to do / check the following:
+ # - relabel all resources where there is a 'from' field in
+ # the policy and mark those as unlabeled where the label
+ # does not appear in the new policy anymore
+ # - relabel all VMs where there is a 'from' field in the
+ # policy and mark those as unlabeled where the label
+ # does not appear in the new policy anymore; no running
+ # or paused VM may be unlabeled through this
+ # - check that under the new labeling conditions the VMs
+ # still have access to their resources as before. Unlabeled
+ # resources are inaccessible. If this check fails, the
+ # update failed.
+ # - Attempt changes in the hypervisor; if this step fails,
+ # roll back the relabeling of resources and VMs
+ # - Commit the relabeling of resources
+
+
+ rc, errors = security.change_acm_policy(bin_pol,
+ del_array, chg_array,
+ vmlabel_map, reslabel_map,
+ self, acmpol_new)
+
+ if rc == 0:
+ # Replace the old DOM with the new one and save it
+ self.dom = acmpol_new.dom
+ self.compile()
+ log.info("ACM policy update was successful")
+ else:
+ #Not loaded in HV
+ self.dom = acmpol_new.dom
+ self.compile()
+ return rc, errors
+
+ def compareVersions(self, v1, v2):
+ """
+ Compare two policy versions given their tuples of major and
+ minor.
+ Return '0' if versions are equal, '>0' if v1 > v2 and
+ '<' if v1 < v2
+ """
+ rc = v1[0] - v2[0]
+ if rc == 0:
+ rc = v1[1] - v2[1]
+ return rc
+
+ def getVersionTuple(self, item="Version"):
+ v_str = self.policy_dom_get_hdr_item(item)
+ return self.__convVersionToTuple(v_str)
+
+ def get_version(self):
+ return self.policy_dom_get_hdr_item("Version")
+
+ def isVersionUpdate(self, polnew):
+ if self.compareVersions(polnew.getVersionTuple(),
+ self.getVersionTuple()) > 0:
+ return True
+ return False
+
+ def __convVersionToTuple(self, v_str):
+ """ Convert a version string, formatted according to the scheme
+ "%d.%d" into a tuple of (major, minor). Return (0,0) if the
+ string is empty.
+ """
+ major = 0
+ minor = 0
+ if v_str != "":
+ tmp = v_str.split(".")
+ major = int(tmp[0])
+ if len(tmp) > 1:
+ minor = int(tmp[1])
+ return (major, minor)
+
+
+ def policy_path(self, name, prefix = ACM_POLICIES_DIR ):
+ path = prefix + name.replace('.','/')
+ _path = path.split("/")
+ del _path[-1]
+ mkdir.parents("/".join(_path), stat.S_IRWXU)
+ return path
+
+ def path_from_policy_name(self, name):
+ return self.policy_path(name) + "-security_policy.xml"
+
+ #
+ # Functions interacting with the bootloader
+ #
+ def vmlabel_to_ssidref(self, vm_label):
+ """ Convert a VMlabel into an ssidref given the current
+ policy
+ Return xsconstants.INVALID_SSIDREF if conversion failed.
+ """
+ ssidref = xsconstants.INVALID_SSIDREF
+ names = self.policy_get_virtualmachinelabel_names_sorted()
+ try:
+ vmidx = names.index(vm_label) + 1 # for _NULL_LABEL_
+ ssidref = (vmidx << 16) | vmidx
+ except:
+ pass
+ return ssidref
+
+ def set_vm_bootlabel(self, vm_label):
+ parms="<>"
+ if vm_label != "":
+ ssidref = self.vmlabel_to_ssidref(vm_label)
+ if ssidref == xsconstants.INVALID_SSIDREF:
+ return -xsconstants.XSERR_BAD_LABEL
+ parms = "0x%08x:%s:%s:%s" % \
+ (ssidref, xsconstants.ACM_POLICY_ID, \
+ self.get_name(),vm_label)
+ else:
+ ssidref = 0 #Identifier for removal
+ try:
+ def_title = bootloader.get_default_title()
+ bootloader.set_kernel_attval(def_title, "ssidref", parms)
+ except:
+ return -xsconstants.XSERR_GENERAL_FAILURE
+ return ssidref
+
+ #
+ # Utility functions related to the policy's files
+ #
+ def get_filename(self, postfix, prefix = ACM_POLICIES_DIR, dotted=False):
+ """
+ Create the filename for the policy. The prefix is prepended
+ to the path. If dotted is True, then a policy name like
+ 'a.b.c' will remain as is, otherwise it will become 'a/b/c'
+ """
+ name = self.get_name()
+ if name:
+ p = name.split(".")
+ path = ""
+ if dotted == True:
+ sep = "."
+ else:
+ sep = "/"
+ if len(p) > 1:
+ path = sep.join(p[0:len(p)-1])
+ if prefix != "" or path != "":
+ allpath = prefix + path + sep + p[-1] + postfix
+ else:
+ allpath = p[-1] + postfix
+ return allpath
+ return None
+
+ def __readfile(self, name):
+ cont = ""
+ filename = self.get_filename(name)
+ f = open(filename, "r")
+ if f:
+ cont = f.read()
+ f.close()
+ return cont
+
+ def get_map(self):
+ return self.__readfile(".map")
+
+ def get_bin(self):
+ return self.__readfile(".bin")
+
+ #
+ # DOM-related functions
+ #
+
+ def policy_dom_get(self, parent, key, createit=False):
+ for node in parent.childNodes:
+ if node.nodeType == Node.ELEMENT_NODE:
+ if node.nodeName == key:
+ return node
+ if createit:
+ self.dom_create_node(parent, key)
+ return self.policy_dom_get(parent, key)
+
+ def dom_create_node(self, parent, newname, value=" "):
+ xml = "<a><"+newname+">"+ value +"</"+newname+"></a>"
+ frag = minidom.parseString(xml)
+ frag.childNodes[0].nodeType = Node.DOCUMENT_FRAGMENT_NODE
+ parent.appendChild(frag.childNodes[0])
+ return frag.childNodes[0]
+
+ def dom_get_node(self, path, createit=False):
+ node = None
+ parts = path.split("/")
+ doc = self.get_dom()
+ if len(parts) > 0:
+ node = self.policy_dom_get(doc.documentElement, parts[0])
+ if node:
+ i = 1
+ while i < len(parts):
+ _node = self.policy_dom_get(node, parts[i], createit)
+ if not _node:
+ if not createit:
+ break
+ else:
+ self.dom_create_node(node, parts[i])
+ _node = self.policy_dom_get(node, parts[i])
+ node = _node
+ i += 1
+ return node
+
+ #
+ # Header-related functions
+ #
+ def policy_dom_get_header_subnode(self, nodename):
+ node = self.dom_get_node("PolicyHeader/%s" % nodename)
+ return node
+
+ def policy_dom_get_hdr_item(self, name, default=""):
+ node = self.policy_dom_get_header_subnode(name)
+ if node and len(node.childNodes) > 0:
+ return node.childNodes[0].nodeValue
+ return default
+
+ def policy_dom_get_frompol_item(self, name, default="", createit=False):
+ node = self.dom_get_node("PolicyHeader/FromPolicy",createit)
+ if node:
+ node = self.policy_dom_get(node, name, createit)
+ if node and len(node.childNodes) > 0:
+ return node.childNodes[0].nodeValue
+ return default
+
+ def get_header_fields_map(self):
+ header = {
+ 'policyname' : self.policy_dom_get_hdr_item("PolicyName"),
+ 'policyurl' : self.policy_dom_get_hdr_item("PolicyUrl"),
+ 'reference' : self.policy_dom_get_hdr_item("Reference"),
+ 'date' : self.policy_dom_get_hdr_item("Date"),
+ 'namespaceurl' : self.policy_dom_get_hdr_item("NameSpaceUrl"),
+ 'version' : self.policy_dom_get_hdr_item("Version")
+ }
+ return header
+
+ def set_frompolicy_name(self, name):
+ """ For tools to adapt the header of the policy """
+ node = self.dom_get_node("PolicyHeader/FromPolicy/PolicyName",
+ createit=True)
+ node.childNodes[0].nodeValue = name
+
+ def set_frompolicy_version(self, version):
+ """ For tools to adapt the header of the policy """
+ node = self.dom_get_node("PolicyHeader/FromPolicy/Version",
+ createit=True)
+ node.childNodes[0].nodeValue = version
+
+ def set_policy_name(self, name):
+ """ For tools to adapt the header of the policy """
+ node = self.dom_get_node("PolicyHeader/PolicyName")
+ node.childNodes[0].nodeValue = name
+
+ def set_policy_version(self, version):
+ """ For tools to adapt the header of the policy """
+ node = self.dom_get_node("PolicyHeader/Version")
+ node.childNodes[0].nodeValue = version
+
+ def update_frompolicy(self, curpol):
+ self.set_frompolicy_name(curpol.policy_dom_get_hdr_item("PolicyName"))
+ version = curpol.policy_dom_get_hdr_item("Version")
+ self.set_frompolicy_version(version)
+ (maj, min) = self.__convVersionToTuple(version)
+ self.set_policy_version("%s.%s" % (maj, min+1))
+
+ #
+ # Get all types that are part of a node
+ #
+
+ def policy_get_types(self, node):
+ strings = []
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == "Type":
+ strings.append(node.childNodes[i].childNodes[0].nodeValue)
+ i += 1
+ return strings
+
+ #
+ # Simple Type Enforcement-related functions
+ #
+
+ def policy_get_stetypes_node(self):
+ node = self.dom_get_node("SimpleTypeEnforcement/SimpleTypeEnforcementTypes")
+ return node
+
+ def policy_get_stetypes_types(self):
+ strings = []
+ node = self.policy_get_stetypes_node()
+ if node:
+ strings = self.policy_get_types(node)
+ return strings
+
+ #
+ # Chinese Wall Type-related functions
+ #
+
+ def policy_get_chwall_types(self):
+ strings = []
+ node = self.dom_get_node("ChineseWall/ChineseWallTypes")
+ if node:
+ strings = self.policy_get_types(node)
+ return strings
+
+ def policy_get_chwall_cfses(self):
+ cfs = []
+ node = self.dom_get_node("ChineseWall/ConflictSets")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ _cfs = {}
+ if node.childNodes[i].nodeName == "Conflict":
+ _cfs['name'] = node.childNodes[i].getAttribute('name')
+ _cfs['chws'] = self.policy_get_types(node.childNodes[i])
+ cfs.append(_cfs)
+ i += 1
+ return cfs
+
+ def policy_get_chwall_cfses_names_sorted(self):
+ """
+ Return the list of all conflict set names in alphabetical
+ order.
+ """
+ cfs_names = []
+ node = self.dom_get_node("ChineseWall/ConflictSets")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == "Conflict":
+ n = node.childNodes[i].getAttribute('name')
+ #it better have a name!
+ if n:
+ cfs_names.append(n)
+ i += 1
+ cfs_names.sort()
+ return cfs_names
+
+ #
+ # Subject Label-related functions
+ #
+
+ def policy_get_bootstrap_vmlabel(self):
+ node = self.dom_get_node("SecurityLabelTemplate/SubjectLabels")
+ if node:
+ vmlabel = node.getAttribute("bootstrap")
+ return vmlabel
+
+ # Get the names of all virtual machine labels; returns an array
+ def policy_get_virtualmachinelabel_names(self):
+ strings = []
+ node = self.dom_get_node("SecurityLabelTemplate/SubjectLabels")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == "VirtualMachineLabel":
+ name = self.policy_dom_get(node.childNodes[i], "Name")
+ strings.append(name.childNodes[0].nodeValue)
+ i += 1
+ return strings
+
+ def policy_sort_virtualmachinelabel_names(self, vmnames):
+ bootstrap = self.policy_get_bootstrap_vmlabel()
+ if bootstrap not in vmnames:
+ raise SecurityError(-xsconstants.XSERR_POLICY_INCONSISTENT)
+ vmnames.remove(bootstrap)
+ vmnames.sort()
+ vmnames.insert(0, bootstrap)
+ return vmnames
+
+ def policy_get_virtualmachinelabel_names_sorted(self):
+ """ Get a sorted list of VMlabel names. The bootstrap VM's
+ label will be the first one in that list, followed
+ by an alphabetically sorted list of VM label names """
+ vmnames = self.policy_get_virtualmachinelabel_names()
+ return self.policy_sort_virtualmachinelabel_names(vmnames)
+
+ def policy_get_virtualmachinelabels(self):
+ """ Get a list of all virtual machine labels in this policy """
+ res = []
+ node = self.dom_get_node("SecurityLabelTemplate/SubjectLabels")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == "VirtualMachineLabel":
+ _res = {}
+ _res['type'] = xsconstants.ACM_LABEL_VM
+ name = self.policy_dom_get(node.childNodes[i], "Name")
+ _res['name'] = name.childNodes[0].nodeValue
+ stes = self.policy_dom_get(node.childNodes[i],
+ "SimpleTypeEnforcementTypes")
+ if stes:
+ _res['stes'] = self.policy_get_types(stes)
+ else:
+ _res['stes'] = []
+ chws = self.policy_dom_get(node.childNodes[i],
+ "ChineseWallTypes")
+ if chws:
+ _res['chws'] = self.policy_get_types(chws)
+ else:
+ _res['chws'] = []
+ res.append(_res)
+ i += 1
+ return res
+
+ def policy_get_stes_of_vmlabel(self, vmlabel):
+ """ Get a list of all STEs of a given VMlabel """
+ return self.__policy_get_stes_of_labeltype(vmlabel,
+ "VirtualMachineLabel")
+
+ def policy_get_stes_of_resource(self, reslabel):
+ """ Get a list of all resources of a given VMlabel """
+ return self.__policy_get_stes_of_labeltype(reslabel, "ResourceLabel")
+
+ def __policy_get_stes_of_labeltype(self, label, labeltype):
+ node = self.dom_get_node("SecurityLabelTemplate/SubjectLabels")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == labeltype:
+ name = self.policy_dom_get(node.childNodes[i], "Name")
+ if name.childNodes[0].nodeValue == label:
+ stes = self.policy_dom_get(node.childNodes[i],
+ "SimpleTypeEnforcementTypes")
+ if not stes:
+ return []
+ return self.policy_get_types(stes)
+ i += 1
+ return []
+
+ def policy_check_vmlabel_against_reslabels(self, vmlabel, resources):
+ """
+ Check whether the given vmlabel is compatible with the given
+ resource labels. Do this by getting all the STEs of the
+ vmlabel and the STEs of the resources. Any STE type of the
+ VM label must match an STE type of the resource.
+ """
+ vm_stes = self.policy_get_stes_of_vmlabel(vmlabel)
+ if len(vm_stes) == 0:
+ return False
+ for res in resources:
+ res_stes = self.policy_get_stes_of_resource(res)
+ if len( set(res_stes).union( set(vm_stes) ) ) == 0:
+ return False
+ return True
+
+ def __policy_get_label_translation_map(self, path, labeltype):
+ res = {}
+ node = self.dom_get_node("SecurityLabelTemplate/" + path)
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == labeltype:
+ name = self.policy_dom_get(node.childNodes[i], "Name")
+ from_name = name.getAttribute("from")
+ if from_name:
+ res.update({from_name : name.childNodes[0].nodeValue})
+ i += 1
+ return res
+
+ def policy_get_vmlabel_translation_map(self):
+ """
+ Get a dictionary of virtual machine mappings from their
+ old VMlabel name to the new VMlabel name.
+ """
+ return self.__policy_get_label_translation_map("SubjectLabels",
+ "VirtualMachineLabel")
+
+ def policy_get_reslabel_translation_map(self):
+ """
+ Get a dictionary of resource mappings from their
+ old resource label name to the new resource label name.
+ """
+ return self.__policy_get_label_translation_map("ObjectLabels",
+ "ResourceLabel")
+
+ #
+ # Object Label-related functions
+ #
+ def policy_get_resourcelabel_names(self):
+ """
+ Get the names of all resource labels in an array but
+ only those that actually have types
+ """
+ strings = []
+ node = self.dom_get_node("SecurityLabelTemplate/ObjectLabels")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == "ResourceLabel":
+ name = self.policy_dom_get(node.childNodes[i], "Name")
+ stes = self.policy_dom_get(node.childNodes[i],
+ "SimpleTypeEnforcementTypes")
+ if stes:
+ strings.append(name.childNodes[0].nodeValue)
+ i += 1
+ return strings
+
+ def policy_get_resourcelabels(self):
+ """
+ Get all information about all resource labels of this policy.
+ """
+ res = []
+ node = self.dom_get_node("SecurityLabelTemplate/ObjectLabels")
+ if node:
+ i = 0
+ while i < len(node.childNodes):
+ if node.childNodes[i].nodeName == "ResourceLabel":
+ _res = {}
+ _res['type'] = xsconstants.ACM_LABEL_RES
+ name = self.policy_dom_get(node.childNodes[i], "Name")
+ _res['name'] = name.childNodes[0].nodeValue
+ stes = self.policy_dom_get(node.childNodes[i],
+ "SimpleTypeEnforcementTypes")
+ if stes:
+ _res['stes'] = self.policy_get_types(stes)
+ else:
+ _res['stes'] = []
+ _res['chws'] = []
+ res.append(_res)
+ i += 1
+ return res
+
+
+ def policy_find_reslabels_with_stetype(self, stetype):
+ """
+ Find those resource labels that hold a given STE type.
+ """
+ res = []
+ reslabels = self.policy_get_resourcelabels()
+ for resl in reslabels:
+ if stetype in resl['stes']:
+ res.append(resl['name'])
+ return res
+
+
+ def toxml(self):
+ dom = self.get_dom()
+ if dom:
+ return dom.toxml()
+ return None
+
+ def save(self):
+ ### Save the XML policy into a file ###
+ rc = -xsconstants.XSERR_FILE_ERROR
+ name = self.get_name()
+ if name:
+ path = self.path_from_policy_name(name)
+ if path:
+ f = open(path, 'w')
+ if f:
+ f.write(self.toxml())
+ f.close()
+ rc = 0
+ return rc
+
+ def __write_to_file(self, suffix, data):
+ #write the data into a file with the given suffix
+ f = open(self.get_filename(suffix),"w")
+ if f:
+ try:
+ try:
+ f.write(data)
+ except Exception, e:
+ log.error("Error writing file: %s" % str(e))
+ return -xsconstants.XSERR_FILE_ERROR
+ finally:
+ f.close()
+ else:
+ return -xsconstants.XSERR_FILE_ERROR
+ return xsconstants.XSERR_SUCCESS
+
+
+ def compile(self):
+ rc = self.save()
+ if rc == 0:
+ rc, mapfile, bin_pol = self.policy_create_map_and_bin()
+
+ if rc == 0:
+ rc = self.__write_to_file(".map", mapfile)
+ if rc != 0:
+ log.error("Error writing map file")
+
+ if rc == 0:
+ rc = self.__write_to_file(".bin", bin_pol)
+ if rc != 0:
+ log.error("Error writing binary policy file")
+ return rc
+
+ def loadintohv(self):
+ """
+ load this policy into the hypervisor
+ if successful,the policy's flags will indicate that the
+ policy is the one loaded into the hypervisor
+ """
+ (ret, output) = commands.getstatusoutput(
+ security.xensec_tool +
+ " loadpolicy " +
+ self.get_filename(".bin"))
+ if ret != 0:
+ return -xsconstants.XSERR_POLICY_LOAD_FAILED
+ return xsconstants.XSERR_SUCCESS
+
+ def isloaded(self):
+ """
+ Determine whether this policy is the active one.
+ """
+ security.refresh_security_policy()
+ if self.get_name() == security.active_policy:
+ return True
+ return False
+
+ def destroy(self):
+ """
+ Destroy the policy including its binary, mapping and
+ XML files.
+ This only works if the policy is not the one that's loaded
+ """
+ if self.isloaded():
+ return -xsconstants.XSERR_POLICY_LOADED
+ files = [ self.get_filename(".map",""),
+ self.get_filename(".bin",""),
+ self.path_from_policy_name(self.get_name())]
+ for f in files:
+ try:
+ os.unlink(f)
+ except:
+ pass
+ if self.xendacmpolicy:
+ self.xendacmpolicy.destroy()
+ XSPolicy.destroy(self)
+ return xsconstants.XSERR_SUCCESS
+
+ def policy_get_domain_label(self, domid):
+ """
+ Given a domain's ID, retrieve the label it has using
+ its ssidref for reverse calculation.
+ """
+ try:
+ mgmt_dom = security.get_ssid(domid)
+ except:
+ return ""
+ return self.policy_get_domain_label_by_ssidref(int(mgmt_dom[3]))
+
+ def policy_get_domain_label_by_ssidref(self, ssidref):
+ """ Given an ssidref, find the corresponding VM label """
+ chwall_ref = ssidref & 0xffff
+ try:
+ allvmtypes = self.policy_get_virtualmachinelabel_names_sorted()
+ except:
+ return None
+ return allvmtypes[chwall_ref-1] # skip _NULL_LABEL_
+
+ def policy_get_domain_label_formatted(self, domid):
+ label = self.policy_get_domain_label(domid)
+ if label == "":
+ return ""
+ return "%s:%s:%s" % (xsconstants.ACM_POLICY_ID, self.get_name(), label)
+
+ def policy_get_domain_label_by_ssidref_formatted(self, ssidref):
+ label = self.policy_get_domain_label_by_ssidref(ssidref)
+ if label == "":
+ return ""
+ return "%s:%s:%s" % (xsconstants.ACM_POLICY_ID, self.get_name(), label)
+
+ def policy_create_map_and_bin(self):
+ """
+ Create the policy's map and binary files -- compile the policy.
+ """
+ def roundup8(len):
+ return ((len + 7) & ~7)
+
+ rc = xsconstants.XSERR_SUCCESS
+ mapfile = ""
+ primpolcode = ACM_POLICY_UNDEFINED
+ secpolcode = ACM_POLICY_UNDEFINED
+ unknown_ste = set()
+ unknown_chw = set()
+
+ rc = self.validate()
+ if rc:
+ return rc, "", ""
+
+ stes = self.policy_get_stetypes_types()
+ if stes:
+ stes.sort()
+
+ chws = self.policy_get_chwall_types()
+ if chws:
+ chws.sort()
+
+ vms = self.policy_get_virtualmachinelabels()
+ bootstrap = self.policy_get_bootstrap_vmlabel()
+
+ vmlabels = self.policy_get_virtualmachinelabel_names_sorted()
+ if bootstrap not in vmlabels:
+ log.error("Bootstrap label '%s' not found among VM labels '%s'." \
+ % (bootstrap, vmlabels))
+ return -xsconstants.XSERR_POLICY_INCONSISTENT, "", ""
+
+ vms_with_chws = []
+ chws_by_vm = {}
+ for v in vms:
+ if v.has_key("chws"):
+ vms_with_chws.append(v["name"])
+ chws_by_vm[v["name"]] = v["chws"]
+ if bootstrap in vms_with_chws:
+ vms_with_chws.remove(bootstrap)
+ vms_with_chws.sort()
+ vms_with_chws.insert(0, bootstrap)
+ else:
+ vms_with_chws.sort()
+
+ vms_with_stes = []
+ stes_by_vm = {}
+ for v in vms:
+ if v.has_key("stes"):
+ vms_with_stes.append(v["name"])
+ stes_by_vm[v["name"]] = v["stes"]
+ if bootstrap in vms_with_stes:
+ vms_with_stes.remove(bootstrap)
+ vms_with_stes.sort()
+ vms_with_stes.insert(0, bootstrap)
+ else:
+ vms_with_stes.sort()
+
+ resnames = self.policy_get_resourcelabel_names()
+ resnames.sort()
+ stes_by_res = {}
+ res = self.policy_get_resourcelabels()
+ for r in res:
+ if r.has_key("stes"):
+ stes_by_res[r["name"]] = r["stes"]
+
+ max_chw_ssids = 1 + len(vms_with_chws)
+ max_chw_types = 1 + len(vms_with_chws)
+ max_ste_ssids = 1 + len(vms_with_stes) + len(resnames)
+ max_ste_types = 1 + len(vms_with_stes) + len(resnames)
+
+ mapfile = "POLICYREFERENCENAME %s\n" % self.get_name()
+ mapfile += "MAGIC %08x\n" % ACM_MAGIC
+ mapfile += "POLICFILE %s\n" % \
+ self.path_from_policy_name(self.get_name())
+ mapfile += "BINARYFILE %s\n" % self.get_filename(".bin")
+ mapfile += "MAX-CHWALL-TYPES %08x\n" % len(chws)
+ mapfile += "MAX-CHWALL-SSIDS %08x\n" % max_chw_ssids
+ mapfile += "MAX-CHWALL-LABELS %08x\n" % max_chw_ssids
+ mapfile += "MAX-STE-TYPES %08x\n" % len(stes)
+ mapfile += "MAX-STE-SSIDS %08x\n" % max_ste_ssids
+ mapfile += "MAX-STE-LABELS %08x\n" % max_ste_ssids
+ mapfile += "\n"
+
+ if chws:
+ mapfile += \
+ "PRIMARY CHWALL\n"
+ primpolcode = ACM_CHINESE_WALL_POLICY
+ if stes:
+ mapfile += \
+ "SECONDARY STE\n"
+ else:
+ mapfile += \
+ "SECONDARY NULL\n"
+ secpolcode = ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
+ else:
+ if stes:
+ mapfile += \
+ "PRIMARY STE\n"
+ primpolcode = ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY
+ mapfile += \
+ "SECONDARY NULL\n"
+
+ mapfile += "\n"
+
+ if len(vms_with_chws) > 0:
+ mapfile += \
+ "LABEL->SSID ANY CHWALL __NULL_LABEL__ %x\n" % 0
+ i = 0
+ for v in vms_with_chws:
+ mapfile += \
+ "LABEL->SSID VM CHWALL %-20s %x\n" % \
+ (v, i+1)
+ i += 1
+ mapfile += "\n"
+
+ if len(vms_with_stes) > 0 or len(resnames) > 0:
+ mapfile += \
+ "LABEL->SSID ANY STE __NULL_LABEL__ %08x\n" % 0
+ i = 0
+ for v in vms_with_stes:
+ mapfile += \
+ "LABEL->SSID VM STE %-20s %x\n" % (v, i+1)
+ i += 1
+ j = 0
+ for r in resnames:
+ mapfile += \
+ "LABEL->SSID RES STE %-20s %x\n" % (r, j+i+1)
+ j += 1
+ mapfile += "\n"
+
+ if vms_with_chws:
+ mapfile += \
+ "SSID->TYPE CHWALL %08x\n" % 0
+ i = 1
+ for v in vms_with_chws:
+ mapfile += \
+ "SSID->TYPE CHWALL %08x" % i
+ for c in chws_by_vm[v]:
+ mapfile += " %s" % c
+ mapfile += "\n"
+ i += 1
+ mapfile += "\n"
+
+ if len(vms_with_stes) > 0 or len(resnames) > 0:
+ mapfile += \
+ "SSID->TYPE STE %08x\n" % 0
+ i = 1
+ for v in vms_with_stes:
+ mapfile += \
+ "SSID->TYPE STE %08x" % i
+ for s in stes_by_vm[v]:
+ mapfile += " %s" % s
+ mapfile += "\n"
+ i += 1
+
+ for r in resnames:
+ mapfile += \
+ "SSID->TYPE STE %08x" % i
+ for s in stes_by_res[r]:
+ mapfile += " %s" % s
+ mapfile += "\n"
+ i += 1
+ mapfile += "\n"
+
+ if chws:
+ i = 0
+ while i < len(chws):
+ mapfile += \
+ "TYPE CHWALL %-20s %d\n" % (chws[i], i)
+ i += 1
+ mapfile += "\n"
+ if stes:
+ i = 0
+ while i < len(stes):
+ mapfile += \
+ "TYPE STE %-20s %d\n" % (stes[i], i)
+ i += 1
+ mapfile += "\n"
+
+ mapfile += "\n"
+
+ # Build header with policy name
+ length = roundup8(4 + len(self.get_name()) + 1)
+ polname = self.get_name();
+ pr_bin = struct.pack("!i", len(polname)+1)
+ pr_bin += polname;
+ while len(pr_bin) < length:
+ pr_bin += "\x00"
+
+ # Build chinese wall part
+ cfses_names = self.policy_get_chwall_cfses_names_sorted()
+ cfses = self.policy_get_chwall_cfses()
+
+ chwformat = "!iiiiiiiii"
+ max_chw_cfs = len(cfses)
+ chw_ssid_offset = struct.calcsize(chwformat)
+ chw_confset_offset = chw_ssid_offset + \
+ 2 * len(chws) * max_chw_types
+ chw_running_types_offset = 0
+ chw_conf_agg_offset = 0
+
+ chw_bin = struct.pack(chwformat,
+ ACM_CHWALL_VERSION,
+ ACM_CHINESE_WALL_POLICY,
+ len(chws),
+ max_chw_ssids,
+ max_chw_cfs,
+ chw_ssid_offset,
+ chw_confset_offset,
+ chw_running_types_offset,
+ chw_conf_agg_offset)
+ chw_bin_body = ""
+ # simulate __NULL_LABEL__
+ for c in chws:
+ chw_bin_body += struct.pack("!h",0)
+ # VMs that are listed and their chinese walls
+ for v in vms_with_chws:
+ for c in chws:
+ unknown_chw |= (set(chws_by_vm[v]) - set(chws))
+ if c in chws_by_vm[v]:
+ chw_bin_body += struct.pack("!h",1)
+ else:
+ chw_bin_body += struct.pack("!h",0)
+
+ # Conflict sets -- they need to be processed in alphabetical order
+ for cn in cfses_names:
+ if cn == "" or cn is None:
+ return -xsconstants.XSERR_BAD_CONFLICTSET, "", ""
+ i = 0
+ while i < len(cfses):
+ if cfses[i]['name'] == cn:
+ conf = cfses[i]['chws']
+ break
+ i += 1
+ for c in chws:
+ if c in conf:
+ chw_bin_body += struct.pack("!h",1)
+ else:
+ chw_bin_body += struct.pack("!h",0)
+ del cfses[i]
+
+ if len(cfses) != 0:
+ return -xsconstants.XSERR_BAD_CONFLICTSET, "", ""
+
+ chw_bin += chw_bin_body
+
+ while len(chw_bin) < roundup8(len(chw_bin)):
+ chw_bin += "\x00"
+
+ # Build STE part
+ steformat="!iiiii"
+ ste_bin = struct.pack(steformat,
+ ACM_STE_VERSION,
+ ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY,
+ len(stes),
+ max_ste_types,
+ struct.calcsize(steformat))
+ ste_bin_body = ""
+ if stes:
+ # Simulate __NULL_LABEL__
+ for s in stes:
+ ste_bin_body += struct.pack("!h",0)
+ # VMs that are listed and their chinese walls
+ for v in vms_with_stes:
+ unknown_ste |= (set(stes_by_vm[v]) - set(stes))
+ for s in stes:
+ if s in stes_by_vm[v]:
+ ste_bin_body += struct.pack("!h",1)
+ else:
+ ste_bin_body += struct.pack("!h",0)
+ for r in resnames:
+ unknown_ste |= (set(stes_by_res[r]) - set(stes))
+ for s in stes:
+ if s in stes_by_res[r]:
+ ste_bin_body += struct.pack("!h",1)
+ else:
+ ste_bin_body += struct.pack("!h",0)
+
+ ste_bin += ste_bin_body;
+
+ while len(ste_bin) < roundup8(len(ste_bin)):
+ ste_bin += "\x00"
+
+ #Write binary header:
+ headerformat="!iiiiiiiiii"
+ totallen_bin = struct.calcsize(headerformat) + \
+ len(pr_bin) + len(chw_bin) + len(ste_bin)
+ polref_offset = struct.calcsize(headerformat)
+ primpoloffset = polref_offset + len(pr_bin)
+ if primpolcode == ACM_CHINESE_WALL_POLICY:
+ secpoloffset = primpoloffset + len(chw_bin)
+ elif primpolcode == ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY:
+ secpoloffset = primpoloffset + len(ste_bin)
+ else:
+ secpoloffset = primpoloffset
+
+ (major, minor) = self.getVersionTuple()
+ hdr_bin = struct.pack(headerformat,
+ ACM_POLICY_VERSION,
+ ACM_MAGIC,
+ totallen_bin,
+ polref_offset,
+ primpolcode,
+ primpoloffset,
+ secpolcode,
+ secpoloffset,
+ major, minor)
+
+ all_bin = array.array('B')
+ for s in [ hdr_bin, pr_bin, chw_bin, ste_bin ]:
+ for c in s:
+ all_bin.append(ord(c))
+
+ log.info("Compiled policy: rc = %s" % hex(rc))
+ if len(unknown_ste) > 0:
+ log.info("The following STEs in VM/res labels were unknown:" \
+ " %s" % list(unknown_ste))
+ if len(unknown_chw) > 0:
+ log.info("The following Ch. Wall types in labels were unknown:" \
+ " %s" % list(unknown_chw))
+ return rc, mapfile, all_bin.tostring()
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/util/bootloader.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/bootloader.py Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,521 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2006,2007 International Business Machines Corp.
+# Author: Stefan Berger <stefanb@us.ibm.com>
+#============================================================================
+
+import re
+import os, stat
+import tempfile
+import shutil
+import threading
+from xen.xend.XendLogging import log
+
+__bootloader = None
+
+#
+# Functions for modifying entries in the bootloader, i.e. adding
+# a module to boot the system with a policy.
+#
+
+def get_default_title():
+ """ See description in Bootloader class below """
+ return __bootloader.get_default_title()
+
+
+def get_boot_policies():
+ """ See description in Bootloader class below """
+ return __bootloader.get_boot_policies()
+
+
+def add_boot_policy(index, binpolname):
+ """ See description in Bootloader class below """
+ return __bootloader.add_boot_policy(index, binpolname)
+
+
+def rm_policy_from_boottitle(index, unamelist):
+ """ See description in Bootloader class below """
+ return __bootloader.rm_policy_from_boottitle(index, unamelist)
+
+
+def set_kernel_attval(index, att, val):
+ """ See description in Bootloader class below """
+ return __bootloader.set_kernel_attval(index, att, val)
+
+
+def get_kernel_val(index, att):
+ """ See description in Bootloader class below """
+ return __bootloader.get_kernel_val(index, att)
+
+
+def set_boot_policy(title_idx, filename):
+ boottitles = get_boot_policies()
+ if boottitles.has_key(title_idx):
+ rm_policy_from_boottitle(title_idx, [ boottitles[title_idx] ])
+ rc = add_boot_policy(title_idx, filename)
+ return rc
+
+
+def loads_default_policy(filename):
+ """ Determine whether the given policy is loaded by the default boot title """
+ polfile = get_default_policy()
+ if polfile != None:
+ if polfile == filename or \
+ "/"+polfile == filename:
+ return True
+ return False
+
+
+def get_default_policy():
+ """ Get the name of the policy loaded by the default boot title """
+ title = get_default_title()
+ policies = get_boot_policies()
+ return policies.get(title)
+
+
+def set_default_boot_policy(filename):
+ """ Set the boot policy in the default title to the given name. """
+ title = get_default_title()
+ return set_boot_policy(title, filename)
+
+
+def __is_bootdir_mounted():
+ """
+ Determine whether the boot partition /boot is mounted or not
+ """
+ rc = False
+ file = open("/proc/mounts")
+ for line in file:
+ tmp = line.split(" ")
+ if tmp[1] == "/boot":
+ rc = True
+ break
+ return rc
+
+def get_prefix():
+ if __is_bootdir_mounted():
+ return "/"
+ else:
+ return "/boot/"
+
+
+
+class Bootloader:
+ """ Bootloader class that real bootloader implementations must overwrite """
+ def __init__(self):
+ pass
+
+ def probe(self):
+ """ Test whether this implementation of a bootloader is supported on the
+ local system """
+ return True
+
+ def get_default_title(self):
+ """ Get the index (starting with 0) of the default boot title
+ This number is read from the grub configuration file.
+ In case of an error '-1' is returned
+ @rtype: int
+ @return: the index of the default boot title
+ """
+ return None
+
+ def get_boot_policies(self):
+ """ Get a dictionary of policies that the system is booting with.
+ @rtype: dict
+ @return: dictionary of boot titles where the keys are the
+ indices of the boot titles
+ """
+ return {}
+
+ def add_boot_policy(self, index, binpolname):
+ """ Add the binary policy for automatic loading when
+ booting the system. Add it to the boot title at index
+ 'index'.
+ """
+ return False
+
+ def rm_policy_from_boottitle(self, index, unamelist):
+ """ Remove a policy from the given title. A list of possible policies
+ must be given to detect what module to remove
+ """
+ return False
+
+ def set_kernel_attval(self, index, att, val):
+ """
+ Append an attribut/value pair to the kernel line.
+ @param index : The index of the title to modify
+ @param att : The attribute to add
+ @param val : The value to add. If no value or the special value
+ '<>' is given, then the attribute will be removed.
+ If an empty value is given, then only the attribute
+ is added in the format "att", otherwise "att=val"
+ is added.
+ """
+ return False
+
+ def get_kernel_val(self, index, att):
+ """
+ Get an attribute's value from the kernel line.
+ @param index : The index of the title to get the attribute/value from
+ @param att : The attribute to read the value of
+ """
+ return None
+
+
+class Grub(Bootloader):
+ """ Implementation for manipulating bootloader entries in grub according
+ to the 'Bootloader' class interface """
+
+ def __init__(self):
+ self.__bootfile_lock = threading.RLock()
+ self.title_re = re.compile("\s*title\s", re.IGNORECASE)
+ self.module_re = re.compile("\s+module\s", re.IGNORECASE)
+ self.policy_re = re.compile(".*\.bin", re.IGNORECASE)
+ self.kernel_re = re.compile("\s*kernel\s", re.IGNORECASE)
+ Bootloader.__init__(self)
+
+ def probe(self):
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return False
+ return True
+
+
+ def __get_bootfile(self):
+ """ Get the name of the bootfile """
+ boot_file = "/boot/grub/grub.conf"
+ alt_boot_file = "/boot/grub/menu.lst"
+
+ if not os.path.isfile(boot_file):
+ #take alternate boot file instead
+ boot_file = alt_boot_file
+
+ #follow symlink since menue.lst might be linked to grub.conf
+ if not os.path.exists(boot_file):
+ raise IOError("Boot file \'%s\' not found." % boot_file)
+
+ if stat.S_ISLNK(os.lstat(boot_file)[stat.ST_MODE]):
+ new_name = os.readlink(boot_file)
+ if new_name[0] == "/":
+ boot_file = new_name
+ else:
+ path = boot_file.split('/')
+ path[len(path)-1] = new_name
+ boot_file = '/'.join(path)
+ if not os.path.exists(boot_file):
+ raise IOError("Boot file \'%s\' not found." % boot_file)
+ return boot_file
+
+
+ def __get_titles(self):
+ """ Get the names of all boot titles in the grub config file
+ @rtype: list
+ @return: list of names of available boot titles
+ """
+ titles = []
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return []
+ try:
+ self.__bootfile_lock.acquire()
+ grub_fd = open(boot_file)
+ for line in grub_fd:
+ if self.title_re.match(line):
+ line = line.rstrip().lstrip()
+ titles.append(line.lstrip('title').lstrip())
+ finally:
+ self.__bootfile_lock.release()
+ return titles
+
+
+ def get_default_title(self):
+ """ Get the index (starting with 0) of the default boot title
+ This number is read from the grub configuration file.
+ In case of an error '-1' is returned
+ @rtype: int
+ @return: the index of the default boot title
+ """
+ def_re = re.compile("default", re.IGNORECASE)
+ default = None
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return default
+ try:
+ self.__bootfile_lock.acquire()
+ grub_fd = open(boot_file)
+ for line in grub_fd:
+ line = line.rstrip()
+ if def_re.match(line):
+ line = line.rstrip()
+ line = line.lstrip("default=")
+ default = int(line)
+ break
+ finally:
+ self.__bootfile_lock.release()
+ return default
+
+
+ def get_boot_policies(self):
+ """ Get a dictionary of policies that the system is booting with.
+ @rtype: dict
+ @return: dictionary of boot titles where the keys are the
+ indices of the boot titles
+ """
+ policies = {}
+ within_title = 0
+ idx = -1
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return policies
+ try:
+ self.__bootfile_lock.acquire()
+
+ grub_fd = open(boot_file)
+ for line in grub_fd:
+ if self.title_re.match(line):
+ within_title = 1
+ idx = idx + 1
+ if within_title and self.module_re.match(line):
+ if self.policy_re.match(line):
+ start = line.find("module")
+ pol = line[start+6:]
+ pol = pol.lstrip().rstrip()
+ if pol[0] == '/':
+ pol = pol[1:]
+ if pol[0:5] == "boot/":
+ pol = pol[5:]
+ policies[idx] = pol
+ finally:
+ self.__bootfile_lock.release()
+ return policies
+
+
+ def add_boot_policy(self, index, binpolname):
+ """ Add the binary policy for automatic loading when
+ booting the system. Add it to the boot title at index
+ 'index'.
+ """
+ ctr = 0
+ module_line = ""
+ within_title = 0
+ found = False
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return False
+ try:
+ self.__bootfile_lock.acquire()
+ grub_fd = open(boot_file)
+ (tmp_fd, tmp_grub) = tempfile.mkstemp()
+ for line in grub_fd:
+ if self.title_re.match(line):
+ if module_line != "" and not found:
+ os.write(tmp_fd, module_line)
+ found = True
+
+ if ctr == index:
+ within_title = 1
+ else:
+ within_title = 0
+ ctr = ctr + 1
+ elif within_title and self.module_re.match(line):
+ start = line.find("module")
+ l = line[start+6:len(line)]
+ l = l.lstrip()
+ if l[0] == '/':
+ prefix = "/"
+ else:
+ prefix = ""
+ prefix = get_prefix()
+ module_line = "\tmodule %s%s\n" % (prefix,binpolname)
+ else:
+ if module_line != "" and not found:
+ os.write(tmp_fd, module_line)
+ found = True
+
+ os.write(tmp_fd, line)
+
+ if module_line != "" and not found:
+ os.write(tmp_fd, module_line)
+ found = True
+
+ shutil.move(boot_file, boot_file+"_save")
+ shutil.copyfile(tmp_grub, boot_file)
+ os.close(tmp_fd)
+ try:
+ os.remove(tmp_grub)
+ except:
+ pass
+ finally:
+ self.__bootfile_lock.release()
+ return found
+
+
+ def rm_policy_from_boottitle(self, index, unamelist):
+ """ Remove a policy from the given title. A list of possible policies
+ must be given to detect what module to remove
+ """
+ found = False
+ ctr = 0
+ within_title = 0
+
+ prefix = get_prefix()
+ namelist = [prefix+name for name in unamelist]
+
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return False
+ try:
+ self.__bootfile_lock.acquire()
+
+ grub_fd = open(boot_file)
+ (tmp_fd, tmp_grub) = tempfile.mkstemp()
+ for line in grub_fd:
+ omit_line = False
+ if self.title_re.match(line):
+ if ctr == index:
+ within_title = 1
+ else:
+ within_title = 0
+ ctr = ctr + 1
+ if within_title and self.module_re.match(line):
+ if self.policy_re.match(line):
+ start = line.find("module")
+ pol = line[start+6:len(line)]
+ pol = pol.lstrip().rstrip()
+ if pol in namelist:
+ omit_line = True
+ found = True
+ if not omit_line:
+ os.write(tmp_fd, line)
+ if found:
+ shutil.move(boot_file, boot_file+"_save")
+ shutil.copyfile(tmp_grub, boot_file)
+ os.close(tmp_fd)
+ try:
+ os.remove(tmp_grub)
+ except:
+ pass
+ finally:
+ self.__bootfile_lock.release()
+ return found
+
+
+ def set_kernel_attval(self, index, att, val):
+ """
+ Append an attribut/value pair to the kernel line.
+ @param index : The index of the title to modify
+ @param att : The attribute to add
+ @param val : The value to add. If no value or the special value
+ '<>' is given, then the attribute will be removed.
+ If an empty value is given, then only the attribute
+ is added in the format "att", otherwise "att=val"
+ is added.
+ """
+ found = False
+ ctr = 0
+ within_title = 0
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ False
+ try:
+ self.__bootfile_lock.acquire()
+
+ grub_fd = open(boot_file)
+ (tmp_fd, tmp_grub) = tempfile.mkstemp()
+ for line in grub_fd:
+ if self.title_re.match(line):
+ if ctr == index:
+ within_title = 1
+ else:
+ within_title = 0
+ ctr = ctr + 1
+ if within_title and self.kernel_re.match(line):
+ nitems = []
+ items = line.split(" ")
+ i = 0
+ while i < len(items):
+ el = items[i].split("=",1)
+ if el[0] != att:
+ nitems.append(items[i].rstrip("\n"))
+ i += 1
+ if val == "":
+ nitems.append("%s" % (att))
+ elif val != None and val != "<>":
+ nitems.append("%s=%s" % (att,val))
+ line = " ".join(nitems) + "\n"
+ os.write(tmp_fd, line)
+ shutil.move(boot_file, boot_file+"_save")
+ shutil.copyfile(tmp_grub, boot_file)
+ os.close(tmp_fd)
+ try:
+ os.remove(tmp_grub)
+ except:
+ pass
+ finally:
+ self.__bootfile_lock.release()
+ return found
+
+
+ def get_kernel_val(self, index, att):
+ """
+ Get an attribute's value from the kernel line.
+ @param index : The index of the title to get the attribute/value from
+ @param att : The attribute to read the value of
+ """
+ ctr = 0
+ within_title = 0
+ try:
+ boot_file = self.__get_bootfile()
+ except:
+ return None
+ try:
+ self.__bootfile_lock.acquire()
+
+ grub_fd = open(boot_file)
+ for line in grub_fd:
+ if self.title_re.match(line):
+ if ctr == index:
+ within_title = 1
+ else:
+ within_title = 0
+ ctr = ctr + 1
+ if within_title and self.kernel_re.match(line):
+ line = line.rstrip().lstrip()
+ items = line.split(" ")
+ i = 0
+ while i < len(items):
+ el = items[i].split("=",1)
+ if el[0] == att:
+ if len(el) == 1:
+ return "<>"
+ return el[1]
+ i += 1
+ finally:
+ self.__bootfile_lock.release()
+ return None # Not found
+
+
+__bootloader = Bootloader()
+
+grub = Grub()
+if grub.probe() == True:
+ __bootloader = grub
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/util/security.py
--- a/tools/python/xen/util/security.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/util/security.py Tue Jul 10 08:39:26 2007 -0600
@@ -15,17 +15,22 @@
# Copyright (C) 2006 International Business Machines Corp.
# Author: Reiner Sailer
# Author: Bryan D. Payne <bdpayne@us.ibm.com>
+# Author: Stefan Berger <stefanb@us.ibm.com>
#============================================================================

import commands
import logging
-import sys, os, string, re
-import traceback
-import shutil
+import os, string, re
+import threading
+import struct
+import stat
from xen.lowlevel import acm
from xen.xend import sxp
+from xen.xend import XendConstants
from xen.xend.XendLogging import log
-from xen.util import dictio
+from xen.xend.XendError import VmError
+from xen.util import dictio, xsconstants
+from xen.xend.XendConstants import *

#global directories and tools for security management
policy_dir_prefix = "/etc/xen/acm-security/policies"
@@ -60,6 +65,10 @@ policy_name_re = re.compile(".*[.chwall|s
#other global variables
NULL_SSIDREF = 0

+#general Rlock for map files; only one lock for all mapfiles
+__mapfile_lock = threading.RLock()
+__resfile_lock = threading.RLock()
+
log = logging.getLogger("xend.util.security")

# Our own exception definition. It is masked (pass) if raised and
@@ -75,12 +84,18 @@ def err(msg):
def err(msg):
"""Raise ACM exception.
"""
- sys.stderr.write("ACMError: " + msg + "\n")
raise ACMError(msg)



active_policy = None
+
+
+def mapfile_lock():
+ __mapfile_lock.acquire()
+
+def mapfile_unlock():
+ __mapfile_lock.release()


def refresh_security_policy():
@@ -106,6 +121,39 @@ def on():
return (active_policy not in ['INACTIVE', 'NULL'])


+def calc_dom_ssidref_from_info(info):
+ """
+ Calculate a domain's ssidref from the security_label in its
+ info.
+ This function is called before the domain is started and
+ makes sure that:
+ - the type of the policy is the same as indicated in the label
+ - the name of the policy is the same as indicated in the label
+ - calculates an up-to-date ssidref for the domain
+ The latter is necessary since the domain's ssidref could have
+ changed due to changes to the policy.
+ """
+ import xen.xend.XendConfig
+ if isinstance(info, xen.xend.XendConfig.XendConfig):
+ if info.has_key('security_label'):
+ seclab = info['security_label']
+ tmp = seclab.split(":")
+ if len(tmp) != 3:
+ raise VmError("VM label '%s' in wrong format." % seclab)
+ typ, policyname, vmlabel = seclab.split(":")
+ if typ != xsconstants.ACM_POLICY_ID:
+ raise VmError("Policy type '%s' not supported." % typ)
+ refresh_security_policy()
+ if active_policy != policyname:
+ raise VmError("Active policy '%s' different than "
+ "what in VM's label ('%s')." %
+ (active_policy, policyname))
+ ssidref = label2ssidref(vmlabel, policyname, "dom")
+ return ssidref
+ else:
+ return 0
+ raise VmError("security.calc_dom_ssidref_from_info: info of type '%s'"
+ "not supported." % type(info))

# Assumes a 'security' info [security access_control ...] [ssidref ...]
def get_security_info(info, field):
@@ -144,7 +192,6 @@ def get_security_info(info, field):
return 0
else:
return None
-


def get_security_printlabel(info):
@@ -250,32 +297,37 @@ def ssidref2label(ssidref_var):
else:
err("Instance type of ssidref not supported (must be of type 'str' or 'int')")

- (primary, secondary, f, pol_exists) = getmapfile(None)
- if not f:
- if (pol_exists):
- err("Mapping file for policy \'" + policyname + "\' not found.\n" +
- "Please use makepolicy command to create mapping file!")
- else:
- err("Policy file for \'" + active_policy + "\' not found.")
-
- #2. get labelnames for both ssidref parts
- pri_ssid = ssidref & 0xffff
- sec_ssid = ssidref >> 16
- pri_null_ssid = NULL_SSIDREF & 0xffff
- sec_null_ssid = NULL_SSIDREF >> 16
- pri_labels = []
- sec_labels = []
- labels = []
-
- for line in f:
- l = line.split()
- if (len(l) < 5) or (l[0] != "LABEL->SSID"):
- continue
- if primary and (l[2] == primary) and (int(l[4], 16) == pri_ssid):
- pri_labels.append(l[3])
- if secondary and (l[2] == secondary) and (int(l[4], 16) == sec_ssid):
- sec_labels.append(l[3])
- f.close()
+ try:
+ mapfile_lock()
+
+ (primary, secondary, f, pol_exists) = getmapfile(None)
+ if not f:
+ if (pol_exists):
+ err("Mapping file for policy not found.\n" +
+ "Please use makepolicy command to create mapping file!")
+ else:
+ err("Policy file for \'" + active_policy + "\' not found.")
+
+ #2. get labelnames for both ssidref parts
+ pri_ssid = ssidref & 0xffff
+ sec_ssid = ssidref >> 16
+ pri_null_ssid = NULL_SSIDREF & 0xffff
+ sec_null_ssid = NULL_SSIDREF >> 16
+ pri_labels = []
+ sec_labels = []
+ labels = []
+
+ for line in f:
+ l = line.split()
+ if (len(l) < 5) or (l[0] != "LABEL->SSID"):
+ continue
+ if primary and (l[2] == primary) and (int(l[4], 16) == pri_ssid):
+ pri_labels.append(l[3])
+ if secondary and (l[2] == secondary) and (int(l[4], 16) == sec_ssid):
+ sec_labels.append(l[3])
+ f.close()
+ finally:
+ mapfile_unlock()

#3. get the label that is in both lists (combination must be a single label)
if (primary == "CHWALL") and (pri_ssid == pri_null_ssid) and (sec_ssid != sec_null_ssid):
@@ -297,7 +349,7 @@ def ssidref2label(ssidref_var):



-def label2ssidref(labelname, policyname, type):
+def label2ssidref(labelname, policyname, typ):
"""
returns ssidref corresponding to labelname;
maps current policy to default directory
@@ -307,42 +359,51 @@ def label2ssidref(labelname, policyname,
err("Cannot translate labels for \'" + policyname + "\' policy.")

allowed_types = ['ANY']
- if type == 'dom':
+ if typ == 'dom':
allowed_types.append('VM')
- elif type == 'res':
+ elif typ == 'res':
allowed_types.append('RES')
else:
err("Invalid type. Must specify 'dom' or 'res'.")

- (primary, secondary, f, pol_exists) = getmapfile(policyname)
-
- #2. get labelnames for ssidref parts and find a common label
- pri_ssid = []
- sec_ssid = []
- for line in f:
- l = line.split()
- if (len(l) < 5) or (l[0] != "LABEL->SSID"):
- continue
- if primary and (l[1] in allowed_types) and (l[2] == primary) and (l[3] == labelname):
- pri_ssid.append(int(l[4], 16))
- if secondary and (l[1] in allowed_types) and (l[2] == secondary) and (l[3] == labelname):
- sec_ssid.append(int(l[4], 16))
- f.close()
- if (type == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0):
- pri_ssid.append(NULL_SSIDREF)
- elif (type == 'res') and (secondary == "CHWALL") and (len(sec_ssid) == 0):
- sec_ssid.append(NULL_SSIDREF)
-
- #3. sanity check and composition of ssidref
- if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and (secondary != "NULL")):
- err("Label \'" + labelname + "\' not found.")
- elif (len(pri_ssid) > 1) or (len(sec_ssid) > 1):
- err("Label \'" + labelname + "\' not unique in policy (policy error)")
- if secondary == "NULL":
- return pri_ssid[0]
- else:
- return (sec_ssid[0] << 16) | pri_ssid[0]
-
+ try:
+ mapfile_lock()
+ (primary, secondary, f, pol_exists) = getmapfile(policyname)
+
+ #2. get labelnames for ssidref parts and find a common label
+ pri_ssid = []
+ sec_ssid = []
+ for line in f:
+ l = line.split()
+ if (len(l) < 5) or (l[0] != "LABEL->SSID"):
+ continue
+ if primary and (l[1] in allowed_types) and \
+ (l[2] == primary) and \
+ (l[3] == labelname):
+ pri_ssid.append(int(l[4], 16))
+ if secondary and (l[1] in allowed_types) and \
+ (l[2] == secondary) and \
+ (l[3] == labelname):
+ sec_ssid.append(int(l[4], 16))
+ f.close()
+ if (typ == 'res') and (primary == "CHWALL") and (len(pri_ssid) == 0):
+ pri_ssid.append(NULL_SSIDREF)
+ elif (typ == 'res') and (secondary == "CHWALL") and \
+ (len(sec_ssid) == 0):
+ sec_ssid.append(NULL_SSIDREF)
+
+ #3. sanity check and composition of ssidref
+ if (len(pri_ssid) == 0) or ((len(sec_ssid) == 0) and \
+ (secondary != "NULL")):
+ err("Label \'" + labelname + "\' not found.")
+ elif (len(pri_ssid) > 1) or (len(sec_ssid) > 1):
+ err("Label \'" + labelname + "\' not unique in policy (policy error)")
+ if secondary == "NULL":
+ return pri_ssid[0]
+ else:
+ return (sec_ssid[0] << 16) | pri_ssid[0]
+ finally:
+ mapfile_unlock()


def refresh_ssidref(config):
@@ -381,8 +442,9 @@ def refresh_ssidref(config):
err("Illegal field in access_control")
#verify policy is correct
if active_policy != policyname:
- err("Policy \'" + policyname + "\' in label does not match active policy \'"
- + active_policy +"\'!")
+ err("Policy \'" + str(policyname) +
+ "\' in label does not match active policy \'"
+ + str(active_policy) +"\'!")

new_ssidref = label2ssidref(labelname, policyname, 'dom')
if not new_ssidref:
@@ -470,6 +532,25 @@ def get_decision(arg1, arg2):
err("Cannot determine decision (Invalid parameter).")


+def hv_chg_policy(bin_pol, del_array, chg_array):
+ """
+ Change the binary policy in the hypervisor
+ The 'del_array' and 'chg_array' give hints about deleted ssidrefs
+ and changed ssidrefs which can be due to deleted VM labels
+ or reordered VM labels
+ """
+ rc = -xsconstants.XSERR_GENERAL_FAILURE
+ errors = ""
+ if not on():
+ err("No policy active.")
+ try:
+ rc, errors = acm.chgpolicy(bin_pol, del_array, chg_array)
+ except Exception, e:
+ pass
+ if (len(errors) > 0):
+ rc = -xsconstants.XSERR_HV_OP_FAILED
+ return rc, errors
+

def make_policy(policy_name):
policy_file = string.join(string.split(policy_name, "."), "/")
@@ -479,8 +560,6 @@ def make_policy(policy_name):
(ret, output) = commands.getstatusoutput(xensec_xml2bin + " -d " + policy_dir_prefix + " " + policy_file)
if ret:
err("Creating policy failed:\n" + output)
-
-

def load_policy(policy_name):
global active_policy
@@ -538,8 +617,8 @@ def list_labels(policy_name, condition):


def get_res_label(resource):
- """Returns resource label information (label, policy) if it exists.
- Otherwise returns null label and policy.
+ """Returns resource label information (policytype, label, policy) if
+ it exists. Otherwise returns null label and policy.
"""
def default_res_label():
ssidref = NULL_SSIDREF
@@ -547,23 +626,19 @@ def get_res_label(resource):
label = ssidref2label(ssidref)
else:
label = None
- return (label, 'NULL')
-
- (label, policy) = default_res_label()
-
- # load the resource label file
- res_label_cache = {}
- try:
- res_label_cache = dictio.dict_read("resources", res_label_filename)
- except:
- log.info("Resource label file not found.")
- return default_res_label()
-
- # find the resource information
- if res_label_cache.has_key(resource):
- (policy, label) = res_label_cache[resource]
-
- return (label, policy)
+ return (xsconstants.ACM_POLICY_ID, 'NULL', label)
+
+
+ tmp = get_resource_label(resource)
+ if len(tmp) == 2:
+ policytype = xsconstants.ACM_POLICY_ID
+ policy, label = tmp
+ elif len(tmp) == 3:
+ policytype, policy, label = tmp
+ else:
+ policytype, policy, label = default_res_label()
+
+ return (policytype, label, policy)


def get_res_security_details(resource):
@@ -582,7 +657,7 @@ def get_res_security_details(resource):
(label, ssidref, policy) = default_security_details()

# find the entry associated with this resource
- (label, policy) = get_res_label(resource)
+ (policytype, label, policy) = get_res_label(resource)
if policy == 'NULL':
log.info("Resource label for "+resource+" not in file, using DEFAULT.")
return default_security_details()
@@ -596,8 +671,29 @@ def get_res_security_details(resource):

return (label, ssidref, policy)

-
-def unify_resname(resource):
+def security_label_to_details(seclab):
+ """ Convert a Xen-API type of security label into details """
+ def default_security_details():
+ ssidref = NULL_SSIDREF
+ if on():
+ label = ssidref2label(ssidref)
+ else:
+ label = None
+ policy = active_policy
+ return (label, ssidref, policy)
+
+ (policytype, policy, label) = seclab.split(":")
+
+ # is this resource label for the running policy?
+ if policy == active_policy:
+ ssidref = label2ssidref(label, policy, 'res')
+ else:
+ log.info("Resource label not for active policy, using DEFAULT.")
+ return default_security_details()
+
+ return (label, ssidref, policy)
+
+def unify_resname(resource, mustexist=True):
"""Makes all resource locations absolute. In case of physical
resources, '/dev/' is added to local file names"""

@@ -606,28 +702,53 @@ def unify_resname(resource):

# sanity check on resource name
try:
- (type, resfile) = resource.split(":", 1)
+ (typ, resfile) = resource.split(":", 1)
except:
err("Resource spec '%s' contains no ':' delimiter" % resource)

- if type == "tap":
+ if typ == "tap":
try:
(subtype, resfile) = resfile.split(":")
except:
err("Resource spec '%s' contains no tap subtype" % resource)

- if type in ["phy", "tap"]:
+ import os
+ if typ in ["phy", "tap"]:
if not resfile.startswith("/"):
resfile = "/dev/" + resfile
+ if mustexist:
+ stats = os.lstat(resfile)
+ if stat.S_ISLNK(stats[stat.ST_MODE]):
+ resolved = os.readlink(resfile)
+ if resolved[0] != "/":
+ resfile = os.path.join(os.path.dirname(resfile), resolved)
+ resfile = os.path.abspath(resfile)
+ else:
+ resfile = resolved
+ stats = os.lstat(resfile)
+ if not (stat.S_ISBLK(stats[stat.ST_MODE])):
+ err("Invalid resource")
+
+ if typ in [ "file", "tap" ]:
+ if mustexist:
+ stats = os.lstat(resfile)
+ if stat.S_ISLNK(stats[stat.ST_MODE]):
+ resfile = os.readlink(resfile)
+ stats = os.lstat(resfile)
+ if not stat.S_ISREG(stats[stat.ST_MODE]):
+ err("Invalid resource")

#file: resources must specified with absolute path
- if (not resfile.startswith("/")) or (not os.path.exists(resfile)):
- err("Invalid resource.")
+ #vlan resources don't start with '/'
+ if typ != "vlan":
+ if (not resfile.startswith("/")) or \
+ (mustexist and not os.path.exists(resfile)):
+ err("Invalid resource.")

# from here on absolute file names with resources
- if type == "tap":
- type = type + ":" + subtype
- resource = type + ":" + resfile
+ if typ == "tap":
+ typ = typ + ":" + subtype
+ resource = typ + ":" + resfile
return resource


@@ -662,9 +783,481 @@ def res_security_check(resource, domain_
else:
# Note, we can't canonicalise the resource here, because people using
# xm without ACM are free to use relative paths.
- (label, policy) = get_res_label(resource)
+ (policytype, label, policy) = get_res_label(resource)
if policy != 'NULL':
raise ACMError("Security is off, but '"+resource+"' is labeled")
rtnval = 0

return rtnval
+
+def res_security_check_xapi(rlabel, rssidref, rpolicy, xapi_dom_label):
+ """Checks if the given resource can be used by the given domain
+ label. Returns 1 if the resource can be used, otherwise 0.
+ """
+ rtnval = 1
+ # if security is on, ask the hypervisor for a decision
+ if on():
+ typ, dpolicy, domain_label = xapi_dom_label.split(":")
+ if not dpolicy or not domain_label:
+ raise VmError("VM security label in wrong format.")
+ if active_policy != rpolicy:
+ raise VmError("Resource's policy '%s' != active policy '%s'" %
+ (rpolicy, active_policy))
+ domac = ['access_control']
+ domac.append(['policy', active_policy])
+ domac.append(['label', domain_label])
+ domac.append(['type', 'dom'])
+ decision = get_decision(domac, ['ssidref', str(rssidref)])
+
+ log.info("Access Control Decision : %s" % decision)
+ # provide descriptive error messages
+ if decision == 'DENIED':
+ if rlabel == ssidref2label(NULL_SSIDREF):
+ #raise ACMError("Resource is not labeled")
+ rtnval = 0
+ else:
+ #raise ACMError("Permission denied for resource because label '"+rlabel+"' is not allowed")
+ rtnval = 0
+
+ # security is off, make sure resource isn't labeled
+ else:
+ # Note, we can't canonicalise the resource here, because people using
+ # xm without ACM are free to use relative paths.
+ if rpolicy != 'NULL':
+ #raise ACMError("Security is off, but resource is labeled")
+ rtnval = 0
+
+ return rtnval
+
+
+def set_resource_label_xapi(resource, reslabel_xapi, oldlabel_xapi):
+ """Assign a resource label to a resource
+ @param resource: The name of a resource, i.e., "phy:/dev/hda", or
+ "tap:qcow:/path/to/file.qcow"
+
+ @param reslabel_xapi: A resource label foramtted as in all other parts of
+ the Xen-API, i.e., ACM:xm-test:blue"
+ @rtype: int
+ @return Success (0) or failure value (< 0)
+ """
+ olabel = ""
+ if reslabel_xapi == "":
+ return rm_resource_label(resource, oldlabel_xapi)
+ typ, policyref, label = reslabel_xapi.split(":")
+ if typ != xsconstants.ACM_POLICY_ID:
+ return -xsconstants.XSERR_WRONG_POLICY_TYPE
+ if not policyref or not label:
+ return -xsconstants.XSERR_BAD_LABEL_FORMAT
+ if oldlabel_xapi not in [ "" ]:
+ tmp = oldlabel_xapi.split(":")
+ if len(tmp) != 3:
+ return -xsconstants.XSERR_BAD_LABEL_FORMAT
+ otyp, opolicyref, olabel = tmp
+ # Only ACM is supported
+ if otyp != xsconstants.ACM_POLICY_ID:
+ return -xsconstants.XSERR_WRONG_POLICY_TYPE
+ return set_resource_label(resource, typ, policyref, label, olabel)
+
+def is_resource_in_use(resource):
+ """ Investigate all running domains whether they use this device """
+ from xen.xend import XendDomain
+ dominfos = XendDomain.instance().list('all')
+ lst = []
+ for dominfo in dominfos:
+ if is_resource_in_use_by_dom(dominfo, resource):
+ lst.append(dominfo)
+ return lst
+
+def devices_equal(res1, res2):
+ """ Determine whether two devices are equal """
+ return (unify_resname(res1) == unify_resname(res2))
+
+def is_resource_in_use_by_dom(dominfo, resource):
+ """ Determine whether a resources is in use by a given domain
+ @return True or False
+ """
+ if not dominfo.domid:
+ return False
+ if dominfo._stateGet() not in [ DOM_STATE_RUNNING ]:
+ return False
+ devs = dominfo.info['devices']
+ uuids = devs.keys()
+ for uuid in uuids:
+ dev = devs[uuid]
+ if len(dev) >= 2 and dev[1].has_key('uname'):
+ # dev[0] is type, i.e. 'vbd'
+ if devices_equal(dev[1]['uname'], resource):
+ log.info("RESOURCE IN USE: Domain %d uses %s." %
+ (dominfo.domid, resource))
+ return True
+ return False
+
+
+def get_domain_resources(dominfo):
+ """ Collect all resources of a domain in a map where each entry of
+ the map is a list.
+ Entries are strored in the following formats:
+ tap:qcow:/path/xyz.qcow
+ """
+ resources = { 'vbd' : [], 'tap' : []}
+ devs = dominfo.info['devices']
+ uuids = devs.keys()
+ for uuid in uuids:
+ dev = devs[uuid]
+ typ = dev[0]
+ if typ in [ 'vbd', 'tap' ]:
+ resources[typ].append(dev[1]['uname'])
+
+ return resources
+
+
+def resources_compatible_with_vmlabel(xspol, dominfo, vmlabel):
+ """
+ Check whether the resources' labels are compatible with the
+ given VM label. This is a function to be used when for example
+ a running domain is to get the new label 'vmlabel'
+ """
+ if not xspol:
+ return False
+
+ try:
+ __resfile_lock.acquire()
+ try:
+ access_control = dictio.dict_read("resources",
+ res_label_filename)
+ except:
+ return False
+ return __resources_compatible_with_vmlabel(xspol, dominfo, vmlabel,
+ access_control)
+ finally:
+ __resfile_lock.release()
+ return False
+
+
+def __resources_compatible_with_vmlabel(xspol, dominfo, vmlabel,
+ access_control):
+ """
+ Check whether the resources' labels are compatible with the
+ given VM label. The access_control parameter provides a
+ dictionary of the resource name to resource label mappings
+ under which the evaluation should be done.
+ """
+ resources = get_domain_resources(dominfo)
+ reslabels = [] # all resource labels
+ polname = xspol.get_name()
+ for key in resources.keys():
+ for res in resources[key]:
+ try:
+ tmp = access_control[res]
+ if len(tmp) != 3:
+ return False
+
+ if polname != tmp[1]:
+ return False
+ label = tmp[2]
+ if not label in reslabels:
+ reslabels.append(label)
+ except:
+ return False
+ # Check that all resource labes have a common STE type with the
+ # vmlabel
+ rc = xspol.policy_check_vmlabel_against_reslabels(vmlabel, reslabels)
+ return rc;
+
+def set_resource_label(resource, policytype, policyref, reslabel, \
+ oreslabel = None):
+ """Assign a label to a resource
+ If the old label (oreslabel) is given, then the resource must have
+ that old label.
+ A resource label may be changed if
+ - the resource is not in use
+ @param resource : The name of a resource, i.e., "phy:/dev/hda"
+ @param policyref : The name of the policy
+ @param reslabel : the resource label within the policy
+ @param oreslabel : optional current resource label
+
+ @rtype: int
+ @return Success (0) or failure value (< 0)
+ """
+ try:
+ resource = unify_resname(resource, mustexist=False)
+ except Exception:
+ return -xsconstants.XSERR_BAD_RESOURCE_FORMAT
+
+ domains = is_resource_in_use(resource)
+ if len(domains) > 0:
+ return -xsconstants.XSERR_RESOURCE_IN_USE
+
+ try:
+ __resfile_lock.acquire()
+ access_control = {}
+ try:
+ access_control = dictio.dict_read("resources", res_label_filename)
+ except:
+ pass
+ if oreslabel:
+ if not access_control.has_key(resource):
+ return -xsconstants.XSERR_BAD_LABEL
+ tmp = access_control[resource]
+ if len(tmp) != 3:
+ return -xsconstants.XSERR_BAD_LABEL
+ if tmp[2] != oreslabel:
+ return -xsconstants.XSERR_BAD_LABEL
+ if reslabel != "":
+ new_entry = { resource : tuple([policytype, policyref, reslabel])}
+ access_control.update(new_entry)
+ else:
+ if access_control.has_key(resource):
+ del access_control[resource]
+ dictio.dict_write(access_control, "resources", res_label_filename)
+ finally:
+ __resfile_lock.release()
+ return xsconstants.XSERR_SUCCESS
+
+def rm_resource_label(resource, oldlabel_xapi):
+ """Remove a resource label from a physical resource
+ @param resource: The name of a resource, i.e., "phy:/dev/hda"
+
+ @rtype: int
+ @return Success (0) or failure value (< 0)
+ """
+ tmp = oldlabel_xapi.split(":")
+ if len(tmp) != 3:
+ return -xsconstants.XSERR_BAD_LABEL_FORMAT
+ otyp, opolicyref, olabel = tmp
+ # Only ACM is supported
+ if otyp != xsconstants.ACM_POLICY_ID and \
+ otyp != xsconstants.INVALID_POLICY_PREFIX + xsconstants.ACM_POLICY_ID:
+ return -xsconstants.XSERR_WRONG_POLICY_TYPE
+ return set_resource_label(resource, "", "", "", olabel)
+
+def get_resource_label_xapi(resource):
+ """Get the assigned resource label of a physical resource
+ in the format used by then Xen-API, i.e., "ACM:xm-test:blue"
+
+ @rtype: string
+ @return the string representing policy type, policy name and label of
+ the resource
+ """
+ res = get_resource_label(resource)
+ return format_resource_label(res)
+
+def format_resource_label(res):
+ if res:
+ if len(res) == 2:
+ return xsconstants.ACM_POLICY_ID + ":" + res[0] + ":" + res[1]
+ if len(res) == 3:
+ return ":".join(res)
+ return ""
+
+def get_resource_label(resource):
+ """Get the assigned resource label of a given resource
+ @param resource: The name of a resource, i.e., "phy:/dev/hda"
+
+ @rtype: list
+ @return tuple of (policy name, resource label), i.e., (xm-test, blue)
+ """
+ try:
+ resource = unify_resname(resource, mustexist=False)
+ except Exception:
+ return []
+
+ reslabel_map = get_labeled_resources()
+
+ if reslabel_map.has_key(resource):
+ return list(reslabel_map[resource])
+ else:
+ #Try to resolve each label entry
+ for key, value in reslabel_map.items():
+ try:
+ if resource == unify_resname(key):
+ return list(value)
+ except:
+ pass
+
+ return []
+
+
+def get_labeled_resources_xapi():
+ """ Get a map of all labeled resource with the labels formatted in the
+ xen-api resource label format.
+ """
+ reslabel_map = get_labeled_resources()
+ for key, labeldata in reslabel_map.items():
+ reslabel_map[key] = format_resource_label(labeldata)
+ return reslabel_map
+
+
+def get_labeled_resources():
+ """Get a map of all labeled resources
+ @rtype: list
+ @return list of labeled resources
+ """
+ try:
+ __resfile_lock.acquire()
+ try:
+ access_control = dictio.dict_read("resources", res_label_filename)
+ except:
+ return {}
+ finally:
+ __resfile_lock.release()
+ return access_control
+
+
+def relabel_domains(relabel_list):
+ """
+ Relabel the given domains to have a new ssidref.
+ @param relabel_list: a list containing tuples of domid, ssidref
+ example: [ [0, 0x00020002] ]
+ """
+ rel_rules = ""
+ for r in relabel_list:
+ log.info("Relabeling domain with domid %d to new ssidref 0x%08x",
+ r[0], r[1])
+ rel_rules += struct.pack("ii", r[0], r[1])
+ try:
+ rc, errors = acm.relabel_domains(rel_rules)
+ except Exception, e:
+ log.info("Error after relabel_domains: %s" % str(e))
+ rc = -xsconstants.XSERR_GENERAL_FAILURE
+ errors = ""
+ if (len(errors) > 0):
+ rc = -xsconstants.XSERR_HV_OP_FAILED
+ return rc, errors
+
+
+def change_acm_policy(bin_pol, del_array, chg_array,
+ vmlabel_map, reslabel_map, cur_acmpol, new_acmpol):
+ """
+ Change the ACM policy of the system by relabeling
+ domains and resources first and doing some access checks.
+ Then update the policy in the hypervisor. If this is all successful,
+ relabel the domains permanently and commit the relabed resources.
+
+ Need to do / check the following:
+ - relabel all resources where there is a 'from' field in
+ the policy. [. NOT DOING THIS: and mark those as unlabeled where the label
+ does not appear in the new policy anymore (deletion) ]
+ - relabel all VMs where there is a 'from' field in the
+ policy and mark those as unlabeled where the label
+ does not appear in the new policy anymore; no running
+ or paused VM may be unlabeled through this
+ - check that under the new labeling conditions the VMs
+ still have access to their resources as before. Unlabeled
+ resources are inaccessible. If this check fails, the
+ update failed.
+ - Attempt changes in the hypervisor; if this step fails,
+ roll back the relabeling of resources and VMs
+ - Make the relabeling of resources and VMs permanent
+ """
+ rc = xsconstants.XSERR_SUCCESS
+
+ domain_label_map = {}
+ new_policyname = new_acmpol.get_name()
+ new_policytype = new_acmpol.get_type_name()
+ cur_policyname = cur_acmpol.get_name()
+ cur_policytype = cur_acmpol.get_type_name()
+ polnew_reslabels = new_acmpol.policy_get_resourcelabel_names()
+ errors=""
+
+ try:
+ __resfile_lock.acquire()
+ mapfile_lock()
+
+ # Get all domains' dominfo.
+ from xen.xend import XendDomain
+ dominfos = XendDomain.instance().list('all')
+
+ log.info("----------------------------------------------")
+ # relabel resources
+
+ access_control = {}
+ try:
+ access_control = dictio.dict_read("resources", res_label_filename)
+ finally:
+ pass
+ for key, labeldata in access_control.items():
+ if len(labeldata) == 2:
+ policy, label = labeldata
+ policytype = xsconstants.ACM_POLICY_ID
+ elif len(labeldata) == 3:
+ policytype, policy, label = labeldata
+ else:
+ return -xsconstants.XSERR_BAD_LABEL_FORMAT, ""
+
+ if policytype != cur_policytype or \
+ policy != cur_policyname:
+ continue
+
+ # label been renamed or deleted?
+ if reslabel_map.has_key(label) and cur_policyname == policy:
+ label = reslabel_map[label]
+ elif label not in polnew_reslabels:
+ policytype = xsconstants.INVALID_POLICY_PREFIX + policytype
+ # Update entry
+ access_control[key] = \
+ tuple([ policytype, new_policyname, label ])
+
+ # All resources have new labels in the access_control map
+ # There may still be labels in there that are invalid now.
+
+ # Do this in memory without writing to disk:
+ # - Relabel all domains independent of whether they are running
+ # or not
+ # - later write back to config files
+ polnew_vmlabels = new_acmpol.policy_get_virtualmachinelabel_names()
+
+ for dominfo in dominfos:
+ sec_lab = dominfo.get_security_label()
+ if not sec_lab:
+ continue
+ policytype, policy, vmlabel = sec_lab.split(":")
+ name = dominfo.getName()
+
+ if policytype != cur_policytype or \
+ policy != cur_policyname:
+ continue
+
+ new_vmlabel = vmlabel
+ if vmlabel_map.has_key(vmlabel):
+ new_vmlabel = vmlabel_map[vmlabel]
+ if new_vmlabel not in polnew_vmlabels:
+ policytype = xsconstants.INVALID_POLICY_PREFIX + policytype
+ new_seclab = "%s:%s:%s" % \
+ (policytype, new_policyname, new_vmlabel)
+
+ domain_label_map[dominfo] = [ sec_lab, new_seclab ]
+
+ if dominfo._stateGet() in (DOM_STATE_PAUSED, DOM_STATE_RUNNING):
+ compatible = __resources_compatible_with_vmlabel(new_acmpol,
+ dominfo,
+ new_vmlabel,
+ access_control)
+ log.info("Domain %s with new label '%s' can access its "
+ "resources? : %s" %
+ (name, new_vmlabel, str(compatible)))
+ log.info("VM labels in new domain: %s" %
+ new_acmpol.policy_get_virtualmachinelabel_names())
+ if not compatible:
+ return (-xsconstants.XSERR_RESOURCE_ACCESS, "")
+
+ rc, errors = hv_chg_policy(bin_pol, del_array, chg_array)
+ if rc == 0:
+ # Write the relabeled resources back into the file
+ dictio.dict_write(access_control, "resources", res_label_filename)
+ # Properly update all VMs to their new labels
+ for dominfo, labels in domain_label_map.items():
+ sec_lab, new_seclab = labels
+ if sec_lab != new_seclab:
+ log.info("Updating domain %s to new label '%s'." % \
+ (new_seclab, sec_lab))
+ # This better be working!
+ dominfo.set_security_label(new_seclab,
+ sec_lab,
+ new_acmpol)
+ finally:
+ log.info("----------------------------------------------")
+ mapfile_unlock()
+ __resfile_lock.release()
+
+ return rc, errors
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/util/xsconstants.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/xsconstants.py Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,104 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2007 International Business Machines Corp.
+# Author: Stefan Berger <stefanb@us.ibm.com>
+#============================================================================
+
+XS_INST_NONE = 0
+XS_INST_BOOT = (1 << 0)
+XS_INST_LOAD = (1 << 1)
+
+XS_POLICY_NONE = 0
+XS_POLICY_ACM = (1 << 0)
+
+# Some internal variables used by the Xen-API
+ACM_LABEL_VM = (1 << 0)
+ACM_LABEL_RES = (1 << 1)
+
+# Base for XS error codes for collision avoidance with other error codes
+XSERR_BASE = 0x1000
+
+# XS error codes as used by the Xen-API
+XSERR_SUCCESS = 0
+XSERR_GENERAL_FAILURE = 1 + XSERR_BASE
+XSERR_BAD_XML = 2 + XSERR_BASE # XML is wrong (not according to schema)
+XSERR_XML_PROCESSING = 3 + XSERR_BASE
+XSERR_POLICY_INCONSISTENT = 4 + XSERR_BASE # i.e., bootstrap name not a VM label
+XSERR_FILE_ERROR = 5 + XSERR_BASE
+XSERR_BAD_RESOURCE_FORMAT = 6 + XSERR_BASE # badly formatted resource
+XSERR_BAD_LABEL_FORMAT = 7 + XSERR_BASE
+XSERR_RESOURCE_NOT_LABELED = 8 + XSERR_BASE
+XSERR_RESOURCE_ALREADY_LABELED = 9 + XSERR_BASE
+XSERR_WRONG_POLICY_TYPE = 10 + XSERR_BASE
+XSERR_BOOTPOLICY_INSTALLED = 11 + XSERR_BASE
+XSERR_NO_DEFAULT_BOOT_TITLE = 12 + XSERR_BASE
+XSERR_POLICY_LOAD_FAILED = 13 + XSERR_BASE
+XSERR_POLICY_LOADED = 14 + XSERR_BASE
+XSERR_POLICY_TYPE_UNSUPPORTED = 15 + XSERR_BASE
+XSERR_BAD_CONFLICTSET = 16 + XSERR_BASE
+XSERR_RESOURCE_IN_USE = 17 + XSERR_BASE
+XSERR_BAD_POLICY_NAME = 18 + XSERR_BASE
+XSERR_VERSION_PREVENTS_UPDATE = 19 + XSERR_BASE
+XSERR_BAD_LABEL = 20 + XSERR_BASE
+XSERR_VM_WRONG_STATE = 21 + XSERR_BASE
+XSERR_POLICY_NOT_LOADED = 22 + XSERR_BASE
+XSERR_RESOURCE_ACCESS = 23 + XSERR_BASE
+XSERR_HV_OP_FAILED = 24 + XSERR_BASE
+XSERR_BOOTPOLICY_INSTALL_ERROR = 25 + XSERR_BASE
+XSERR_LAST = 25 + XSERR_BASE ## KEEP LAST
+
+XSERR_MESSAGES = [.
+ '',
+ 'General Failure',
+ 'XML is malformed',
+ 'Error while processing XML',
+ 'Policy has inconsistencies',
+ 'A file access error occurred',
+ 'The resource format is not valid',
+ 'The label format is not valid',
+ 'The resource is not labeld',
+ 'The resource is already labeld',
+ 'The policy type is wrong',
+ 'The system boot policy is installed',
+ 'Could not find the default boot title',
+ 'Loading of the policy failed',
+ 'The policy is loaded',
+ 'The policy type is unsupported',
+ 'There is a bad conflict set',
+ 'The resource is in use',
+ 'The policy has an invalid name',
+ 'The version of the policy prevents an update',
+ 'The label is bad',
+ 'Operation not premittend - the VM is in the wrong state',
+ 'The policy is not loaded',
+ 'Error accessing resource',
+ 'Operation failed in hypervisor',
+ 'Boot policy installation error'
+]
+
+def xserr2string(err):
+ if err == XSERR_SUCCESS:
+ return "Success"
+ if err >= XSERR_GENERAL_FAILURE and \
+ err <= XSERR_LAST:
+ return XSERR_MESSAGES[err - XSERR_BASE]
+ return "Unknown XSERR code '%s'." % (hex(err))
+
+# Policy identifiers used in labels
+ACM_POLICY_ID = "ACM"
+
+INVALID_POLICY_PREFIX = "INV_"
+
+INVALID_SSIDREF = 0xFFFFFFFF
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/util/xspolicy.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/util/xspolicy.py Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,66 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2006,2007 International Business Machines Corp.
+# Author: Stefan Berger <stefanb@us.ibm.com>
+#============================================================================
+
+import threading
+import xsconstants
+
+class XSPolicy:
+ """
+ The base policy class for all policies administered through
+ XSPolicyAdmin.
+ """
+
+ def __init__(self, name=None, ref=None):
+ self.lock = threading.Lock()
+ self.ref = ref
+ self.name = name
+ if ref:
+ from xen.xend.XendXSPolicy import XendXSPolicy
+ self.xendxspolicy = XendXSPolicy(self, {}, ref)
+ else:
+ self.xendxspolicy = None
+
+ def grab_lock(self):
+ self.lock.acquire()
+
+ def unlock(self):
+ self.lock.release()
+
+ def get_ref(self):
+ return self.ref
+
+ def destroy(self):
+ if self.xendxspolicy:
+ self.xendxspolicy.destroy()
+
+ # All methods below should be overwritten by the inheriting class
+
+ def isloaded(self):
+ return False
+
+ def loadintohv(self):
+ return xsconstants.XSERR_POLICY_LOAD_FAILED
+
+ def get_type(self):
+ return xsconstants.XS_POLICY_NONE
+
+ def get_type_name(self):
+ return ""
+
+ def update(self, repr_new):
+ return -xsconstants.XSERR_GENERAL_FAILURE, ""
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendAPI.py
--- a/tools/python/xen/xend/XendAPI.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendAPI.py Tue Jul 10 08:39:26 2007 -0600
@@ -40,11 +40,13 @@ from XendVMMetrics import XendVMMetrics
from XendVMMetrics import XendVMMetrics
from XendPIF import XendPIF
from XendPBD import XendPBD
+from XendXSPolicy import XendXSPolicy, XendACMPolicy

from XendAPIConstants import *
from xen.util.xmlrpclib2 import stringify

from xen.util.blkif import blkdev_name_to_number
+from xen.util import xsconstants


AUTH_NONE = 'none'
@@ -467,6 +469,8 @@ classes = {
'console' : valid_console,
'SR' : valid_sr,
'task' : valid_task,
+ 'XSPolicy' : valid_object("XSPolicy"),
+ 'ACMPolicy' : valid_object("ACMPolicy"),
'debug' : valid_debug,
'network' : valid_object("network"),
'PIF' : valid_object("PIF"),
@@ -481,6 +485,8 @@ autoplug_classes = {
'VM_metrics' : XendVMMetrics,
'PBD' : XendPBD,
'PIF_metrics' : XendPIFMetrics,
+ 'XSPolicy' : XendXSPolicy,
+ 'ACMPolicy' : XendACMPolicy,
}

class XendAPI(object):
@@ -1170,7 +1176,8 @@ class XendAPI(object):
'HVM_boot_params',
'platform',
'PCI_bus',
- 'other_config']
+ 'other_config',
+ 'security_label']

VM_methods = [.('clone', 'VM'),
('start', None),
@@ -1230,7 +1237,8 @@ class XendAPI(object):
'HVM_boot_params',
'platform',
'PCI_bus',
- 'other_config']
+ 'other_config',
+ 'security_label']

def VM_get(self, name, session, vm_ref):
return xen_api_success(
@@ -1601,7 +1609,22 @@ class XendAPI(object):
if dom:
return xen_api_success([dom.get_uuid()])
return xen_api_success([])
-
+
+ def VM_get_security_label(self, session, vm_ref):
+ dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
+ label = dom.get_security_label()
+ return xen_api_success(label)
+
+ def VM_set_security_label(self, session, vm_ref, sec_label, old_label):
+ dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
+ (rc, errors, oldlabel, new_ssidref) = \
+ dom.set_security_label(sec_label, old_label)
+ if rc != xsconstants.XSERR_SUCCESS:
+ return xen_api_error(['SECURITY_ERROR', rc])
+ if rc == 0:
+ rc = new_ssidref
+ return xen_api_success(rc)
+
def VM_create(self, session, vm_struct):
xendom = XendDomain.instance()
domuuid = XendTask.log_progress(0, 100,
@@ -1655,6 +1678,7 @@ class XendAPI(object):
'domid': domid is None and -1 or domid,
'is_control_domain': xeninfo.info['is_control_domain'],
'metrics': xeninfo.get_metrics(),
+ 'security_label': xeninfo.get_security_label(),
'crash_dumps': []
}
return xen_api_success(record)
@@ -1952,7 +1976,8 @@ class XendAPI(object):
'runtime_properties']
VIF_attr_rw = ['device',
'MAC',
- 'MTU']
+ 'MTU',
+ 'security_label']

VIF_attr_inst = VIF_attr_rw

@@ -2054,7 +2079,10 @@ class XendAPI(object):
except Exception, exn:
log.exception(exn)
return xen_api_success({})
-
+
+ def VIF_get_security_label(self, session, vif_ref):
+ return self._VIF_get(vif_ref, 'security_label')
+
# Xen API: Class VIF_metrics
# ----------------------------------------------------------------

@@ -2098,7 +2126,8 @@ class XendAPI(object):
'virtual_size',
'sharable',
'read_only',
- 'other_config']
+ 'other_config',
+ 'security_label']
VDI_attr_inst = VDI_attr_ro + VDI_attr_rw

VDI_methods = [('destroy', None)]
@@ -2206,13 +2235,24 @@ class XendAPI(object):
xennode = XendNode.instance()
return xen_api_success(xennode.get_vdi_by_name_label(name))

+ def VDI_set_security_label(self, session, vdi_ref, sec_lab, old_lab):
+ vdi = XendNode.instance().get_vdi_by_uuid(vdi_ref)
+ rc = vdi.set_security_label(sec_lab, old_lab)
+ if rc < 0:
+ return xen_api_error(['SECURITY_ERROR', rc])
+ return xen_api_success(rc)
+
+ def VDI_get_security_label(self, session, vdi_ref):
+ vdi = XendNode.instance().get_vdi_by_uuid(vdi_ref)
+ return xen_api_success(vdi.get_security_label())

# Xen API: Class VTPM
# ----------------------------------------------------------------

VTPM_attr_rw = [ ]
VTPM_attr_ro = ['VM',
- 'backend']
+ 'backend',
+ 'runtime_properties' ]

VTPM_attr_inst = VTPM_attr_rw

@@ -2289,6 +2329,18 @@ class XendAPI(object):
vtpms = [d.get_vtpms() for d in XendDomain.instance().list('all')]
vtpms = reduce(lambda x, y: x + y, vtpms)
return xen_api_success(vtpms)
+
+ def VTPM_get_runtime_properties(self, _, vtpm_ref):
+ xendom = XendDomain.instance()
+ dominfo = xendom.get_vm_with_dev_uuid('vtpm', vtpm_ref)
+ device = dominfo.get_dev_config_by_uuid('vtpm', vtpm_ref)
+
+ try:
+ device_sxps = dominfo.getDeviceSxprs('vtpm')
+ device_dict = dict(device_sxps[0][1])
+ return xen_api_success(device_dict)
+ except:
+ return xen_api_success({})

# Xen API: Class console
# ----------------------------------------------------------------
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendConfig.py
--- a/tools/python/xen/xend/XendConfig.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendConfig.py Tue Jul 10 08:39:26 2007 -0600
@@ -22,6 +22,7 @@ import types

from xen.xend import sxp
from xen.xend import uuid
+from xen.xend import XendOptions
from xen.xend import XendAPIStore
from xen.xend.XendError import VmError
from xen.xend.XendDevices import XendDevices
@@ -29,6 +30,8 @@ from xen.xend.XendConstants import DOM_S
from xen.xend.XendConstants import DOM_STATE_HALTED
from xen.xend.server.netif import randomMAC
from xen.util.blkif import blkdev_name_to_number
+from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
+from xen.util import xsconstants

log = logging.getLogger("xend.XendConfig")
log.setLevel(logging.WARN)
@@ -159,6 +162,7 @@ XENAPI_CFG_TYPES = {
'platform': dict,
'tools_version': dict,
'other_config': dict,
+ 'security_label': str,
}

# List of legacy configuration keys that have no equivalent in the
@@ -167,7 +171,6 @@ LEGACY_UNSUPPORTED_BY_XENAPI_CFG = [
LEGACY_UNSUPPORTED_BY_XENAPI_CFG = [
# roundtripped (dynamic, unmodified)
'shadow_memory',
- 'security',
'vcpu_avail',
'cpu_weight',
'cpu_cap',
@@ -318,7 +321,6 @@ class XendConfig(dict):
'memory_static_max': 0,
'memory_dynamic_max': 0,
'devices': {},
- 'security': None,
'on_xend_start': 'ignore',
'on_xend_stop': 'ignore',
'cpus': [],
@@ -392,6 +394,9 @@ class XendConfig(dict):

def _platform_sanity_check(self):
if self.is_hvm():
+ if 'keymap' not in self['platform'] and XendOptions.instance().get_keymap():
+ self['platform']['keymap'] = XendOptions.instance().get_keymap()
+
if 'device_model' not in self['platform']:
self['platform']['device_model'] = DEFAULT_DM

@@ -421,9 +426,10 @@ class XendConfig(dict):
self._memory_sanity_check()

self['cpu_time'] = dominfo['cpu_time']/1e9
- # TODO: i don't know what the security stuff expects here
if dominfo.get('ssidref'):
- self['security'] = [['ssidref', dominfo['ssidref']]]
+ ssidref = int(dominfo.get('ssidref'))
+ self['security_label'] = XSPolicyAdminInstance().ssidref_to_vmlabel(ssidref)
+
self['shutdown_reason'] = dominfo['shutdown_reason']

# parse state into Xen API states
@@ -630,8 +636,26 @@ class XendConfig(dict):
except ValueError, e:
raise XendConfigError('cpus = %s: %s' % (cfg['cpus'], e))

- if 'security' in cfg and isinstance(cfg['security'], str):
- cfg['security'] = sxp.from_string(cfg['security'])
+ if 'security' in cfg and not cfg.get('security_label'):
+ secinfo = cfg['security']
+ if isinstance(secinfo, list):
+ # The xm command sends a list formatted like this:
+ # [['access_control', ['policy', 'xm-test'],['label', 'red']],
+ # ['ssidref', 196611]]
+ policy = ""
+ label = ""
+ policytype = xsconstants.ACM_POLICY_ID
+ for idx in range(0, len(secinfo)):
+ if secinfo[idx][0] == "access_control":
+ for aidx in range(1, len(secinfo[idx])):
+ if secinfo[idx][aidx][0] == "policy":
+ policy = secinfo[idx][aidx][1]
+ if secinfo[idx][aidx][0] == "label":
+ label = secinfo[idx][aidx][1]
+ if label != "" and policy != "":
+ cfg['security_label'] = "%s:%s:%s" % \
+ (policytype, policy, label)
+ del cfg['security']

old_state = sxp.child_value(sxp_cfg, 'state')
if old_state:
@@ -774,7 +798,6 @@ class XendConfig(dict):
self[sxp_arg] = val

_set_cfg_if_exists('shadow_memory')
- _set_cfg_if_exists('security')
_set_cfg_if_exists('features')
_set_cfg_if_exists('on_xend_stop')
_set_cfg_if_exists('on_xend_start')
@@ -886,6 +909,9 @@ class XendConfig(dict):
continue
if self.has_key(legacy) and self[legacy] not in (None, []):
sxpr.append([legacy, self[legacy]])
+
+ if self.has_key('security_label'):
+ sxpr.append(['security_label', self['security_label']])

sxpr.append(['image', self.image_sxpr()])
sxpr.append(['status', domain._stateGet()])
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendDomain.py Tue Jul 10 08:39:26 2007 -0600
@@ -49,7 +49,7 @@ from xen.xend.XendAPIConstants import *

from xen.xend.xenstore.xstransact import xstransact
from xen.xend.xenstore.xswatch import xswatch
-from xen.util import mkdir, security
+from xen.util import mkdir
from xen.xend import uuid

xc = xen.lowlevel.xc.xc()
@@ -486,7 +486,6 @@ class XendDomain:
"""
self.domains_lock.acquire()
try:
- security.refresh_ssidref(config)
dominfo = XendDomainInfo.restore(config)
return dominfo
finally:
@@ -1113,6 +1112,10 @@ class XendDomain:
raise XendInvalidDomain(str(domid))
if dominfo.getDomid() == DOM0_ID:
raise XendError("Cannot unpause privileged domain %s" % domid)
+ if dominfo._stateGet() not in (DOM_STATE_PAUSED, DOM_STATE_RUNNING):
+ raise VMBadState("Domain '%s' is not started" % domid,
+ POWER_STATE_NAMES[DOM_STATE_PAUSED],
+ POWER_STATE_NAMES[dominfo._stateGet()])
log.info("Domain %s (%d) unpaused.", dominfo.getName(),
int(dominfo.getDomid()))
dominfo.unpause()
@@ -1138,6 +1141,10 @@ class XendDomain:
raise XendInvalidDomain(str(domid))
if dominfo.getDomid() == DOM0_ID:
raise XendError("Cannot pause privileged domain %s" % domid)
+ if dominfo._stateGet() not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+ raise VMBadState("Domain '%s' is not started" % domid,
+ POWER_STATE_NAMES[DOM_STATE_RUNNING],
+ POWER_STATE_NAMES[dominfo._stateGet()])
log.info("Domain %s (%d) paused.", dominfo.getName(),
int(dominfo.getDomid()))
dominfo.pause()
@@ -1253,7 +1260,11 @@ class XendDomain:
raise XendInvalidDomain(str(domid))

if dominfo.getDomid() == DOM0_ID:
- raise XendError("Cannot save privileged domain %i" % domid)
+ raise XendError("Cannot save privileged domain %s" % str(domid))
+ if dominfo._stateGet() != DOM_STATE_RUNNING:
+ raise VMBadState("Domain is not running",
+ POWER_STATE_NAMES[DOM_STATE_RUNNING],
+ POWER_STATE_NAMES[dominfo._stateGet()])

oflags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
if hasattr(os, "O_LARGEFILE"):
@@ -1399,10 +1410,15 @@ class XendDomain:
dominfo = self.domain_lookup_nr(domid)
if not dominfo:
raise XendInvalidDomain(str(domid))
- try:
- return xc.sched_credit_domain_get(dominfo.getDomid())
- except Exception, ex:
- raise XendError(str(ex))
+
+ if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+ try:
+ return xc.sched_credit_domain_get(dominfo.getDomid())
+ except Exception, ex:
+ raise XendError(str(ex))
+ else:
+ return {'weight' : dominfo.getWeight(),
+ 'cap' : dominfo.getCap()}

def domain_sched_credit_set(self, domid, weight = None, cap = None):
"""Set credit scheduler parameters for a domain.
@@ -1436,12 +1452,15 @@ class XendDomain:
assert type(weight) == int
assert type(cap) == int

- rc = xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
+ rc = 0
+ if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+ rc = xc.sched_credit_domain_set(dominfo.getDomid(), weight, cap)
if rc == 0:
if set_weight:
dominfo.setWeight(weight)
if set_cap:
dominfo.setCap(cap)
+ self.managed_config_save(dominfo)
return rc
except Exception, ex:
log.exception(ex)
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendDomainInfo.py Tue Jul 10 08:39:26 2007 -0600
@@ -489,6 +489,9 @@ class XendDomainInfo:

def send_sysrq(self, key):
""" Send a Sysrq equivalent key via xenstored."""
+ if self._stateGet() not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+ raise XendError("Domain '%s' is not started" % self.info['name_label'])
+
asserts.isCharConvertible(key)
self.storeDom("control/sysrq", '%c' % key)

@@ -503,9 +506,18 @@ class XendDomainInfo:
dev_uuid = self.info.device_add(dev_type, cfg_sxp = dev_config)
dev_config_dict = self.info['devices'][dev_uuid][1]
log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config_dict))
- dev_config_dict['devid'] = devid = \
- self._createDevice(dev_type, dev_config_dict)
- self._waitForDevice(dev_type, devid)
+
+ if self.domid is not None:
+ try:
+ dev_config_dict['devid'] = devid = \
+ self._createDevice(dev_type, dev_config_dict)
+ self._waitForDevice(dev_type, devid)
+ except VmError, ex:
+ raise ex
+ else:
+ devid = None
+
+ xen.xend.XendDomain.instance().managed_config_save(self)
return self.getDeviceController(dev_type).sxpr(devid)

def device_configure(self, dev_sxp, devid = None):
@@ -818,6 +830,9 @@ class XendDomainInfo:
f('image/%s/%s' % (n, v), True)
else:
f('image/%s' % n, v)
+
+ if self.info.has_key('security_label'):
+ f('security_label', self.info['security_label'])

to_store.update(self._vcpuDomDetails())

@@ -988,9 +1003,6 @@ class XendDomainInfo:
xen.xend.XendDomain.instance().managed_config_save(self)
log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
vcpus)
-
- def getLabel(self):
- return security.get_security_info(self.info, 'label')

def getMemoryTarget(self):
"""Get this domain's target memory size, in KB."""
@@ -1435,11 +1447,20 @@ class XendDomainInfo:
# allocation of 1MB. We free up 2MB here to be on the safe side.
balloon.free(2*1024) # 2MB should be plenty

- self.domid = xc.domain_create(
- domid = 0,
- ssidref = security.get_security_info(self.info, 'ssidref'),
- handle = uuid.fromString(self.info['uuid']),
- hvm = int(hvm))
+ ssidref = security.calc_dom_ssidref_from_info(self.info)
+ if ssidref == 0 and security.on():
+ raise VmError('VM is not properly labeled.')
+
+ try:
+ self.domid = xc.domain_create(
+ domid = 0,
+ ssidref = ssidref,
+ handle = uuid.fromString(self.info['uuid']),
+ hvm = int(hvm))
+ except Exception, e:
+ # may get here if due to ACM the operation is not permitted
+ if security.on():
+ raise VmError('Domain in conflict set with running domain?')

if self.domid < 0:
raise VmError('Creating domain failed: name=%s' %
@@ -1954,24 +1975,6 @@ class XendDomainInfo:
image_sxpr = self.info.image_sxpr()
if image_sxpr:
to_store['image'] = sxp.to_string(image_sxpr)
-
- if self._infoIsSet('security'):
- secinfo = self.info['security']
- to_store['security'] = sxp.to_string(secinfo)
- for idx in range(0, len(secinfo)):
- if secinfo[idx][0] == 'access_control':
- to_store['security/access_control'] = sxp.to_string(
- [secinfo[idx][1], secinfo[idx][2]])
- for aidx in range(1, len(secinfo[idx])):
- if secinfo[idx][aidx][0] == 'label':
- to_store['security/access_control/label'] = \
- secinfo[idx][aidx][1]
- if secinfo[idx][aidx][0] == 'policy':
- to_store['security/access_control/policy'] = \
- secinfo[idx][aidx][1]
- if secinfo[idx][0] == 'ssidref':
- to_store['security/ssidref'] = str(secinfo[idx][1])
-

if not self._readVm('xend/restart_count'):
to_store['xend/restart_count'] = str(0)
@@ -2090,15 +2093,6 @@ class XendDomainInfo:
info["maxmem_kb"] = XendNode.instance() \
.physinfo_dict()['total_memory'] * 1024

- #manually update ssidref / security fields
- if security.on() and info.has_key('ssidref'):
- if (info['ssidref'] != 0) and self.info.has_key('security'):
- security_field = self.info['security']
- if not security_field:
- #create new security element
- self.info.update({'security':
- [['ssidref', str(info['ssidref'])]]})
-
#ssidref field not used any longer
if 'ssidref' in info:
info.pop('ssidref')
@@ -2182,7 +2176,133 @@ class XendDomainInfo:
return self.info.get('tools_version', {})
def get_metrics(self):
return self.metrics.get_uuid();
-
+
+
+ def get_security_label(self):
+ domid = self.getDomid()
+
+ from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
+ xspol = XSPolicyAdminInstance().get_loaded_policy()
+
+ if domid == 0:
+ if xspol:
+ label = xspol.policy_get_domain_label_formatted(domid)
+ else:
+ label = ""
+ else:
+ label = self.info.get('security_label', '')
+ return label
+
+ def set_security_label(self, seclab, old_seclab, xspol=None):
+ """
+ Set the security label of a domain from its old to
+ a new value.
+ @param seclab New security label formatted in the form
+ <policy type>:<policy name>:<vm label>
+ @param old_seclab The current security label that the
+ VM must have.
+ @param xspol An optional policy under which this
+ update should be done. If not given,
+ then the current active policy is used.
+ @return Returns return code, a string with errors from
+ the hypervisor's operation, old label of the
+ domain
+ """
+ rc = 0
+ errors = ""
+ old_label = ""
+ new_ssidref = 0
+ domid = self.getDomid()
+ res_labels = None
+
+ from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
+ from xen.util import xsconstants
+
+ state = self._stateGet()
+ # Relabel only HALTED or RUNNING or PAUSED domains
+ if domid != 0 and \
+ state not in \
+ [ DOM_STATE_HALTED, DOM_STATE_RUNNING, DOM_STATE_PAUSED, \
+ DOM_STATE_SUSPENDED ]:
+ log.warn("Relabeling domain not possible in state '%s'" %
+ DOM_STATES[state])
+ return (-xsconstants.XSERR_VM_WRONG_STATE, "", "", 0)
+
+ # Remove security label. Works only for halted domains
+ if not seclab or seclab == "":
+ if state not in [ DOM_STATE_HALTED ]:
+ return (-xsconstants.XSERR_VM_WRONG_STATE, "", "", 0)
+
+ if self.info.has_key('security_label'):
+ old_label = self.info['security_label']
+ # Check label against expected one.
+ if old_label != old_seclab:
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+ del self.info['security_label']
+ xen.xend.XendDomain.instance().managed_config_save(self)
+ return (xsconstants.XSERR_SUCCESS, "", "", 0)
+
+ tmp = seclab.split(":")
+ if len(tmp) != 3:
+ return (-xsconstants.XSERR_BAD_LABEL_FORMAT, "", "", 0)
+ typ, policy, label = tmp
+
+ poladmin = XSPolicyAdminInstance()
+ if not xspol:
+ xspol = poladmin.get_policy_by_name(policy)
+
+ if state in [ DOM_STATE_RUNNING, DOM_STATE_PAUSED ]:
+ #if domain is running or paused try to relabel in hypervisor
+ if not xspol:
+ return (-xsconstants.XSERR_POLICY_NOT_LOADED, "", "", 0)
+
+ if typ != xspol.get_type_name() or \
+ policy != xspol.get_name():
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+
+ if typ == xsconstants.ACM_POLICY_ID:
+ new_ssidref = xspol.vmlabel_to_ssidref(label)
+ if new_ssidref == xsconstants.INVALID_SSIDREF:
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+
+ # Check that all used resources are accessible under the
+ # new label
+ if not security.resources_compatible_with_vmlabel(xspol,
+ self, label):
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+
+ #Check label against expected one.
+ old_label = self.get_security_label()
+ if old_label != old_seclab:
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+
+ # relabel domain in the hypervisor
+ rc, errors = security.relabel_domains([[domid, new_ssidref]])
+ log.info("rc from relabeling in HV: %d" % rc)
+ else:
+ return (-xsconstants.XSERR_POLICY_TYPE_UNSUPPORTED, "", "", 0)
+
+ if rc == 0:
+ # HALTED, RUNNING or PAUSED
+ if domid == 0:
+ if xspol:
+ ssidref = poladmin.set_domain0_bootlabel(xspol, label)
+ else:
+ return (-xsconstants.XSERR_POLICY_NOT_LOADED, "", "", 0)
+ else:
+ if self.info.has_key('security_label'):
+ old_label = self.info['security_label']
+ # Check label against expected one, unless wildcard
+ if old_label != old_seclab:
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+
+ self.info['security_label'] = seclab
+ try:
+ xen.xend.XendDomain.instance().managed_config_save(self)
+ except:
+ pass
+ return (rc, errors, old_label, new_ssidref)
+
def get_on_shutdown(self):
after_shutdown = self.info.get('actions_after_shutdown')
if not after_shutdown or after_shutdown not in XEN_API_ON_NORMAL_EXIT:
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendError.py
--- a/tools/python/xen/xend/XendError.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendError.py Tue Jul 10 08:39:26 2007 -0600
@@ -174,6 +174,23 @@ class NetworkError(XendAPIError):

def __str__(self):
return 'NETWORK_ERROR: %s %s' % (self.error, self.network)
+
+from xen.util.xsconstants import xserr2string
+
+class SecurityError(XendAPIError):
+ def __init__(self, error, message=None):
+ XendAPIError.__init__(self)
+ self.error = error
+ if not message:
+ self.message = xserr2string(-error)
+ else:
+ self.message = message
+
+ def get_api_error(self):
+ return ['SECURITY_ERROR', self.error, self.message]
+
+ def __str__(self):
+ return 'SECURITY_ERROR: %s:%s' % (self.error, self.message)

XEND_ERROR_AUTHENTICATION_FAILED = ('ELUSER', 'Authentication Failed')
XEND_ERROR_SESSION_INVALID = ('EPERMDENIED', 'Session Invalid')
@@ -188,4 +205,5 @@ XEND_ERROR_VTPM_INVALID = ('EVT
XEND_ERROR_VTPM_INVALID = ('EVTPMINVALID', 'VTPM Invalid')
XEND_ERROR_VDI_INVALID = ('EVDIINVALID', 'VDI Invalid')
XEND_ERROR_SR_INVALID = ('ESRINVALID', 'SR Invalid')
+XEND_ERROR_XSPOLICY_INVALID = ('EXSPOLICYINVALID', 'XS Invalid')
XEND_ERROR_TODO = ('ETODO', 'Lazy Programmer Error')
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendNode.py Tue Jul 10 08:39:26 2007 -0600
@@ -533,18 +533,70 @@ class XendNode:
['version', ver],
['machine', mch]]

+ def list_to_rangepairs(self,cmap):
+ cmap.sort()
+ pairs = []
+ x = y = 0
+ for i in range(0,len(cmap)):
+ try:
+ if ((cmap[y+1] - cmap[i]) > 1):
+ pairs.append((cmap[x],cmap[y]))
+ x = y = i+1
+ else:
+ y = y + 1
+ # if we go off the end, then just add x to y
+ except IndexError:
+ pairs.append((cmap[x],cmap[y]))
+
+ return pairs
+
+ def format_pairs(self,pairs):
+ if not pairs:
+ return "no cpus"
+ out = ""
+ for f,s in pairs:
+ if (f==s):
+ out += '%d'%f
+ else:
+ out += '%d-%d'%(f,s)
+ out += ','
+ # trim trailing ','
+ return out[:-1]
+
+ def list_to_strrange(self,list):
+ return self.format_pairs(self.list_to_rangepairs(list))
+
+ def format_node_to_cpu(self, pinfo):
+ str=''
+ whitespace=''
+ try:
+ node_to_cpu=pinfo['node_to_cpu']
+ for i in range(0, pinfo['nr_nodes']):
+ str+='%snode%d:%s\n' % (whitespace,
+ i,
+ self.list_to_strrange(node_to_cpu[i]))
+ whitespace='%25s' % ''
+ except:
+ str='none\n'
+ return str[:-1];
+
+ def count_cpus(self, pinfo):
+ count=0
+ node_to_cpu=pinfo['node_to_cpu']
+ for i in range(0, pinfo['nr_nodes']):
+ count+=len(node_to_cpu[i])
+ return count;
+
def physinfo(self):
info = self.xc.physinfo()

- info['nr_cpus'] = (info['nr_nodes'] *
- info['sockets_per_node'] *
- info['cores_per_socket'] *
- info['threads_per_core'])
+ info['nr_cpus'] = self.count_cpus(info)
info['cpu_mhz'] = info['cpu_khz'] / 1000

# physinfo is in KiB, need it in MiB
info['total_memory'] = info['total_memory'] / 1024
info['free_memory'] = info['free_memory'] / 1024
+ info['node_to_cpu'] = self.format_node_to_cpu(info)

ITEM_ORDER = [.'nr_cpus',
'nr_nodes',
@@ -555,6 +607,7 @@ class XendNode:
'hw_caps',
'total_memory',
'free_memory',
+ 'node_to_cpu'
]

return [[k, info[k]] for k in ITEM_ORDER]
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendOptions.py
--- a/tools/python/xen/xend/XendOptions.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendOptions.py Tue Jul 10 08:39:26 2007 -0600
@@ -277,6 +277,9 @@ class XendOptions:
def get_vncpasswd_default(self):
return self.get_config_string('vncpasswd',
self.vncpasswd_default)
+
+ def get_keymap(self):
+ return self.get_config_value('keymap', None)

class XendOptionsFile(XendOptions):

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendVDI.py
--- a/tools/python/xen/xend/XendVDI.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/XendVDI.py Tue Jul 10 08:39:26 2007 -0600
@@ -23,6 +23,7 @@ import os

from xen.util.xmlrpclib2 import stringify
from xmlrpclib import dumps, loads
+from xen.util import security, xsconstants

KB = 1024
MB = 1024 * 1024
@@ -160,6 +161,17 @@ class XendVDI(AutoSaveObject):

def get_location(self):
raise NotImplementedError()
+
+ def set_security_label(self, sec_lab, old_lab):
+ image = self.get_location()
+ rc = security.set_resource_label_xapi(image, sec_lab, old_lab)
+ if rc != xsconstants.XSERR_SUCCESS:
+ raise SecurityError(rc)
+ return rc
+
+ def get_security_label(self):
+ image = self.get_location()
+ return security.get_resource_label_xapi(image)


class XendQCoWVDI(XendVDI):
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendXSPolicy.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendXSPolicy.py Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,222 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (c) 2007 IBM Corporation
+# Copyright (c) 2006 Xensource
+#============================================================================
+
+import logging
+from xen.xend.XendBase import XendBase
+from xen.xend.XendError import *
+from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
+from xen.util import xsconstants, security
+import base64
+
+log = logging.getLogger("xend.XendXSPolicy")
+log.setLevel(logging.TRACE)
+
+
+class XendXSPolicy(XendBase):
+ """ Administration class for an XSPolicy. """
+
+ def getClass(self):
+ return "XSPolicy"
+
+ def getMethods(self):
+ methods = ['activate_xspolicy']
+ return XendBase.getMethods() + methods
+
+ def getFuncs(self):
+ funcs = [. 'get_xstype',
+ 'set_xspolicy',
+ 'get_xspolicy',
+ 'rm_xsbootpolicy',
+ 'get_resource_label',
+ 'set_resource_label',
+ 'get_labeled_resources' ]
+ return XendBase.getFuncs() + funcs
+
+ getClass = classmethod(getClass)
+ getMethods = classmethod(getMethods)
+ getFuncs = classmethod(getFuncs)
+
+ def __init__(self, xspol, record, uuid):
+ """ xspol = actual XSPolicy object """
+ self.xspol = xspol
+ XendBase.__init__(self, uuid, record)
+
+ def get_record(self):
+ xspol_record = {
+ 'uuid' : self.get_uuid(),
+ 'flags' : XSPolicyAdminInstance().get_policy_flags(self.xspol),
+ 'repr' : self.xspol.toxml(),
+ 'type' : self.xspol.get_type(),
+ }
+ return xspol_record
+
+ def get_xstype(self):
+ return XSPolicyAdminInstance().isXSEnabled()
+
+ def set_xspolicy(self, xstype, xml, flags, overwrite):
+ ref = ""
+ xstype = int(xstype)
+ flags = int(flags)
+
+ polstate = { 'xs_ref': "", 'repr' : "", 'type' : 0,
+ 'flags' : 0 , 'version': 0 , 'errors' : "", 'xserr' : 0 }
+ if xstype == xsconstants.XS_POLICY_ACM:
+ poladmin = XSPolicyAdminInstance()
+ try:
+ (xspol, rc, errors) = poladmin.add_acmpolicy_to_system(
+ xml, flags,
+ overwrite)
+ if rc != 0:
+ polstate.update( { 'xserr' : rc,
+ 'errors': base64.b64encode(errors) } )
+ else:
+ ref = xspol.get_ref()
+ polstate = {
+ 'xs_ref' : ref,
+ 'flags' : poladmin.get_policy_flags(xspol),
+ 'type' : xstype,
+ 'repr' : "",
+ 'version': xspol.get_version(),
+ 'errors' : base64.b64encode(errors),
+ 'xserr' : rc,
+ }
+ except Exception, e:
+ raise
+ else:
+ raise SecurityError(-xsconstants.XSERR_POLICY_TYPE_UNSUPPORTED)
+ return polstate
+
+ def activate_xspolicy(self, flags):
+ flags = int(flags)
+ rc = -xsconstants.XSERR_GENERAL_FAILURE
+ poladmin = XSPolicyAdminInstance()
+ try:
+ rc = poladmin.activate_xspolicy(self.xspol, flags)
+ except Exception, e:
+ log.info("Activate_policy: %s" % str(e))
+ if rc != flags:
+ raise SecurityError(rc)
+ return flags
+
+ def get_xspolicy(self):
+ polstate = { 'xs_ref' : "",
+ 'repr' : "",
+ 'type' : 0,
+ 'flags' : 0,
+ 'version': "",
+ 'errors' : "",
+ 'xserr' : 0 }
+ poladmin = XSPolicyAdminInstance()
+ refs = poladmin.get_policies_refs()
+ # Will return one or no policy
+ if refs and len(refs) > 0:
+ ref = refs[0]
+ xspol = XSPolicyAdminInstance().policy_from_ref(ref)
+ try:
+ xspol.grab_lock()
+
+ polstate = {
+ 'xs_ref' : ref,
+ 'repr' : xspol.toxml(),
+ 'type' : xspol.get_type(),
+ 'flags' : poladmin.get_policy_flags(xspol),
+ 'version': xspol.get_version(),
+ 'errors' : "",
+ 'xserr' : 0,
+ }
+ finally:
+ if xspol:
+ xspol.unlock()
+ return polstate
+
+ def rm_xsbootpolicy(self):
+ rc = XSPolicyAdminInstance().rm_bootpolicy()
+ if rc != xsconstants.XSERR_SUCCESS:
+ raise SecurityError(rc)
+
+ def get_labeled_resources(self):
+ return security.get_labeled_resources_xapi()
+
+ def set_resource_label(self, resource, sec_lab, old_lab):
+ rc = security.set_resource_label_xapi(resource, sec_lab, old_lab)
+ if rc != xsconstants.XSERR_SUCCESS:
+ raise SecurityError(rc)
+
+ def get_resource_label(self, resource):
+ res = security.get_resource_label_xapi(resource)
+ return res
+
+ get_xstype = classmethod(get_xstype)
+ get_xspolicy = classmethod(get_xspolicy)
+ set_xspolicy = classmethod(set_xspolicy)
+ rm_xsbootpolicy = classmethod(rm_xsbootpolicy)
+ set_resource_label = classmethod(set_resource_label)
+ get_resource_label = classmethod(get_resource_label)
+ get_labeled_resources = classmethod(get_labeled_resources)
+
+
+class XendACMPolicy(XendXSPolicy):
+ """ Administration class of an ACMPolicy """
+
+ def getClass(self):
+ return "ACMPolicy"
+
+ def getAttrRO(self):
+ attrRO = [ 'xml',
+ 'map',
+ 'binary',
+ 'header' ]
+ return XendXSPolicy.getAttrRO() + attrRO
+
+ getClass = classmethod(getClass)
+ getAttrRO = classmethod(getAttrRO)
+
+ def __init__(self, acmpol, record, uuid):
+ """ acmpol = actual ACMPolicy object """
+ self.acmpol = acmpol
+ XendXSPolicy.__init__(self, acmpol, record, uuid)
+
+ def get_record(self):
+ polstate = {
+ 'uuid' : self.get_uuid(),
+ 'flags' : XSPolicyAdminInstance().get_policy_flags(self.acmpol),
+ 'repr' : self.acmpol.toxml(),
+ 'type' : self.acmpol.get_type(),
+ }
+ return polstate
+
+ def get_header(self):
+ header = {
+ 'policyname' : "", 'policyurl' : "", 'reference' : "",
+ 'date' : "", 'namespaceurl' : "", 'version' : "",
+ }
+ try:
+ header = self.acmpol.get_header_fields_map()
+ except:
+ pass
+ return header
+
+ def get_xml(self):
+ return self.acmpol.toxml()
+
+ def get_map(self):
+ return self.acmpol.get_map()
+
+ def get_binary(self):
+ polbin = self.acmpol.get_bin()
+ return base64.b64encode(polbin)
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/XendXSPolicyAdmin.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/python/xen/xend/XendXSPolicyAdmin.py Tue Jul 10 08:39:26 2007 -0600
@@ -0,0 +1,314 @@
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2006,2007 International Business Machines Corp.
+# Author: Stefan Berger <stefanb@us.ibm.com>
+#============================================================================
+import os
+import shutil
+
+from xml.dom import minidom, Node
+
+from xen.xend.XendLogging import log
+from xen.xend import uuid
+from xen.util import security, xsconstants, dictio, bootloader
+from xen.util.xspolicy import XSPolicy
+from xen.util.acmpolicy import ACMPolicy
+from xen.xend.XendError import SecurityError
+
+XS_MANAGED_POLICIES_FILE = "/etc/xen/acm-security/policies/managed_policies"
+
+class XSPolicyAdmin:
+ """ The class that handles the managed policies in the system.
+ Handles adding and removing managed policies. All managed
+ policies are handled using a reference (UUID) which is
+ assigned to the policy by this class.
+ """
+
+ def __init__(self, maxpolicies):
+ """ Create a management class for managing the system's
+ policies.
+
+ @param maxpolicies: The max. number of policies allowed
+ on the system (currently '1')
+ """
+ self.maxpolicies = maxpolicies
+ try:
+ self.policies = dictio.dict_read("managed_policies",
+ XS_MANAGED_POLICIES_FILE)
+ except Exception, e:
+ self.policies = {}
+
+ self.xsobjs = {}
+ for ref, data in self.policies.items():
+ name = data[0]
+ typ = data[1]
+ try:
+ if typ == xsconstants.ACM_POLICY_ID:
+ self.xsobjs[ref] = ACMPolicy(name=name, ref=ref)
+ else:
+ del self.policies[ref]
+ except Exception, e:
+ log.error("XSPolicyAdmin: Could not find policy '%s': %s" %
+ (name, str(e)))
+ del self.policies[ref]
+ log.debug("XSPolicyAdmin: Known policies: %s" % self.policies)
+
+ def isXSEnabled(self):
+ """ Check whether 'security' is enabled on this system.
+ This currently only checks for ACM-enablement.
+ """
+ rc = 0
+ if security.on():
+ rc |= xsconstants.XS_POLICY_ACM
+ return rc
+
+ def add_acmpolicy_to_system(self, xmltext, flags, overwrite):
+ """ Add an ACM policy's xml representation to the system. The
+ policy will automatically be compiled
+ flags:
+ XS_INST_BOOT : make policy the one to boot the system with
+ by default; if there's a policy already installed,
+ refuse to install this policy unless its one with
+ the same name
+ XS_INST_LOAD : load the policy immediately; if this does not work
+ refuse to install this policy
+ overwrite:
+ If any policy is installed and this is False, refuse to install
+ this policy
+ If flags is True, then any existing policy will be removed from
+ the system and the new one will be installed
+ """
+ errors = ""
+ loadedpol = self.get_loaded_policy()
+ if loadedpol:
+ # This is meant as an update to a currently loaded policy
+ if flags & xsconstants.XS_INST_LOAD == 0:
+ raise SecurityError(-xsconstants.XSERR_POLICY_LOADED)
+ rc, errors = loadedpol.update(xmltext)
+ if rc == 0:
+ self.rm_bootpolicy()
+ irc = self.activate_xspolicy(loadedpol, flags)
+ return (loadedpol, rc, errors)
+
+ try:
+ dom = minidom.parseString(xmltext.encode("utf-8"))
+ except:
+ raise SecurityError(-xsconstants.XSERR_BAD_XML)
+
+ ref = uuid.createString()
+
+ acmpol = ACMPolicy(dom=dom, ref=ref)
+
+ #First some basic tests that do not modify anything:
+
+ if flags & xsconstants.XS_INST_BOOT and not overwrite:
+ filename = acmpol.get_filename(".bin","",dotted=True)
+ if bootloader.get_default_policy != None and \
+ not bootloader.loads_default_policy(filename):
+ raise SecurityError(-xsconstants.XSERR_BOOTPOLICY_INSTALLED)
+
+ if not overwrite and len(self.policies) >= self.maxpolicies:
+ raise SecurityError(-xsconstants.XSERR_BOOTPOLICY_INSTALLED)
+
+ if overwrite:
+ #This should only give one key since only one policy is
+ #allowed.
+ keys = self.policies.keys()
+ for k in keys:
+ self.rm_bootpolicy()
+ rc = self.rm_policy_from_system(k, force=overwrite)
+ if rc != xsconstants.XSERR_SUCCESS:
+ raise SecurityError(rc)
+
+ rc = acmpol.compile()
+ if rc != 0:
+ raise SecurityError(rc)
+
+ if flags & xsconstants.XS_INST_LOAD:
+ rc = acmpol.loadintohv()
+ if rc != 0:
+ raise SecurityError(rc)
+
+ if flags & xsconstants.XS_INST_BOOT:
+ rc = self.make_boot_policy(acmpol)
+ if rc != 0:
+ # If it cannot be installed due to unsupported
+ # bootloader, let it be ok.
+ pass
+
+ if dom:
+ new_entry = { ref : tuple([acmpol.get_name(),
+ xsconstants.ACM_POLICY_ID]) }
+ self.policies.update(new_entry)
+ self.xsobjs[ref] = acmpol
+ dictio.dict_write(self.policies,
+ "managed_policies",
+ XS_MANAGED_POLICIES_FILE)
+ return (acmpol, xsconstants.XSERR_SUCCESS, errors)
+
+ def make_boot_policy(self, acmpol):
+ spolfile = acmpol.get_filename(".bin")
+ dpolfile = "/boot/" + acmpol.get_filename(".bin","",dotted=True)
+ if not os.path.isfile(spolfile):
+ log.error("binary policy file does not exist.")
+ return -xsconstants.XSERR_FILE_ERROR
+ try:
+ shutil.copyfile(spolfile, dpolfile)
+ except:
+ return -xsconstants.XSERR_FILE_ERROR
+
+ try:
+ filename = acmpol.get_filename(".bin","",dotted=True)
+ if bootloader.set_default_boot_policy(filename) != True:
+ return xsconstants.XSERR_BOOTPOLICY_INSTALL_ERROR
+ except:
+ return xsconstants.XSERR_FILE_ERROR
+ return xsconstants.XSERR_SUCCESS
+
+ def activate_xspolicy(self, xspol, flags):
+ rc = xsconstants.XSERR_SUCCESS
+ if flags & xsconstants.XS_INST_LOAD:
+ rc = xspol.loadintohv()
+ if rc == xsconstants.XSERR_SUCCESS and \
+ flags & xsconstants.XS_INST_BOOT:
+ rc = self.make_boot_policy(xspol)
+ if rc == xsconstants.XSERR_SUCCESS:
+ rc = flags
+ return rc
+
+ def rm_policy_from_system(self, ref, force=False):
+ if self.policies.has_key(ref):
+ acmpol = self.xsobjs[ref]
+ rc = acmpol.destroy()
+ if rc == xsconstants.XSERR_SUCCESS or force:
+ del self.policies[ref]
+ del self.xsobjs[ref]
+ dictio.dict_write(self.policies,
+ "managed_policies",
+ XS_MANAGED_POLICIES_FILE)
+ rc = xsconstants.XSERR_SUCCESS
+ return rc
+
+ def rm_bootpolicy(self):
+ """ Remove any (ACM) boot policy from the grub configuration file
+ """
+ rc = 0
+ title = bootloader.get_default_title()
+ if title != None:
+ polnames = []
+ for (k, v) in self.xsobjs.items():
+ polnames.append(v.get_filename(".bin","",dotted=True))
+ bootloader.rm_policy_from_boottitle(title, polnames)
+ else:
+ rc = -xsconstants.XSERR_NO_DEFAULT_BOOT_TITLE
+ return rc
+
+ def get_policy_flags(self, acmpol):
+ """ Get the currently active flags of a policy, i.e., whether the
+ system is using this policy as its boot policy for the default
+ boot title.
+ """
+ flags = 0
+
+ filename = acmpol.get_filename(".bin","", dotted=True)
+ if bootloader.loads_default_policy(filename):
+ flags |= xsconstants.XS_INST_BOOT
+
+ if acmpol.isloaded():
+ flags |= xsconstants.XS_INST_LOAD
+ return flags
+
+ def get_policies(self):
+ """ Get all managed policies. """
+ return self.xsobjs.values()
+
+ def get_policies_refs(self):
+ """ Get all managed policies' references. """
+ return self.xsobjs.keys()
+
+ def has_ref(self, ref):
+ """ Check whether there is a policy with the given reference """
+ return self.xsobjs.has_key(ref)
+
+ def policy_from_ref(self, ref):
+ """ Get the policy's object given its reference """
+ if ref in self.xsobjs.keys():
+ return self.xsobjs[ref]
+ return None
+
+ def ref_from_polname(self, polname):
+ """ Get the reference of the policy given its name """
+ ref = None
+ for (k, v) in self.xsobjs.items():
+ if v.get_name() == polname:
+ ref = k
+ break
+ return ref
+
+ def lock_policy(self, ref):
+ """ get exclusive access to a policy """
+ self.xsobjs[ref].grab_lock()
+
+ def unlock_policy(self, ref):
+ """ release exclusive access to a policy """
+ self.xsobjs[ref].unlock()
+
+ def get_loaded_policy(self):
+ for pol in self.xsobjs.values():
+ if pol.isloaded():
+ return pol
+ return None
+
+ def get_policy_by_name(self, name):
+ for pol in self.xsobjs.values():
+ if pol.get_name() == name:
+ return pol
+ return None
+
+ def get_domain0_bootlabel(self):
+ """ Get the domain0 bootlabel from the default boot title """
+ title = ""
+ def_title = bootloader.get_default_title()
+ line = bootloader.get_kernel_val(def_title, "ssidref")
+ if line:
+ parms = line.split(":",1)
+ if len(parms) > 1:
+ title = parms[1]
+ return title
+
+ def set_domain0_bootlabel(self, xspol, label):
+ """ Set the domain-0 bootlabel under the given policy """
+ return xspol.set_vm_bootlabel(label)
+
+ def rm_domain0_bootlabel(self):
+ """ Remove the domain-0 bootlabel from the default boot title """
+ def_title = bootloader.get_default_title()
+ return bootloader.set_kernel_attval(def_title, "ssidref", None)
+
+ def ssidref_to_vmlabel(self, ssidref):
+ """ Given an ssidref, return the vmlabel under the current policy """
+ vmlabel = ""
+ pol = self.get_loaded_policy()
+ if pol:
+ vmlabel = pol.policy_get_domain_label_by_ssidref_formatted(ssidref)
+ return vmlabel
+
+poladmin = None
+
+def XSPolicyAdminInstance(maxpolicies=1):
+ global poladmin
+ if poladmin == None:
+ poladmin = XSPolicyAdmin(maxpolicies)
+ return poladmin
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/server/SrvDomain.py
--- a/tools/python/xen/xend/server/SrvDomain.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/server/SrvDomain.py Tue Jul 10 08:39:26 2007 -0600
@@ -155,7 +155,8 @@ class SrvDomain(SrvDir):
def op_domain_sched_credit_set(self, _, req):
fn = FormFn(self.xd.domain_sched_credit_set,
[['dom', 'int'],
- ['weight', 'int']])
+ ['weight', 'int'],
+ ['cap', 'int']])
val = fn(req.args, {'dom': self.dom.domid})
return val

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/server/blkif.py
--- a/tools/python/xen/xend/server/blkif.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/server/blkif.py Tue Jul 10 08:39:26 2007 -0600
@@ -73,10 +73,17 @@ class BlkifController(DevController):
back['uuid'] = uuid

if security.on():
- (label, ssidref, policy) = security.get_res_security_details(uname)
- back.update({'acm_label' : label,
- 'acm_ssidref': str(ssidref),
- 'acm_policy' : policy})
+ (label, ssidref, policy) = \
+ security.get_res_security_details(uname)
+ domain_label = self.vm.get_security_label()
+ if domain_label:
+ rc = security.res_security_check_xapi(label, ssidref, policy,
+ domain_label)
+ if rc == 0:
+ raise VmError("VM's access to block device '%s' denied." %
+ uname)
+ else:
+ raise VmError("VM must have a security label.")

devid = blkif.blkdev_name_to_number(dev)
if devid is None:
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/server/netif.py
--- a/tools/python/xen/xend/server/netif.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/server/netif.py Tue Jul 10 08:39:26 2007 -0600
@@ -107,6 +107,7 @@ class NetifController(DevController):
uuid = config.get('uuid')
ipaddr = config.get('ip')
model = config.get('model')
+ accel = config.get('accel')

if not typ:
typ = xoptions.netback_type
@@ -131,6 +132,8 @@ class NetifController(DevController):
back['uuid'] = uuid
if model:
back['model'] = model
+ if accel:
+ back['accel'] = accel

config_path = "device/%s/%d/" % (self.deviceClass, devid)
for x in back:
@@ -157,10 +160,10 @@ class NetifController(DevController):
config_path = "device/%s/%d/" % (self.deviceClass, devid)
devinfo = ()
for x in ( 'script', 'ip', 'bridge', 'mac',
- 'type', 'vifname', 'rate', 'uuid', 'model' ):
+ 'type', 'vifname', 'rate', 'uuid', 'model', 'accel'):
y = self.vm._readVm(config_path + x)
devinfo += (y,)
- (script, ip, bridge, mac, typ, vifname, rate, uuid, model) = devinfo
+ (script, ip, bridge, mac, typ, vifname, rate, uuid, model, accel) = devinfo

if script:
result['script'] = script
@@ -180,5 +183,7 @@ class NetifController(DevController):
result['uuid'] = uuid
if model:
result['model'] = model
+ if accel:
+ result['accel'] = accel

return result
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xend/server/vfbif.py
--- a/tools/python/xen/xend/server/vfbif.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xend/server/vfbif.py Tue Jul 10 08:39:26 2007 -0600
@@ -76,6 +76,11 @@ class VfbifController(DevController):
args += [ "--listen", vnclisten ]
if config.has_key("keymap"):
args += ["-k", "%s" % config["keymap"]]
+ else:
+ xoptions = xen.xend.XendOptions.instance()
+ if xoptions.get_keymap():
+ args += ["-k", "%s" % xoptions.get_keymap()]
+
spawn_detached(args[0], args + std_args, os.environ)
elif t == "sdl":
args = [xen.util.auxbin.pathTo("xen-sdlfb")]
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xm/create.py Tue Jul 10 08:39:26 2007 -0600
@@ -318,7 +318,8 @@ gopts.var('vfb', val="type={vnc,sdl},vnc
given DISPLAY and XAUTHORITY, which default to the current user's
ones.""")

-gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT,backend=DOM,vifname=NAME",
+gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT," + \
+ "backend=DOM,vifname=NAME,rate=RATE,model=MODEL,accel=ACCEL",
fn=append_value, default=[],
use="""Add a network interface with the given MAC address and bridge.
The vif is configured by calling the given configuration script.
@@ -330,6 +331,9 @@ gopts.var('vif', val="type=TYPE,mac=MAC,
If backend is not specified the default backend driver domain is used.
If vifname is not specified the backend virtual interface will have name vifD.N
where D is the domain id and N is the interface id.
+ If rate is not specified the default rate is used.
+ If model is not specified the default model is used.
+ If accel is not specified an accelerator plugin module is not used.
This option may be repeated to add more than one vif.
Specifying vifs will increase the number of interfaces as needed.""")

@@ -710,7 +714,7 @@ def configure_vifs(config_devs, vals):

def f(k):
if k not in ['backend', 'bridge', 'ip', 'mac', 'script', 'type',
- 'vifname', 'rate', 'model']:
+ 'vifname', 'rate', 'model', 'accel']:
err('Invalid vif option: ' + k)

config_vif.append([k, d[k]])
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/python/xen/xm/main.py
--- a/tools/python/xen/xm/main.py Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/python/xen/xm/main.py Tue Jul 10 08:39:26 2007 -0600
@@ -700,13 +700,7 @@ def xm_save(args):
if serverType == SERVER_XEN_API:
server.xenapi.VM.save(get_single_vm(dom), savefile, checkpoint)
else:
- try:
- dominfo = parse_doms_info(server.xend.domain(dom))
- except xmlrpclib.Fault, ex:
- raise ex
-
- domid = dominfo['domid']
- server.xend.domain.save(domid, savefile, checkpoint)
+ server.xend.domain.save(dom, savefile, checkpoint)

def xm_restore(args):
arg_check(args, "restore", 1, 2)
@@ -1529,7 +1523,7 @@ def xm_sched_credit(args):

doms = filter(lambda x : domid_match(domid, x),
[parse_doms_info(dom)
- for dom in getDomains(None, 'running')])
+ for dom in getDomains(None, 'all')])

if weight is None and cap is None:
if domid is not None and doms == []:
@@ -1545,7 +1539,7 @@ def xm_sched_credit(args):
server.xenapi.VM.get_metrics(
get_single_vm(d['name'])))
else:
- info = server.xend.domain.sched_credit_get(d['domid'])
+ info = server.xend.domain.sched_credit_get(d['name'])
except xmlrpclib.Fault:
pass

@@ -1557,8 +1551,8 @@ def xm_sched_credit(args):
info['cap'] = int(info['cap'])

info['name'] = d['name']
- info['domid'] = int(d['domid'])
- print( ("%(name)-32s %(domid)5d %(weight)6d %(cap)4d") % info)
+ info['domid'] = str(d['domid'])
+ print( ("%(name)-32s %(domid)5s %(weight)6d %(cap)4d") % info)
else:
if domid is None:
# place holder for system-wide scheduler parameters
@@ -1566,14 +1560,24 @@ def xm_sched_credit(args):
usage('sched-credit')

if serverType == SERVER_XEN_API:
- server.xenapi.VM.add_to_VCPUs_params_live(
- get_single_vm(domid),
- "weight",
- weight)
- server.xenapi.VM.add_to_VCPUs_params_live(
- get_single_vm(domid),
- "cap",
- cap)
+ if doms[0]['domid']:
+ server.xenapi.VM.add_to_VCPUs_params_live(
+ get_single_vm(domid),
+ "weight",
+ weight)
+ server.xenapi.VM.add_to_VCPUs_params_live(
+ get_single_vm(domid),
+ "cap",
+ cap)
+ else:
+ server.xenapi.VM.add_to_VCPUs_params(
+ get_single_vm(domid),
+ "weight",
+ weight)
+ server.xenapi.VM.add_to_VCPUs_params(
+ get_single_vm(domid),
+ "cap",
+ cap)
else:
result = server.xend.domain.sched_credit_set(domid, weight, cap)
if result != 0:
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/security/policies/security_policy.xsd
--- a/tools/security/policies/security_policy.xsd Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/security/policies/security_policy.xsd Tue Jul 10 08:39:26 2007 -0600
@@ -22,7 +22,7 @@
<xsd:element name="Reference" type="xsd:string" minOccurs="0" maxOccurs="1" />
<xsd:element name="Date" minOccurs="0" maxOccurs="1" type="xsd:string"></xsd:element>
<xsd:element name="NameSpaceUrl" minOccurs="0" maxOccurs="1" type="xsd:string"></xsd:element>
- <xsd:element name="Version" minOccurs="0" maxOccurs="1" type="VersionFormat"/>
+ <xsd:element name="Version" minOccurs="1" maxOccurs="1" type="VersionFormat"/>
<xsd:element ref="FromPolicy" minOccurs="0" maxOccurs="1"/>
</xsd:sequence>
</xsd:complexType>
@@ -91,23 +91,23 @@
<xsd:sequence>
<xsd:element maxOccurs="unbounded" minOccurs="1" ref="Type" />
</xsd:sequence>
- <xsd:attribute name="name" type="xsd:string" use="optional"></xsd:attribute>
+ <xsd:attribute name="name" type="xsd:string" use="required"></xsd:attribute>
</xsd:complexType>
</xsd:element>
<xsd:element name="VirtualMachineLabel">
<xsd:complexType>
<xsd:sequence>
- <xsd:element ref="Name"></xsd:element>
+ <xsd:element name="Name" type="NameWithFrom"></xsd:element>
<xsd:element ref="SimpleTypeEnforcementTypes" minOccurs="0" maxOccurs="unbounded" />
- <xsd:element ref="ChineseWallTypes" minOccurs="0" maxOccurs="unbounded" />
+ <xsd:element name="ChineseWallTypes" type="SingleChineseWallType" />
</xsd:sequence>
</xsd:complexType>
</xsd:element>
<xsd:element name="ResourceLabel">
<xsd:complexType>
<xsd:sequence>
- <xsd:element ref="Name"></xsd:element>
- <xsd:element ref="SimpleTypeEnforcementTypes" minOccurs="0" maxOccurs="unbounded" />
+ <xsd:element name="Name" type="NameWithFrom"></xsd:element>
+ <xsd:element name="SimpleTypeEnforcementTypes" type="SingleSimpleTypeEnforcementType" />
</xsd:sequence>
</xsd:complexType>
</xsd:element>
@@ -131,4 +131,21 @@
<xsd:pattern value="[0-9]{1,8}.[0-9]{1,8}"></xsd:pattern>
</xsd:restriction>
</xsd:simpleType>
+ <xsd:complexType name="NameWithFrom">
+ <xsd:simpleContent>
+ <xsd:extension base="xsd:string">
+ <xsd:attribute name="from" type="xsd:string" use="optional"></xsd:attribute>
+ </xsd:extension>
+ </xsd:simpleContent>
+ </xsd:complexType>
+ <xsd:complexType name="SingleSimpleTypeEnforcementType">
+ <xsd:sequence>
+ <xsd:element maxOccurs="1" minOccurs="1" ref="Type" />
+ </xsd:sequence>
+ </xsd:complexType>
+ <xsd:complexType name="SingleChineseWallType">
+ <xsd:sequence>
+ <xsd:element maxOccurs="1" minOccurs="1" ref="Type" />
+ </xsd:sequence>
+ </xsd:complexType>
</xsd:schema>
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/security/xensec_ezpolicy
--- a/tools/security/xensec_ezpolicy Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/security/xensec_ezpolicy Tue Jul 10 08:39:26 2007 -0600
@@ -1102,8 +1102,10 @@ def org2dict():
for i in iterchildren(app.win.orgs.GetRootItem()):
d = []
for j in iterchildren(i):
- d.append(str(app.win.orgspanel.orgs.GetItemText(j)))
- o.append([str(app.win.orgspanel.orgs.GetItemText(i)) , d])
+ d.append(
+ str(app.win.orgspanel.orgs.GetItemText(j).encode("utf-8")))
+ o.append([str(app.win.orgspanel.orgs.GetItemText(i).encode("utf-8")),
+ d])
dic['orgs'] = o
c=[]
for i in app.win.conspanel.conflictsets:
@@ -1175,12 +1177,14 @@ def printPolicy(fd, types, cons):
continue
#name is optional but must be set
if i[0]:
- rer_name = str(i[0])
+ rer_name = i[0]
else:
- rer_name = str("RER")
- fd.write(""" <Conflict name=\"%s\">\n""" % rer_name)
+ rer_name = "RER"
+ fd.write(""" <Conflict name=\"""" +
+ rer_name.encode("utf-8") + """\">\n""")
for j in i[1]:
- fd.write(""" <Type>%s</Type>\n""" % str(j))
+ typ = j.encode("utf-8")
+ fd.write(""" <Type>%s</Type>\n""" % typ)
fd.write(""" </Conflict>\n""")
fd.write(""" </ConflictSets>\n""")

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xcutils/Makefile
--- a/tools/xcutils/Makefile Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xcutils/Makefile Tue Jul 10 08:39:26 2007 -0600
@@ -15,7 +15,7 @@ PROGRAMS_INSTALL_DIR = /usr/$(LIBDIR)/xe

INCLUDES += -I $(XEN_LIBXC) -I $(XEN_XENSTORE)

-CFLAGS += -Werror -fno-strict-aliasing
+CFLAGS += -Werror
CFLAGS += $(INCLUDES)

# Make gcc generate dependencies.
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenmon/xenbaked.c Tue Jul 10 08:39:26 2007 -0600
@@ -444,7 +444,7 @@ struct t_rec **init_rec_ptrs(struct t_bu
*/
unsigned int get_num_cpus(void)
{
- xc_physinfo_t physinfo;
+ xc_physinfo_t physinfo = { 0 };
int xc_handle = xc_interface_open();
int ret;

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenstat/libxenstat/src/xenstat.c Tue Jul 10 08:39:26 2007 -0600
@@ -135,7 +135,7 @@ xenstat_node *xenstat_get_node(xenstat_h
{
#define DOMAIN_CHUNK_SIZE 256
xenstat_node *node;
- xc_physinfo_t physinfo;
+ xc_physinfo_t physinfo = { 0 };
xc_domaininfo_t domaininfo[DOMAIN_CHUNK_SIZE];
unsigned int new_domains;
unsigned int i;
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/Makefile
--- a/tools/xenstore/Makefile Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenstore/Makefile Tue Jul 10 08:39:26 2007 -0600
@@ -11,15 +11,11 @@ BASECFLAGS += -Wp,-MD,.$(@F).d
BASECFLAGS += -Wp,-MD,.$(@F).d
PROG_DEP = .*.d
BASECFLAGS+= $(PROFILE)
-#BASECFLAGS+= -I$(XEN_ROOT)/tools
BASECFLAGS+= -I$(XEN_ROOT)/tools/libxc
BASECFLAGS+= -I.

CFLAGS += $(BASECFLAGS)
LDFLAGS += $(PROFILE) -L$(XEN_LIBXC)
-TESTDIR = testsuite/tmp
-TESTFLAGS= -DTESTING
-TESTENV = XENSTORED_ROOTDIR=$(TESTDIR) XENSTORED_RUNDIR=$(TESTDIR)

CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod
CLIENTS += xenstore-write
@@ -34,12 +30,6 @@ XENSTORED_OBJS += $(XENSTORED_OBJS_y)

.PHONY: all
all: libxenstore.so libxenstore.a xenstored $(CLIENTS) xs_tdb_dump xenstore-control xenstore-ls
-
-test_interleaved_transactions: test_interleaved_transactions.o
- $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -L. -lxenstore -o $@
-
-.PHONY: testcode
-testcode: xs_test xenstored_test xs_random

xenstored: $(XENSTORED_OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -lxenctrl $(SOCKET_LIBS) -o $@
@@ -56,34 +46,8 @@ xenstore-ls: xsls.o libxenstore.so
xenstore-ls: xsls.o libxenstore.so
$(CC) $(CFLAGS) $(LDFLAGS) $< $(LOADLIBES) $(LDLIBS) -L. -lxenstore $(SOCKET_LIBS) -o $@

-xenstored_test: xenstored_core_test.o xenstored_watch_test.o xenstored_domain_test.o xenstored_transaction_test.o xs_lib.o talloc_test.o fake_libxc.o utils.o tdb.o
- $(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
-
xs_tdb_dump: xs_tdb_dump.o utils.o tdb.o talloc.o
$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@
-
-xs_test xs_random xs_stress xs_crashme: LDFLAGS+=-lpthread
-xs_test: xs_test.o xs_lib.o utils.o
-xs_random: xs_random.o xs_test_lib.o xs_lib.o talloc.o utils.o
-xs_stress: xs_stress.o xs_test_lib.o xs_lib.o talloc.o utils.o
-xs_crashme: xs_crashme.o xs_lib.o talloc.o utils.o
-
-speedtest: speedtest.o xs.o xs_lib.o utils.o talloc.o
-
-.PHONY: check-speed
-check-speed: speedtest xenstored_test $(TESTDIR)
- $(TESTENV) time ./speedtest 100
-
-xs_test.o xs_stress.o xenstored_core_test.o xenstored_watch_test.o xenstored_transaction_test.o xenstored_domain_test.o xs_random.o xs_test_lib.o talloc_test.o fake_libxc.o xs_crashme.o: CFLAGS=$(BASECFLAGS) $(TESTFLAGS)
-
-xenstored_%_test.o: xenstored_%.c
- $(COMPILE.c) -o $@ $<
-
-xs_test_lib.o: xs.c
- $(COMPILE.c) -o $@ $<
-
-talloc_test.o: talloc.c
- $(COMPILE.c) -o $@ $<

libxenstore.so: libxenstore.so.$(MAJOR)
ln -sf $< $@
@@ -97,66 +61,12 @@ libxenstore.a: xs.o xs_lib.o
$(AR) rcs libxenstore.a $^

.PHONY: clean
-clean: testsuite-clean
+clean:
rm -f *.a *.o *.opic *.so*
rm -f xenstored xs_random xs_stress xs_crashme
- rm -f xs_test xenstored_test xs_tdb_dump xenstore-control xenstore-ls
+ rm -f xs_tdb_dump xenstore-control xenstore-ls
rm -f $(CLIENTS)
$(RM) $(PROG_DEP)
-
-.PHONY: print-dir
-print-dir:
- @echo -n tools/xenstore:
-
-.PHONY: print-end
-print-end:
- @echo
-
-.PHONY: check
-check: print-dir testsuite-fast randomcheck-fast print-end
-
-.PHONY: fullcheck
-fullcheck: testsuite-run randomcheck stresstest
-
-$(TESTDIR):
- mkdir $@
-
-.PHONY: testsuite-run
-testsuite-run: xenstored_test xs_test $(TESTDIR)
- $(TESTENV) testsuite/test.sh && echo
-
-.PHONY: testsuite-fast
-testsuite-fast: xenstored_test xs_test $(TESTDIR)
- @$(TESTENV) testsuite/test.sh --fast
-
-.PHONY: testsuite-clean
-testsuite-clean:
- rm -rf $(TESTDIR)
-
-# Make this visible so they can see repeat tests without --fast if they
-# fail.
-RANDSEED=$(shell date +%s)
-.PHONY: randomcheck
-randomcheck: xs_random xenstored_test $(TESTDIR)
- $(TESTENV) ./xs_random --simple --fast /tmp/xs_random 200000 $(RANDSEED) && echo
- $(TESTENV) ./xs_random --fast /tmp/xs_random 100000 $(RANDSEED) && echo
-# $(TESTENV) ./xs_random --fail /tmp/xs_random 10000 $(RANDSEED)
-
-.PHONY: crashme
-crashme: xs_crashme xenstored_test $(TESTDIR)
- rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog* /tmp/trace
- export $(TESTENV); ./xs_crashme 5000 $(RANDSEED) 2>/dev/null
- if [ -n "`cat /tmp/xs_crashme.vglog*`" ]; then echo Valgrind complained; cat /tmp/xs_crashme.vglog*; exit 1; fi
- rm -rf $(TESTDIR)/store $(TESTDIR)/transactions /tmp/xs_crashme.vglog* /tmp/trace
-
-.PHONY: randomcheck-fast
-randomcheck-fast: xs_random xenstored_test $(TESTDIR)
- @$(TESTENV) ./xs_random --fast /tmp/xs_random 2000 $(RANDSEED)
-
-.PHONY: stresstest
-stresstest: xs_stress xenstored_test $(TESTDIR)
- rm -rf $(TESTDIR)/store $(TESTDIR)/transactions
- export $(TESTENV); PID=`./xenstored_test --output-pid --trace-file=/tmp/trace`; ./xs_stress 5000; ret=$$?; kill $$PID; exit $$ret

.PHONY: TAGS
TAGS:
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/fake_libxc.c
--- a/tools/xenstore/fake_libxc.c Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-/*
- Fake libxc which doesn't require hypervisor but talks to xs_test.
- Copyright (C) 2005 Rusty Russell IBM Corporation
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include <assert.h>
-#include <signal.h>
-#include "utils.h"
-#include "xenstored_core.h"
-#include "xenstored_domain.h"
-#include "xenstored_test.h"
-#include <xenctrl.h>
-
-static int sigfd;
-static int xs_test_pid;
-static evtchn_port_t port;
-
-/* The event channel maps to a signal, shared page to an mmapped file. */
-void xc_evtchn_notify(int xce_handle, int local_port)
-{
- assert(local_port == port);
- if (kill(xs_test_pid, SIGUSR2) != 0)
- barf_perror("fake event channel failed");
-}
-
-void *xc_map_foreign_range(int xc_handle, uint32_t dom __attribute__((unused)),
- int size, int prot,
- unsigned long mfn __attribute__((unused)))
-{
- void *ret;
-
- ret = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0);
- if (ret == MAP_FAILED)
- return NULL;
-
- /* xs_test tells us pid and port by putting it in buffer, we reply. */
- xs_test_pid = *(int *)(ret + 32);
- port = *(int *)(ret + 36);
- *(int *)(ret + 32) = getpid();
- return ret;
-}
-
-int xc_interface_open(void)
-{
- int fd;
- char page[getpagesize()];
-
- fd = open("/tmp/xcmap", O_RDWR|O_CREAT|O_TRUNC, 0600);
- if (fd < 0)
- return fd;
-
- memset(page, 0, sizeof(page));
- if (!xs_write_all(fd, page, sizeof(page)))
- barf_perror("Failed to write /tmp/xcmap page");
-
- return fd;
-}
-
-int xc_interface_close(int xc_handle)
-{
- close(xc_handle);
- return 0;
-}
-
-int xc_domain_getinfo(int xc_handle __attribute__((unused)),
- uint32_t first_domid, unsigned int max_doms,
- xc_dominfo_t *info)
-{
- assert(max_doms == 1);
- info->domid = first_domid;
-
- info->dying = 0;
- info->shutdown = 0;
- info->paused = 0;
- info->blocked = 0;
- info->running = 1;
-
- info->shutdown_reason = 0;
-
- if ( info->shutdown && (info->shutdown_reason == SHUTDOWN_crash) )
- {
- info->shutdown = 0;
- info->crashed = 1;
- }
-
- return 1;
-}
-
-static void send_to_fd(int signo __attribute__((unused)))
-{
- int saved_errno = errno;
- write(sigfd, &port, sizeof(port));
- errno = saved_errno;
-}
-
-void fake_block_events(void)
-{
- signal(SIGUSR2, SIG_IGN);
-}
-
-void fake_ack_event(void)
-{
- signal(SIGUSR2, send_to_fd);
-}
-
-int xc_evtchn_open(void)
-{
- int fds[2];
-
- if (pipe(fds) != 0)
- return -1;
-
- if (signal(SIGUSR2, send_to_fd) == SIG_ERR) {
- int saved_errno = errno;
- close(fds[0]);
- close(fds[1]);
- errno = saved_errno;
- return -1;
- }
- sigfd = fds[1];
- return fds[0];
-}
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/speedtest.c
--- a/tools/xenstore/speedtest.c Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,130 +0,0 @@
-/*
- Xen Store Daemon Speed test
- Copyright (C) 2005 Rusty Russell IBM Corporation
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include "utils.h"
-#include "xs.h"
-#include "list.h"
-#include "talloc.h"
-
-static void do_command(const char *cmd)
-{
- int ret;
-
- ret = system(cmd);
- if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
- barf_perror("Failed '%s': %i", cmd, ret);
-}
-
-static int start_daemon(void)
-{
- int fds[2], pid;
-
- do_command(talloc_asprintf(NULL, "rm -rf testsuite/tmp/*"));
-
- /* Start daemon. */
- pipe(fds);
- if ((pid = fork())) {
- /* Child writes PID when its ready: we wait for that. */
- char buffer[20];
- close(fds[1]);
- if (read(fds[0], buffer, sizeof(buffer)) < 0)
- barf("Failed to summon daemon");
- close(fds[0]);
- } else {
- dup2(fds[1], STDOUT_FILENO);
- close(fds[0]);
-#if 0
- execlp("valgrind", "valgrind", "-q", "--suppressions=testsuite/vg-suppressions", "xenstored_test", "--output-pid",
- "--no-fork", "--trace-file=/tmp/trace", NULL);
-#else
- execlp("./xenstored_test", "xenstored_test", "--output-pid", "--no-fork", NULL);
-// execlp("strace", "strace", "-o", "/tmp/out", "./xenstored_test", "--output-pid", "--no-fork", NULL);
-#endif
- exit(1);
- }
- return pid;
-}
-
-static void kill_daemon(int pid)
-{
- int saved_errno = errno;
- kill(pid, SIGTERM);
- errno = saved_errno;
-}
-
-#define NUM_ENTRIES 50
-
-/* We create the given number of trees, each with NUM_ENTRIES, using
- * transactions. */
-int main(int argc, char *argv[])
-{
- int i, j, pid, print;
- struct xs_handle *h;
-
- if (argc != 2)
- barf("Usage: speedtest <numdomains>");
-
- pid = start_daemon();
- h = xs_daemon_open();
- print = atoi(argv[1]) / 76;
- if (!print)
- print = 1;
- for (i = 0; i < atoi(argv[1]); i ++) {
- char name[64];
-
- if (i % print == 0)
- write(1, ".", 1);
- if (!xs_transaction_start(h)) {
- kill_daemon(pid);
- barf_perror("Starting transaction");
- }
- sprintf(name, "/%i", i);
- if (!xs_mkdir(h, name)) {
- kill_daemon(pid);
- barf_perror("Making directory %s", name);
- }
-
- for (j = 0; j < NUM_ENTRIES; j++) {
- sprintf(name, "/%i/%i", i, j);
- if (!xs_write(h, name, name, strlen(name))) {
- kill_daemon(pid);
- barf_perror("Making directory %s", name);
- }
- }
- if (!xs_transaction_end(h, false)) {
- kill_daemon(pid);
- barf_perror("Ending transaction");
- }
- }
- write(1, "\n", 1);
-
- kill_daemon(pid);
- wait(NULL);
- return 0;
-}
-
-
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/01simple.test
--- a/tools/xenstore/testsuite/01simple.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-# Create an entry, read it.
-write /test contents
-expect contents
-read /test
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/02directory.test
--- a/tools/xenstore/testsuite/02directory.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-# Root directory has only tool dir in it.
-expect tool
-dir /
-
-# Create a file.
-write /test contents
-
-# Directory shows it.
-expect test
-expect tool
-dir /
-
-# Make a new directory, check it's there
-mkdir /dir
-expect dir
-expect test
-expect tool
-dir /
-
-# Check it's empty.
-dir /dir
-
-# Create a file, check it exists.
-write /dir/test2 contents2
-expect test2
-dir /dir
-expect contents2
-read /dir/test2
-
-# Creating dir over the top should succeed.
-mkdir /dir
-mkdir /dir/test2
-
-# Mkdir implicitly creates directories.
-mkdir /dir/1/2/3/4
-expect test2
-expect 1
-dir /dir
-expect 2
-dir /dir/1
-expect 3
-dir /dir/1/2
-expect 4
-dir /dir/1/2/3
-dir /dir/1/2/3/4
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/03write.test
--- a/tools/xenstore/testsuite/03write.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-# Write succeeds
-write /test contents
-expect contents
-read /test
-
-# Overwrite succeeds.
-write /test contents2
-expect contents2
-read /test
-
-# Write should implicitly create directories
-write /dir/test contents
-expect test
-dir /dir
-expect contents
-read /dir/test
-write /dir/1/2/3/4 contents4
-expect test
-expect 1
-dir /dir
-expect 2
-dir /dir/1
-expect 3
-dir /dir/1/2
-expect 4
-dir /dir/1/2/3
-expect contents4
-read /dir/1/2/3/4
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/04rm.test
--- a/tools/xenstore/testsuite/04rm.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-# Remove non-existant is OK, as long as parent exists
-rm /test
-expect rm failed: No such file or directory
-rm /dir/test
-
-# Create file and remove it
-write /test contents
-rm /test
-expect tool
-dir /
-
-# Create directory and remove it.
-mkdir /dir
-rm /dir
-
-# Create directory, create file, remove all.
-mkdir /dir
-write /dir/test contents
-rm /dir
-
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/05filepermissions.test
--- a/tools/xenstore/testsuite/05filepermissions.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-# Fail to get perms on non-existent file.
-expect getperm failed: No such file or directory
-getperm /test
-expect getperm failed: No such file or directory
-getperm /dir/test
-
-# Create file: inherits from root (0 READ)
-write /test contents
-expect 0 READ
-getperm /test
-setid 1
-expect 0 READ
-getperm /test
-expect contents
-read /test
-expect write failed: Permission denied
-write /test contents
-
-# Take away read access to file.
-setid 0
-setperm /test 0 NONE
-setid 1
-expect getperm failed: Permission denied
-getperm /test
-expect read failed: Permission denied
-read /test
-expect write failed: Permission denied
-write /test contents
-
-# Grant everyone write access to file.
-setid 0
-setperm /test 0 WRITE
-setid 1
-expect getperm failed: Permission denied
-getperm /test
-expect read failed: Permission denied
-read /test
-write /test contents2
-setid 0
-expect contents2
-read /test
-
-# Grant everyone both read and write access.
-setperm /test 0 READ/WRITE
-setid 1
-expect 0 READ/WRITE
-getperm /test
-expect contents2
-read /test
-write /test contents3
-expect contents3
-read /test
-
-# Change so that user 1 owns it, noone else can do anything.
-setid 0
-setperm /test 1 NONE
-setid 1
-expect 1 NONE
-getperm /test
-expect contents3
-read /test
-write /test contents4
-
-# User 2 can do nothing.
-setid 2
-expect setperm failed: Permission denied
-setperm /test 2 NONE
-expect getperm failed: Permission denied
-getperm /test
-expect read failed: Permission denied
-read /test
-expect write failed: Permission denied
-write /test contents4
-
-# Tools can always access things.
-setid 0
-expect 1 NONE
-getperm /test
-expect contents4
-read /test
-write /test contents5
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/06dirpermissions.test
--- a/tools/xenstore/testsuite/06dirpermissions.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,119 +0,0 @@
-# Root directory: owned by tool, everyone has read access.
-expect 0 READ
-getperm /
-
-# Create directory: inherits from root.
-mkdir /dir
-expect 0 READ
-getperm /dir
-setid 1
-expect 0 READ
-getperm /dir
-dir /dir
-expect write failed: Permission denied
-write /dir/test contents2
-
-# Remove everyone's read access to directoy.
-setid 0
-setperm /dir 0 NONE
-setid 1
-expect dir failed: Permission denied
-dir /dir
-expect read failed: Permission denied
-read /dir/test create contents2
-expect write failed: Permission denied
-write /dir/test contents2
-
-# Grant everyone write access to directory.
-setid 0
-setperm /dir 0 WRITE
-setid 1
-expect getperm failed: Permission denied
-getperm /dir
-expect dir failed: Permission denied
-dir /dir
-write /dir/test contents
-setid 0
-expect 1 WRITE
-getperm /dir/test
-setperm /dir/test 0 NONE
-expect contents
-read /dir/test
-
-# Grant everyone both read and write access.
-setperm /dir 0 READ/WRITE
-setid 1
-expect 0 READ/WRITE
-getperm /dir
-expect test
-dir /dir
-write /dir/test2 contents
-expect contents
-read /dir/test2
-setperm /dir/test2 1 NONE
-
-# Change so that user 1 owns it, noone else can do anything.
-setid 0
-setperm /dir 1 NONE
-expect 1 NONE
-getperm /dir
-expect test
-expect test2
-dir /dir
-write /dir/test3 contents
-
-# User 2 can do nothing. Can't even tell if file exists.
-setid 2
-expect setperm failed: Permission denied
-setperm /dir 2 NONE
-expect getperm failed: Permission denied
-getperm /dir
-expect dir failed: Permission denied
-dir /dir
-expect read failed: Permission denied
-read /dir/test
-expect read failed: Permission denied
-read /dir/test2
-expect read failed: Permission denied
-read /dir/test3
-expect read failed: Permission denied
-read /dir/test4
-expect write failed: Permission denied
-write /dir/test contents
-expect write failed: Permission denied
-write /dir/test4 contents
-
-# Tools can always access things.
-setid 0
-expect 1 NONE
-getperm /dir
-expect test
-expect test2
-expect test3
-dir /dir
-write /dir/test4 contents
-
-# Inherited by child.
-mkdir /dir/subdir
-expect 1 NONE
-getperm /dir/subdir
-write /dir/subfile contents
-expect 1 NONE
-getperm /dir/subfile
-
-# But for domains, they own it.
-setperm /dir/subdir 2 READ/WRITE
-expect 2 READ/WRITE
-getperm /dir/subdir
-setid 3
-write /dir/subdir/subfile contents
-expect 3 READ/WRITE
-getperm /dir/subdir/subfile
-
-# Inheritence works through multiple directories, too.
-write /dir/subdir/1/2/3/4 contents
-expect 3 READ/WRITE
-getperm /dir/subdir/1/2/3/4
-mkdir /dir/subdir/a/b/c/d
-expect 3 READ/WRITE
-getperm /dir/subdir/a/b/c/d
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/07watch.test
--- a/tools/xenstore/testsuite/07watch.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,176 +0,0 @@
-# Watch something, write to it, check watch has fired.
-write /test contents
-
-1 watch /test token
-2 write /test contents2
-expect 1:/test:token
-1 waitwatch
-1 close
-
-# Check that reads don't set it off.
-1 watch /test token
-expect 2:contents2
-2 read /test
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
-1 close
-
-# mkdir, setperm and rm should (also tests watching dirs)
-mkdir /dir
-1 watch /dir token
-2 mkdir /dir/newdir
-expect 1:/dir/newdir:token
-1 waitwatch
-2 setperm /dir/newdir 0 READ
-expect 1:/dir/newdir:token
-1 waitwatch
-2 rm /dir/newdir
-expect 1:/dir/newdir:token
-1 waitwatch
-1 close
-2 close
-
-# Changed in b594bb976a743d509f1ffabb5bc698874ab90d8f
-## We don't get a watch from our own commands.
-#watch /dir token
-#mkdir /dir/newdir
-#expect waitwatch failed: Connection timed out
-#waitwatch
-#close
-
-# ignore watches while doing commands, should work.
-watch /dir token
-1 write /dir/test contents
-expect contents
-read /dir/test
-expect /dir/test:token
-waitwatch
-close
-
-# watch priority test: all simultaneous
-1 watch /dir token1
-3 watch /dir token3
-2 watch /dir token2
-write /dir/test contents
-expect 3:/dir/test:token3
-3 waitwatch
-expect 2:/dir/test:token2
-2 waitwatch
-expect 1:/dir/test:token1
-1 waitwatch
-1 close
-2 close
-3 close
-
-# If one dies (without acking), the other should still get ack.
-1 watch /dir token1
-2 watch /dir token2
-write /dir/test contents
-expect 2:/dir/test:token2
-2 waitwatch
-2 close
-expect 1:/dir/test:token1
-1 waitwatch
-1 close
-
-# If one dies (without reading at all), the other should still get ack.
-1 watch /dir token1
-2 watch /dir token2
-write /dir/test contents
-2 close
-expect 1:/dir/test:token1
-1 waitwatch
-1 close
-2 close
-
-# unwatch
-1 watch /dir token1
-1 unwatch /dir token1
-1 watch /dir token2
-2 write /dir/test2 contents
-expect 1:/dir/test2:token2
-1 waitwatch
-1 unwatch /dir token2
-1 close
-2 close
-
-# unwatch while watch pending. Other watcher still gets the event.
-1 watch /dir token1
-2 watch /dir token2
-write /dir/test contents
-2 unwatch /dir token2
-expect 1:/dir/test:token1
-1 waitwatch
-1 close
-2 close
-
-# unwatch while watch pending. Should clear this so we get next event.
-1 watch /dir token1
-write /dir/test contents
-1 unwatch /dir token1
-1 watch /dir/test token2
-write /dir/test contents2
-expect 1:/dir/test:token2
-1 waitwatch
-
-# check we only get notified once.
-1 watch /test token
-2 write /test contents2
-expect 1:/test:token
-1 waitwatch
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
-1 close
-
-# watches are queued in order.
-1 watch / token
-2 write /test1 contents
-2 write /test2 contents
-2 write /test3 contents
-expect 1:/test1:token
-1 waitwatch
-expect 1:/test2:token
-1 waitwatch
-expect 1:/test3:token
-1 waitwatch
-1 close
-
-# Creation of subpaths should be covered correctly.
-1 watch / token
-2 write /test/subnode contents2
-2 write /test/subnode/subnode contents2
-expect 1:/test/subnode:token
-1 waitwatch
-expect 1:/test/subnode/subnode:token
-1 waitwatch
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
-1 close
-
-# Watch event must have happened before we registered interest.
-1 watch / token
-2 write /test/subnode contents2
-1 watchnoack / token2 0
-expect 1:/test/subnode:token
-1 waitwatch
-expect 1:/:token2
-1 waitwatch
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
-1 close
-
-# Rm fires notification on child.
-1 watch /test/subnode token
-2 rm /test
-expect 1:/test/subnode:token
-1 waitwatch
-
-# Watch should not double-send after we ack, even if we did something in between.
-1 watch /test2 token
-2 write /test2/foo contents2
-expect 1:/test2/foo:token
-1 waitwatch
-expect 1:contents2
-1 read /test2/foo
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/08transaction.slowtest
--- a/tools/xenstore/testsuite/08transaction.slowtest Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-# Test transaction clashes.
-
-mkdir /test
-write /test/entry1 contents
-
-# Start transaction, do read-only op, transaction succeeds
-1 start
-1 write /test/entry1 contents2
-expect contents
-read /test/entry1
-1 commit
-expect contents2
-read /test/entry1
-
-# Start transaction, abort other transaction, transaction succeeds.
-1 start
-1 write /test/entry1 contents3
-start
-write /test/entry1 contents
-abort
-1 commit
-expect contents3
-read /test/entry1
-
-# Start transaction, do write op, transaction fails
-1 start
-1 write /test/entry1 contents4
-write /test/entry1 contents
-expect 1: commit failed: Resource temporarily unavailable
-1 commit
-expect contents
-read /test/entry1
-
-# Start transaction, do other transaction, transaction fails
-1 start
-1 write /test/entry1 contents4
-start
-write /test/entry1 contents5
-commit
-expect 1: commit failed: Resource temporarily unavailable
-1 commit
-expect contents5
-read /test/entry1
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/08transaction.test
--- a/tools/xenstore/testsuite/08transaction.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-# Test transactions.
-
-mkdir /test
-
-# Simple transaction: create a file inside transaction.
-1 start
-1 write /test/entry1 contents
-2 dir /test
-expect 1:entry1
-1 dir /test
-1 commit
-expect 2:contents
-2 read /test/entry1
-
-rm /test/entry1
-
-# Create a file and abort transaction.
-1 start
-1 write /test/entry1 contents
-2 dir /test
-expect 1:entry1
-1 dir /test
-1 abort
-2 dir /test
-
-write /test/entry1 contents
-# Delete in transaction, commit
-1 start
-1 rm /test/entry1
-expect 2:entry1
-2 dir /test
-1 dir /test
-1 commit
-2 dir /test
-
-# Delete in transaction, abort.
-write /test/entry1 contents
-1 start
-1 rm /test/entry1
-expect 2:entry1
-2 dir /test
-1 dir /test
-1 abort
-expect 2:entry1
-2 dir /test
-
-# Events inside transactions don't trigger watches until (successful) commit.
-mkdir /test/dir
-1 watch /test token
-2 start
-2 mkdir /test/dir/sub
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
-2 close
-1 close
-
-1 watch /test token
-2 start
-2 mkdir /test/dir/sub
-2 abort
-expect 1: waitwatch failed: Connection timed out
-1 waitwatch
-1 close
-
-1 watch /test token
-2 start
-2 mkdir /test/dir/sub
-2 commit
-expect 1:/test/dir/sub:token
-1 waitwatch
-1 close
-
-# Rm inside transaction works like rm outside: children get notified.
-1 watch /test/dir/sub token
-2 start
-2 rm /test/dir
-2 commit
-expect 1:/test/dir/sub:token
-1 waitwatch
-1 close
-
-# Multiple events from single transaction don't trigger assert
-1 watch /test token
-2 start
-2 write /test/1 contents
-2 write /test/2 contents
-2 commit
-expect 1:/test/1:token
-1 waitwatch
-expect 1:/test/2:token
-1 waitwatch
-1 close
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/09domain.test
--- a/tools/xenstore/testsuite/09domain.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-# Test domain communication.
-
-# Create a domain, write an entry.
-expect handle is 1
-introduce 1 100 7 /my/home
-1 write /entry1 contents
-expect entry1
-expect tool
-dir /
-close
-
-# Release that domain.
-release 1
-close
-
-# Introduce and release by same connection.
-expect handle is 2
-introduce 1 100 7 /my/home
-release 1
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/10domain-homedir.test
--- a/tools/xenstore/testsuite/10domain-homedir.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-# Test domain "implicit" paths.
-
-# Create a domain, write an entry using implicit path, read using implicit
-mkdir /home
-expect handle is 1
-introduce 1 100 7 /home
-1 write entry1 contents
-expect contents
-read /home/entry1
-expect entry1
-dir /home
-
-# Place a watch using a relative path: expect relative answer.
-1 mkdir foo
-1 watch foo token
-write /home/foo/bar contents
-expect 1:foo/bar:token
-1 waitwatch
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/11domain-watch.test
--- a/tools/xenstore/testsuite/11domain-watch.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-# Test watching from a domain.
-
-# Watch something, write to it, check watch has fired.
-write /test contents
-mkdir /dir
-
-expect handle is 1
-introduce 1 100 7 /my/home
-1 watch /test token
-write /test contents2
-expect 1:/test:token
-1 waitwatch
-1 unwatch /test token
-release 1
-1 close
-
-# ignore watches while doing commands, should work.
-expect handle is 1
-introduce 1 100 7 /my/home
-1 watch /dir token
-write /dir/test contents
-1 write /dir/test2 contents2
-1 write /dir/test3 contents3
-1 write /dir/test4 contents4
-expect 1:/dir/test:token
-1 waitwatch
-release 1
-1 close
-
-# unwatch
-expect handle is 1
-introduce 1 100 7 /my/home
-1 watch /dir token1
-1 unwatch /dir token1
-1 watch /dir token2
-write /dir/test2 contents
-expect 1:/dir/test2:token2
-1 waitwatch
-1 unwatch /dir token2
-release 1
-1 close
-
-# unwatch while watch pending.
-expect handle is 1
-introduce 1 100 7 /my/home
-1 watch /dir token1
-write /dir/test2 contents
-1 unwatch /dir token1
-release 1
-1 close
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/12readonly.test
--- a/tools/xenstore/testsuite/12readonly.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-# Test that read only connection can't alter store.
-
-write /test contents
-
-readonly
-expect test
-expect tool
-dir /
-
-expect contents
-read /test
-expect 0 READ
-getperm /test
-watch /test token
-unwatch /test token
-start
-commit
-start
-abort
-
-# These don't work
-expect write failed: Permission denied
-write /test2 contents
-expect write failed: Permission denied
-write /test contents
-expect setperm failed: Permission denied
-setperm /test 100 NONE
-expect setperm failed: Permission denied
-setperm /test 100 NONE
-expect introduce failed: Permission denied
-introduce 1 100 7 /home
-
-# Check that watches work like normal.
-watch / token
-1 readwrite
-1 write /test contents
-expect /test:token
-waitwatch
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/13watch-ack.test
--- a/tools/xenstore/testsuite/13watch-ack.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-# This demonstrates a bug where an xs_acknowledge_watch returns
-# EINVAL, because the daemon doesn't track what watch event it sent
-# and relies on it being the "first" watch which has an event.
-# Watches firing after the first event is sent out will change this.
-
-# Create three things to watch.
-mkdir /test
-mkdir /test/1
-mkdir /test/2
-mkdir /test/3
-
-# Watch all three, fire event on 2, read watch, fire event on 1 and 3, ack 2.
-1 watch /test/1 token1
-1 watch /test/2 token2
-1 watch /test/3 token3
-2 write /test/2 contents2
-expect 1:/test/2:token2
-1 waitwatch
-3 write /test/1 contents1
-4 write /test/3 contents3
-1 close
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/14complexperms.test
--- a/tools/xenstore/testsuite/14complexperms.test Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-# We should not be able to tell the difference between a node which
-# doesn't exist, and a node we don't have permission on, if we don't
-# have permission on it directory.
-
-mkdir /dir
-setperm /dir 0 NONE
-
-# First when it doesn't exist
-setid 1
-expect *Permission denied
-dir /dir/file
-expect *Permission denied
-read /dir/file
-expect *Permission denied
-write /dir/file value
-expect *Permission denied
-mkdir /dir/file
-expect *Permission denied
-rm /dir/file
-expect *Permission denied
-rm /dir
-expect *Permission denied
-getperm /dir/file
-expect *Permission denied
-setperm /dir/file 0 NONE
-# We get no watch event when there's no permission. It's a corner case.
-watchnoack /dir/file token
-1 write /dir/file contents
-1 rm /dir/file
-expect waitwatch failed: Connection timed out
-waitwatch
-unwatch /dir/file token
-expect *No such file or directory
-unwatch /dir/file token
-expect *Permission denied
-introduce 2 100 7 /dir/file
-
-# Now it exists
-setid 0
-write /dir/file contents
-
-setid 1
-expect *Permission denied
-dir /dir/file
-expect *Permission denied
-read /dir/file
-expect *Permission denied
-write /dir/file value
-expect *Permission denied
-mkdir /dir/file
-expect *Permission denied
-rm /dir/file
-expect *Permission denied
-rm /dir
-expect *Permission denied
-getperm /dir/file
-expect *Permission denied
-setperm /dir/file 0 NONE
-watchnoack /dir/file token
-1 write /dir/file contents
-1 rm /dir/file
-expect waitwatch failed: Connection timed out
-waitwatch
-unwatch /dir/file token
-expect *No such file or directory
-unwatch /dir/file token
-expect *Permission denied
-introduce 2 100 7 /dir/file
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/test.sh
--- a/tools/xenstore/testsuite/test.sh Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-#! /bin/sh
-
-set -e
-set -m
-
-run_test()
-{
- rm -rf $XENSTORED_ROOTDIR
- mkdir $XENSTORED_ROOTDIR
- if [ $VALGRIND -eq 1 ]; then
- valgrind --suppressions=testsuite/vg-suppressions -q ./xenstored_test --output-pid --trace-file=testsuite/tmp/trace --no-fork > /tmp/pid 2> testsuite/tmp/xenstored_errors &
- while [ ! -s /tmp/pid ]; do sleep 0; done
- PID=`cat /tmp/pid`
- rm /tmp/pid
- else
- # We don't get error messages from this, though.
- PID=`./xenstored_test --output-pid --trace-file=testsuite/tmp/trace`
- fi
- if ./xs_test $2 $1; then
- if [ -s testsuite/tmp/xenstored_errors ]; then
- kill $PID
- echo Errors:
- cat testsuite/tmp/xenstored_errors
- return 1
- fi
- kill $PID
- sleep 1
- return 0
- else
- # In case daemon is wedged.
- kill $PID
- sleep 1
- return 1
- fi
-}
-
-if [ x$1 = x--fast ]; then
- VALGRIND=0
- SLOWTESTS=""
- shift
-else
- if type valgrind >/dev/null 2>&1; then
- VALGRIND=1
- else
- echo "WARNING: valgrind not available" >&2
- VALGRIND=0
- fi
- SLOWTESTS=testsuite/[0-9]*.slowtest
-fi
-
-MATCH=${1:-"*"}
-for f in testsuite/[0-9]*.test $SLOWTESTS; do
- case `basename $f` in $MATCH) RUN=1;; esac
- [ -n "$RUN" ] || continue
-
- if run_test $f -x >/tmp/out; then
- echo -n .
- else
- cat /tmp/out
- # That will have filled the screen, repeat message.
- echo Test $f failed
- exit 1
- fi
-done
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/testsuite/vg-suppressions
--- a/tools/xenstore/testsuite/vg-suppressions Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-{
- Glibc goes boom from _start (Debian glibc 2.3.5-3)
- Memcheck:Cond
- obj:/lib/ld-2.3.5.so
- obj:/lib/ld-2.3.5.so
- obj:/lib/ld-2.3.5.so
- obj:/lib/ld-2.3.5.so
- obj:/lib/ld-2.3.5.so
-}
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenstore/xenstored_core.c Tue Jul 10 08:39:26 2007 -0600
@@ -39,7 +39,6 @@
#include <assert.h>
#include <setjmp.h>

-//#define DEBUG
#include "utils.h"
#include "list.h"
#include "talloc.h"
@@ -53,7 +52,6 @@

#include "hashtable.h"

-
extern int xce_handle; /* in xenstored_domain.c */

static bool verbose = false;
@@ -81,50 +79,6 @@ int quota_nb_watch_per_domain = 128;
int quota_nb_watch_per_domain = 128;
int quota_max_entry_size = 2048; /* 2K */
int quota_max_transaction = 10;
-
-#ifdef TESTING
-static bool failtest = false;
-
-/* We override talloc's malloc. */
-void *test_malloc(size_t size)
-{
- /* 1 in 20 means only about 50% of connections establish. */
- if (failtest && (random() % 32) == 0)
- return NULL;
- return malloc(size);
-}
-
-static void stop_failtest(int signum __attribute__((unused)))
-{
- failtest = false;
-}
-
-/* Need these before we #define away write_all/mkdir in testing.h */
-bool test_write_all(int fd, void *contents, unsigned int len);
-bool test_write_all(int fd, void *contents, unsigned int len)
-{
- if (failtest && (random() % 8) == 0) {
- if (len)
- len = random() % len;
- write(fd, contents, len);
- errno = ENOSPC;
- return false;
- }
- return xs_write_all(fd, contents, len);
-}
-
-int test_mkdir(const char *dir, int perms);
-int test_mkdir(const char *dir, int perms)
-{
- if (failtest && (random() % 8) == 0) {
- errno = ENOSPC;
- return -1;
- }
- return mkdir(dir, perms);
-}
-#endif /* TESTING */
-
-#include "xenstored_test.h"

TDB_CONTEXT *tdb_context(struct connection *conn)
{
@@ -1163,12 +1117,10 @@ static void do_debug(struct connection *
{
int num;

-#ifndef TESTING
if (conn->id != 0) {
send_error(conn, EACCES);
return;
}
-#endif

num = xs_count_strings(in->buffer, in->used);

@@ -1179,18 +1131,10 @@ static void do_debug(struct connection *
}
xprintf("debug: %s", in->buffer + get_string(in, 0));
}
+
if (streq(in->buffer, "check"))
check_store();
-#ifdef TESTING
- /* For testing, we allow them to set id. */
- if (streq(in->buffer, "setid")) {
- conn->id = atoi(in->buffer + get_string(in, 0));
- } else if (streq(in->buffer, "failtest")) {
- if (get_string(in, 0) < in->used)
- srandom(atoi(in->buffer + get_string(in, 0)));
- failtest = true;
- }
-#endif /* TESTING */
+
send_ack(conn, XS_DEBUG);
}

@@ -1319,10 +1263,8 @@ static void handle_input(struct connecti
return;

if (in->hdr.msg.len > PATH_MAX) {
-#ifndef TESTING
syslog(LOG_ERR, "Client tried to feed us %i",
in->hdr.msg.len);
-#endif
goto bad_client;
}

@@ -1414,39 +1356,7 @@ static void accept_connection(int sock,
close(fd);
}

-#ifdef TESTING
-/* Valgrind can check our writes better if we don't use mmap */
-#define TDB_FLAGS TDB_NOMMAP
-/* Useful for running under debugger. */
-void dump_connection(void)
-{
- struct connection *i;
-
- list_for_each_entry(i, &connections, list) {
- printf("Connection %p:\n", i);
- printf(" state = %s\n",
- list_empty(&i->out_list) ? "OK" : "BUSY");
- if (i->id)
- printf(" id = %i\n", i->id);
- if (!i->in->inhdr || i->in->used)
- printf(" got %i bytes of %s\n",
- i->in->used, i->in->inhdr ? "header" : "data");
-#if 0
- if (i->out)
- printf(" sending message %s (%s) out\n",
- sockmsg_string(i->out->hdr.msg.type),
- i->out->buffer);
- if (i->transaction)
- dump_transaction(i);
- if (i->domain)
- dump_domain(i);
-#endif
- dump_watches(i);
- }
-}
-#else
#define TDB_FLAGS 0
-#endif

/* We create initial nodes manually. */
static void manual_node(const char *name, const char *child)
@@ -1693,10 +1603,6 @@ static void corrupt(struct connection *c
log("corruption detected by connection %i: err %s: %s",
conn ? (int)conn->id : -1, strerror(saved_errno), str);

-#ifdef TESTING
- /* Allow them to attach debugger. */
- sleep(30);
-#endif
check_store();
}

@@ -1740,11 +1646,10 @@ static void daemonize(void)
if (pid != 0)
exit(0);

-#ifndef TESTING /* Relative paths for socket names */
/* Move off any mount points we might be in. */
if (chdir("/") == -1)
barf_perror("Failed to chdir");
-#endif
+
/* Discard our parent's old-fashioned umask prejudices. */
umask(0);
}
@@ -1941,10 +1846,6 @@ int main(int argc, char *argv[])

signal(SIGHUP, trigger_reopen_log);

-#ifdef TESTING
- signal(SIGUSR1, stop_failtest);
-#endif
-
if (xce_handle != -1)
evtchn_fd = xc_evtchn_fd(xce_handle);

diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenstore/xenstored_domain.c Tue Jul 10 08:39:26 2007 -0600
@@ -23,14 +23,12 @@
#include <stdlib.h>
#include <stdarg.h>

-//#define DEBUG
#include "utils.h"
#include "talloc.h"
#include "xenstored_core.h"
#include "xenstored_domain.h"
#include "xenstored_transaction.h"
#include "xenstored_watch.h"
-#include "xenstored_test.h"

#include <xenctrl.h>

@@ -217,10 +215,8 @@ void handle_event(void)
if (port == virq_port)
domain_cleanup();

-#ifndef TESTING
if (xc_evtchn_unmask(xce_handle, port) == -1)
barf_perror("Failed to write to event fd");
-#endif
}

bool domain_can_read(struct connection *conn)
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xenstored_test.h
--- a/tools/xenstore/xenstored_test.h Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-/*
- Testing replcements for Xen Store Daemon.
- Copyright (C) 2005 Rusty Russell IBM Corporation
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-#ifndef _XENSTORED_TEST_H
-#define _XENSTORED_TEST_H
-
-#ifdef TESTING
-bool test_write_all(int fd, void *contents, unsigned int len);
-#define xs_write_all test_write_all
-
-int test_mkdir(const char *dir, int perms);
-#define mkdir test_mkdir
-
-int fake_open_eventchn(void);
-void fake_block_events(void);
-void fake_ack_event(void);
-
-#define ioctl(a,b,c) 0
-
-#endif
-
-#endif /* _XENSTORED_INTERNAL_H */
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xenstored_transaction.c
--- a/tools/xenstore/xenstored_transaction.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenstore/xenstored_transaction.c Tue Jul 10 08:39:26 2007 -0600
@@ -35,7 +35,6 @@
#include "xenstored_domain.h"
#include "xs_lib.h"
#include "utils.h"
-#include "xenstored_test.h"

struct changed_node
{
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xenstored_watch.c
--- a/tools/xenstore/xenstored_watch.c Mon Jul 09 09:22:58 2007 -0600
+++ b/tools/xenstore/xenstored_watch.c Tue Jul 10 08:39:26 2007 -0600
@@ -29,7 +29,6 @@
#include "xenstored_watch.h"
#include "xs_lib.h"
#include "utils.h"
-#include "xenstored_test.h"
#include "xenstored_domain.h"

extern int quota_nb_watch_per_domain;
@@ -195,17 +194,6 @@ void conn_delete_all_watches(struct conn
domain_watch_dec(conn);
}
}
-
-#ifdef TESTING
-void dump_watches(struct connection *conn)
-{
- struct watch *watch;
-
- list_for_each_entry(watch, &conn->watches, list)
- printf(" watch on %s token %s\n",
- watch->node, watch->token);
-}
-#endif

/*
* Local variables:
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xs_crashme.c
--- a/tools/xenstore/xs_crashme.c Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,393 +0,0 @@
-/* Code which randomly corrupts bits going to the daemon.
- Copyright (C) 2005 Rusty Russell IBM Corporation
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-#include <stdbool.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <stdarg.h>
-#include <string.h>
-#include <sys/time.h>
-#include "xs.h"
-#include "talloc.h"
-#include <errno.h>
-
-#define XSTEST
-#define RAND_FREQ 128 /* One char in 32 is corrupted. */
-
-/* jhash.h: Jenkins hash support.
- *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
- *
- * http://burtleburtle.net/bob/hash/
- *
- * These are the credits from Bob's sources:
- *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose. It has no warranty.
- *
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- *
- * I've modified Bob's hash to be useful in the Linux kernel, and
- * any bugs present are surely my fault. -DaveM
- */
-
-/* NOTE: Arguments are modified. */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
-}
-
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO 0x9e3779b9
-
-/* The most generic version, hashes an arbitrary sequence
- * of bytes. No alignment or length assumptions are made about
- * the input key.
- */
-static inline uint32_t jhash(const void *key, uint32_t length, uint32_t initval)
-{
- uint32_t a, b, c, len;
- const uint8_t *k = key;
-
- len = length;
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
-
- while (len >= 12) {
- a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
- b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
- c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
-
- __jhash_mix(a,b,c);
-
- k += 12;
- len -= 12;
- }
-
- c += length;
- switch (len) {
- case 11: c += ((uint32_t)k[10]<<24);
- case 10: c += ((uint32_t)k[9]<<16);
- case 9 : c += ((uint32_t)k[8]<<8);
- case 8 : b += ((uint32_t)k[7]<<24);
- case 7 : b += ((uint32_t)k[6]<<16);
- case 6 : b += ((uint32_t)k[5]<<8);
- case 5 : b += k[4];
- case 4 : a += ((uint32_t)k[3]<<24);
- case 3 : a += ((uint32_t)k[2]<<16);
- case 2 : a += ((uint32_t)k[1]<<8);
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-/* A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- */
-static inline uint32_t jhash2(uint32_t *k, uint32_t length, uint32_t initval)
-{
- uint32_t a, b, c, len;
-
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
- len = length;
-
- while (len >= 3) {
- a += k[0];
- b += k[1];
- c += k[2];
- __jhash_mix(a, b, c);
- k += 3; len -= 3;
- }
-
- c += length * 4;
-
- switch (len) {
- case 2 : b += k[1];
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-
-/* A special ultra-optimized versions that knows they are hashing exactly
- * 3, 2 or 1 word(s).
- *
- * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
- * done at the end is not done here.
- */
-static inline uint32_t jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
-{
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += initval;
-
- __jhash_mix(a, b, c);
-
- return c;
-}
-
-static inline uint32_t jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
-{
- return jhash_3words(a, b, 0, initval);
-}
-
-static inline uint32_t jhash_1word(uint32_t a, uint32_t initval)
-{
- return jhash_3words(a, 0, 0, initval);
-}
-
-static unsigned int get_randomness(int *state)
-{
- return jhash_1word((*state)++, *state * 1103515243);
-}
-
-static int state;
-
-/* Lengthening headers is pointless: other end will just wait for more
- * data and timeout. We merely shorten the length. */
-static void corrupt_header(char *output, const struct xsd_sockmsg *msg,
- unsigned int *next_bit)
-{
- struct xsd_sockmsg newmsg = *msg;
-
- while (*next_bit < sizeof(*msg)) {
- if (newmsg.len)
- newmsg.len = get_randomness(&state) % newmsg.len;
- *next_bit += get_randomness(&state) % RAND_FREQ;
- }
- memcpy(output, &newmsg, sizeof(newmsg));
-}
-
-#define read_all_choice read_all
-static bool write_all_choice(int fd, const void *data, unsigned int len)
-{
- char corrupt_data[len];
- bool ret;
- static unsigned int next_bit;
-
- if (len == sizeof(struct xsd_sockmsg)
- && ((unsigned long)data % __alignof__(struct xsd_sockmsg)) == 0)
- corrupt_header(corrupt_data, data, &next_bit);
- else {
- memcpy(corrupt_data, data, len);
- while (next_bit < len * CHAR_BIT) {
- corrupt_data[next_bit/CHAR_BIT]
- ^= (1 << (next_bit%CHAR_BIT));
- next_bit += get_randomness(&state) % RAND_FREQ;
- }
- }
-
- ret = xs_write_all(fd, corrupt_data, len);
- next_bit -= len * CHAR_BIT;
- return ret;
-}
-
-#include "xs.c"
-
-static char *random_path(void)
-{
- unsigned int i;
- char *ret = NULL;
-
- if (get_randomness(&state) % 20 == 0)
- return talloc_strdup(NULL, "/");
-
- for (i = 0; i < 1 || (get_randomness(&state) % 2); i++) {
- ret = talloc_asprintf_append(ret, "/%i",
- get_randomness(&state) % 15);
- }
- return ret;
-}
-
-/* Do the next operation, return the results. */
-static void do_next_op(struct xs_handle *h, bool verbose)
-{
- char *name;
- unsigned int num;
-
- if (verbose)
- printf("State %i: ", state);
-
- name = random_path();
- switch (get_randomness(&state) % 9) {
- case 0:
- if (verbose)
- printf("DIR %s\n", name);
- free(xs_directory(h, name, &num));
- break;
- case 1:
- if (verbose)
- printf("READ %s\n", name);
- free(xs_read(h, name, &num));
- break;
- case 2: {
- char *contents = talloc_asprintf(NULL, "%i",
- get_randomness(&state));
- unsigned int len = get_randomness(&state)%(strlen(contents)+1);
- if (verbose)
- printf("WRITE %s %.*s\n", name, len, contents);
- xs_write(h, name, contents, len);
- break;
- }
- case 3:
- if (verbose)
- printf("MKDIR %s\n", name);
- xs_mkdir(h, name);
- break;
- case 4:
- if (verbose)
- printf("RM %s\n", name);
- xs_rm(h, name);
- break;
- case 5:
- if (verbose)
- printf("GETPERMS %s\n", name);
- free(xs_get_permissions(h, name, &num));
- break;
- case 6: {
- unsigned int i, num = get_randomness(&state)%8;
- struct xs_permissions perms[num];
-
- if (verbose)
- printf("SETPERMS %s: ", name);
- for (i = 0; i < num; i++) {
- perms[i].id = get_randomness(&state)%8;
- perms[i].perms = get_randomness(&state)%4;
- if (verbose)
- printf("%i%c ", perms[i].id,
- perms[i].perms == XS_PERM_WRITE ? 'W'
- : perms[i].perms == XS_PERM_READ ? 'R'
- : perms[i].perms ==
- (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
- : 'N');
- }
- if (verbose)
- printf("\n");
- xs_set_permissions(h, name, perms, num);
- break;
- }
- case 7: {
- if (verbose)
- printf("START %s\n", name);
- xs_transaction_start(h);
- break;
- }
- case 8: {
- bool abort = (get_randomness(&state) % 2);
-
- if (verbose)
- printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
- xs_transaction_end(h, abort);
- break;
- }
- default:
- barf("Impossible randomness");
- }
-}
-
-static struct xs_handle *h;
-static void alarmed(int sig __attribute__((unused)))
-{
- /* We force close on timeout. */
- close(h->fd);
-}
-
-static int start_daemon(void)
-{
- int fds[2];
- int daemon_pid;
-
- /* Start daemon. */
- pipe(fds);
- if ((daemon_pid = fork())) {
- /* Child writes PID when its ready: we wait for that. */
- char buffer[20];
- close(fds[1]);
- if (read(fds[0], buffer, sizeof(buffer)) < 0)
- barf("Failed to summon daemon");
- close(fds[0]);
- return daemon_pid;
- } else {
- dup2(fds[1], STDOUT_FILENO);
- close(fds[0]);
-#if 1
- execlp("valgrind", "valgrind", "--log-file=/tmp/xs_crashme.vglog", "-q", "./xenstored_test", "--output-pid",
- "--no-fork", "--trace-file=/tmp/trace", NULL);
-#else
- execlp("./xenstored_test", "xenstored_test", "--output-pid",
- "--no-fork", NULL);
-#endif
- exit(1);
- }
-}
-
-
-int main(int argc, char **argv)
-{
- unsigned int i;
- int pid;
-
- if (argc != 3 && argc != 4)
- barf("Usage: xs_crashme <iterations> <seed> [pid]");
-
- if (argc == 3)
- pid = start_daemon();
- else
- pid = atoi(argv[3]);
-
- state = atoi(argv[2]);
- h = xs_daemon_open();
- if (!h)
- barf_perror("Opening connection to daemon");
- signal(SIGALRM, alarmed);
- for (i = 0; i < (unsigned)atoi(argv[1]); i++) {
- alarm(1);
- do_next_op(h, false);
- if (i % (atoi(argv[1]) / 72 ?: 1) == 0) {
- printf(".");
- fflush(stdout);
- }
- if (kill(pid, 0) != 0)
- barf_perror("Pinging daemon on iteration %i", i);
- if (h->fd < 0) {
- xs_daemon_close(h);
- h = xs_daemon_open();
- if (!h)
- barf_perror("Connecting on iteration %i", i);
- }
- }
- kill(pid, SIGTERM);
- return 0;
-}
-
diff -r 87b0b6a08dbd -r 42586a0f4407 tools/xenstore/xs_random.c
--- a/tools/xenstore/xs_random.c Mon Jul 09 09:22:58 2007 -0600
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,1590 +0,0 @@
-/* Random tests.
-
- We check that the results from a real filesystem are the same.
-*/
-#include <sys/types.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <dirent.h>
-#include <errno.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/wait.h>
-#include "xs.h"
-#include "talloc.h"
-#include "utils.h"
-
-struct ops
-{
- char *name;
-
- char **(*dir)(void *h, const char *path, unsigned int *num);
-
- void *(*read)(void *h, const char *path, unsigned int *len);
-
- bool (*write)(void *h, const char *path, const void *data,
- unsigned int len);
-
- bool (*mkdir)(void *h, const char *path);
-
- bool (*rm)(void *h, const char *path);
-
- struct xs_permissions *(*get_perms)(void *h,
- const char *path,
- unsigned int *num);
-
- bool (*set_perms)(void *h,
- const char *path,
- struct xs_permissions *perms,
- unsigned int num);
-
- bool (*transaction_start)(void *h);
- bool (*transaction_end)(void *h, bool abort);
-
- /* Create and destroy a new handle. */
- void *(*handle)(const char *path);
- void (*close)(void *);
-};
-
-struct file_ops_info
-{
- const char *base;
- char *transact_base;
-};
-
-static void convert_to_dir(const char *dirname)
-{
- char *tmpname = talloc_asprintf(dirname, "%s.tmp", dirname);
- if (rename(dirname, tmpname) != 0)
- barf_perror("Failed to rename %s to %s", dirname, tmpname);
- if (mkdir(dirname, 0700) != 0)
- barf_perror("Failed to mkdir %s", dirname);
- if (rename(tmpname,talloc_asprintf(dirname, "%s/.DATA", dirname)) != 0)
- barf_perror("Failed to rename into %s", dirname);
- /* If perms exists, move it in. */
- rename(talloc_asprintf(dirname, "%s.perms", dirname),
- talloc_asprintf(dirname, "%s/.perms", dirname));
-}
-
-/* Files can be used as dirs, too. Convert them when they are. */
-static void maybe_convert_to_directory(const char *filename)
-{
- struct stat st;
- char *dirname = talloc_asprintf(
- filename, "%.*s",
- (int)(strrchr(filename, '/') - filename), filename);
- if (lstat(dirname, &st) == 0 && S_ISREG(st.st_mode))
- convert_to_dir(dirname);
-}
-
-static char *get_name(struct file_ops_info *info, const char *path)
-{
- if (info->transact_base)
- return talloc_asprintf(path, "%s%s", info->transact_base,
- path);
- return talloc_asprintf(path, "%s%s", info->base, path);
-}
-
-static char *path_to_name(struct file_ops_info *info, const char *path)
-{
- char *filename = get_name(info, path);
- maybe_convert_to_directory(filename);
- return filename;
-}
-
-static char **file_directory(struct file_ops_info *info,
- const char *path, unsigned int *num)
-{
- char **ret;
- DIR *dir;
- struct dirent *dirent;
- char *p, *dirname = path_to_name(info, path);
- unsigned int i, len = 0;
- struct stat st;
-
- /* If it exists, but isn't a directory, we convert it. */
- if (lstat(dirname, &st) == 0 && !S_ISDIR(st.st_mode))
- convert_to_dir(dirname);
-
- *num = 0;
- dir = opendir(dirname);
- if (!dir)
- return NULL;;
-
- /* Once to count them. */
- while ((dirent = readdir(dir)) != NULL) {
- if (strchr(dirent->d_name, '.'))
- continue;
- len += strlen(dirent->d_name) + 1;
- (*num)++;
- }
- rewinddir(dir);
-
- /* Now allocate and fill in. */
- ret = malloc(sizeof(char *) * *num + len);
- p = (char *)&ret[*num];
- i = 0;
- while ((dirent = readdir(dir)) != NULL) {
- if (strchr(dirent->d_name, '.'))
- continue;
- ret[i] = p;
- strcpy(p, dirent->d_name);
- p += strlen(p) + 1;
- i++;
- }
- closedir(dir);
-
- return ret;
-}
-
-static char *filename_to_data(const char *filename)
-{
- struct stat st;
-
- if (lstat(filename, &st) == 0 && S_ISDIR(st.st_mode))
- return talloc_asprintf(filename, "%s/.DATA", filename);
- return (char *)filename;
-}
-
-static void *file_read(struct file_ops_info *info,
- const char *path, unsigned int *len)
-{
- void *ret;
- char *filename = filename_to_data(path_to_name(info, path));
- unsigned long size;
-
- ret = grab_file(filename, &size);
- /* Directory exists, .DATA doesn't. */
- if (!ret && errno == ENOENT && strends(filename, ".DATA")) {
- ret = strdup("");
- size = 0;
- }
- *len = size;
- return ret;
-}
-
-static struct xs_permissions *file_get_perms(struct file_ops_info *info,
- const char *path,
- unsigned int *num)
-{
- void *perms;
- struct xs_permissions *ret;
- char *filename = path_to_name(info, path);
- char *permfile;
- unsigned long size;
- struct stat st;
-
- if (lstat(filename, &st) != 0)
- return NULL;
-
- if (S_ISDIR(st.st_mode))
- permfile = talloc_asprintf(path, "%s/.perms", filename);
- else
- permfile = talloc_asprintf(path, "%s.perms", filename);
-
- perms = grab_file(permfile, &size);
- if (!perms)
- barf("Grabbing permissions for %s", permfile);
- *num = xs_count_strings(perms, size);
-
- ret = new_array(struct xs_permissions, *num);
- if (!xs_strings_to_perms(ret, *num, perms))
- barf("Reading permissions from %s", permfile);
- release_file(perms, size);
- return ret;
-}
-
-static void do_command(const char *cmd)
-{
- int ret;
-
- ret = system(cmd);
- if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
- barf_perror("Failed '%s': %i", cmd, ret);
-}
-
-static void init_perms(const char *filename)
-{
- struct stat st;
- char *permfile, *command;
-
- if (lstat(filename, &st) != 0)
- barf_perror("Failed to stat %s", filename);
-
- if (S_ISDIR(st.st_mode))
- permfile = talloc_asprintf(filename, "%s/.perms", filename);
- else
- permfile = talloc_asprintf(filename, "%s.perms", filename);
-
- /* Leave permfile if it already exists. */
- if (lstat(permfile, &st) == 0)
- return;
-
- /* Copy permissions from parent */
- command = talloc_asprintf(filename, "cp %.*s/.perms %s",
- (int)(strrchr(filename, '/') - filename),
- filename, permfile);
- do_command(command);
-}
-
-static bool file_set_perms(struct file_ops_info *info,
- const char *path,
- struct xs_permissions *perms,
- unsigned int num)
-{
- unsigned int i;
- char *filename = path_to_name(info, path);
- char *permfile;
- int fd;
- struct stat st;
-
- if (num < 1) {
- errno = EINVAL;
- return false;
- }
-
- /* Check non-perm file exists/ */
- if (lstat(filename, &st) != 0)
- return false;
-
- if (S_ISDIR(st.st_mode))
- permfile = talloc_asprintf(path, "%s/.perms", filename);
- else
- permfile = talloc_asprintf(path, "%s.perms", filename);
-
- fd = open(permfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
- if (fd < 0)
- return false;
-
- for (i = 0; i < num; i++) {
- char buffer[100];
-
- if (!xs_perm_to_string(&perms[i], buffer)) {
- int saved_errno = errno;
- close(fd);
- errno = saved_errno;
- return false;
- }
- if (write(fd, buffer, strlen(buffer) + 1)
- != (int)strlen(buffer) + 1)
- barf_perror("Failed to write perm");
- }
- close(fd);
- return true;
-}
-
-static char *parent_filename(const char *name)
-{
- char *slash = strrchr(name + 1, '/');
- if (!slash)
- return talloc_strdup(name, "/");
- return talloc_asprintf(name, "%.*s", (int)(slash-name), name);
-}
-
-static void make_dirs(const char *filename)
-{
- struct stat st;
-
- if (lstat(filename, &st) == 0 && S_ISREG(st.st_mode))
- convert_to_dir(filename);
-
- if (mkdir(filename, 0700) == 0) {
- init_perms(filename);
- return;
- }
- if (errno == EEXIST)
- return;
-
- make_dirs(parent_filename(filename));
- if (mkdir(filename, 0700) != 0)
- barf_perror("Failed to mkdir %s", filename);
- init_perms(filename);
-}
-
-static bool file_write(struct file_ops_info *info,
- const char *path, const void *data,
- unsigned int len)
-{
- char *filename = filename_to_data(path_to_name(info, path));
- int fd;
-
- make_dirs(parent_filename(filename));
- fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0600);
- if (fd < 0)
- return false;
-
- if (write(fd, data, len) != (int)len)
- barf_perror("Bad write to %s", filename);
-
- init_perms(filename);
- close(fd);
- return true;
-}
-
-static bool file_mkdir(struct file_ops_info *info, const char *path)
-{
- char *dirname = path_to_name(info, path);
-
- make_dirs(parent_filename(dirname));
- if (mkdir(dirname, 0700) != 0)
- return (errno == EEXIST);
-
- init_perms(dirname);
- return true;
-}
-
-static bool file_rm(struct file_ops_info *info, const char *path)
-{
- char *filename = path_to_name(info, path);
- struct stat st;
-
- if (lstat(filename, &st) != 0) {
- if (lstat(parent_filename(filename), &st) != 0)
- return false;
- return true;
- }
-
- if (streq(path, "/")) {
- errno = EINVAL;
- return false;
- }
-
- do_command(talloc_asprintf(path, "rm -f %s.perms; rm -r %s",
- filename, filename));
- return true;
-}
-
-static bool file_transaction_start(struct file_ops_info *info)
-{
- char *cmd;
-
- if (info->transact_base) {
- errno = EBUSY;
- return false;
- }
-
- info->transact_base = talloc_asprintf(NULL, "%s.transact", info->base);
- cmd = talloc_asprintf(NULL, "cp -r %s %s",
- info->base, info->transact_base);
- do_command(cmd);
- talloc_free(cmd);
- return true;
-}
-
-static bool file_transaction_end(struct file_ops_info *info, bool abort)
-{
- char *old, *cmd;
-
- if (!info->transact_base) {
- errno = ENOENT;
- return false;
- }
-
- if (abort) {
- cmd = talloc_asprintf(NULL, "rm -rf %s", info->transact_base);
- do_command(cmd);
- goto success;
- }
-
- old = talloc_asprintf(NULL, "rm -rf %s", info->base);
- do_command(old);
- talloc_free(old);
-
- cmd = talloc_asprintf(NULL, "mv %s %s",
- info->transact_base, info->base);
- do_command(cmd);
-
-success:
- talloc_free(cmd);
- talloc_free(info->transact_base);
- info->transact_base = NULL;
- return true;
-}
-
-static struct file_ops_info *file_handle(const char *dir)
-{
- struct file_ops_info *info = talloc(NULL, struct file_ops_info);
-
- info->base = dir;
- info->transact_base = NULL;
- return info;
-}
-
-static void file_close(struct file_ops_info *handle)
-{
- talloc_free(handle);
-}
-
-static struct xs_handle *xs_handle(const char *dir __attribute__((unused)))
-{
- struct xs_handle *h;
-
- h = xs_daemon_open();
- if (!h)
- barf_perror("Connecting to xs daemon");
- return h;
-}
-
-static void xs_close(struct xs_handle *handle)
-{
- xs_daemon_close(handle);
-}
-
-struct ops file_ops = {
- .name = "FILE",
- .dir = (void *)file_directory,
- .read = (void *)file_read,
- .write = (void *)file_write,
- .mkdir = (void *)file_mkdir,
- .rm = (void *)file_rm,
- .get_perms = (void *)file_get_perms,
- .set_perms = (void *)file_set_perms,
- .transaction_start = (void *)file_transaction_start,
- .transaction_end = (void *)file_transaction_end,
- .handle = (void *)file_handle,
- .close = (void *)file_close,
-};
-
-struct ops xs_ops = {
- .name = "XS",
- .dir = (void *)xs_directory,
- .read = (void *)xs_read,
- .write = (void *)xs_write,
- .mkdir = (void *)xs_mkdir,
- .rm = (void *)xs_rm,
- .get_perms = (void *)xs_get_permissions,
- .set_perms = (void *)xs_set_permissions,
- .transaction_start = (void *)xs_transaction_start,
- .transaction_end = (void *)xs_transaction_end,
- .handle = (void *)xs_handle,
- .close = (void *)xs_close,
-};
-
-static int strptrcmp(const void *a, const void *b)
-{
- return strcmp(*(char **)a, *(char **)b);
-}
-
-static void sort_dir(char **dir, unsigned int num)
-{
- qsort(dir, num, sizeof(char *), strptrcmp);
-}
-
-static char *dump_dir(struct ops *ops,
- void *h,
- const char *node,
- char **dir,
- unsigned int numdirs,
- unsigned int depth)
-{
- char *ret = talloc_strdup(node, "");
- unsigned int i;
- char spacing[depth+1];
-
- memset(spacing, ' ', depth);
- spacing[depth] = '\0';
-
- sort_dir(dir, numdirs);
-
- for (i = 0; i < numdirs; i++) {
- struct xs_permissions *perms;
- unsigned int j, numperms;
- unsigned int len;
- char *contents;
- unsigned int subnum;
- char **subdirs;
- char *subret;
- char *subnode = talloc_asprintf(node, "%s/%s", node, dir[i]);
-
- perms = ops->get_perms(h, subnode, &numperms);
- if (!perms)
- return NULL;
- ret = talloc_asprintf_append(ret, "%s%s: ", spacing, dir[i]);
- for (j = 0; j < numperms; j++) {
- char buffer[100];
- if (!xs_perm_to_string(&perms[j], buffer))
- barf("perm to string");
- ret = talloc_asprintf_append(ret, "%s ", buffer);
- }
- free(perms);
- ret = talloc_asprintf_append(ret, "\n");
-
- /* Even directories can have contents. */
- contents = ops->read(h, subnode, &len);
- if (!contents) {
- if (errno != EISDIR)
- return NULL;
- } else {
- ret = talloc_asprintf_append(ret, " %s(%.*s)\n",
- spacing, len, contents);
- free(contents);
- }
-
- /* Every node is a directory. */
- subdirs = ops->dir(h, subnode, &subnum);
- if (!subdirs)
- return NULL;
- subret = dump_dir(ops, h, subnode, subdirs, subnum, depth+1);
- if (!subret)
- return NULL;
- ret = talloc_asprintf_append(ret, "%s", subret);
- free(subdirs);
- }
- return ret;
-}
-
-static char *dump(struct ops *ops, void *h)
-{
- char **subdirs;
- unsigned int subnum;
- char *ret = NULL, *root = talloc_strdup(NULL, "/");
-
- subdirs = ops->dir(h, root, &subnum);
- if (subdirs) {
- ret = dump_dir(ops, h, talloc_strdup(root, ""), subdirs,
- subnum, 0);
- free(subdirs);
- if (ret)
- talloc_steal(NULL, ret);
- }
- talloc_free(root);
- return ret;
-}
-
-/* jhash.h: Jenkins hash support.
- *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
- *
- * http://burtleburtle.net/bob/hash/
- *
- * These are the credits from Bob's sources:
- *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose. It has no warranty.
- *
- * Copyright (C) 2003 David S. Miller (davem@redhat.com)
- *
- * I've modified Bob's hash to be useful in the Linux kernel, and
- * any bugs present are surely my fault. -DaveM
- */
-
-/* NOTE: Arguments are modified. */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
-}
-
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO 0x9e3779b9
-
-/* The most generic version, hashes an arbitrary sequence
- * of bytes. No alignment or length assumptions are made about
- * the input key.
- */
-static inline uint32_t jhash(const void *key, uint32_t length, uint32_t initval)
-{
- uint32_t a, b, c, len;
- const uint8_t *k = key;
-
- len = length;
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
-
- while (len >= 12) {
- a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
- b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
- c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
-
- __jhash_mix(a,b,c);
-
- k += 12;
- len -= 12;
- }
-
- c += length;
- switch (len) {
- case 11: c += ((uint32_t)k[10]<<24);
- case 10: c += ((uint32_t)k[9]<<16);
- case 9 : c += ((uint32_t)k[8]<<8);
- case 8 : b += ((uint32_t)k[7]<<24);
- case 7 : b += ((uint32_t)k[6]<<16);
- case 6 : b += ((uint32_t)k[5]<<8);
- case 5 : b += k[4];
- case 4 : a += ((uint32_t)k[3]<<24);
- case 3 : a += ((uint32_t)k[2]<<16);
- case 2 : a += ((uint32_t)k[1]<<8);
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-/* A special optimized version that handles 1 or more of uint32_ts.
- * The length parameter here is the number of uint32_ts in the key.
- */
-static inline uint32_t jhash2(uint32_t *k, uint32_t length, uint32_t initval)
-{
- uint32_t a, b, c, len;
-
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
- len = length;
-
- while (len >= 3) {
- a += k[0];
- b += k[1];
- c += k[2];
- __jhash_mix(a, b, c);
- k += 3; len -= 3;
- }
-
- c += length * 4;
-
- switch (len) {
- case 2 : b += k[1];
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-
-/* A special ultra-optimized versions that knows they are hashing exactly
- * 3, 2 or 1 word(s).
- *
- * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
- * done at the end is not done here.
- */
-static inline uint32_t jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval)
-{
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += initval;
-
- __jhash_mix(a, b, c);
-
- return c;
-}
-
-static inline uint32_t jhash_2words(uint32_t a, uint32_t b, uint32_t initval)
-{
- return jhash_3words(a, b, 0, initval);
-}
-
-static inline uint32_t jhash_1word(uint32_t a, uint32_t initval)
-{
- return jhash_3words(a, 0, 0, initval);
-}
-
-static unsigned int get_randomness(int *state)
-{
- return jhash_1word((*state)++, *state * 1103515243);
-}
-
-static char *random_path(int *state)
-{
- unsigned int i;
- char *ret = NULL;
-
- if (get_randomness(state) % 20 == 0)
- return talloc_strdup(NULL, "/");
-
- for (i = 0; i < 1 || (get_randomness(state) % 2); i++) {
- ret = talloc_asprintf_append(ret, "/%i",
- get_randomness(state) % 15);
- }
- return ret;
-}
-
-static char *bool_to_errstring(bool result)
-{
- if (result)
- return talloc_strdup(NULL, "OK");
-
- /* Real daemon can never return this. */
- if (errno == ENOTDIR)
- errno = ENOENT;
- return talloc_asprintf(NULL, "FAILED:%s", strerror(errno));
-}
-
-static char *linearize_dir(char **dir, unsigned int *num)
-{
- char *result = NULL;
- unsigned int i;
-
- if (!dir)
- return bool_to_errstring(false);
-
- if (!*num) {
- free(dir);
- return talloc_strdup(NULL, "");
- }
-
- sort_dir(dir, *num);
- for (i = 0; i < *num; i++)
- result = talloc_asprintf_append(result, "%s\n", dir[i]);
- free(dir);
- return result;
-}
-
-static char *linearize_read(char *read, unsigned int *size)
-{
- char *ret;
-
- if (!read)
- return bool_to_errstring(false);
-
- ret = talloc_asprintf(NULL, "%i:%.*s", *size, *size, read);
- free(read);
- return ret;
-}
-
-static char *linearize_perms(struct xs_permissions *perms, unsigned int *size)
-{
- char *ret = NULL;
- unsigned int i;
-
- if (!perms)
- return bool_to_errstring(false);
-
- for (i = 0; i < *size; i++)
- ret = talloc_asprintf_append(ret, "(%u %u)",
- perms[i].id, perms[i].perms);
-
- free(perms);
- return ret;
-}
-
-/* Do the next operation, return the results. */
-static char *do_next_op(struct ops *ops, void *h, int state, bool verbose)
-{
- char *name;
- unsigned int num;
- char *ret;
-
- if (verbose)
- printf("State %i: ", state);
-
- name = random_path(&state);
- switch (get_randomness(&state) % 9) {
- case 0:
- if (verbose)
- printf("DIR %s\n", name);
- ret = linearize_dir(ops->dir(h, name, &num), &num);
- break;
- case 1:
- if (verbose)
- printf("READ %s\n", name);
- ret = linearize_read(ops->read(h, name, &num), &num);
- break;
- case 2: {
- char *contents = talloc_asprintf(NULL, "%i",
- get_randomness(&state));
- unsigned int len = get_randomness(&state)%(strlen(contents)+1);
- if (verbose)
- printf("WRITE %s %.*s\n", name, len, contents);
- ret = bool_to_errstring(ops->write(h, name, contents, len));
- talloc_steal(ret, contents);
- break;
- }
- case 3:
- if (verbose)
- printf("MKDIR %s\n", name);
- ret = bool_to_errstring(ops->mkdir(h, name));
- break;
- case 4:
- if (verbose)
- printf("RM %s\n", name);
- ret = bool_to_errstring(ops->rm(h, name));
- break;
- case 5:
- if (verbose)
- printf("GETPERMS %s\n", name);
- ret = linearize_perms(ops->get_perms(h, name, &num),
- &num);
- break;
- case 6: {
- unsigned int i, num = get_randomness(&state)%8;
- struct xs_permissions perms[num];
-
- if (verbose)
- printf("SETPERMS %s: ", name);
- for (i = 0; i < num; i++) {
- perms[i].id = get_randomness(&state)%8;
- perms[i].perms = get_randomness(&state)%4;
- if (verbose)
- printf("%i%c ", perms[i].id,
- perms[i].perms == XS_PERM_WRITE ? 'W'
- : perms[i].perms == XS_PERM_READ ? 'R'
- : perms[i].perms ==
- (XS_PERM_READ|XS_PERM_WRITE) ? 'B'
- : 'N');
- }
- if (verbose)
- printf("\n");
- ret = bool_to_errstring(ops->set_perms(h, name, perms,
- num));
- break;
- }
- case 7: {
- if (verbose)
- printf("START %s\n", name);
- ret = bool_to_errstring(ops->transaction_start(h));
- if (streq(ret, "OK")) {
- talloc_free(ret);
- ret = talloc_asprintf(NULL, "OK:START-TRANSACT");
- }
-
- break;
- }
- case 8: {
- bool abort = (get_randomness(&state) % 2);
-
- if (verbose)
- printf("STOP %s\n", abort ? "ABORT" : "COMMIT");
- ret = bool_to_errstring(ops->transaction_end(h, abort));
- if (streq(ret, "OK")) {
- talloc_free(ret);
- ret = talloc_strdup(NULL, "OK:STOP-TRANSACT");
- }
- break;
- }
- default:
- barf("Impossible randomness");
- }
-
- talloc_steal(ret, name);
- return ret;
-}
-
-static int daemon_pid;
-
-static void cleanup_xs_ops(void)
-{
- char *cmd;
-
- if (daemon_pid) {

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog
[xen-unstable] merge with xen-unstable.hg [ In reply to ]
# HG changeset patch
# User Hollis Blanchard <hollisb@us.ibm.com>
# Date 1186066458 18000
# Node ID 04fb85a46dc555bc8f306dc98119858b7c5ad083
# Parent 976db28bcc43bfbb38728aa08e079e6c4d20b3bb
# Parent 88bb0d305308a2cab31fd8559a6a2719db1ea55a
merge with xen-unstable.hg
---
tools/blktap/drivers/blktapctrl.c | 44 +-
tools/blktap/lib/blktaplib.h | 4
tools/firmware/hvmloader/smbios.c | 2
tools/libxc/ia64/Makefile | 2
tools/libxc/ia64/dom_fw_acpi.c | 13
tools/python/xen/util/acmpolicy.py | 7
tools/xenstore/utils.c | 80 ----
tools/xenstore/utils.h | 27 -
tools/xenstore/xenstored_core.c | 9
tools/xenstore/xenstored_domain.c | 9
tools/xenstore/xs_tdb_dump.c | 2
tools/xm-test/lib/XmTestLib/acm.py | 4
tools/xm-test/tests/security-acm/07_security-acm_pol_update.py | 9
tools/xm-test/tests/security-acm/09_security-acm_pol_update.py | 9
xen/arch/ia64/xen/dom_fw_common.c | 11
xen/arch/ia64/xen/dom_fw_dom0.c | 13
xen/arch/x86/acpi/boot.c | 15
xen/arch/x86/domain_build.c | 7
xen/arch/x86/hvm/instrlen.c | 113 +++---
xen/arch/x86/hvm/platform.c | 14
xen/arch/x86/hvm/svm/intr.c | 83 ++--
xen/arch/x86/hvm/svm/svm.c | 87 ++---
xen/arch/x86/hvm/vmx/intr.c | 78 +---
xen/arch/x86/hvm/vmx/vmcs.c | 17 -
xen/arch/x86/hvm/vmx/vmx.c | 167 +++-------
xen/arch/x86/mm/shadow/multi.c | 2
xen/common/libelf/libelf-dominfo.c | 101 +++++-
xen/common/libelf/libelf-loader.c | 44 ++
xen/common/libelf/libelf-tools.c | 30 +
xen/drivers/acpi/tables.c | 154 +++++++++
xen/include/asm-ia64/dom_fw_common.h | 1
xen/include/asm-x86/hvm/hvm.h | 70 +++-
xen/include/asm-x86/hvm/svm/vmcb.h | 8
xen/include/asm-x86/hvm/vmx/vmcs.h | 7
xen/include/asm-x86/hvm/vmx/vmx.h | 36 --
xen/include/public/libelf.h | 76 ++--
xen/include/xen/acpi.h | 3
37 files changed, 797 insertions(+), 561 deletions(-)

diff -r 976db28bcc43 -r 04fb85a46dc5 tools/blktap/drivers/blktapctrl.c
--- a/tools/blktap/drivers/blktapctrl.c Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/blktap/drivers/blktapctrl.c Thu Aug 02 09:54:18 2007 -0500
@@ -42,6 +42,7 @@
#include <errno.h>
#include <sys/types.h>
#include <linux/types.h>
+#include <sys/wait.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/poll.h>
@@ -472,11 +473,38 @@ static int read_msg(int fd, int msgtype,

}

+int launch_tapdisk(char *wrctldev, char *rdctldev)
+{
+ char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
+ pid_t child;
+
+ if ((child = fork()) < 0)
+ return -1;
+
+ if (!child) {
+ int i;
+ for (i = 0 ; i < sysconf(_SC_OPEN_MAX) ; i++)
+ if (i != STDIN_FILENO &&
+ i != STDOUT_FILENO &&
+ i != STDERR_FILENO)
+ close(i);
+
+ execvp("tapdisk", argv);
+ _exit(1);
+ } else {
+ pid_t got;
+ do {
+ got = waitpid(child, NULL, 0);
+ } while (got != child);
+ }
+ return 0;
+}
+
int blktapctrl_new_blkif(blkif_t *blkif)
{
blkif_info_t *blk;
int major, minor, fd_read, fd_write, type, new;
- char *rdctldev, *wrctldev, *cmd, *ptr;
+ char *rdctldev, *wrctldev, *ptr;
image_t *image;
blkif_t *exist = NULL;
static uint16_t next_cookie = 0;
@@ -504,12 +532,6 @@ int blktapctrl_new_blkif(blkif_t *blkif)
free(rdctldev);
return -1;
}
- if (asprintf(&cmd, "tapdisk %s %s", wrctldev, rdctldev) == -1) {
- free(rdctldev);
- free(wrctldev);
- return -1;
- }
-
blkif->fds[READ] = open_ctrl_socket(rdctldev);
blkif->fds[WRITE] = open_ctrl_socket(wrctldev);

@@ -517,15 +539,14 @@ int blktapctrl_new_blkif(blkif_t *blkif)
goto fail;

/*launch the new process*/
- DPRINTF("Launching process, CMDLINE [%s]\n",cmd);
- if (system(cmd) == -1) {
- DPRINTF("Unable to fork, cmdline: [%s]\n",cmd);
+ DPRINTF("Launching process, CMDLINE [tapdisk %s %s]\n",wrctldev, rdctldev);
+ if (launch_tapdisk(wrctldev, rdctldev) == -1) {
+ DPRINTF("Unable to fork, cmdline: [tapdisk %s %s]\n",wrctldev, rdctldev);
return -1;
}

free(rdctldev);
free(wrctldev);
- free(cmd);
} else {
DPRINTF("Process exists!\n");
blkif->fds[READ] = exist->fds[READ];
@@ -605,7 +626,6 @@ int open_ctrl_socket(char *devname)
{
int ret;
int ipc_fd;
- char *cmd;
fd_set socks;
struct timeval timeout;

diff -r 976db28bcc43 -r 04fb85a46dc5 tools/blktap/lib/blktaplib.h
--- a/tools/blktap/lib/blktaplib.h Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/blktap/lib/blktaplib.h Thu Aug 02 09:54:18 2007 -0500
@@ -169,12 +169,14 @@ typedef struct image {
unsigned int info;
} image_t;

+/* 16-byte message header, immediately followed by message payload. */
typedef struct msg_hdr {
- uint16_t type;
+ uint16_t type;
uint16_t len;
uint16_t drivertype;
uint16_t cookie;
uint8_t readonly;
+ uint8_t pad[7];
} msg_hdr_t;

typedef struct msg_newdev {
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/firmware/hvmloader/smbios.c
--- a/tools/firmware/hvmloader/smbios.c Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/firmware/hvmloader/smbios.c Thu Aug 02 09:54:18 2007 -0500
@@ -169,7 +169,7 @@ hvm_write_smbios_tables(void)
/* temporary variables used to build up Xen version string */
char *p = NULL; /* points to next point of insertion */
unsigned len = 0; /* length of string already composed */
- char *tmp = NULL; /* holds result of itoa() */
+ char tmp[16]; /* holds result of itoa() */
unsigned tmp_len; /* length of next string to add */

hypercall_xen_version(XENVER_guest_handle, uuid);
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/libxc/ia64/Makefile
--- a/tools/libxc/ia64/Makefile Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/libxc/ia64/Makefile Thu Aug 02 09:54:18 2007 -0500
@@ -5,6 +5,8 @@ GUEST_SRCS-y += ia64/xc_ia64_linux_resto
GUEST_SRCS-y += ia64/xc_ia64_linux_restore.c

GUEST_SRCS-y += ia64/xc_dom_ia64_util.c
+GUEST_SRCS-y += ia64/dom_fw_acpi.c
+
DOMFW_SRCS_BASE := dom_fw_common.c dom_fw_domu.c dom_fw_asm.S
DOMFW_SRCS := $(addprefix ia64/, $(DOMFW_SRCS_BASE))
$(DOMFW_SRCS):
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/libxc/ia64/dom_fw_acpi.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/ia64/dom_fw_acpi.c Thu Aug 02 09:54:18 2007 -0500
@@ -0,0 +1,13 @@
+#include <inttypes.h>
+#include <xen/acpi.h>
+
+uint8_t
+generate_acpi_checksum(void *tbl, unsigned long len)
+{
+ uint8_t *ptr, sum = 0;
+
+ for ( ptr = tbl; len > 0 ; len--, ptr++ )
+ sum += *ptr;
+
+ return 0 - sum;
+}
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/python/xen/util/acmpolicy.py
--- a/tools/python/xen/util/acmpolicy.py Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/python/xen/util/acmpolicy.py Thu Aug 02 09:54:18 2007 -0500
@@ -818,12 +818,13 @@ class ACMPolicy(XSPolicy):
if successful,the policy's flags will indicate that the
policy is the one loaded into the hypervisor
"""
- (ret, output) = commands.getstatusoutput(
+ if not self.isloaded():
+ (ret, output) = commands.getstatusoutput(
security.xensec_tool +
" loadpolicy " +
self.get_filename(".bin"))
- if ret != 0:
- return -xsconstants.XSERR_POLICY_LOAD_FAILED
+ if ret != 0:
+ return -xsconstants.XSERR_POLICY_LOAD_FAILED
return xsconstants.XSERR_SUCCESS

def isloaded(self):
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/utils.c
--- a/tools/xenstore/utils.c Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xenstore/utils.c Thu Aug 02 09:54:18 2007 -0500
@@ -8,20 +8,19 @@
#include <fcntl.h>
#include <sys/types.h>
#include <signal.h>
-
#include "utils.h"

void xprintf(const char *fmt, ...)
{
- static FILE *out = NULL;
va_list args;
- if (!out)
- out = stderr;
+
+ if (!stderr)
+ return; /* could trace()? */

va_start(args, fmt);
- vfprintf(out, fmt, args);
+ vfprintf(stderr, fmt, args);
va_end(args);
- fflush(out);
+ fflush(stderr);
}

void barf(const char *fmt, ...)
@@ -61,72 +60,3 @@ void barf_perror(const char *fmt, ...)
}
exit(1);
}
-
-void *_realloc_array(void *ptr, size_t size, size_t num)
-{
- if (num >= SIZE_MAX/size)
- return NULL;
- return realloc_nofail(ptr, size * num);
-}
-
-void *realloc_nofail(void *ptr, size_t size)
-{
- ptr = realloc(ptr, size);
- if (ptr)
- return ptr;
- barf("realloc of %zu failed", size);
-}
-
-void *malloc_nofail(size_t size)
-{
- void *ptr = malloc(size);
- if (ptr)
- return ptr;
- barf("malloc of %zu failed", size);
-}
-
-/* This version adds one byte (for nul term) */
-void *grab_file(const char *filename, unsigned long *size)
-{
- unsigned int max = 16384;
- int ret, fd;
- void *buffer;
-
- if (streq(filename, "-"))
- fd = dup(STDIN_FILENO);
- else
- fd = open(filename, O_RDONLY, 0);
-
- if (fd == -1)
- return NULL;
-
- buffer = malloc(max+1);
- if (!buffer)
- goto error;
- *size = 0;
- while ((ret = read(fd, buffer + *size, max - *size)) > 0) {
- *size += ret;
- if (*size == max) {
- void *nbuffer;
- max *= 2;
- nbuffer = realloc(buffer, max + 1);
- if (!nbuffer)
- goto error;
- buffer = nbuffer;
- }
- }
- if (ret < 0)
- goto error;
- ((char *)buffer)[*size] = '\0';
- close(fd);
- return buffer;
-error:
- free(buffer);
- close(fd);
- return NULL;
-}
-
-void release_file(void *data, unsigned long size __attribute__((unused)))
-{
- free(data);
-}
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/utils.h
--- a/tools/xenstore/utils.h Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xenstore/utils.h Thu Aug 02 09:54:18 2007 -0500
@@ -21,39 +21,12 @@ static inline bool strends(const char *a

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

-#define ___stringify(x) #x
-#define __stringify(x) ___stringify(x)
-
-/* Convenient wrappers for malloc and realloc. Use them. */
-#define new(type) ((type *)malloc_nofail(sizeof(type)))
-#define new_array(type, num) realloc_array((type *)0, (num))
-#define realloc_array(ptr, num) ((__typeof__(ptr))_realloc_array((ptr), sizeof((*ptr)), (num)))
-
-void *malloc_nofail(size_t size);
-void *realloc_nofail(void *ptr, size_t size);
-void *_realloc_array(void *ptr, size_t size, size_t num);
-
void barf(const char *fmt, ...) __attribute__((noreturn));
void barf_perror(const char *fmt, ...) __attribute__((noreturn));
-
-/* This version adds one byte (for nul term) */
-void *grab_file(const char *filename, unsigned long *size);
-void release_file(void *data, unsigned long size);
-
-/* Signal handling: returns fd to listen on. */
-int signal_to_fd(int signal);
-void close_signal(int fd);

void xprintf(const char *fmt, ...);

#define eprintf(_fmt, _args...) xprintf("[ERR] %s" _fmt, __FUNCTION__, ##_args)
-#define iprintf(_fmt, _args...) xprintf("[INF] %s" _fmt, __FUNCTION__, ##_args)
-
-#ifdef DEBUG
-#define dprintf(_fmt, _args...) xprintf("[DBG] %s" _fmt, __FUNCTION__, ##_args)
-#else
-#define dprintf(_fmt, _args...) ((void)0)
-#endif

/*
* Mux errno values onto returned pointers.
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/xenstored_core.c
--- a/tools/xenstore/xenstored_core.c Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xenstore/xenstored_core.c Thu Aug 02 09:54:18 2007 -0500
@@ -1820,7 +1820,9 @@ int main(int argc, char *argv[])
if (pidfile)
write_pidfile(pidfile);

- talloc_enable_leak_report_full();
+ /* Talloc leak reports go to stderr, which is closed if we fork. */
+ if (!dofork)
+ talloc_enable_leak_report_full();

/* Create sockets for them to listen to. */
sock = talloc(talloc_autofree_context(), int);
@@ -1881,6 +1883,11 @@ int main(int argc, char *argv[])
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
+
+ /* Get ourselves a nice xenstored crash if these are used. */
+ stdin = NULL;
+ stdout = NULL;
+ stderr = NULL;
}

signal(SIGHUP, trigger_reopen_log);
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/xenstored_domain.c
--- a/tools/xenstore/xenstored_domain.c Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xenstore/xenstored_domain.c Thu Aug 02 09:54:18 2007 -0500
@@ -621,13 +621,8 @@ void domain_entry_fix(unsigned int domid
struct domain *d;

d = find_domain_by_domid(domid);
- if (d) {
- if ((d->nbentry += num) < 0) {
- eprintf("invalid domain entry number %d",
- d->nbentry);
- d->nbentry = 0;
- }
- }
+ if (d && ((d->nbentry += num) < 0))
+ d->nbentry = 0;
}

int domain_entry(struct connection *conn)
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xenstore/xs_tdb_dump.c
--- a/tools/xenstore/xs_tdb_dump.c Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xenstore/xs_tdb_dump.c Thu Aug 02 09:54:18 2007 -0500
@@ -4,7 +4,7 @@
#include <fcntl.h>
#include <stdio.h>
#include <stdarg.h>
-
+#include <string.h>
#include "xs_lib.h"
#include "tdb.h"
#include "talloc.h"
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xm-test/lib/XmTestLib/acm.py
--- a/tools/xm-test/lib/XmTestLib/acm.py Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xm-test/lib/XmTestLib/acm.py Thu Aug 02 09:54:18 2007 -0500
@@ -67,6 +67,10 @@ def ACMLoadPolicy(policy='xm-test'):
if main.serverType == main.SERVER_XEN_API:
ACMLoadPolicy_XenAPI()
else:
+ cmd='xm dumppolicy | grep -E "^POLICY REFERENCE = ' + policy + '.$"'
+ s, o = traceCommand(cmd)
+ if o != "":
+ return
s, o = traceCommand("xm makepolicy %s" % (policy))
if s != 0:
FAIL("Need to be able to do 'xm makepolicy %s' but could not" %
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xm-test/tests/security-acm/07_security-acm_pol_update.py
--- a/tools/xm-test/tests/security-acm/07_security-acm_pol_update.py Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xm-test/tests/security-acm/07_security-acm_pol_update.py Thu Aug 02 09:54:18 2007 -0500
@@ -12,10 +12,19 @@ from xen.util import acmpolicy, security
from xen.util import acmpolicy, security, xsconstants
from xen.util.acmpolicy import ACMPolicy
from xen.xend.XendDomain import DOM0_UUID
+from XmTestLib.acm import *

import commands
import os
import base64
+
+if not isACMEnabled():
+ SKIP("Not running this test since ACM not enabled.")
+
+try:
+ session = xapi.connect()
+except:
+ SKIP("Skipping this test since xm is not using the Xen-API.")

xm_test = {}
xm_test['policyname'] = "xm-test"
diff -r 976db28bcc43 -r 04fb85a46dc5 tools/xm-test/tests/security-acm/09_security-acm_pol_update.py
--- a/tools/xm-test/tests/security-acm/09_security-acm_pol_update.py Thu Aug 02 09:50:55 2007 -0500
+++ b/tools/xm-test/tests/security-acm/09_security-acm_pol_update.py Thu Aug 02 09:54:18 2007 -0500
@@ -7,6 +7,7 @@

from XmTestLib import xapi
from XmTestLib.XenAPIDomain import XmTestAPIDomain
+from XmTestLib.acm import *
from XmTestLib import *
from xen.xend import XendAPIConstants
from xen.util import security, xsconstants
@@ -15,6 +16,14 @@ import base64
import base64
import struct
import time
+
+if not isACMEnabled():
+ SKIP("Not running this test since ACM not enabled.")
+
+try:
+ session = xapi.connect()
+except:
+ SKIP("Skipping this test since xm is not using the Xen-API.")

def typestoxml(types):
res = ""
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/ia64/xen/dom_fw_common.c
--- a/xen/arch/ia64/xen/dom_fw_common.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/ia64/xen/dom_fw_common.c Thu Aug 02 09:54:18 2007 -0500
@@ -207,17 +207,6 @@ print_md(efi_memory_desc_t *md)
printk("(%luKB)\n", size >> 10);
}

-uint8_t
-generate_acpi_checksum(void *tbl, unsigned long len)
-{
- uint8_t *ptr, sum = 0;
-
- for (ptr = tbl; len > 0 ; len--, ptr++)
- sum += *ptr;
-
- return 0 - sum;
-}
-
struct fake_acpi_tables {
struct acpi20_table_rsdp rsdp;
struct xsdt_descriptor_rev2 xsdt;
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/ia64/xen/dom_fw_dom0.c
--- a/xen/arch/ia64/xen/dom_fw_dom0.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/ia64/xen/dom_fw_dom0.c Thu Aug 02 09:54:18 2007 -0500
@@ -103,6 +103,7 @@ acpi_update_madt_checksum(unsigned long
/* base is physical address of acpi table */
static void __init touch_acpi_table(void)
{
+ int result;
lsapic_nbr = 0;

if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, 0) < 0)
@@ -110,6 +111,18 @@ static void __init touch_acpi_table(void
if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC,
acpi_patch_plat_int_src, 0) < 0)
printk("Error parsing MADT - no PLAT_INT_SRC entries\n");
+
+ result = acpi_table_disable(ACPI_SRAT);
+ if ( result == 0 )
+ printk("Success Disabling SRAT\n");
+ else if ( result != -ENOENT )
+ printk("ERROR: Failed Disabling SRAT\n");
+
+ result = acpi_table_disable(ACPI_SLIT);
+ if ( result == 0 )
+ printk("Success Disabling SLIT\n");
+ else if ( result != -ENOENT )
+ printk("ERROR: Failed Disabling SLIT\n");

acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum);

diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/acpi/boot.c Thu Aug 02 09:54:18 2007 -0500
@@ -371,11 +371,18 @@ extern u32 pmtmr_ioport;

#ifdef CONFIG_ACPI_SLEEP
/* Get pm1x_cnt and pm1x_evt information for ACPI sleep */
-static int __init
+static void __init
acpi_fadt_parse_sleep_info(struct fadt_descriptor_rev2 *fadt)
{
+ struct acpi_table_rsdp *rsdp;
+ unsigned long rsdp_phys;
struct facs_descriptor_rev2 *facs = NULL;
uint64_t facs_pa;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys || acpi_disabled)
+ goto bad;
+ rsdp = __va(rsdp_phys);

if (fadt->revision >= FADT2_REVISION_ID) {
/* Sanity check on FADT Rev. 2 */
@@ -432,8 +439,7 @@ acpi_fadt_parse_sleep_info(struct fadt_d
"FACS is shorter than ACPI spec allow: 0x%x",
facs->length);

- if ((acpi_rsdp_rev < 2) ||
- (facs->length < 32)) {
+ if ((rsdp->revision < 2) || (facs->length < 32)) {
acpi_sinfo.wakeup_vector = facs_pa +
offsetof(struct facs_descriptor_rev2,
firmware_waking_vector);
@@ -451,10 +457,9 @@ acpi_fadt_parse_sleep_info(struct fadt_d
acpi_sinfo.pm1a_cnt, acpi_sinfo.pm1b_cnt,
acpi_sinfo.pm1a_evt, acpi_sinfo.pm1b_cnt,
acpi_sinfo.wakeup_vector, acpi_sinfo.vector_width);
- return 0;
+ return;
bad:
memset(&acpi_sinfo, 0, sizeof(acpi_sinfo));
- return 0;
}
#endif

diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/domain_build.c Thu Aug 02 09:54:18 2007 -0500
@@ -316,6 +316,9 @@ int __init construct_dom0(
parms.pae ? ", PAE" : "",
elf_msb(&elf) ? "msb" : "lsb",
elf.pstart, elf.pend);
+ if ( parms.bsd_symtab )
+ printk(" Dom0 symbol map 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
+ elf.sstart, elf.send);

if ( !compatible )
{
@@ -385,7 +388,7 @@ int __init construct_dom0(
v_start = parms.virt_base;
vkern_start = parms.virt_kstart;
vkern_end = parms.virt_kend;
- vinitrd_start = round_pgup(vkern_end);
+ vinitrd_start = round_pgup(parms.virt_end);
vinitrd_end = vinitrd_start + initrd_len;
vphysmap_start = round_pgup(vinitrd_end);
vphysmap_end = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
@@ -795,7 +798,7 @@ int __init construct_dom0(

/* Copy the OS image and free temporary buffer. */
elf.dest = (void*)vkern_start;
- elf_load_binary(&elf);
+ elf_xen_dom_load_binary(&elf, &parms);

if ( UNSET_ADDR != parms.virt_hypercall )
{
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/instrlen.c
--- a/xen/arch/x86/hvm/instrlen.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/instrlen.c Thu Aug 02 09:54:18 2007 -0500
@@ -7,14 +7,6 @@
*
* Essentially a very, very stripped version of Keir Fraser's work in
* x86_emulate.c. Used for MMIO.
- */
-
-/*
- * TODO: The way in which we use hvm_instruction_length is very inefficient as
- * it now stands. It will be worthwhile to return the actual instruction buffer
- * along with the instruction length since one of the reasons we are getting
- * the instruction length is to know how many instruction bytes we need to
- * fetch.
*/

#include <xen/config.h>
@@ -194,31 +186,51 @@ static uint8_t twobyte_table[256] = {
/*
* insn_fetch - fetch the next byte from instruction stream
*/
-#define insn_fetch() \
-({ uint8_t _x; \
- if ( length >= 15 ) \
- return -1; \
- if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \
- gdprintk(XENLOG_WARNING, \
- "Cannot read from address %lx (eip %lx, mode %d)\n", \
- pc, org_pc, address_bytes); \
- return -1; \
- } \
- pc += 1; \
- length += 1; \
- _x; \
+#define insn_fetch() \
+({ uint8_t _x; \
+ if ( length >= 15 ) \
+ return -1; \
+ if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) { \
+ unsigned long err; \
+ struct segment_register cs; \
+ gdprintk(XENLOG_WARNING, \
+ "Cannot read from address %lx (eip %lx, mode %d)\n", \
+ pc, org_pc, address_bytes); \
+ err = 0; /* Must be not-present: we don't enforce reserved bits */ \
+ if ( hvm_nx_enabled(current) ) \
+ err |= PFEC_insn_fetch; \
+ hvm_get_segment_register(current, x86_seg_cs, &cs); \
+ if ( cs.attr.fields.dpl != 0 ) \
+ err |= PFEC_user_mode; \
+ hvm_inject_exception(TRAP_page_fault, err, pc); \
+ return -1; \
+ } \
+ if ( buf ) \
+ buf[length] = _x; \
+ length += 1; \
+ pc += 1; \
+ _x; \
})

+#define insn_skip(_n) do { \
+ int _i; \
+ for ( _i = 0; _i < (_n); _i++) { \
+ (void) insn_fetch(); \
+ } \
+} while (0)
+
/**
- * hvm_instruction_length - returns the current instructions length
+ * hvm_instruction_fetch - read the current instruction and return its length
*
* @org_pc: guest instruction pointer
- * @mode: guest operating mode
+ * @address_bytes: guest address width
+ * @buf: (optional) buffer to load actual instruction bytes into
*
- * EXTERNAL this routine calculates the length of the current instruction
- * pointed to by org_pc. The guest state is _not_ changed by this routine.
+ * Doesn't increment the guest's instruction pointer, but may
+ * issue faults to the guest. Returns -1 on failure.
*/
-int hvm_instruction_length(unsigned long org_pc, int address_bytes)
+int hvm_instruction_fetch(unsigned long org_pc, int address_bytes,
+ unsigned char *buf)
{
uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
@@ -317,18 +329,13 @@ done_prefixes:
{
case 0:
if ( modrm_rm == 6 )
- {
- length += 2;
- pc += 2; /* skip disp16 */
- }
+ insn_skip(2); /* skip disp16 */
break;
case 1:
- length += 1;
- pc += 1; /* skip disp8 */
+ insn_skip(1); /* skip disp8 */
break;
case 2:
- length += 2;
- pc += 2; /* skip disp16 */
+ insn_skip(2); /* skip disp16 */
break;
}
}
@@ -340,33 +347,19 @@ done_prefixes:
case 0:
if ( (modrm_rm == 4) &&
((insn_fetch() & 7) == 5) )
- {
- length += 4;
- pc += 4; /* skip disp32 specified by SIB.base */
- }
+ insn_skip(4); /* skip disp32 specified by SIB.base */
else if ( modrm_rm == 5 )
- {
- length += 4;
- pc += 4; /* skip disp32 */
- }
+ insn_skip(4); /* skip disp32 */
break;
case 1:
if ( modrm_rm == 4 )
- {
- length += 1;
- pc += 1;
- }
- length += 1;
- pc += 1; /* skip disp8 */
+ insn_skip(1);
+ insn_skip(1); /* skip disp8 */
break;
case 2:
if ( modrm_rm == 4 )
- {
- length += 1;
- pc += 1;
- }
- length += 4;
- pc += 4; /* skip disp32 */
+ insn_skip(1);
+ insn_skip(4); /* skip disp32 */
break;
}
}
@@ -387,12 +380,10 @@ done_prefixes:
tmp = (d & ByteOp) ? 1 : op_bytes;
if ( tmp == 8 ) tmp = 4;
/* NB. Immediates are sign-extended as necessary. */
- length += tmp;
- pc += tmp;
+ insn_skip(tmp);
break;
case SrcImmByte:
- length += 1;
- pc += 1;
+ insn_skip(1);
break;
}

@@ -402,8 +393,7 @@ done_prefixes:
switch ( b )
{
case 0xa0 ... 0xa3: /* mov */
- length += ad_bytes;
- pc += ad_bytes; /* skip src/dst displacement */
+ insn_skip(ad_bytes); /* skip src/dst displacement */
break;
case 0xf6 ... 0xf7: /* Grp3 */
switch ( modrm_reg )
@@ -412,8 +402,7 @@ done_prefixes:
/* Special case in Grp3: test has an immediate source operand. */
tmp = (d & ByteOp) ? 1 : op_bytes;
if ( tmp == 8 ) tmp = 4;
- length += tmp;
- pc += tmp;
+ insn_skip(tmp);
break;
}
break;
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/platform.c
--- a/xen/arch/x86/hvm/platform.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/platform.c Thu Aug 02 09:54:18 2007 -0500
@@ -1041,17 +1041,13 @@ void handle_mmio(unsigned long gpa)
/* real or vm86 modes */
address_bytes = 2;
inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip;
- inst_len = hvm_instruction_length(inst_addr, address_bytes);
+ memset(inst, 0, MAX_INST_LEN);
+ inst_len = hvm_instruction_fetch(inst_addr, address_bytes, inst);
if ( inst_len <= 0 )
{
- printk("handle_mmio: failed to get instruction length\n");
- domain_crash_synchronous();
- }
-
- memset(inst, 0, MAX_INST_LEN);
- if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) {
- printk("handle_mmio: failed to copy instruction\n");
- domain_crash_synchronous();
+ gdprintk(XENLOG_DEBUG, "handle_mmio: failed to get instruction\n");
+ /* hvm_instruction_fetch() will have injected a #PF; get out now */
+ return;
}

if ( mmio_decode(address_bytes, inst, mmio_op, &ad_size,
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/svm/intr.c
--- a/xen/arch/x86/hvm/svm/intr.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/svm/intr.c Thu Aug 02 09:54:18 2007 -0500
@@ -58,7 +58,7 @@ static void svm_inject_nmi(struct vcpu *

event.bytes = 0;
event.fields.v = 1;
- event.fields.type = EVENTTYPE_NMI;
+ event.fields.type = X86_EVENTTYPE_NMI;
event.fields.vector = 2;

ASSERT(vmcb->eventinj.fields.v == 0);
@@ -72,34 +72,39 @@ static void svm_inject_extint(struct vcp

event.bytes = 0;
event.fields.v = 1;
- event.fields.type = EVENTTYPE_INTR;
+ event.fields.type = X86_EVENTTYPE_EXT_INTR;
event.fields.vector = vector;

ASSERT(vmcb->eventinj.fields.v == 0);
vmcb->eventinj = event;
}

+static void enable_intr_window(struct vcpu *v, enum hvm_intack intr_source)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+
+ ASSERT(intr_source != hvm_intack_none);
+
+ /*
+ * Create a dummy virtual interrupt to intercept as soon as the
+ * guest can accept the real interrupt.
+ *
+ * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
+ * shadow. This is hard to do without hardware support. We should also
+ * track 'NMI blocking' from NMI injection until IRET. This can be done
+ * quite easily in software by intercepting the unblocking IRET.
+ */
+ vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
+ HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
+ svm_inject_dummy_vintr(v);
+}
+
asmlinkage void svm_intr_assist(void)
{
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
enum hvm_intack intr_source;
int intr_vector;
-
- /*
- * Previous event delivery caused this intercept?
- * This will happen if the injection is latched by the processor (hence
- * clearing vintr.fields.irq or eventinj.v) but then subsequently a fault
- * occurs (e.g., due to lack of shadow mapping of guest IDT or guest-kernel
- * stack).
- */
- if ( vmcb->exitintinfo.fields.v )
- {
- vmcb->eventinj = vmcb->exitintinfo;
- vmcb->exitintinfo.bytes = 0;
- HVMTRACE_1D(REINJ_VIRQ, v, intr_vector);
- return;
- }

/* Crank the handle on interrupt state. */
pt_update_irq(v);
@@ -111,32 +116,23 @@ asmlinkage void svm_intr_assist(void)
return;

/*
- * If the guest can't take an interrupt right now, create a 'fake'
- * virtual interrupt on to intercept as soon as the guest _can_ take
- * interrupts. Do not obtain the next interrupt from the vlapic/pic
- * if unable to inject.
- *
- * Also do this if there is an injection already pending. This is
- * because the event delivery can arbitrarily delay the injection
- * of the vintr (for example, if the exception is handled via an
- * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
- * - the vTPR could be modified upwards, so we need to wait until the
- * exception is delivered before we can safely decide that an
- * interrupt is deliverable; and
- * - the guest might look at the APIC/PIC state, so we ought not to
- * have cleared the interrupt out of the IRR.
- *
- * TODO: Better NMI handling. We need a way to skip a MOV SS interrupt
- * shadow. This is hard to do without hardware support. We should also
- * track 'NMI blocking' from NMI injection until IRET. This can be done
- * quite easily in software by intercepting the unblocking IRET.
+ * Pending IRQs must be delayed if:
+ * 1. An event is already pending. This is despite the fact that SVM
+ * provides a VINTR delivery method quite separate from the EVENTINJ
+ * mechanism. The event delivery can arbitrarily delay the injection
+ * of the vintr (for example, if the exception is handled via an
+ * interrupt gate, hence zeroing RFLAGS.IF). In the meantime:
+ * - the vTPR could be modified upwards, so we need to wait until
+ * the exception is delivered before we can safely decide that an
+ * interrupt is deliverable; and
+ * - the guest might look at the APIC/PIC state, so we ought not to
+ * have cleared the interrupt out of the IRR.
+ * 2. The IRQ is masked.
*/
- if ( !hvm_interrupts_enabled(v, intr_source) ||
- vmcb->eventinj.fields.v )
+ if ( unlikely(vmcb->eventinj.fields.v) ||
+ !hvm_interrupts_enabled(v, intr_source) )
{
- vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
- HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
- svm_inject_dummy_vintr(v);
+ enable_intr_window(v, intr_source);
return;
}
} while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
@@ -151,6 +147,11 @@ asmlinkage void svm_intr_assist(void)
svm_inject_extint(v, intr_vector);
pt_intr_post(v, intr_vector, intr_source);
}
+
+ /* Is there another IRQ to queue up behind this one? */
+ intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_intr_window(v, intr_source);
}

/*
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/svm/svm.c Thu Aug 02 09:54:18 2007 -0500
@@ -71,8 +71,8 @@ static void *root_vmcb[NR_CPUS] __read_m
/* hardware assisted paging bits */
extern int opt_hap_enabled;

-static void svm_inject_exception(struct vcpu *v, int trap,
- int ev, int error_code)
+static void svm_inject_exception(
+ struct vcpu *v, int trap, int ev, int error_code)
{
eventinj_t event;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
@@ -84,13 +84,11 @@ static void svm_inject_exception(struct

event.bytes = 0;
event.fields.v = 1;
- event.fields.type = EVENTTYPE_EXCEPTION;
+ event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
event.fields.vector = trap;
event.fields.ev = ev;
event.fields.errorcode = error_code;

- ASSERT(vmcb->eventinj.fields.v == 0);
-
vmcb->eventinj = event;
}

@@ -362,21 +360,14 @@ int svm_vmcb_save(struct vcpu *v, struct
c->sysenter_esp = vmcb->sysenter_esp;
c->sysenter_eip = vmcb->sysenter_eip;

- /* Save any event/interrupt that was being injected when we last exited. */
- if ( vmcb->exitintinfo.fields.v )
- {
- c->pending_event = vmcb->exitintinfo.bytes & 0xffffffff;
- c->error_code = vmcb->exitintinfo.fields.errorcode;
- }
- else if ( vmcb->eventinj.fields.v )
- {
- c->pending_event = vmcb->eventinj.bytes & 0xffffffff;
+ c->pending_event = 0;
+ c->error_code = 0;
+ if ( vmcb->eventinj.fields.v &&
+ hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
+ vmcb->eventinj.fields.vector) )
+ {
+ c->pending_event = (uint32_t)vmcb->eventinj.bytes;
c->error_code = vmcb->eventinj.fields.errorcode;
- }
- else
- {
- c->pending_event = 0;
- c->error_code = 0;
}

return 1;
@@ -495,11 +486,11 @@ int svm_vmcb_restore(struct vcpu *v, str
vmcb->sysenter_esp = c->sysenter_esp;
vmcb->sysenter_eip = c->sysenter_eip;

- /* update VMCB for nested paging restore */
- if ( paging_mode_hap(v->domain) ) {
+ if ( paging_mode_hap(v->domain) )
+ {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
- (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+ vmcb->cr4 = (v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE));
vmcb->cr3 = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
@@ -514,26 +505,23 @@ int svm_vmcb_restore(struct vcpu *v, str
gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
c->pending_event, c->error_code);

- /* VMX uses a different type for #OF and #BP; fold into "Exception" */
- if ( c->pending_type == 6 )
- c->pending_type = 3;
- /* Sanity check */
- if ( c->pending_type == 1 || c->pending_type > 4
- || c->pending_reserved != 0 )
+ if ( (c->pending_type == 1) || (c->pending_type > 6) ||
+ (c->pending_reserved != 0) )
{
gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32"\n",
c->pending_event);
return -EINVAL;
}
- /* Put this pending event in exitintinfo and svm_intr_assist()
- * will reinject it when we return to the guest. */
- vmcb->exitintinfo.bytes = c->pending_event;
- vmcb->exitintinfo.fields.errorcode = c->error_code;
+
+ if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
+ {
+ vmcb->eventinj.bytes = c->pending_event;
+ vmcb->eventinj.fields.errorcode = c->error_code;
+ }
}

paging_update_paging_modes(v);
- /* signal paging update to ASID handler */
- svm_asid_g_update_paging (v);
+ svm_asid_g_update_paging(v);

return 0;

@@ -965,10 +953,10 @@ static void svm_hvm_inject_exception(
svm_inject_exception(v, trapnr, (errcode != -1), errcode);
}

-static int svm_event_injection_faulted(struct vcpu *v)
-{
- struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- return vmcb->exitintinfo.fields.v;
+static int svm_event_pending(struct vcpu *v)
+{
+ struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ return vmcb->eventinj.fields.v;
}

static struct hvm_function_table svm_function_table = {
@@ -1000,7 +988,7 @@ static struct hvm_function_table svm_fun
.inject_exception = svm_hvm_inject_exception,
.init_ap_context = svm_init_ap_context,
.init_hypercall_page = svm_init_hypercall_page,
- .event_injection_faulted = svm_event_injection_faulted
+ .event_pending = svm_event_pending
};

static void svm_npt_detect(void)
@@ -1667,6 +1655,17 @@ static int svm_set_cr0(unsigned long val
unsigned long old_base_mfn;

HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
+
+ if ( (u32)value != value )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set upper 32 bits in CR0: %lx",
+ value);
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ return 0;
+ }
+
+ value &= ~HVM_CR0_GUEST_RESERVED_BITS;

/* ET is reserved and should be always be 1. */
value |= X86_CR0_ET;
@@ -2420,6 +2419,7 @@ asmlinkage void svm_vmexit_handler(struc
unsigned long eip;
struct vcpu *v = current;
struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
+ eventinj_t eventinj;
int inst_len, rc;

exit_reason = vmcb->exitcode;
@@ -2434,6 +2434,13 @@ asmlinkage void svm_vmexit_handler(struc

perfc_incra(svmexits, exit_reason);
eip = vmcb->rip;
+
+ /* Event delivery caused this intercept? Queue for redelivery. */
+ eventinj = vmcb->exitintinfo;
+ if ( unlikely(eventinj.fields.v) &&
+ hvm_event_needs_reinjection(eventinj.fields.type,
+ eventinj.fields.vector) )
+ vmcb->eventinj = eventinj;

switch ( exit_reason )
{
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/intr.c Thu Aug 02 09:54:18 2007 -0500
@@ -76,10 +76,9 @@ static void enable_intr_window(struct vc
u32 *cpu_exec_control = &v->arch.hvm_vmx.exec_control;
u32 ctl = CPU_BASED_VIRTUAL_INTR_PENDING;

- if ( unlikely(intr_source == hvm_intack_none) )
- return;
+ ASSERT(intr_source != hvm_intack_none);

- if ( unlikely(intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi )
+ if ( (intr_source == hvm_intack_nmi) && cpu_has_vmx_vnmi )
{
/*
* We set MOV-SS blocking in lieu of STI blocking when delivering an
@@ -131,68 +130,27 @@ asmlinkage void vmx_intr_assist(void)
int intr_vector;
enum hvm_intack intr_source;
struct vcpu *v = current;
- unsigned int idtv_info_field;
- unsigned long inst_len;
+ unsigned int intr_info;

+ /* Crank the handle on interrupt state. */
pt_update_irq(v);
-
hvm_set_callback_irq_level();
-
- update_tpr_threshold(vcpu_vlapic(v));

do {
intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( likely(intr_source == hvm_intack_none) )
+ goto out;

- if ( unlikely(v->arch.hvm_vmx.vector_injected) )
- {
- v->arch.hvm_vmx.vector_injected = 0;
- enable_intr_window(v, intr_source);
- return;
- }
-
- /* This could be moved earlier in the VMX resume sequence. */
- idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
- if ( unlikely(idtv_info_field & INTR_INFO_VALID_MASK) )
- {
- /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD,
- idtv_info_field & ~INTR_INFO_RESVD_BITS_MASK);
-
- /*
- * Safe: the length will only be interpreted for software
- * exceptions and interrupts. If we get here then delivery of some
- * event caused a fault, and this always results in defined
- * VM_EXIT_INSTRUCTION_LEN.
- */
- inst_len = __vmread(VM_EXIT_INSTRUCTION_LEN); /* Safe */
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len);
-
- if ( unlikely(idtv_info_field & 0x800) ) /* valid error code */
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
- __vmread(IDT_VECTORING_ERROR_CODE));
-
- /*
- * Clear NMI-blocking interruptibility info if an NMI delivery
- * faulted. Re-delivery will re-set it (see SDM 3B 25.7.1.2).
- */
- if ( (idtv_info_field&INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
- __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
- __vmread(GUEST_INTERRUPTIBILITY_INFO) &
- ~VMX_INTR_SHADOW_NMI);
-
- enable_intr_window(v, intr_source);
-
- HVM_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field);
- return;
- }
-
- if ( likely(intr_source == hvm_intack_none) )
- return;
-
- if ( !hvm_interrupts_enabled(v, intr_source) )
+ /*
+ * An event is already pending or the pending interrupt is masked?
+ * Then the pending interrupt must be delayed.
+ */
+ intr_info = __vmread(VM_ENTRY_INTR_INFO);
+ if ( unlikely(intr_info & INTR_INFO_VALID_MASK) ||
+ !hvm_interrupts_enabled(v, intr_source) )
{
enable_intr_window(v, intr_source);
- return;
+ goto out;
}
} while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );

@@ -206,6 +164,14 @@ asmlinkage void vmx_intr_assist(void)
vmx_inject_extint(v, intr_vector);
pt_intr_post(v, intr_vector, intr_source);
}
+
+ /* Is there another IRQ to queue up behind this one? */
+ intr_source = hvm_vcpu_has_pending_irq(v);
+ if ( unlikely(intr_source != hvm_intack_none) )
+ enable_intr_window(v, intr_source);
+
+ out:
+ update_tpr_threshold(vcpu_vlapic(v));
}

/*
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Aug 02 09:54:18 2007 -0500
@@ -240,8 +240,23 @@ int vmx_cpu_up(void)
{
u32 eax, edx;
int cpu = smp_processor_id();
+ u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;

BUG_ON(!(read_cr4() & X86_CR4_VMXE));
+
+ /*
+ * Ensure the current processor operating mode meets
+ * the requred CRO fixed bits in VMX operation.
+ */
+ cr0 = read_cr0();
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
+ if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
+ {
+ printk("CPU%d: some settings of host CR0 are "
+ "not allowed in VMX operation.\n", cpu);
+ return 0;
+ }

rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx);

@@ -418,7 +433,7 @@ static void construct_vmcs(struct vcpu *
__vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
__vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);

- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ __vmwrite(VM_ENTRY_INTR_INFO, 0);

__vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
__vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Aug 02 09:54:18 2007 -0500
@@ -613,28 +613,13 @@ void vmx_vmcs_save(struct vcpu *v, struc
c->sysenter_esp = __vmread(GUEST_SYSENTER_ESP);
c->sysenter_eip = __vmread(GUEST_SYSENTER_EIP);

- /*
- * Save any event/interrupt that was being injected when we last
- * exited. IDT_VECTORING_INFO_FIELD has priority, as anything in
- * VM_ENTRY_INTR_INFO_FIELD is either a fault caused by the first
- * event, which will happen the next time, or an interrupt, which we
- * never inject when IDT_VECTORING_INFO_FIELD is valid.
- */
- if ( (ev = __vmread(IDT_VECTORING_INFO_FIELD)) & INTR_INFO_VALID_MASK )
- {
- c->pending_event = ev;
- c->error_code = __vmread(IDT_VECTORING_ERROR_CODE);
- }
- else if ( (ev = __vmread(VM_ENTRY_INTR_INFO_FIELD)) &
- INTR_INFO_VALID_MASK )
+ c->pending_event = 0;
+ c->error_code = 0;
+ if ( ((ev = __vmread(VM_ENTRY_INTR_INFO)) & INTR_INFO_VALID_MASK) &&
+ hvm_event_needs_reinjection((ev >> 8) & 7, ev & 0xff) )
{
c->pending_event = ev;
c->error_code = __vmread(VM_ENTRY_EXCEPTION_ERROR_CODE);
- }
- else
- {
- c->pending_event = 0;
- c->error_code = 0;
}

vmx_vmcs_exit(v);
@@ -754,34 +739,9 @@ int vmx_vmcs_restore(struct vcpu *v, str

if ( c->pending_valid )
{
- vmx_vmcs_enter(v);
-
gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
c->pending_event, c->error_code);

- /* SVM uses type 3 ("Exception") for #OF and #BP; VMX uses type 6 */
- if ( (c->pending_type == 3) &&
- ((c->pending_vector == 3) || (c->pending_vector == 4)) )
- c->pending_type = 6;
-
- /* For software exceptions, we need to tell the hardware the
- * instruction length as well (hmmm). */
- if ( c->pending_type > 4 )
- {
- int addrbytes, ilen;
- if ( (c->cs_arbytes & X86_SEG_AR_CS_LM_ACTIVE) &&
- (c->msr_efer & EFER_LMA) )
- addrbytes = 8;
- else if ( c->cs_arbytes & X86_SEG_AR_DEF_OP_SIZE )
- addrbytes = 4;
- else
- addrbytes = 2;
-
- ilen = hvm_instruction_length(c->rip, addrbytes);
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen);
- }
-
- /* Sanity check */
if ( (c->pending_type == 1) || (c->pending_type > 6) ||
(c->pending_reserved != 0) )
{
@@ -790,12 +750,13 @@ int vmx_vmcs_restore(struct vcpu *v, str
return -EINVAL;
}

- /* Re-inject the exception */
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, c->pending_event);
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
- v->arch.hvm_vmx.vector_injected = 1;
-
- vmx_vmcs_exit(v);
+ if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
+ {
+ vmx_vmcs_enter(v);
+ __vmwrite(VM_ENTRY_INTR_INFO, c->pending_event);
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, c->error_code);
+ vmx_vmcs_exit(v);
+ }
}

return 0;
@@ -1203,14 +1164,10 @@ static void vmx_update_vtpr(struct vcpu
/* VMX doesn't have a V_TPR field */
}

-static int vmx_event_injection_faulted(struct vcpu *v)
-{
- unsigned int idtv_info_field;
-
+static int vmx_event_pending(struct vcpu *v)
+{
ASSERT(v == current);
-
- idtv_info_field = __vmread(IDT_VECTORING_INFO_FIELD);
- return (idtv_info_field & INTR_INFO_VALID_MASK);
+ return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
}

static void disable_intercept_for_msr(u32 msr)
@@ -1261,7 +1218,7 @@ static struct hvm_function_table vmx_fun
.inject_exception = vmx_inject_exception,
.init_ap_context = vmx_init_ap_context,
.init_hypercall_page = vmx_init_hypercall_page,
- .event_injection_faulted = vmx_event_injection_faulted,
+ .event_pending = vmx_event_pending,
.cpu_up = vmx_cpu_up,
.cpu_down = vmx_cpu_down,
};
@@ -2199,6 +2156,17 @@ static int vmx_set_cr0(unsigned long val
unsigned long old_base_mfn;

HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
+
+ if ( (u32)value != value )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1,
+ "Guest attempts to set upper 32 bits in CR0: %lx",
+ value);
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ return 0;
+ }
+
+ value &= ~HVM_CR0_GUEST_RESERVED_BITS;

/* ET is reserved and should be always be 1. */
value |= X86_CR0_ET;
@@ -2842,47 +2810,6 @@ static void vmx_do_extint(struct cpu_use
}
}

-static void vmx_reflect_exception(struct vcpu *v)
-{
- int error_code, intr_info, vector;
-
- intr_info = __vmread(VM_EXIT_INTR_INFO);
- vector = intr_info & 0xff;
- if ( intr_info & INTR_INFO_DELIVER_CODE_MASK )
- error_code = __vmread(VM_EXIT_INTR_ERROR_CODE);
- else
- error_code = VMX_DELIVER_NO_ERROR_CODE;
-
-#ifndef NDEBUG
- {
- unsigned long rip;
-
- rip = __vmread(GUEST_RIP);
- HVM_DBG_LOG(DBG_LEVEL_1, "rip = %lx, error_code = %x",
- rip, error_code);
- }
-#endif /* NDEBUG */
-
- /*
- * According to Intel Virtualization Technology Specification for
- * the IA-32 Intel Architecture (C97063-002 April 2005), section
- * 2.8.3, SW_EXCEPTION should be used for #BP and #OV, and
- * HW_EXCEPTION used for everything else. The main difference
- * appears to be that for SW_EXCEPTION, the EIP/RIP is incremented
- * by VM_ENTER_INSTRUCTION_LEN bytes, whereas for HW_EXCEPTION,
- * it is not.
- */
- if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_SW_EXCEPTION )
- {
- int ilen = __get_instruction_length(); /* Safe: software exception */
- vmx_inject_sw_exception(v, vector, ilen);
- }
- else
- {
- vmx_inject_hw_exception(v, vector, error_code);
- }
-}
-
static void vmx_failed_vmentry(unsigned int exit_reason,
struct cpu_user_regs *regs)
{
@@ -2919,7 +2846,7 @@ static void vmx_failed_vmentry(unsigned

asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
- unsigned int exit_reason;
+ unsigned int exit_reason, idtv_info;
unsigned long exit_qualification, inst_len = 0;
struct vcpu *v = current;

@@ -2934,6 +2861,30 @@ asmlinkage void vmx_vmexit_handler(struc

if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
return vmx_failed_vmentry(exit_reason, regs);
+
+ /* Event delivery caused this intercept? Queue for redelivery. */
+ idtv_info = __vmread(IDT_VECTORING_INFO);
+ if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) )
+ {
+ if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
+ {
+ /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
+ __vmwrite(VM_ENTRY_INTR_INFO,
+ idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+ if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ }
+
+ /*
+ * Clear NMI-blocking interruptibility info if an NMI delivery faulted.
+ * Re-delivery will re-set it (see SDM 3B 25.7.1.2).
+ */
+ if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) )
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
+ __vmread(GUEST_INTERRUPTIBILITY_INFO) &
+ ~VMX_INTR_SHADOW_NMI);
+ }

switch ( exit_reason )
{
@@ -2957,7 +2908,7 @@ asmlinkage void vmx_vmexit_handler(struc
* (NB. If we emulate this IRET for any reason, we should re-clear!)
*/
if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) &&
- !(__vmread(IDT_VECTORING_INFO_FIELD) & INTR_INFO_VALID_MASK) &&
+ !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
(vector != TRAP_double_fault) )
__vmwrite(GUEST_INTERRUPTIBILITY_INFO,
__vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
@@ -2995,14 +2946,12 @@ asmlinkage void vmx_vmexit_handler(struc
vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
- if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
- {
- HVMTRACE_0D(NMI, v);
- vmx_store_cpu_guest_regs(v, regs, NULL);
- do_nmi(regs); /* Real NMI, vector 2: normal processing. */
- }
- else
- vmx_reflect_exception(v);
+ if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
+ (X86_EVENTTYPE_NMI << 8) )
+ goto exit_and_crash;
+ HVMTRACE_0D(NMI, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
+ do_nmi(regs); /* Real NMI, vector 2: normal processing. */
break;
case TRAP_machine_check:
HVMTRACE_0D(MCE, v);
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/arch/x86/mm/shadow/multi.c Thu Aug 02 09:54:18 2007 -0500
@@ -2905,7 +2905,7 @@ static int sh_page_fault(struct vcpu *v,
* stack is currently considered to be a page table, so we should
* unshadow the faulting page before exiting.
*/
- if ( unlikely(hvm_event_injection_faulted(v)) )
+ if ( unlikely(hvm_event_pending(v)) )
{
gdprintk(XENLOG_DEBUG, "write to pagetable during event "
"injection: cr2=%#lx, mfn=%#lx\n",
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/common/libelf/libelf-dominfo.c
--- a/xen/common/libelf/libelf-dominfo.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/common/libelf/libelf-dominfo.c Thu Aug 02 09:54:18 2007 -0500
@@ -333,6 +333,99 @@ static int elf_xen_note_check(struct elf
return 0;
}

+
+static void elf_xen_loadsymtab(struct elf_binary *elf,
+ struct elf_dom_parms *parms)
+{
+ unsigned long maxva, len;
+
+ if ( !parms->bsd_symtab )
+ return;
+
+ /* Calculate the required additional kernel space for the elf image */
+
+ /* The absolute base address of the elf image */
+ maxva = elf_round_up(elf, parms->virt_kend);
+ maxva += sizeof(long); /* Space to store the size of the elf image */
+ /* Space for the elf and elf section headers */
+ maxva += (elf_uval(elf, elf->ehdr, e_ehsize) +
+ elf_shdr_count(elf) * elf_uval(elf, elf->ehdr, e_shentsize));
+ maxva = elf_round_up(elf, maxva);
+
+ /* Space for the symbol and string tabs */
+ len = (unsigned long)elf->send - (unsigned long)elf->sstart;
+ maxva = elf_round_up(elf, maxva + len);
+
+ /* The address the kernel must expanded to */
+ parms->virt_end = maxva;
+}
+
+int elf_xen_dom_load_binary(struct elf_binary *elf,
+ struct elf_dom_parms *parms)
+{
+ elf_ehdr *sym_ehdr;
+ unsigned long shdr, symtab_addr;
+ unsigned long maxva, symbase;
+ uint8_t i;
+ char *p;
+
+ elf_load_binary(elf);
+
+ if ( !parms->bsd_symtab )
+ return 0;
+
+#define elf_hdr_elm(_elf, _hdr, _elm, _val) \
+do { \
+ if ( elf_64bit(_elf) ) \
+ (_hdr)->e64._elm = _val; \
+ else \
+ (_hdr)->e32._elm = _val; \
+} while ( 0 )
+
+ /* ehdr right after the kernel image (4 byte aligned) */
+ symbase = elf_round_up(elf, parms->virt_kend);
+ symtab_addr = maxva = symbase + sizeof(long);
+
+ /* Set up Elf header. */
+ sym_ehdr = (elf_ehdr *)symtab_addr;
+ maxva = elf_copy_ehdr(elf, sym_ehdr);
+
+ elf_hdr_elm(elf, sym_ehdr, e_phoff, 0);
+ elf_hdr_elm(elf, sym_ehdr, e_shoff, elf_uval(elf, elf->ehdr, e_ehsize));
+ elf_hdr_elm(elf, sym_ehdr, e_phentsize, 0);
+ elf_hdr_elm(elf, sym_ehdr, e_phnum, 0);
+
+ /* Copy Elf section headers. */
+ shdr = maxva;
+ maxva = elf_copy_shdr(elf, (elf_shdr *)shdr);
+
+ for ( i = 0; i < elf_shdr_count(elf); i++ )
+ {
+ uint8_t type;
+ unsigned long tmp;
+ type = elf_uval(elf, (elf_shdr *)shdr, sh_type);
+ if ( (type == SHT_STRTAB) || (type == SHT_SYMTAB) )
+ {
+ elf_msg(elf, "%s: shdr %i at 0x%p -> 0x%p\n", __func__, i,
+ elf_section_start(elf, (elf_shdr *)shdr), (void *)maxva);
+ tmp = elf_copy_section(elf, (elf_shdr *)shdr, (void *)maxva);
+ /* Mangled to be based on ELF header location. */
+ elf_hdr_elm(elf, (elf_shdr *)shdr, sh_offset,
+ maxva - symtab_addr);
+ maxva = tmp;
+ }
+ shdr += elf_uval(elf, elf->ehdr, e_shentsize);
+ }
+
+ /* Write down the actual sym size. */
+ p = (char *)symbase;
+ *(long *)p = maxva - symtab_addr; /* sym size */
+
+#undef elf_ehdr_elm
+
+ return 0;
+}
+
static int elf_xen_addr_calc_check(struct elf_binary *elf,
struct elf_dom_parms *parms)
{
@@ -374,9 +467,13 @@ static int elf_xen_addr_calc_check(struc
parms->virt_offset = parms->virt_base - parms->elf_paddr_offset;
parms->virt_kstart = elf->pstart + parms->virt_offset;
parms->virt_kend = elf->pend + parms->virt_offset;
+ parms->virt_end = parms->virt_kend;

if ( parms->virt_entry == UNSET_ADDR )
parms->virt_entry = elf_uval(elf, elf->ehdr, e_entry);
+
+ if ( parms->bsd_symtab )
+ elf_xen_loadsymtab(elf, parms);

elf_msg(elf, "%s: addresses:\n", __FUNCTION__);
elf_msg(elf, " virt_base = 0x%" PRIx64 "\n", parms->virt_base);
@@ -384,12 +481,14 @@ static int elf_xen_addr_calc_check(struc
elf_msg(elf, " virt_offset = 0x%" PRIx64 "\n", parms->virt_offset);
elf_msg(elf, " virt_kstart = 0x%" PRIx64 "\n", parms->virt_kstart);
elf_msg(elf, " virt_kend = 0x%" PRIx64 "\n", parms->virt_kend);
+ elf_msg(elf, " virt_end = 0x%" PRIx64 "\n", parms->virt_end);
elf_msg(elf, " virt_entry = 0x%" PRIx64 "\n", parms->virt_entry);

if ( (parms->virt_kstart > parms->virt_kend) ||
(parms->virt_entry < parms->virt_kstart) ||
(parms->virt_entry > parms->virt_kend) ||
- (parms->virt_base > parms->virt_kstart) )
+ (parms->virt_base > parms->virt_kstart) ||
+ (parms->virt_kend > parms->virt_end) )
{
elf_err(elf, "%s: ERROR: ELF start or entries are out of bounds.\n",
__FUNCTION__);
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/common/libelf/libelf-loader.c
--- a/xen/common/libelf/libelf-loader.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/common/libelf/libelf-loader.c Thu Aug 02 09:54:18 2007 -0500
@@ -10,6 +10,8 @@ int elf_init(struct elf_binary *elf, con
{
const elf_shdr *shdr;
uint64_t i, count, section, offset;
+ uint64_t low = -1;
+ uint64_t high = 0;

if ( !elf_is_elfbinary(image) )
{
@@ -24,7 +26,11 @@ int elf_init(struct elf_binary *elf, con
elf->class = elf->ehdr->e32.e_ident[EI_CLASS];
elf->data = elf->ehdr->e32.e_ident[EI_DATA];

- /* sanity check phdr */
+#ifdef VERBOSE
+ elf_set_verbose(elf);
+#endif
+
+ /* Sanity check phdr. */
offset = elf_uval(elf, elf->ehdr, e_phoff) +
elf_uval(elf, elf->ehdr, e_phentsize) * elf_phdr_count(elf);
if ( offset > elf->size )
@@ -34,7 +40,7 @@ int elf_init(struct elf_binary *elf, con
return -1;
}

- /* sanity check shdr */
+ /* Sanity check shdr. */
offset = elf_uval(elf, elf->ehdr, e_shoff) +
elf_uval(elf, elf->ehdr, e_shentsize) * elf_shdr_count(elf);
if ( offset > elf->size )
@@ -44,29 +50,55 @@ int elf_init(struct elf_binary *elf, con
return -1;
}

- /* find section string table */
+ /* Find section string table. */
section = elf_uval(elf, elf->ehdr, e_shstrndx);
shdr = elf_shdr_by_index(elf, section);
if ( shdr != NULL )
elf->sec_strtab = elf_section_start(elf, shdr);

- /* find symbol table, symbol string table */
+ /* Find symbol table and symbol string table. */
count = elf_shdr_count(elf);
for ( i = 0; i < count; i++ )
{
+ const char *sh_symend, *sh_strend;
+
shdr = elf_shdr_by_index(elf, i);
if ( elf_uval(elf, shdr, sh_type) != SHT_SYMTAB )
continue;
elf->sym_tab = shdr;
+ sh_symend = (const char *)elf_section_end(elf, shdr);
shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link));
if ( shdr == NULL )
{
elf->sym_tab = NULL;
+ sh_symend = 0;
continue;
}
elf->sym_strtab = elf_section_start(elf, shdr);
- break;
- }
+ sh_strend = (const char *)elf_section_end(elf, shdr);
+
+ if ( low > (unsigned long)elf->sym_tab )
+ low = (unsigned long)elf->sym_tab;
+ if ( low > (unsigned long)shdr )
+ low = (unsigned long)shdr;
+
+ if ( high < ((unsigned long)sh_symend) )
+ high = (unsigned long)sh_symend;
+ if ( high < ((unsigned long)sh_strend) )
+ high = (unsigned long)sh_strend;
+
+ elf_msg(elf, "%s: shdr: sym_tab=%p size=0x%" PRIx64 "\n",
+ __FUNCTION__, elf->sym_tab,
+ elf_uval(elf, elf->sym_tab, sh_size));
+ elf_msg(elf, "%s: shdr: str_tab=%p size=0x%" PRIx64 "\n",
+ __FUNCTION__, elf->sym_strtab, elf_uval(elf, shdr, sh_size));
+
+ elf->sstart = low;
+ elf->send = high;
+ elf_msg(elf, "%s: symbol map: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
+ __FUNCTION__, elf->sstart, elf->send);
+ }
+
return 0;
}

diff -r 976db28bcc43 -r 04fb85a46dc5 xen/common/libelf/libelf-tools.c
--- a/xen/common/libelf/libelf-tools.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/common/libelf/libelf-tools.c Thu Aug 02 09:54:18 2007 -0500
@@ -236,6 +236,36 @@ int elf_phdr_is_loadable(struct elf_bina
uint64_t p_flags = elf_uval(elf, phdr, p_flags);

return ((p_type == PT_LOAD) && (p_flags & (PF_W | PF_X)) != 0);
+}
+
+unsigned long
+elf_copy_ehdr(struct elf_binary *elf, void *dest)
+{
+ uint64_t size;
+
+ size = elf_uval(elf, elf->ehdr, e_ehsize);
+ memcpy(dest, elf->ehdr, size);
+ return elf_round_up(elf, (unsigned long)(dest) + size);
+}
+
+unsigned long
+elf_copy_shdr(struct elf_binary *elf, void *dest)
+{
+ uint64_t size;
+
+ size = elf_shdr_count(elf) * elf_uval(elf, elf->ehdr, e_shentsize);
+ memcpy(dest, elf->image + elf_uval(elf, elf->ehdr, e_shoff), size);
+ return elf_round_up(elf, (unsigned long)(dest) + size);
+}
+
+unsigned long
+elf_copy_section(struct elf_binary *elf, const elf_shdr *shdr, void *dest)
+{
+ uint64_t size;
+
+ size = elf_uval(elf, shdr, sh_size);
+ memcpy(dest, elf_section_start(elf, shdr), size);
+ return elf_round_up(elf, (unsigned long)(dest) + size);
}

/*
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/drivers/acpi/tables.c
--- a/xen/drivers/acpi/tables.c Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/drivers/acpi/tables.c Thu Aug 02 09:54:18 2007 -0500
@@ -73,7 +73,6 @@ struct acpi_table_sdt {

static unsigned long sdt_pa; /* Physical Address */
static unsigned long sdt_count; /* Table count */
-unsigned char acpi_rsdp_rev;

static struct acpi_table_sdt sdt_entry[ACPI_MAX_TABLES] __initdata;

@@ -227,6 +226,17 @@ void acpi_table_print_madt_entry(acpi_ta
}
}

+uint8_t
+generate_acpi_checksum(void *tbl, unsigned long len)
+{
+ uint8_t *ptr, sum = 0;
+
+ for (ptr = tbl; len > 0 ; len--, ptr++)
+ sum += *ptr;
+
+ return 0 - sum;
+}
+
static int
acpi_table_compute_checksum(void *table_pointer, unsigned long length)
{
@@ -599,8 +609,6 @@ int __init acpi_table_init(void)
"RSDP (v%3.3d %6.6s ) @ 0x%p\n",
rsdp->revision, rsdp->oem_id, (void *)rsdp_phys);

- acpi_rsdp_rev = rsdp->revision;
-
if (rsdp->revision < 2)
result =
acpi_table_compute_checksum(rsdp,
@@ -623,3 +631,143 @@ int __init acpi_table_init(void)

return 0;
}
+
+int __init
+acpi_table_disable(enum acpi_table_id table_id)
+{
+ struct acpi_table_header *header = NULL;
+ struct acpi_table_rsdp *rsdp;
+ unsigned long rsdp_phys;
+ char *table_name;
+ int id;
+
+ rsdp_phys = acpi_find_rsdp();
+ if (!rsdp_phys)
+ return -ENODEV;
+
+ rsdp = (struct acpi_table_rsdp *)__acpi_map_table(rsdp_phys,
+ sizeof(struct acpi_table_rsdp));
+ if (!rsdp)
+ return -ENODEV;
+
+ for (id = 0; id < sdt_count; id++)
+ if (sdt_entry[id].id == table_id)
+ break;
+
+ if (id == sdt_count)
+ return -ENOENT;
+
+ table_name = acpi_table_signatures[table_id];
+
+ /* First check XSDT (but only on ACPI 2.0-compatible systems) */
+
+ if ((rsdp->revision >= 2) &&
+ (((struct acpi20_table_rsdp *)rsdp)->xsdt_address)) {
+
+ struct acpi_table_xsdt *mapped_xsdt = NULL;
+
+ sdt_pa = ((struct acpi20_table_rsdp *)rsdp)->xsdt_address;
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
+
+ if (!header) {
+ printk(KERN_WARNING PREFIX
+ "Unable to map XSDT header\n");
+ return -ENODEV;
+ }
+
+ /* remap in the entire table before processing */
+ mapped_xsdt = (struct acpi_table_xsdt *)
+ __acpi_map_table(sdt_pa, header->length);
+ if (!mapped_xsdt) {
+ printk(KERN_WARNING PREFIX "Unable to map XSDT\n");
+ return -ENODEV;
+ }
+ header = &mapped_xsdt->header;
+
+ if (strncmp(header->signature, "XSDT", 4)) {
+ printk(KERN_WARNING PREFIX
+ "XSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING PREFIX "Invalid XSDT checksum\n");
+ return -ENODEV;
+ }
+
+ if (id < sdt_count) {
+ header = (struct acpi_table_header *)
+ __acpi_map_table(mapped_xsdt->entry[id], sizeof(struct acpi_table_header));
+ } else {
+ printk(KERN_WARNING PREFIX
+ "Unable to disable entry %d\n",
+ id);
+ return -ENODEV;
+ }
+ }
+
+ /* Then check RSDT */
+
+ else if (rsdp->rsdt_address) {
+
+ struct acpi_table_rsdt *mapped_rsdt = NULL;
+
+ sdt_pa = rsdp->rsdt_address;
+
+ /* map in just the header */
+ header = (struct acpi_table_header *)
+ __acpi_map_table(sdt_pa, sizeof(struct acpi_table_header));
+ if (!header) {
+ printk(KERN_WARNING PREFIX
+ "Unable to map RSDT header\n");
+ return -ENODEV;
+ }
+
+ /* remap in the entire table before processing */
+ mapped_rsdt = (struct acpi_table_rsdt *)
+ __acpi_map_table(sdt_pa, header->length);
+ if (!mapped_rsdt) {
+ printk(KERN_WARNING PREFIX "Unable to map RSDT\n");
+ return -ENODEV;
+ }
+ header = &mapped_rsdt->header;
+
+ if (strncmp(header->signature, "RSDT", 4)) {
+ printk(KERN_WARNING PREFIX
+ "RSDT signature incorrect\n");
+ return -ENODEV;
+ }
+
+ if (acpi_table_compute_checksum(header, header->length)) {
+ printk(KERN_WARNING PREFIX "Invalid RSDT checksum\n");
+ return -ENODEV;
+ }
+ if (id < sdt_count) {
+ header = (struct acpi_table_header *)
+ __acpi_map_table(mapped_rsdt->entry[id], sizeof(struct acpi_table_header));
+ } else {
+ printk(KERN_WARNING PREFIX
+ "Unable to disable entry %d\n",
+ id);
+ return -ENODEV;
+ }
+ }
+
+ else {
+ printk(KERN_WARNING PREFIX
+ "No System Description Table (RSDT/XSDT) specified in RSDP\n");
+ return -ENODEV;
+ }
+
+ memcpy(header->signature, "OEMx", 4);
+ memcpy(header->oem_id, "xxxxxx", 6);
+ memcpy(header->oem_id+1, table_name, 4);
+ memcpy(header->oem_table_id, "Xen ", 8);
+ header->checksum = 0;
+ header->checksum = generate_acpi_checksum(header, header->length);
+
+ return 0;
+}
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-ia64/dom_fw_common.h
--- a/xen/include/asm-ia64/dom_fw_common.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/asm-ia64/dom_fw_common.h Thu Aug 02 09:54:18 2007 -0500
@@ -85,7 +85,6 @@ xen_ia64_efi_make_md(efi_memory_desc_t *
xen_ia64_efi_make_md(efi_memory_desc_t *md,
uint32_t type, uint64_t attr,
uint64_t start, uint64_t end);
-uint8_t generate_acpi_checksum(void *tbl, unsigned long len);
struct fake_acpi_tables;
void dom_fw_fake_acpi(domain_t *d, struct fake_acpi_tables *tables);
int efi_mdt_cmp(const void *a, const void *b);
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/asm-x86/hvm/hvm.h Thu Aug 02 09:54:18 2007 -0500
@@ -154,7 +154,7 @@ struct hvm_function_table {

void (*init_hypercall_page)(struct domain *d, void *hypercall_page);

- int (*event_injection_faulted)(struct vcpu *v);
+ int (*event_pending)(struct vcpu *v);

int (*cpu_up)(void);
void (*cpu_down)(void);
@@ -229,7 +229,8 @@ hvm_guest_x86_mode(struct vcpu *v)
return hvm_funcs.guest_x86_mode(v);
}

-int hvm_instruction_length(unsigned long pc, int address_bytes);
+int hvm_instruction_fetch(unsigned long pc, int address_bytes,
+ unsigned char *buf);

static inline void
hvm_update_host_cr3(struct vcpu *v)
@@ -295,24 +296,71 @@ hvm_inject_exception(unsigned int trapnr

int hvm_bringup_ap(int vcpuid, int trampoline_vector);

-static inline int hvm_event_injection_faulted(struct vcpu *v)
-{
- return hvm_funcs.event_injection_faulted(v);
-}
+static inline int hvm_event_pending(struct vcpu *v)
+{
+ return hvm_funcs.event_pending(v);
+}
+
+/* These reserved bits in lower 32 remain 0 after any load of CR0 */
+#define HVM_CR0_GUEST_RESERVED_BITS \
+ (~((unsigned long) \
+ (X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | \
+ X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | \
+ X86_CR0_WP | X86_CR0_AM | X86_CR0_NW | \
+ X86_CR0_CD | X86_CR0_PG)))

/* These bits in CR4 are owned by the host. */
#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
(X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))

/* These bits in CR4 cannot be set by the guest. */
-#define HVM_CR4_GUEST_RESERVED_BITS \
- ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
- X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
- X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
- X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+#define HVM_CR4_GUEST_RESERVED_BITS \
+ (~((unsigned long) \
+ (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
+ X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | \
+ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
+ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)))

/* These exceptions must always be intercepted. */
#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
+/*
+ * x86 event types. This enumeration is valid for:
+ * Intel VMX: {VM_ENTRY,VM_EXIT,IDT_VECTORING}_INTR_INFO[10:8]
+ * AMD SVM: eventinj[10:8] and exitintinfo[10:8] (types 0-4 only)
+ */
+#define X86_EVENTTYPE_EXT_INTR 0 /* external interrupt */
+#define X86_EVENTTYPE_NMI 2 /* NMI */
+#define X86_EVENTTYPE_HW_EXCEPTION 3 /* hardware exception */
+#define X86_EVENTTYPE_SW_INTERRUPT 4 /* software interrupt */
+#define X86_EVENTTYPE_SW_EXCEPTION 6 /* software exception */
+
+/*
+ * Need to re-inject a given event? We avoid re-injecting software exceptions
+ * and interrupts because the faulting/trapping instruction can simply be
+ * re-executed (neither VMX nor SVM update RIP when they VMEXIT during
+ * INT3/INTO/INTn).
+ */
+static inline int hvm_event_needs_reinjection(uint8_t type, uint8_t vector)
+{
+ switch ( type )
+ {
+ case X86_EVENTTYPE_EXT_INTR:
+ case X86_EVENTTYPE_NMI:
+ return 1;
+ case X86_EVENTTYPE_HW_EXCEPTION:
+ /*
+ * SVM uses type 3 ("HW Exception") for #OF and #BP. We explicitly
+ * check for these vectors, as they are really SW Exceptions. SVM has
+ * not updated RIP to point after the trapping instruction (INT3/INTO).
+ */
+ return (vector != 3) && (vector != 4);
+ default:
+ /* Software exceptions/interrupts can be re-executed (e.g., INT n). */
+ break;
+ }
+ return 0;
+}

static inline int hvm_cpu_up(void)
{
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/svm/vmcb.h
--- a/xen/include/asm-x86/hvm/svm/vmcb.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/asm-x86/hvm/svm/vmcb.h Thu Aug 02 09:54:18 2007 -0500
@@ -319,14 +319,6 @@ typedef union
u64 errorcode:32;
} fields;
} __attribute__ ((packed)) eventinj_t;
-
-enum EVENTTYPES
-{
- EVENTTYPE_INTR = 0,
- EVENTTYPE_NMI = 2,
- EVENTTYPE_EXCEPTION = 3,
- EVENTTYPE_SWINT = 4,
-};

typedef union
{
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Aug 02 09:54:18 2007 -0500
@@ -66,9 +66,6 @@ struct arch_vmx_struct {

/* Cache of cpu execution control. */
u32 exec_control;
-
- /* If there is vector installed in the INTR_INFO_FIELD. */
- u32 vector_injected;

unsigned long cpu_cr0; /* copy of guest CR0 */
unsigned long cpu_shadow_cr0; /* copy of guest read shadow CR0 */
@@ -198,7 +195,7 @@ enum vmcs_field {
VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
VM_ENTRY_CONTROLS = 0x00004012,
VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
- VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_INTR_INFO = 0x00004016,
VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
@@ -207,7 +204,7 @@ enum vmcs_field {
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
VM_EXIT_INTR_ERROR_CODE = 0x00004406,
- IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_INFO = 0x00004408,
IDT_VECTORING_ERROR_CODE = 0x0000440a,
VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
VMX_INSTRUCTION_INFO = 0x0000440e,
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/asm-x86/hvm/vmx/vmx.h
--- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h Thu Aug 02 09:54:18 2007 -0500
@@ -94,11 +94,6 @@ void vmx_vlapic_msr_changed(struct vcpu
#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
#define INTR_INFO_RESVD_BITS_MASK 0x7ffff000

-#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
-#define INTR_TYPE_NMI (2 << 8) /* NMI */
-#define INTR_TYPE_HW_EXCEPTION (3 << 8) /* hardware exception */
-#define INTR_TYPE_SW_EXCEPTION (6 << 8) /* software exception */
-
/*
* Exit Qualifications for MOV for Control Register Access
*/
@@ -263,8 +258,8 @@ static inline int __vmxon (u64 addr)
return rc;
}

-static inline void __vmx_inject_exception(struct vcpu *v, int trap, int type,
- int error_code, int ilen)
+static inline void __vmx_inject_exception(
+ struct vcpu *v, int trap, int type, int error_code)
{
unsigned long intr_fields;

@@ -276,16 +271,13 @@ static inline void __vmx_inject_exceptio
* VM entry]", PRM Vol. 3, 22.6.1 (Interruptibility State).
*/

- intr_fields = (INTR_INFO_VALID_MASK | type | trap);
+ intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap);
if ( error_code != VMX_DELIVER_NO_ERROR_CODE ) {
__vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
intr_fields |= INTR_INFO_DELIVER_CODE_MASK;
}

- if ( ilen )
- __vmwrite(VM_ENTRY_INSTRUCTION_LEN, ilen);
-
- __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
+ __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);

if (trap == TRAP_page_fault)
HVMTRACE_2D(PF_INJECT, v, v->arch.hvm_vmx.cpu_cr2, error_code);
@@ -296,29 +288,19 @@ static inline void vmx_inject_hw_excepti
static inline void vmx_inject_hw_exception(
struct vcpu *v, int trap, int error_code)
{
- v->arch.hvm_vmx.vector_injected = 1;
- __vmx_inject_exception(v, trap, INTR_TYPE_HW_EXCEPTION, error_code, 0);
-}
-
-static inline void vmx_inject_sw_exception(
- struct vcpu *v, int trap, int instruction_len)
-{
- v->arch.hvm_vmx.vector_injected = 1;
- __vmx_inject_exception(v, trap, INTR_TYPE_SW_EXCEPTION,
- VMX_DELIVER_NO_ERROR_CODE,
- instruction_len);
+ __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);
}

static inline void vmx_inject_extint(struct vcpu *v, int trap)
{
- __vmx_inject_exception(v, trap, INTR_TYPE_EXT_INTR,
- VMX_DELIVER_NO_ERROR_CODE, 0);
+ __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR,
+ VMX_DELIVER_NO_ERROR_CODE);
}

static inline void vmx_inject_nmi(struct vcpu *v)
{
- __vmx_inject_exception(v, 2, INTR_TYPE_NMI,
- VMX_DELIVER_NO_ERROR_CODE, 0);
+ __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI,
+ VMX_DELIVER_NO_ERROR_CODE);
}

#endif /* __ASM_X86_HVM_VMX_VMX_H__ */
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/public/libelf.h
--- a/xen/include/public/libelf.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/public/libelf.h Thu Aug 02 09:54:18 2007 -0500
@@ -65,6 +65,8 @@ struct elf_binary {

/* loaded to */
char *dest;
+ uint64_t sstart;
+ uint64_t send;
uint64_t pstart;
uint64_t pend;
uint64_t reloc_offset;
@@ -91,33 +93,32 @@ struct elf_binary {
#define elf_lsb(elf) (ELFDATA2LSB == (elf)->data)
#define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data)

-#define elf_uval(elf, str, elem) \
- ((ELFCLASS64 == (elf)->class) \
- ? elf_access_unsigned((elf), (str), \
- offsetof(typeof(*(str)),e64.elem), \
- sizeof((str)->e64.elem)) \
- : elf_access_unsigned((elf), (str), \
- offsetof(typeof(*(str)),e32.elem), \
- sizeof((str)->e32.elem)))
-
-#define elf_sval(elf, str, elem) \
- ((ELFCLASS64 == (elf)->class) \
- ? elf_access_signed((elf), (str), \
- offsetof(typeof(*(str)),e64.elem), \
- sizeof((str)->e64.elem)) \
- : elf_access_signed((elf), (str), \
- offsetof(typeof(*(str)),e32.elem), \
- sizeof((str)->e32.elem)))
-
-#define elf_size(elf, str) \
- ((ELFCLASS64 == (elf)->class) \
- ? sizeof((str)->e64) \
- : sizeof((str)->e32))
+#define elf_uval(elf, str, elem) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? elf_access_unsigned((elf), (str), \
+ offsetof(typeof(*(str)),e64.elem), \
+ sizeof((str)->e64.elem)) \
+ : elf_access_unsigned((elf), (str), \
+ offsetof(typeof(*(str)),e32.elem), \
+ sizeof((str)->e32.elem)))
+
+#define elf_sval(elf, str, elem) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? elf_access_signed((elf), (str), \
+ offsetof(typeof(*(str)),e64.elem), \
+ sizeof((str)->e64.elem)) \
+ : elf_access_signed((elf), (str), \
+ offsetof(typeof(*(str)),e32.elem), \
+ sizeof((str)->e32.elem)))
+
+#define elf_size(elf, str) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? sizeof((str)->e64) : sizeof((str)->e32))

uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr,
- uint64_t offset, size_t size);
+ uint64_t offset, size_t size);
int64_t elf_access_signed(struct elf_binary *elf, const void *ptr,
- uint64_t offset, size_t size);
+ uint64_t offset, size_t size);

uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr);

@@ -149,6 +150,11 @@ int elf_is_elfbinary(const void *image);
int elf_is_elfbinary(const void *image);
int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr);

+unsigned long elf_copy_ehdr(struct elf_binary *elf, void *dest);
+unsigned long elf_copy_shdr(struct elf_binary *elf, void *dest);
+unsigned long elf_copy_section(struct elf_binary *elf,
+ const elf_shdr *shdr, void *dest);
+
/* ------------------------------------------------------------------------ */
/* xc_libelf_loader.c */

@@ -185,8 +191,8 @@ struct xen_elfnote {
enum xen_elfnote_type type;
const char *name;
union {
- const char *str;
- uint64_t num;
+ const char *str;
+ uint64_t num;
} data;
};

@@ -215,7 +221,8 @@ struct elf_dom_parms {
/* calculated */
uint64_t virt_offset;
uint64_t virt_kstart;
- uint64_t virt_kend;
+ uint64_t virt_kend; /* end of kernel image */
+ uint64_t virt_end; /* end of kernel symtab (== virt_kend if none) */
};

static inline void elf_xen_feature_set(int nr, uint32_t * addr)
@@ -228,14 +235,17 @@ static inline int elf_xen_feature_get(in
}

int elf_xen_parse_features(const char *features,
- uint32_t *supported,
- uint32_t *required);
+ uint32_t *supported,
+ uint32_t *required);
int elf_xen_parse_note(struct elf_binary *elf,
- struct elf_dom_parms *parms,
- const elf_note *note);
+ struct elf_dom_parms *parms,
+ const elf_note *note);
int elf_xen_parse_guest_info(struct elf_binary *elf,
- struct elf_dom_parms *parms);
+ struct elf_dom_parms *parms);
int elf_xen_parse(struct elf_binary *elf,
- struct elf_dom_parms *parms);
+ struct elf_dom_parms *parms);
+
+int elf_xen_dom_load_binary(struct elf_binary *elf,
+ struct elf_dom_parms *parms);

#endif /* __XC_LIBELF__ */
diff -r 976db28bcc43 -r 04fb85a46dc5 xen/include/xen/acpi.h
--- a/xen/include/xen/acpi.h Thu Aug 02 09:50:55 2007 -0500
+++ b/xen/include/xen/acpi.h Thu Aug 02 09:54:18 2007 -0500
@@ -383,6 +383,7 @@ int acpi_numa_init (void);
int acpi_numa_init (void);

int acpi_table_init (void);
+int acpi_table_disable(enum acpi_table_id table_id);
int acpi_table_parse (enum acpi_table_id id, acpi_table_handler handler);
int acpi_get_table_header_early (enum acpi_table_id id, struct acpi_table_header **header);
int acpi_table_parse_madt (enum acpi_madt_entry_id id, acpi_madt_entry_handler handler, unsigned int max_entries);
@@ -390,6 +391,7 @@ void acpi_table_print (struct acpi_table
void acpi_table_print (struct acpi_table_header *header, unsigned long phys_addr);
void acpi_table_print_madt_entry (acpi_table_entry_header *madt);
void acpi_table_print_srat_entry (acpi_table_entry_header *srat);
+uint8_t generate_acpi_checksum(void *tbl, unsigned long len);

/* the following four functions are architecture-dependent */
void acpi_numa_slit_init (struct acpi_table_slit *slit);
@@ -534,6 +536,5 @@ static inline int acpi_get_pxm(acpi_hand
#endif

extern int pnpacpi_disabled;
-extern unsigned char acpi_rsdp_rev;

#endif /*_LINUX_ACPI_H*/

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog