Mailing List Archive

[xen-unstable] libxl: fix migrate for HVM guests
# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1297702975 0
# Node ID 137ad33475048abce15d82a7499cf211f0c28c85
# Parent f8097fe3cf0541e5e251c83051899aa261cf1e66
libxl: fix migrate for HVM guests

Prior to 22909:6868f7f3ab3f libxl would loop waiting simultaneously
for the domain the acknowledge a PV suspend request (by clearing the
XenStore node) and for the domain to actually suspend. For HVM guests
without PV drivers this same loop was simply waiting for the domain to
suspend.

In 22909:6868f7f3ab3f the original loop was split into two loops
(first waiting for the acknowledgement and then for the actual
suspend). This caused libxl to incorrectly wait for an HVM guest
without PV drivers to acknowledge the XenStore request, which is not
something it would ever do.

Fix this by only waiting for an acknowledgement from a guest which
contains PV drivers.

Previously we were also making the request regardless of whether the
guest had PV drivers, change that to only make the request if the
guest has PV drivers.

Lastly there is no need to sample HVM_PARAM_ACPI_S_STATE twice and not
doing so simplifies the test for PVHVM vs. normal HVM guests.

Tested with:
Windows with GPL PV drivers (event channel suspend mode)
Windows without PV drivers (xc_domain_shutdown mode)
Linux PV (PV with XenBus control node mode)
Linux HVM (PVHVM with XenBus control node mode (*))
Linux HVM (xc_domain_shutdown mode)

(*) In this case the kernel didn't actually suspend, due to:
PM: Device input1 failed to suspend: error -22
xen suspend: dpm_suspend_start -22
which may be a misconfiguration in my setup or may be a kernel
bug, but the libxl side dealt with this as gracefully as it could.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
tools/libxl/libxl_dom.c | 131 +++++++++++++++++++++++++-----------------------
1 files changed, 70 insertions(+), 61 deletions(-)

diff -r f8097fe3cf05 -r 137ad3347504 tools/libxl/libxl_dom.c
--- a/tools/libxl/libxl_dom.c Mon Feb 14 16:56:20 2011 +0000
+++ b/tools/libxl/libxl_dom.c Mon Feb 14 17:02:55 2011 +0000
@@ -345,16 +345,21 @@ static int libxl__domain_suspend_common_
static int libxl__domain_suspend_common_callback(void *data)
{
struct suspendinfo *si = data;
- unsigned long s_state = 0;
+ unsigned long hvm_s_state = 0, hvm_pvdrv = 0;
int ret;
char *path, *state = "suspend";
int watchdog;
libxl_ctx *ctx = libxl__gc_owner(si->gc);
xs_transaction_t t;

- if (si->hvm)
- xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &s_state);
- if ((s_state == 0) && (si->suspend_eventchn >= 0)) {
+ if (si->hvm) {
+ xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ, &hvm_pvdrv);
+ xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &hvm_s_state);
+ }
+
+ if ((hvm_s_state == 0) && (si->suspend_eventchn >= 0)) {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "issuing %s suspend request via event channel",
+ si->hvm ? "PVHVM" : "PV");
ret = xc_evtchn_notify(si->xce, si->suspend_eventchn);
if (ret < 0) {
LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "xc_evtchn_notify failed ret=%d", ret);
@@ -368,63 +373,67 @@ static int libxl__domain_suspend_common_
si->guest_responded = 1;
return 1;
}
- path = libxl__sprintf(si->gc, "%s/control/shutdown", libxl__xs_get_dompath(si->gc, si->domid));
- libxl__xs_write(si->gc, XBT_NULL, path, "suspend");
- if (si->hvm) {
- unsigned long hvm_pvdrv, hvm_s_state;
- xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_CALLBACK_IRQ, &hvm_pvdrv);
- xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE, &hvm_s_state);
- if (!hvm_pvdrv || hvm_s_state) {
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Calling xc_domain_shutdown on the domain");
- xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend);
- }
- }
-
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to acknowledge suspend request");
- watchdog = 60;
- while (!strcmp(state, "suspend") && watchdog > 0) {
- usleep(100000);
-
- state = libxl__xs_read(si->gc, XBT_NULL, path);
-
- watchdog--;
- }
-
- /*
- * Guest appears to not be responding. Cancel the suspend request.
- *
- * We re-read the suspend node and clear it within a transaction
- * in order to handle the case where we race against the guest
- * catching up and acknowledging the request at the last minute.
- */
- if (!strcmp(state, "suspend")) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't acknowledge suspend, cancelling request");
- retry_transaction:
- t = xs_transaction_start(ctx->xsh);
-
- state = libxl__xs_read(si->gc, t, path);
-
- if (!strcmp(state, "suspend"))
- libxl__xs_write(si->gc, t, path, "");
-
- if (!xs_transaction_end(ctx->xsh, t, 0))
- if (errno == EAGAIN)
- goto retry_transaction;
-
- }
-
- /*
- * Final check for guest acknowledgement. The guest may have
- * acknowledged while we were cancelling the request in which case
- * we lost the race while cancelling and should continue.
- */
- if (!strcmp(state, "suspend")) {
- LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't acknowledge suspend, request cancelled");
- return 0;
- }
-
- LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend request");
- si->guest_responded = 1;
+
+ if (si->hvm && (!hvm_pvdrv || hvm_s_state)) {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Calling xc_domain_shutdown on HVM domain");
+ xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend);
+ /* The guest does not (need to) respond to this sort of request. */
+ si->guest_responded = 1;
+ } else {
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "issuing %s suspend request via XenBus control node",
+ si->hvm ? "PVHVM" : "PV");
+
+ path = libxl__sprintf(si->gc, "%s/control/shutdown", libxl__xs_get_dompath(si->gc, si->domid));
+ libxl__xs_write(si->gc, XBT_NULL, path, "suspend");
+
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to acknowledge suspend request");
+ watchdog = 60;
+ while (!strcmp(state, "suspend") && watchdog > 0) {
+ usleep(100000);
+
+ state = libxl__xs_read(si->gc, XBT_NULL, path);
+
+ watchdog--;
+ }
+
+ /*
+ * Guest appears to not be responding. Cancel the suspend
+ * request.
+ *
+ * We re-read the suspend node and clear it within a
+ * transaction in order to handle the case where we race
+ * against the guest catching up and acknowledging the request
+ * at the last minute.
+ */
+ if (!strcmp(state, "suspend")) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't acknowledge suspend, cancelling request");
+ retry_transaction:
+ t = xs_transaction_start(ctx->xsh);
+
+ state = libxl__xs_read(si->gc, t, path);
+
+ if (!strcmp(state, "suspend"))
+ libxl__xs_write(si->gc, t, path, "");
+
+ if (!xs_transaction_end(ctx->xsh, t, 0))
+ if (errno == EAGAIN)
+ goto retry_transaction;
+
+ }
+
+ /*
+ * Final check for guest acknowledgement. The guest may have
+ * acknowledged while we were cancelling the request in which
+ * case we lost the race while cancelling and should continue.
+ */
+ if (!strcmp(state, "suspend")) {
+ LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't acknowledge suspend, request cancelled");
+ return 0;
+ }
+
+ LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend request");
+ si->guest_responded = 1;
+ }

LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to suspend");
watchdog = 60;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog