Mailing List Archive

[PATCH 4/5] Xen/MCE: Abort live migration when vMCE occur
Xen/MCE: Abort live migration when vMCE occur

This patch monitor the critical area of live migration (from vMCE point of view,
the copypages stage of migration is the critical area while other areas are not).

If a vMCE occur at the critical area of live migration, there is risk that error
data may be copied to the target. Currently we don't have convenient way to handle
this case, so for the sake of safe, we abort it and try migration later (at that
time broken page would not be mapped and copied to the target).

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r e27a6d53ac15 tools/libxc/xc_domain.c
--- a/tools/libxc/xc_domain.c Thu Oct 11 01:52:33 2012 +0800
+++ b/tools/libxc/xc_domain.c Thu Oct 11 05:12:48 2012 +0800
@@ -283,6 +283,30 @@
return ret;
}

+/* Start vmce monitor */
+int xc_domain_vmce_monitor_start(xc_interface *xch,
+ uint32_t domid)
+{
+ DECLARE_DOMCTL;
+
+ domctl.cmd = XEN_DOMCTL_vmce_monitor_start;
+ domctl.domain = (domid_t)domid;
+
+ return do_domctl(xch, &domctl);
+}
+
+/* End vmce monitor */
+int xc_domain_vmce_monitor_end(xc_interface *xch,
+ uint32_t domid)
+{
+ DECLARE_DOMCTL;
+
+ domctl.cmd = XEN_DOMCTL_vmce_monitor_end;
+ domctl.domain = (domid_t)domid;
+
+ return do_domctl(xch, &domctl);
+}
+
/* get info from hvm guest for save */
int xc_domain_hvm_getcontext(xc_interface *xch,
uint32_t domid,
diff -r e27a6d53ac15 tools/libxc/xc_domain_save.c
--- a/tools/libxc/xc_domain_save.c Thu Oct 11 01:52:33 2012 +0800
+++ b/tools/libxc/xc_domain_save.c Thu Oct 11 05:12:48 2012 +0800
@@ -895,6 +895,8 @@
*/
int compressing = 0;

+ int vmce_while_monitor = 0;
+
int completed = 0;

if ( hvm && !callbacks->switch_qemu_logdirty )
@@ -1109,6 +1111,12 @@
goto out;
}

+ if ( xc_domain_vmce_monitor_start(xch, dom) )
+ {
+ PERROR("Error when start vmce monitor\n");
+ goto out;
+ }
+
copypages:
#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), (len))
#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, (fd), (buf), (len))
@@ -1571,6 +1579,18 @@

DPRINTF("All memory is saved\n");

+ vmce_while_monitor = xc_domain_vmce_monitor_end(xch, dom);
+ if ( vmce_while_monitor < 0 )
+ {
+ PERROR("Error when end vmce monitor\n");
+ goto out;
+ }
+ else if ( vmce_while_monitor > 0 )
+ {
+ fprintf(stderr, "vMCE occurred, abort this time and try later.\n");
+ goto out;
+ }
+
/* After last_iter, buffer the rest of pagebuf & tailbuf data into a
* separate output buffer and flush it after the compressed page chunks.
*/
diff -r e27a6d53ac15 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h Thu Oct 11 01:52:33 2012 +0800
+++ b/tools/libxc/xenctrl.h Thu Oct 11 05:12:48 2012 +0800
@@ -575,6 +575,26 @@
xc_domaininfo_t *info);

/**
+ * This function start monitor vmce event.
+ * @parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id monitored
+ * @return <0 on failure, 0 on success
+ */
+int xc_domain_vmce_monitor_start(xc_interface *xch,
+ uint32_t domid);
+
+/**
+ * This function end monitor vmce event
+ * @parm xch a handle to an open hypervisor interface
+ * @parm domid the domain id monitored
+ * @return < 0 on failure, >= 0 on success while
+ * = 0 on no vmce occurred
+ * > 0 on vmce occurred
+ */
+int xc_domain_vmce_monitor_end(xc_interface *xch,
+ uint32_t domid);
+
+/**
* This function returns information about the context of a hvm domain
* @parm xch a handle to an open hypervisor interface
* @parm domid the domain to get information from
diff -r e27a6d53ac15 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Oct 11 01:52:33 2012 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Oct 11 05:12:48 2012 +0800
@@ -359,6 +359,12 @@
goto vmce_failed;
}

+ if ( unlikely(d->arch.vmce_monitor) )
+ {
+ /* vMCE occur when guest migration */
+ d->arch.vmce_monitor = 1;
+ }
+
/* We will inject vMCE to DOMU*/
if ( inject_vmce(d, VMCE_INJECT_BROADCAST) < 0 )
{
diff -r e27a6d53ac15 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c Thu Oct 11 01:52:33 2012 +0800
+++ b/xen/arch/x86/domctl.c Thu Oct 11 05:12:48 2012 +0800
@@ -1568,6 +1568,47 @@
}
break;

+ case XEN_DOMCTL_vmce_monitor_start:
+ {
+ struct domain *d;
+
+ d = rcu_lock_domain_by_id(domctl->domain);
+ if ( d != NULL )
+ {
+ if ( d->arch.vmce_monitor )
+ ret = -EBUSY;
+ else
+ d->arch.vmce_monitor = -1;
+
+ rcu_unlock_domain(d);
+ }
+ else
+ ret = -ESRCH;
+ }
+ break;
+
+ case XEN_DOMCTL_vmce_monitor_end:
+ {
+ struct domain *d;
+
+ d = rcu_lock_domain_by_id(domctl->domain);
+ if ( d != NULL)
+ {
+ if ( !d->arch.vmce_monitor )
+ ret = -EINVAL;
+ else
+ {
+ ret = d->arch.vmce_monitor > 0 ? 1 : 0;
+ d->arch.vmce_monitor = 0;
+ }
+
+ rcu_unlock_domain(d);
+ }
+ else
+ ret = -ESRCH;
+ }
+ break;
+
default:
ret = iommu_do_domctl(domctl, u_domctl);
break;
diff -r e27a6d53ac15 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Thu Oct 11 01:52:33 2012 +0800
+++ b/xen/include/asm-x86/domain.h Thu Oct 11 05:12:48 2012 +0800
@@ -279,6 +279,11 @@
bool_t has_32bit_shinfo;
/* Domain cannot handle spurious page faults? */
bool_t suppress_spurious_page_faults;
+ /* Monitoring guest memory copy of migration
+ * = 0 - not monitoring
+ * < 0 - monitoring
+ * > 0 - vMCE occurred while monitoring */
+ s8 vmce_monitor;

/* Continuable domain_relinquish_resources(). */
enum {
diff -r e27a6d53ac15 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Thu Oct 11 01:52:33 2012 +0800
+++ b/xen/include/public/domctl.h Thu Oct 11 05:12:48 2012 +0800
@@ -900,6 +900,8 @@
#define XEN_DOMCTL_set_access_required 64
#define XEN_DOMCTL_audit_p2m 65
#define XEN_DOMCTL_set_virq_handler 66
+#define XEN_DOMCTL_vmce_monitor_start 67
+#define XEN_DOMCTL_vmce_monitor_end 68
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002