Mailing List Archive

[xen-unstable] x86 mce: Dump the MCE information in mc_panic and softirq
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1276154398 -3600
# Node ID 875a7ba3247e57193c313b31ad85799b5cf5f2f4
# Parent 8a2486514f1adda5b35acc39ccc71c7aa0348420
x86 mce: Dump the MCE information in mc_panic and softirq

We should not dump the mcinfo in mce handler, instead, we should do
that in mc_panic for fatal error or softirq for other errors.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
Acked-By: Christoph Egger <Christoph.Egger@amd.com>
---
xen/arch/x86/cpu/mcheck/mce.c | 24 ++++++++++++++++++++++++
xen/arch/x86/cpu/mcheck/mce_intel.c | 7 +++----
2 files changed, 27 insertions(+), 4 deletions(-)

diff -r 8a2486514f1a -r 875a7ba3247e xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Thu Jun 10 08:19:11 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c Thu Jun 10 08:19:58 2010 +0100
@@ -1509,15 +1509,39 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
return ret;
}

+int mcinfo_dumpped;
+static int x86_mcinfo_dump_panic(mctelem_cookie_t mctc)
+{
+ struct mc_info *mcip = mctelem_dataptr(mctc);
+
+ x86_mcinfo_dump(mcip);
+ mcinfo_dumpped++;
+
+ return 0;
+}
+
+/* XXX shall we dump commited mc_info?? */
+static void mc_panic_dump(void)
+{
+ int cpu;
+
+ dprintk(XENLOG_ERR, "Begin dump mc_info\n");
+ for_each_online_cpu(cpu)
+ mctelem_process_deferred(cpu, x86_mcinfo_dump_panic);
+ dprintk(XENLOG_ERR, "End dump mc_info, %x mcinfo dumped\n", mcinfo_dumpped);
+}
+
void mc_panic(char *s)
{
is_mc_panic = 1;
console_force_unlock();
+
printk("Fatal machine check: %s\n", s);
printk("\n"
"****************************************\n"
"\n"
" The processor has reported a hardware error which cannot\n"
" be recovered from. Xen will now reboot the machine.\n");
+ mc_panic_dump();
panic("HARDWARE ERROR");
}
diff -r 8a2486514f1a -r 875a7ba3247e xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jun 10 08:19:11 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Jun 10 08:19:58 2010 +0100
@@ -257,6 +257,8 @@ static int mce_delayed_action(mctelem_co
switch (result)
{
case MCER_RESET:
+ dprintk(XENLOG_ERR, "MCE delayed action failed\n");
+ x86_mcinfo_dump(mctelem_dataptr(mctc));
panic("MCE: Software recovery failed for the UCR\n");
break;
case MCER_RECOVERED:
@@ -266,6 +268,7 @@ static int mce_delayed_action(mctelem_co
case MCER_CONTINUE:
dprintk(XENLOG_INFO, "MCE: Error can't be recovered, "
"system is tainted\n");
+ x86_mcinfo_dump(mctelem_dataptr(mctc));
ret = 1;
break;
default:
@@ -755,10 +758,6 @@ static void intel_machine_check(struct c
mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, clear_bank);

if (bs.errcnt) {
- /* dump MCE error */
- if (mctc != NULL)
- x86_mcinfo_dump(mctelem_dataptr(mctc));
-
/*
* Uncorrected errors must be dealth with in softirq context.
*/

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog