Mailing List Archive

[PATCH] mce_cleanup
X86: mce cleanup for both Intel and AMD mce logic

c/s 22964 fixed a mce memory leaks bug which may trigger xen crash when cpu online.
However, there are 2 mce memory leaks: 1 at mce level (arch independent), 1 at mce_intel level (arch dependent).
At c/s 22964, it free both leaks at mce_intel level, which would has problem under AMD arch.

This patch fix this issue.
It alloc/free poll_bankmask (arch independent) at mce level,
and add a notifier block at mce level to avoid xmalloc risk when irq disable.
With this patch, both Intel and AMD mce works OK in a clean way.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>

diff -r 84bacd800bf8 xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Sat Mar 12 13:20:51 2011 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Mar 14 18:29:13 2011 +0800
@@ -16,6 +16,7 @@
#include <xen/event.h>
#include <xen/guest_access.h>
#include <xen/hypercall.h> /* for do_mca */
+#include <xen/cpu.h>

#include <asm/processor.h>
#include <asm/system.h>
@@ -695,14 +696,13 @@ int show_mca_info(int inited, struct cpu
return 0;
}

-int set_poll_bankmask(struct cpuinfo_x86 *c)
+static void set_poll_bankmask(struct cpuinfo_x86 *c)
{
int cpu = smp_processor_id();
struct mca_banks *mb;

- mb = mcabanks_alloc();
- if (!mb)
- return -ENOMEM;
+ mb = per_cpu(poll_bankmask, cpu);
+ BUG_ON(!mb);

if (cmci_support && !mce_disabled) {
mb->num = per_cpu(no_cmci_banks, cpu)->num;
@@ -714,9 +714,6 @@ int set_poll_bankmask(struct cpuinfo_x86
if (mce_firstbank(c))
mcabanks_clear(0, mb);
}
- per_cpu(poll_bankmask, cpu) = mb;
-
- return 0;
}

/* The perbank ctl/status init is platform specific because of AMD's quirk */
@@ -749,6 +746,51 @@ int mca_cap_init(void)

return mca_allbanks ? 0:-ENOMEM;
}
+
+static void cpu_poll_bankmask_free(unsigned int cpu)
+{
+ struct mca_banks *mb = per_cpu(poll_bankmask, cpu);
+
+ mcabanks_free(mb);
+}
+
+static int cpu_poll_bankmask_alloc(unsigned int cpu)
+{
+ struct mca_banks *mb;
+
+ mb = mcabanks_alloc();
+ if ( !mb )
+ return -ENOMEM;
+
+ per_cpu(poll_bankmask, cpu) = mb;
+ return 0;
+}
+
+static int cpu_callback(
+ struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+ int rc = 0;
+
+ switch ( action )
+ {
+ case CPU_UP_PREPARE:
+ rc = cpu_poll_bankmask_alloc(cpu);
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+ cpu_poll_bankmask_free(cpu);
+ break;
+ default:
+ break;
+ }
+
+ return !rc ? NOTIFY_DONE : notifier_from_errno(rc);
+}
+
+static struct notifier_block cpu_nfb = {
+ .notifier_call = cpu_callback
+};

/* This has to be run for each processor */
void mcheck_init(struct cpuinfo_x86 *c)
@@ -802,6 +844,13 @@ void mcheck_init(struct cpuinfo_x86 *c)
/* Turn on MCE now */
set_in_cr4(X86_CR4_MCE);

+ if ( smp_processor_id() == 0 )
+ {
+ /* Early MCE initialisation for BSP. */
+ if ( cpu_poll_bankmask_alloc(0) )
+ BUG();
+ register_cpu_notifier(&cpu_nfb);
+ }
set_poll_bankmask(c);

return;
diff -r 84bacd800bf8 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Sat Mar 12 13:20:51 2011 +0000
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Mar 14 18:29:13 2011 +0800
@@ -1233,17 +1233,15 @@ static void intel_init_mce(void)

static void cpu_mcabank_free(unsigned int cpu)
{
- struct mca_banks *mb1, *mb2, *mb3, *mb4;
+ struct mca_banks *mb1, *mb2, *mb3;

mb1 = per_cpu(mce_clear_banks, cpu);
mb2 = per_cpu(no_cmci_banks, cpu);
mb3 = per_cpu(mce_banks_owned, cpu);
- mb4 = per_cpu(poll_bankmask, cpu);

mcabanks_free(mb1);
mcabanks_free(mb2);
mcabanks_free(mb3);
- mcabanks_free(mb4);
}

static int cpu_mcabank_alloc(unsigned int cpu)