Mailing List Archive

[xen-unstable] mce: Clean-up mcheck_init handler
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1276065739 -3600
# Node ID b04b812480782034b34d759401ab11e95a86cf71
# Parent f425b2c7834048a8f5ea5186ebcd9031b9d14a27
mce: Clean-up mcheck_init handler

Firstly and most importantly, the maxium MCA banks are hard-coded as
MAX_NR_BANKS, which is 30. This is not architecture correct. This
patch removes this definition, replacig the cpu_banks_t with
mca_banks, and provide some basic function, like
set/clear/test/alloc/free for mcabanks_t.

Secondly, remove the broadcast_check code to intel specific, since
only Intel platform support broadcast now.

Thirdly, the X86_FEATURE_MCA check and CR4_MCE enable is done in every
vendor-specifc callback, that's redundant, move it to
mcheck_init. Also, we should enable CR4_MCE only in the end of the
mcheck_init, to close the small window between CR4_enable and the mca
setup.

And we also move vmce specific code to vmce.c as vmce_init, to make
code clean.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
Acked-By: Christoph Egger <Christoph.Egger@amd.com>
---
xen/arch/x86/cpu/mcheck/amd_k8.c | 7
xen/arch/x86/cpu/mcheck/k7.c | 12 -
xen/arch/x86/cpu/mcheck/mce.c | 294 ++++++++++++++++++------------------
xen/arch/x86/cpu/mcheck/mce.h | 20 --
xen/arch/x86/cpu/mcheck/mce_intel.c | 126 ++++++++++-----
xen/arch/x86/cpu/mcheck/non-fatal.c | 5
xen/arch/x86/cpu/mcheck/vmce.c | 53 ++++++
xen/arch/x86/cpu/mcheck/x86_mca.h | 33 +++-
xen/include/asm-x86/mce.h | 2
9 files changed, 334 insertions(+), 218 deletions(-)

diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/amd_k8.c
--- a/xen/arch/x86/cpu/mcheck/amd_k8.c Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Wed Jun 09 07:42:19 2010 +0100
@@ -81,13 +81,8 @@ enum mcheck_type amd_k8_mcheck_init(stru
uint32_t i;
enum mcequirk_amd_flags quirkflag;

- /* Check for PPro style MCA; our caller has confirmed MCE support. */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return mcheck_none;
-
quirkflag = mcequirk_lookup_amd_quirkdata(c);

- mce_cap_init();
x86_mce_vector_register(k8_machine_check);

for (i = 0; i < nr_mce_banks; i++) {
@@ -101,7 +96,5 @@ enum mcheck_type amd_k8_mcheck_init(stru
}
}

- set_in_cr4(X86_CR4_MCE);
-
return mcheck_amd_k8;
}
diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/k7.c
--- a/xen/arch/x86/cpu/mcheck/k7.c Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/k7.c Wed Jun 09 07:42:19 2010 +0100
@@ -70,19 +70,9 @@ static fastcall void k7_machine_check(st
/* AMD K7 machine check */
enum mcheck_type amd_k7_mcheck_init(struct cpuinfo_x86 *c)
{
- u32 l, h;
int i;

- /* Check for PPro style MCA; our caller has confirmed MCE support. */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return mcheck_none;
-
x86_mce_vector_register(k7_machine_check);
-
- rdmsr (MSR_IA32_MCG_CAP, l, h);
- if (l & (1<<8)) /* Control register present ? */
- wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- nr_mce_banks = l & 0xff;

/* Clear status for MC index 0 separately, we don't touch CTL,
* as some Athlons cause spurious MCEs when its enabled. */
@@ -92,7 +82,5 @@ enum mcheck_type amd_k7_mcheck_init(stru
wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}

- set_in_cr4 (X86_CR4_MCE);
-
return mcheck_amd_k7;
}
diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.c Wed Jun 09 07:42:19 2010 +0100
@@ -25,21 +25,15 @@

int mce_disabled;
invbool_param("mce", mce_disabled);
-static int mce_force_broadcast;
-boolean_param("mce_fb", mce_force_broadcast);
int is_mc_panic;
unsigned int nr_mce_banks;

int mce_broadcast = 0;
-uint64_t g_mcg_cap;
-
-/* Real value in physical CTL MSR */
-uint64_t h_mcg_ctl = 0UL;
-uint64_t *h_mci_ctrl;
int firstbank;

static void intpose_init(void);
static void mcinfo_clear(struct mc_info *);
+struct mca_banks *mca_allbanks;

#define SEG_PL(segsel) ((segsel) & 0x3)
#define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16)
@@ -54,8 +48,6 @@ static int x86_mcerr(const char *msg, in
#else
#define x86_mcerr(msg, err) (err)
#endif
-
-cpu_banks_t mca_allbanks;

int mce_verbosity;
static void __init mce_set_verbosity(char *str)
@@ -113,6 +105,36 @@ void mce_recoverable_register(mce_recove
mc_recoverable_scan = cbfunc;
}

+struct mca_banks *mcabanks_alloc(void)
+{
+ struct mca_banks *mb;
+
+ mb = xmalloc(struct mca_banks);
+ if (!mb)
+ return NULL;
+
+ mb->bank_map = xmalloc_array(unsigned long,
+ BITS_TO_LONGS(nr_mce_banks));
+ if (!mb->bank_map)
+ {
+ xfree(mb);
+ return NULL;
+ }
+
+ mb->num = nr_mce_banks;
+ memset(mb->bank_map, 0, sizeof(long) * BITS_TO_LONGS(nr_mce_banks));
+
+ return mb;
+}
+
+void mcabanks_free(struct mca_banks *banks)
+{
+ if (banks == NULL)
+ return;
+ if (banks->bank_map)
+ xfree(banks->bank_map);
+ xfree(banks);
+}
/* Judging whether to Clear Machine Check error bank callback handler
* According to Intel latest MCA OS Recovery Writer's Guide,
* whether the error MCA bank needs to be cleared is decided by the mca_source
@@ -218,8 +240,8 @@ static int mca_init_global(uint32_t flag
* For Intel latest CPU, whether to clear the error bank status needs to
* be judged by the callback function defined above.
*/
-mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask,
- struct mca_summary *sp, cpu_banks_t* clear_bank)
+mctelem_cookie_t mcheck_mca_logout(enum mca_source who, struct mca_banks *bankmask,
+ struct mca_summary *sp, struct mca_banks* clear_bank)
{
uint64_t gstatus, status;
struct mcinfo_global *mig = NULL; /* on stack */
@@ -262,7 +284,7 @@ mctelem_cookie_t mcheck_mca_logout(enum
struct mcinfo_bank *mib; /* on stack */

/* Skip bank if corresponding bit in bankmask is clear */
- if (!test_bit(i, bankmask))
+ if (!mcabanks_test(i, bankmask))
continue;

mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
@@ -325,7 +347,7 @@ mctelem_cookie_t mcheck_mca_logout(enum
/* Clear status */
mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
else if ( who == MCA_MCE_SCAN && need_clear)
- set_bit(i, clear_bank);
+ mcabanks_set(i, clear_bank);

wmb();
}
@@ -359,7 +381,7 @@ mctelem_cookie_t mcheck_mca_logout(enum

/* Shared #MC handler. */
void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code,
- cpu_banks_t bankmask)
+ struct mca_banks *bankmask)
{
int xen_state_lost, dom0_state_lost, domU_state_lost;
struct vcpu *v = current;
@@ -575,13 +597,13 @@ cmn_handler_done:
}
}

-void mcheck_mca_clearbanks(cpu_banks_t bankmask)
+void mcheck_mca_clearbanks(struct mca_banks *bankmask)
{
int i;
uint64_t status;

for (i = 0; i < 32 && i < nr_mce_banks; i++) {
- if (!test_bit(i, bankmask))
+ if (!mcabanks_test(i, bankmask))
continue;
mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
if (!(status & MCi_STATUS_VAL))
@@ -620,21 +642,6 @@ int mce_available(struct cpuinfo_x86 *c)
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}

-static int mce_is_broadcast(struct cpuinfo_x86 *c)
-{
- if (mce_force_broadcast)
- return 1;
-
- /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
- * DisplayFamily_DisplayModel encoding of 06H_EH and above,
- * a MCA signal is broadcast to all logical processors in the system
- */
- if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
- c->x86_model >= 0xe)
- return 1;
- return 0;
-}
-
/*
* Check if bank 0 is usable for MCE. It isn't for AMD K7,
* and Intel P6 family before model 0x1a.
@@ -652,77 +659,9 @@ int mce_firstbank(struct cpuinfo_x86 *c)
return 0;
}

-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
- int i, broadcast;
- enum mcheck_type inited = mcheck_none;
+int show_mca_info(int inited, struct cpuinfo_x86 *c)
+{
static enum mcheck_type g_type = mcheck_unset;
- static int broadcast_check;
-
- if (mce_disabled == 1) {
- dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
- return;
- }
-
- broadcast = mce_is_broadcast(c);
- if (broadcast_check && (broadcast != mce_broadcast) )
- dprintk(XENLOG_INFO,
- "CPUs have mixed broadcast support"
- "may cause undetermined result!!!\n");
-
- broadcast_check = 1;
- if (broadcast)
- mce_broadcast = broadcast;
-
- for (i = 0; i < MAX_NR_BANKS; i++)
- set_bit(i,mca_allbanks);
-
- /* Enforce at least MCE support in CPUID information. Individual
- * families may also need to enforce a check for MCA support. */
- if (!cpu_has(c, X86_FEATURE_MCE)) {
- printk(XENLOG_INFO "CPU%i: No machine check support available\n",
- smp_processor_id());
- return;
- }
-
- intpose_init();
- mctelem_init(sizeof (struct mc_info));
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- inited = amd_mcheck_init(c);
- break;
-
- case X86_VENDOR_INTEL:
- switch (c->x86) {
- case 6:
- case 15:
- inited = intel_mcheck_init(c);
- break;
- }
- break;
-
- default:
- break;
- }
-
- if ( !h_mci_ctrl )
- {
- h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks);
- if (!h_mci_ctrl)
- {
- dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n");
- return;
- }
- /* Don't care banks before firstbank */
- memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl));
- for (i = firstbank; i < nr_mce_banks; i++)
- rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]);
- }
- if (g_mcg_cap & MCG_CTL_P)
- rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl);
- set_poll_bankmask(c);

if (inited != g_type) {
char prefix[20];
@@ -751,32 +690,130 @@ void mcheck_init(struct cpuinfo_x86 *c)
printk("%sNo machine check initialization\n", prefix);
break;
}
-
- g_type = inited;
- }
-}
-
-u64 mce_cap_init(void)
+ g_type = inited;
+ }
+
+ return 0;
+}
+
+int set_poll_bankmask(struct cpuinfo_x86 *c)
+{
+ int cpu = smp_processor_id();
+ struct mca_banks *mb;
+
+ mb = mcabanks_alloc();
+ if (!mb)
+ return -ENOMEM;
+
+ if (cmci_support && !mce_disabled) {
+ mb->num = per_cpu(no_cmci_banks, cpu)->num;
+ bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map,
+ nr_mce_banks);
+ }
+ else {
+ bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks);
+ if (mce_firstbank(c))
+ mcabanks_clear(0, mb);
+ }
+ per_cpu(poll_bankmask, cpu) = mb;
+
+ return 0;
+}
+
+/* The perbank ctl/status init is platform specific because of AMD's quirk */
+int mca_cap_init(void)
{
u32 l, h;
u64 value;

rdmsr(MSR_IA32_MCG_CAP, l, h);
value = ((u64)h << 32) | l;
- /* For Guest vMCE usage */
- g_mcg_cap = value & ~MCG_CMCI_P;

if (l & MCG_CTL_P) /* Control register present ? */
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);

+ if (nr_mce_banks && (l & MCG_CAP_COUNT) != nr_mce_banks)
+ {
+ dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n",
+ smp_processor_id());
+ return -ENODEV;
+ }
nr_mce_banks = l & MCG_CAP_COUNT;
- if ( nr_mce_banks > MAX_NR_BANKS )
+
+ /* mcabanks_alloc depends on nr_mcebanks */
+ if (!mca_allbanks)
{
- printk(KERN_WARNING "MCE: exceed max mce banks\n");
- g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS;
+ int i;
+
+ mca_allbanks = mcabanks_alloc();
+ for ( i = 0; i < nr_mce_banks; i++)
+ mcabanks_set(i, mca_allbanks);
}

- return value;
+ return mca_allbanks ? 0:-ENOMEM;
+}
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+ enum mcheck_type inited = mcheck_none;
+
+ if (mce_disabled == 1) {
+ dprintk(XENLOG_INFO, "MCE support disabled by bootparam\n");
+ return;
+ }
+
+ if (!mce_available(c))
+ {
+ printk(XENLOG_INFO "CPU%i: No machine check support available\n",
+ smp_processor_id());
+ return;
+ }
+
+ /*Hardware Enable */
+ if (mca_cap_init())
+ return;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ inited = amd_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_INTEL:
+ switch (c->x86) {
+ case 6:
+ case 15:
+ inited = intel_mcheck_init(c);
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ show_mca_info(inited, c);
+ if (inited == mcheck_none || inited == mcheck_unset)
+ goto out;
+
+ intpose_init();
+
+ mctelem_init(sizeof(struct mc_info));
+
+ vmce_init(c);
+
+ /* Turn on MCE now */
+ set_in_cr4(X86_CR4_MCE);
+
+ set_poll_bankmask(c);
+
+ return;
+out:
+ if (smp_processor_id() == 0)
+ {
+ mcabanks_free(mca_allbanks);
+ mca_allbanks = NULL;
+ }
}

static void mcinfo_clear(struct mc_info *mi)
@@ -1047,23 +1084,6 @@ void intpose_inval(unsigned int cpu_nr,
(r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \
((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */

-int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d)
-{
- int bank_nr;
-
- if ( !bank || !d || !h_mci_ctrl )
- return 1;
-
- /* Will MCE happen in host if If host mcg_ctl is 0? */
- if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
- return 1;
-
- bank_nr = bank->mc_bank;
- if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
- return 1;
- return 0;
-}
-
static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
{
struct cpuinfo_x86 *c;
@@ -1435,19 +1455,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u

return ret;
}
-void set_poll_bankmask(struct cpuinfo_x86 *c)
-{
-
- if (cmci_support && !mce_disabled) {
- memcpy(&(__get_cpu_var(poll_bankmask)),
- &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t));
- }
- else {
- memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, sizeof(cpu_banks_t));
- if (mce_firstbank(c))
- clear_bit(0, get_cpu_var(poll_bankmask));
- }
-}
+
void mc_panic(char *s)
{
is_mc_panic = 1;
diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce.h Wed Jun 09 07:42:19 2010 +0100
@@ -72,7 +72,7 @@ extern void x86_mce_vector_register(x86_

/* Common generic MCE handler that implementations may nominate
* via x86_mce_vector_register. */
-extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t);
+extern void mcheck_cmn_handler(struct cpu_user_regs *, long, struct mca_banks *);

/* Register a handler for judging whether mce is recoverable. */
typedef int (*mce_recoverable_t)(u64 status);
@@ -120,18 +120,17 @@ struct mca_summary {
uint32_t recoverable;
};

-extern cpu_banks_t mca_allbanks;
-void set_poll_bankmask(struct cpuinfo_x86 *c);
-DECLARE_PER_CPU(cpu_banks_t, poll_bankmask);
-DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks);
+DECLARE_PER_CPU(struct mca_banks *, poll_bankmask);
+DECLARE_PER_CPU(struct mca_banks *, no_cmci_banks);
+
extern int cmci_support;
extern int ser_support;
extern int is_mc_panic;
extern int mce_broadcast;
-extern void mcheck_mca_clearbanks(cpu_banks_t);
+extern void mcheck_mca_clearbanks(struct mca_banks *);

-extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t,
- struct mca_summary *, cpu_banks_t*);
+extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, struct mca_banks *,
+ struct mca_summary *, struct mca_banks *);

/* Register a callback to be made during bank telemetry logout.
* This callback is only available to those machine check handlers
@@ -164,10 +163,7 @@ int inject_vmce(struct domain *d);
int inject_vmce(struct domain *d);
int vmce_domain_inject(struct mcinfo_bank *bank, struct domain *d, struct mcinfo_global *global);

-extern uint64_t g_mcg_cap;
-/* Real value in physical CTL MSR */
-extern uint64_t h_mcg_ctl;
-extern uint64_t *h_mci_ctrl;
+extern int vmce_init(struct cpuinfo_x86 *c);

extern unsigned int nr_mce_banks;

diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Wed Jun 09 07:42:19 2010 +0100
@@ -16,10 +16,13 @@
#include "mce.h"
#include "x86_mca.h"

-DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
-DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+DEFINE_PER_CPU(struct mca_banks *, mce_banks_owned);
+DEFINE_PER_CPU(struct mca_banks *, no_cmci_banks);
+DEFINE_PER_CPU(struct mca_banks *, mce_clear_banks);
int cmci_support = 0;
int ser_support = 0;
+static int mce_force_broadcast;
+boolean_param("mce_fb", mce_force_broadcast);

static int nr_intel_ext_msrs = 0;

@@ -528,12 +531,14 @@ static void intel_machine_check(struct c
uint64_t gstatus;
mctelem_cookie_t mctc = NULL;
struct mca_summary bs;
- cpu_banks_t clear_bank;
+ struct mca_banks *clear_bank;

mce_spin_lock(&mce_logout_lock);

- memset( &clear_bank, 0x0, sizeof(cpu_banks_t));
- mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, &clear_bank);
+ clear_bank = __get_cpu_var(mce_clear_banks);
+ memset( clear_bank->bank_map, 0x0,
+ sizeof(long) * BITS_TO_LONGS(clear_bank->num));
+ mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, clear_bank);

if (bs.errcnt) {
/* dump MCE error */
@@ -699,7 +704,7 @@ static int do_cmci_discover(int i)
rdmsrl(msr, val);
/* Some other CPU already owns this bank. */
if (val & CMCI_EN) {
- clear_bit(i, __get_cpu_var(mce_banks_owned));
+ mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
goto out;
}

@@ -709,12 +714,12 @@ static int do_cmci_discover(int i)

if (!(val & CMCI_EN)) {
/* This bank does not support CMCI. Polling timer has to handle it. */
- set_bit(i, __get_cpu_var(no_cmci_banks));
+ mcabanks_set(i, __get_cpu_var(no_cmci_banks));
return 0;
}
- set_bit(i, __get_cpu_var(mce_banks_owned));
+ mcabanks_set(i, __get_cpu_var(mce_banks_owned));
out:
- clear_bit(i, __get_cpu_var(no_cmci_banks));
+ mcabanks_clear(i, __get_cpu_var(no_cmci_banks));
return 1;
}

@@ -730,7 +735,7 @@ static void cmci_discover(void)
spin_lock_irqsave(&cmci_discover_lock, flags);

for (i = 0; i < nr_mce_banks; i++)
- if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+ if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned)))
do_cmci_discover(i);

spin_unlock_irqrestore(&cmci_discover_lock, flags);
@@ -757,8 +762,8 @@ static void cmci_discover(void)

mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
smp_processor_id(),
- *((unsigned long *)__get_cpu_var(mce_banks_owned)),
- *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+ *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map),
+ *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map));
}

/*
@@ -804,12 +809,12 @@ static void clear_cmci(void)
for (i = 0; i < nr_mce_banks; i++) {
unsigned msr = MSR_IA32_MC0_CTL2 + i;
u64 val;
- if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+ if (!mcabanks_test(i, __get_cpu_var(mce_banks_owned)))
continue;
rdmsrl(msr, val);
if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
- clear_bit(i, __get_cpu_var(mce_banks_owned));
+ mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
}
}

@@ -878,16 +883,44 @@ fastcall void smp_cmci_interrupt(struct

void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
-
#ifdef CONFIG_X86_MCE_THERMAL
intel_init_thermal(c);
#endif
intel_init_cmci(c);
}

-static void _mce_cap_init(struct cpuinfo_x86 *c)
-{
- u32 l = mce_cap_init();
+static int mce_is_broadcast(struct cpuinfo_x86 *c)
+{
+ if (mce_force_broadcast)
+ return 1;
+
+ /* According to Intel SDM Dec, 2009, 15.10.4.1, For processors with
+ * DisplayFamily_DisplayModel encoding of 06H_EH and above,
+ * a MCA signal is broadcast to all logical processors in the system
+ */
+ if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6 &&
+ c->x86_model >= 0xe)
+ return 1;
+ return 0;
+}
+
+static void intel_mca_cap_init(struct cpuinfo_x86 *c)
+{
+ static int broadcast_check;
+ int broadcast;
+ u32 l, h;
+
+ broadcast = mce_is_broadcast(c);
+ if (broadcast_check && (broadcast != mce_broadcast) )
+ dprintk(XENLOG_INFO,
+ "CPUs have mixed broadcast support"
+ "may cause undetermined result!!!\n");
+
+ broadcast_check = 1;
+ if (broadcast)
+ mce_broadcast = broadcast;
+
+ rdmsr(MSR_IA32_MCG_CAP, l, h);

if ((l & MCG_CMCI_P) && cpu_has_apic)
cmci_support = 1;
@@ -912,8 +945,6 @@ static void mce_init(void)
mctelem_cookie_t mctc;
struct mca_summary bs;

- clear_in_cr4(X86_CR4_MCE);
-
mce_barrier_init(&mce_inside_bar);
mce_barrier_init(&mce_severity_bar);
mce_barrier_init(&mce_trap_bar);
@@ -929,8 +960,6 @@ static void mce_init(void)
x86_mcinfo_dump(mctelem_dataptr(mctc));
mctelem_commit(mctc);
}
-
- set_in_cr4(X86_CR4_MCE);

for (i = firstbank; i < nr_mce_banks; i++)
{
@@ -949,10 +978,35 @@ static void mce_init(void)
wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
}

+static int init_mca_banks(void)
+{
+ struct mca_banks *mb1, *mb2, * mb3;
+
+ mb1 = mcabanks_alloc();
+ mb2 = mcabanks_alloc();
+ mb3 = mcabanks_alloc();
+ if (!mb1 || !mb2 || !mb3)
+ goto out;
+
+ __get_cpu_var(mce_clear_banks) = mb1;
+ __get_cpu_var(no_cmci_banks) = mb2;
+ __get_cpu_var(mce_banks_owned) = mb3;
+
+ return 0;
+out:
+ mcabanks_free(mb1);
+ mcabanks_free(mb2);
+ mcabanks_free(mb3);
+ return -ENOMEM;
+}
+
/* p4/p6 family have similar MCA initialization process */
enum mcheck_type intel_mcheck_init(struct cpuinfo_x86 *c)
{
- _mce_cap_init(c);
+ if (init_mca_banks())
+ return mcheck_none;
+
+ intel_mca_cap_init(c);

/* machine check is available */
x86_mce_vector_register(intel_machine_check);
@@ -969,17 +1023,14 @@ enum mcheck_type intel_mcheck_init(struc

int intel_mce_wrmsr(uint32_t msr, uint64_t val)
{
- int ret = 1;
-
- switch ( msr )
+ int ret = 0;
+
+ if (msr > MSR_IA32_MC0_CTL2 &&
+ msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1))
{
- case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
"Guest should not write this MSR!\n");
- break;
- default:
- ret = 0;
- break;
+ ret = 1;
}

return ret;
@@ -987,17 +1038,14 @@ int intel_mce_wrmsr(uint32_t msr, uint64

int intel_mce_rdmsr(uint32_t msr, uint64_t *val)
{
- int ret = 1;
-
- switch ( msr )
+ int ret = 0;
+
+ if (msr > MSR_IA32_MC0_CTL2 &&
+ msr < (MSR_IA32_MC0_CTL2 + nr_mce_banks - 1))
{
- case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1:
mce_printk(MCE_QUIET, "We have disabled CMCI capability, "
"Guest should not read this MSR!\n");
- break;
- default:
- ret = 0;
- break;
+ ret = 1;
}

return ret;
diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/non-fatal.c
--- a/xen/arch/x86/cpu/mcheck/non-fatal.c Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Wed Jun 09 07:42:19 2010 +0100
@@ -22,7 +22,7 @@

#include "mce.h"

-DEFINE_PER_CPU(cpu_banks_t, poll_bankmask);
+DEFINE_PER_CPU(struct mca_banks *, poll_bankmask);
static struct timer mce_timer;

#define MCE_PERIOD MILLISECS(8000)
@@ -94,6 +94,9 @@ static int __init init_nonfatal_mce_chec
if (mce_disabled || !mce_available(c))
return -ENODEV;

+ if ( __get_cpu_var(poll_bankmask) == NULL )
+ return -EINVAL;
+
/*
* Check for non-fatal errors every MCE_RATE s
*/
diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/vmce.c
--- a/xen/arch/x86/cpu/mcheck/vmce.c Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/vmce.c Wed Jun 09 07:42:19 2010 +0100
@@ -20,6 +20,12 @@

#define dom_vmce(x) ((x)->arch.vmca_msrs)

+uint64_t g_mcg_cap;
+
+/* Real value in physical CTL MSR */
+uint64_t h_mcg_ctl = 0UL;
+uint64_t *h_mci_ctrl;
+
int vmce_init_msr(struct domain *d)
{
dom_vmce(d) = xmalloc(struct domain_mca_msrs);
@@ -431,3 +437,50 @@ int vmce_domain_inject(
return inject_vmce(d);
}

+int vmce_init(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ u64 value;
+ int i;
+
+ if ( !h_mci_ctrl )
+ {
+ h_mci_ctrl = xmalloc_array(uint64_t, nr_mce_banks);
+ if (!h_mci_ctrl)
+ {
+ dprintk(XENLOG_INFO, "Failed to alloc h_mci_ctrl\n");
+ return -ENOMEM;
+ }
+ /* Don't care banks before firstbank */
+ memset(h_mci_ctrl, 0xff, sizeof(h_mci_ctrl));
+ for (i = firstbank; i < nr_mce_banks; i++)
+ rdmsrl(MSR_IA32_MC0_CTL + 4*i, h_mci_ctrl[i]);
+ }
+
+ if (g_mcg_cap & MCG_CTL_P)
+ rdmsrl(MSR_IA32_MCG_CTL, h_mcg_ctl);
+
+ rdmsr(MSR_IA32_MCG_CAP, l, h);
+ value = ((u64)h << 32) | l;
+ /* For Guest vMCE usage */
+ g_mcg_cap = value & ~MCG_CMCI_P;
+
+ return 0;
+}
+
+int mca_ctl_conflict(struct mcinfo_bank *bank, struct domain *d)
+{
+ int bank_nr;
+
+ if ( !bank || !d || !h_mci_ctrl )
+ return 1;
+
+ /* Will MCE happen in host if If host mcg_ctl is 0? */
+ if ( ~d->arch.vmca_msrs->mcg_ctl & h_mcg_ctl )
+ return 1;
+
+ bank_nr = bank->mc_bank;
+ if (~d->arch.vmca_msrs->mci_ctl[bank_nr] & h_mci_ctrl[bank_nr] )
+ return 1;
+ return 0;
+}
diff -r f425b2c78340 -r b04b81248078 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Wed Jun 09 07:42:19 2010 +0100
@@ -89,8 +89,37 @@
#define CMCI_THRESHOLD 0x2

#include <asm/domain.h>
-typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
-DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+struct mca_banks
+{
+ int num;
+ unsigned long *bank_map;
+};
+
+static inline void mcabanks_clear(int bit, struct mca_banks *banks) \
+{
+ if (!banks || !banks->bank_map || bit >= banks->num)
+ return ;
+ clear_bit(bit, banks->bank_map);
+}
+
+static inline void mcabanks_set(int bit, struct mca_banks* banks)
+{
+ if (!banks || !banks->bank_map || bit >= banks->num)
+ return;
+ set_bit(bit, banks->bank_map);
+}
+
+static inline int mcabanks_test(int bit, struct mca_banks* banks)
+{
+ if (!banks || !banks->bank_map || bit >= banks->num)
+ return 0;
+ return test_bit(bit, banks->bank_map);
+}
+
+struct mca_banks *mcabanks_alloc(void);
+void mcabanks_free(struct mca_banks *banks);
+extern struct mca_banks *mca_allbanks;

/* Below interfaces are defined for MCA internal processing:
* a. pre_handler will be called early in MCA ISR context, mainly for early
diff -r f425b2c78340 -r b04b81248078 xen/include/asm-x86/mce.h
--- a/xen/include/asm-x86/mce.h Wed Jun 09 07:29:10 2010 +0100
+++ b/xen/include/asm-x86/mce.h Wed Jun 09 07:42:19 2010 +0100
@@ -2,8 +2,6 @@
#include <public/arch-x86/xen-mca.h>
#ifndef _XEN_X86_MCE_H
#define _XEN_X86_MCE_H
-/* Define for GUEST MCA handling */
-#define MAX_NR_BANKS 30

/* This entry is for recording bank nodes for the impacted domain,
* put into impact_header list. */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@lists.xensource.com
http://lists.xensource.com/xen-changelog