Mailing List Archive

[PATCH 3/3] x86: decouple pat and mtrr handling
Today PAT is usable only with MTRR being active, with some nasty tweaks
to make PAT usable when running as Xen PV guest, which doesn't support
MTRR.

The reason for this coupling is, that both, PAT MSR changes and MTRR
changes, require a similar sequence and so full PAT support was added
using the already available MTRR handling.

Xen PV PAT handling can work without MTRR, as it just needs to consume
the PAT MSR setting done by the hypervisor without the ability and need
to change it. This in turn has resulted in a convoluted initialization
sequence and wrong decisions regarding cache mode availability due to
misguiding PAT availability flags.

Fix all of that by allowing to use PAT without MTRR and by adding an
environment dependent PAT init function.

Cc: <stable@vger.kernel.org> # 5.17
Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()")
Signed-off-by: Juergen Gross <jgross@suse.com>
---
arch/x86/include/asm/memtype.h | 13 ++-
arch/x86/include/asm/mtrr.h | 21 +++--
arch/x86/kernel/cpu/common.c | 13 +--
arch/x86/kernel/cpu/mtrr/generic.c | 14 ----
arch/x86/kernel/cpu/mtrr/mtrr.c | 33 ++++----
arch/x86/kernel/cpu/mtrr/mtrr.h | 1 -
arch/x86/kernel/setup.c | 7 --
arch/x86/mm/pat/memtype.c | 127 +++++++++++++++++++++--------
arch/x86/xen/enlighten_pv.c | 4 +
9 files changed, 146 insertions(+), 87 deletions(-)

diff --git a/arch/x86/include/asm/memtype.h b/arch/x86/include/asm/memtype.h
index 9ca760e430b9..93ad980631dc 100644
--- a/arch/x86/include/asm/memtype.h
+++ b/arch/x86/include/asm/memtype.h
@@ -8,7 +8,18 @@
extern bool pat_enabled(void);
extern void pat_disable(const char *reason);
extern void pat_init(void);
-extern void init_cache_modes(void);
+#ifdef CONFIG_X86_PAT
+void pat_init_set(void (*func)(void));
+void pat_init_noset(void);
+void pat_cpu_init(void);
+void pat_ap_init_nomtrr(void);
+void pat_aps_init_nomtrr(void);
+#else
+#define pat_init_set(f) do { } while (0)
+#define pat_cpu_init(f) do { } while (0)
+#define pat_ap_init_nomtrr(f) do { } while (0)
+#define pat_aps_init_nomtrr(f) do { } while (0)
+#endif

extern int memtype_reserve(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 900083ac9f60..bb76e5c6e21d 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -42,9 +42,9 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
-extern void mtrr_ap_init(void);
-extern void mtrr_aps_init(void);
-extern void mtrr_bp_restore(void);
+extern bool mtrr_ap_init(void);
+extern bool mtrr_aps_init(void);
+extern bool mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
void mtrr_disable(void);
@@ -84,9 +84,18 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
-#define mtrr_ap_init() do {} while (0)
-#define mtrr_aps_init() do {} while (0)
-#define mtrr_bp_restore() do {} while (0)
+static inline bool mtrr_ap_init(void)
+{
+ return false;
+}
+static inline bool mtrr_aps_init(void)
+{
+ return false;
+}
+static inline bool mtrr_bp_restore(void)
+{
+ return false;
+}
#define mtrr_disable() do {} while (0)
#define mtrr_enable() do {} while (0)
# endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0a1bd14f7966..3edfb779dab5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2408,8 +2408,8 @@ void __init cache_bp_init(void)
{
if (IS_ENABLED(CONFIG_MTRR))
mtrr_bp_init();
- else
- pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
+
+ pat_cpu_init();
}

void cache_ap_init(void)
@@ -2417,7 +2417,8 @@ void cache_ap_init(void)
if (cache_aps_delayed_init)
return;

- mtrr_ap_init();
+ if (!mtrr_ap_init())
+ pat_ap_init_nomtrr();
}

bool cache_aps_delayed_init;
@@ -2437,11 +2438,13 @@ void cache_aps_init(void)
if (!cache_aps_delayed_init)
return;

- mtrr_aps_init();
+ if (!mtrr_aps_init())
+ pat_aps_init_nomtrr();
cache_aps_delayed_init = false;
}

void cache_bp_restore(void)
{
- mtrr_bp_restore();
+ if (!mtrr_bp_restore())
+ pat_cpu_init();
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 84732215b61d..bb6dd96923a4 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -441,20 +441,6 @@ static void __init print_mtrr_state(void)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}

-/* PAT setup for BP. We need to go through sync steps here */
-void __init mtrr_bp_pat_init(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- cache_disable();
-
- pat_init();
-
- cache_enable();
- local_irq_restore(flags);
-}
-
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index c1593cfae641..76b1553d90f9 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -762,9 +762,6 @@ void __init mtrr_bp_init(void)
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();

- if (mtrr_enabled())
- mtrr_bp_pat_init();
-
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -772,25 +769,17 @@ void __init mtrr_bp_init(void)
}
}

- if (!mtrr_enabled()) {
+ if (!mtrr_enabled())
pr_info("Disabled\n");
-
- /*
- * PAT initialization relies on MTRR's rendezvous handler.
- * Skip PAT init until the handler can initialize both
- * features independently.
- */
- pat_disable("MTRRs disabled, skipping PAT initialization too.");
- }
}

-void mtrr_ap_init(void)
+bool mtrr_ap_init(void)
{
if (!mtrr_enabled())
- return;
+ return false;

if (!use_intel())
- return;
+ return false;

/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
@@ -806,6 +795,8 @@ void mtrr_ap_init(void)
* lock to prevent mtrr entry changes
*/
set_mtrr_from_inactive_cpu(~0U, 0, 0, 0);
+
+ return true;
}

/**
@@ -826,20 +817,24 @@ void mtrr_save_state(void)
/*
* Delayed MTRR initialization for all AP's
*/
-void mtrr_aps_init(void)
+bool mtrr_aps_init(void)
{
if (!use_intel() || !mtrr_enabled())
- return;
+ return false;

set_mtrr(~0U, 0, 0, 0);
+
+ return true;
}

-void mtrr_bp_restore(void)
+bool mtrr_bp_restore(void)
{
if (!use_intel() || !mtrr_enabled())
- return;
+ return false;

mtrr_if->set_all();
+
+ return true;
}

static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 2ac99e561181..f6135a539073 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -53,7 +53,6 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
-void mtrr_bp_pat_init(void);

extern void __init set_mtrr_ops(const struct mtrr_ops *ops);

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 27d61f73c68a..14bb40cd22c6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1008,13 +1008,6 @@ void __init setup_arch(char **cmdline_p)

max_possible_pfn = max_pfn;

- /*
- * This call is required when the CPU does not support PAT. If
- * mtrr_bp_init() invoked it already via pat_init() the call has no
- * effect.
- */
- init_cache_modes();
-
/*
* Define random base addresses for memory sections after max_pfn is
* defined and before each memory section base is used.
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index d5ef64ddd35e..3d4bc27ffebb 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -41,6 +41,7 @@
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
+#include <linux/stop_machine.h>

#include <asm/cacheflush.h>
#include <asm/processor.h>
@@ -65,30 +66,23 @@ static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __read_mostly pat_bp_enabled;
static bool __read_mostly pat_cm_initialized;

-/*
- * PAT support is enabled by default, but can be disabled for
- * various user-requested or hardware-forced reasons:
- */
-void pat_disable(const char *msg_reason)
-{
- if (pat_disabled)
- return;
+static void init_cache_modes(void);

- if (pat_bp_initialized) {
- WARN_ONCE(1, "x86/PAT: PAT cannot be disabled after initialization\n");
- return;
- }
-
- pat_disabled = true;
- pr_info("x86/PAT: %s\n", msg_reason);
+#ifdef CONFIG_X86_PAT
+static void pat_init_nopat(void)
+{
+ init_cache_modes();
}

static int __init nopat(char *str)
{
- pat_disable("PAT support disabled via boot option.");
+ pat_disabled = true;
+ pr_info("PAT support disabled via boot option.");
+ pat_init_set(pat_init_nopat);
return 0;
}
early_param("nopat", nopat);
+#endif

bool pat_enabled(void)
{
@@ -243,13 +237,17 @@ static void pat_bp_init(u64 pat)
u64 tmp_pat;

if (!boot_cpu_has(X86_FEATURE_PAT)) {
- pat_disable("PAT not supported by the CPU.");
+ pr_info("PAT not supported by the CPU.");
+ pat_disabled = true;
+ pat_init_set(pat_init_nopat);
return;
}

rdmsrl(MSR_IA32_CR_PAT, tmp_pat);
if (!tmp_pat) {
- pat_disable("PAT support disabled by the firmware.");
+ pr_info("PAT support disabled by the firmware.");
+ pat_disabled = true;
+ pat_init_set(pat_init_nopat);
return;
}

@@ -272,7 +270,7 @@ static void pat_ap_init(u64 pat)
wrmsrl(MSR_IA32_CR_PAT, pat);
}

-void init_cache_modes(void)
+static void init_cache_modes(void)
{
u64 pat = 0;

@@ -318,25 +316,12 @@ void init_cache_modes(void)
__init_cache_modes(pat);
}

-/**
- * pat_init - Initialize the PAT MSR and PAT table on the current CPU
- *
- * This function initializes PAT MSR and PAT table with an OS-defined value
- * to enable additional cache attributes, WC, WT and WP.
- *
- * This function must be called on all CPUs using the specific sequence of
- * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
- * procedure for PAT.
- */
-void pat_init(void)
+#ifdef CONFIG_X86_PAT
+static void pat_init_native(void)
{
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;

-#ifndef CONFIG_X86_PAT
- pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
-#endif
-
if (pat_disabled)
return;

@@ -406,6 +391,80 @@ void pat_init(void)

#undef PAT

+void pat_init_noset(void)
+{
+ pat_bp_enabled = true;
+ init_cache_modes();
+}
+
+static void (*pat_init_func)(void) = pat_init_native;
+
+void __init pat_init_set(void (*func)(void))
+{
+ pat_init_func = func;
+}
+
+/**
+ * pat_init - Initialize the PAT MSR and PAT table on the current CPU
+ *
+ * This function initializes PAT MSR and PAT table with an OS-defined value
+ * to enable additional cache attributes, WC, WT and WP.
+ *
+ * This function must be called on all CPUs using the specific sequence of
+ * operations defined in Intel SDM. mtrr_rendezvous_handler() provides this
+ * procedure for PAT.
+ */
+void pat_init(void)
+{
+ pat_init_func();
+}
+
+static int __pat_cpu_init(void *data)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cache_disable();
+
+ pat_init();
+
+ cache_enable();
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+void pat_cpu_init(void)
+{
+ if (pat_init_func != pat_init_native)
+ pat_init();
+ else
+ __pat_cpu_init(NULL);
+}
+
+void pat_ap_init_nomtrr(void)
+{
+ if (pat_init_func != pat_init_native)
+ return;
+
+ stop_machine_from_inactive_cpu(__pat_cpu_init, NULL, cpu_callout_mask);
+}
+
+void pat_aps_init_nomtrr(void)
+{
+ if (pat_init_func != pat_init_native)
+ return;
+
+ stop_machine(__pat_cpu_init, NULL, cpu_online_mask);
+}
+#else
+void pat_init(void)
+{
+ pr_info_once("x86/PAT: PAT support disabled because CONFIG_X86_PAT is disabled in the kernel.\n");
+ init_cache_modes();
+}
+#endif /* CONFIG_X86_PAT */
+
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */

/*
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 70fb2ea85e90..97831d822872 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -69,6 +69,7 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/cpu.h>
+#include <asm/memtype.h>
#ifdef CONFIG_X86_IOPL_IOPERM
#include <asm/io_bitmap.h>
#endif
@@ -1317,6 +1318,9 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
initrd_start = __pa(xen_start_info->mod_start);
}

+ /* Don't try to modify PAT MSR. */
+ pat_init_set(pat_init_noset);
+
/* Poke various useful things into boot_params */
boot_params.hdr.type_of_loader = (9 << 4) | 0;
boot_params.hdr.ramdisk_image = initrd_start;
--
2.35.3
Re: [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On Fri, Jul 15, 2022 at 04:25:49PM +0200, Juergen Gross wrote:
> Today PAT is usable only with MTRR being active, with some nasty tweaks
> to make PAT usable when running as Xen PV guest, which doesn't support
> MTRR.
>
> The reason for this coupling is, that both, PAT MSR changes and MTRR
> changes, require a similar sequence and so full PAT support was added
> using the already available MTRR handling.
>
> Xen PV PAT handling can work without MTRR, as it just needs to consume
> the PAT MSR setting done by the hypervisor without the ability and need
> to change it. This in turn has resulted in a convoluted initialization
> sequence and wrong decisions regarding cache mode availability due to
> misguiding PAT availability flags.
>
> Fix all of that by allowing to use PAT without MTRR and by adding an
> environment dependent PAT init function.

Aha, there's the explanation I was looking for.

> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 0a1bd14f7966..3edfb779dab5 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -2408,8 +2408,8 @@ void __init cache_bp_init(void)
> {
> if (IS_ENABLED(CONFIG_MTRR))
> mtrr_bp_init();
> - else
> - pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
> +
> + pat_cpu_init();
> }
>
> void cache_ap_init(void)
> @@ -2417,7 +2417,8 @@ void cache_ap_init(void)
> if (cache_aps_delayed_init)
> return;
>
> - mtrr_ap_init();
> + if (!mtrr_ap_init())
> + pat_ap_init_nomtrr();
> }

So I'm reading this as: if it couldn't init AP's MTRRs, init its PAT.

But currently, the code sets the MTRRs for the delayed case or when the
CPU is not online by doing ->set_all and in there it sets first MTRRs
and then PAT.

I think the code above should simply try the two things, one after the
other, independently from one another.

And I see you've added another stomp machine call for PAT only.

Now, what I think the design of all this should be, is:

you have a bunch of things you need to do at each point:

* cache_ap_init

* cache_aps_init

* ...

Now, in each those, you look at whether PAT or MTRR is supported and you
do only those which are supported.

Also, the rendezvous handler should do:

if MTRR:
do MTRR specific stuff

if PAT:
do PAT specific stuff

This way you have clean definitions of what needs to happen when and you
also do *only* the things that the platform supports, by keeping the
proper order of operations - I believe MTRRs first and then PAT.

This way we'll get rid of that crazy maze of who calls what and when.

But first we need to define those points where stuff needs to happen and
then for each point define what stuff needs to happen.

How does that sound?

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On 7/15/22 10:25 AM, Juergen Gross wrote:
> Today PAT is usable only with MTRR being active, with some nasty tweaks
> to make PAT usable when running as Xen PV guest, which doesn't support
> MTRR.
>
> The reason for this coupling is, that both, PAT MSR changes and MTRR
> changes, require a similar sequence and so full PAT support was added
> using the already available MTRR handling.
>
> Xen PV PAT handling can work without MTRR, as it just needs to consume
> the PAT MSR setting done by the hypervisor without the ability and need
> to change it. This in turn has resulted in a convoluted initialization
> sequence and wrong decisions regarding cache mode availability due to
> misguiding PAT availability flags.
>
> Fix all of that by allowing to use PAT without MTRR and by adding an
> environment dependent PAT init function.
>
> Cc: <stable@vger.kernel.org> # 5.17
> Fixes: bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT with pat_enabled()")
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
...
> diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
> index d5ef64ddd35e..3d4bc27ffebb 100644
> --- a/arch/x86/mm/pat/memtype.c
> +++ b/arch/x86/mm/pat/memtype.c
> ...
>
> +void pat_init_noset(void)
> +{
> + pat_bp_enabled = true;
> + init_cache_modes();
> +}

This is what should fix the regression caused by commit
bdd8b6c98239 ("drm/i915: replace X86_FEATURE_PAT
with pat_enabled()"). Thanks for including this.

This function might need a better name. Does noset
refer to the fact that when we use this function, we do
not set or write to the PAT MSR? Maybe it should be
pat_init_noset_msr. Is Xen PV Dom0 the only case when
this function will be called or is it also for unprivileged
Xen PV domains? Then maybe it should be named
pat_init_xen_pv_dom0 or maybe just pat_init_xen_pv
if it is also used with unprivileged Xen PV domains. Or,
if you want to keep the name as pat_init_noset, maybe
it should be preceded by a comment clearly explaining
this function is currently only for the Xen PV and/or the Xen
PV Dom0 case when we don't write to the PAT MSR and we
still want to report PAT as enabled in those cases.

Chuck
Re: PING [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On 7/19/22 11:15 AM, Borislav Petkov wrote:
> On Fri, Jul 15, 2022 at 04:25:49PM +0200, Juergen Gross wrote:
> > Today PAT is usable only with MTRR being active, with some nasty tweaks
> > to make PAT usable when running as Xen PV guest, which doesn't support
> > MTRR.
> >
> > The reason for this coupling is, that both, PAT MSR changes and MTRR
> > changes, require a similar sequence and so full PAT support was added
> > using the already available MTRR handling.
> >
> > Xen PV PAT handling can work without MTRR, as it just needs to consume
> > the PAT MSR setting done by the hypervisor without the ability and need
> > to change it. This in turn has resulted in a convoluted initialization
> > sequence and wrong decisions regarding cache mode availability due to
> > misguiding PAT availability flags.
> >
> > Fix all of that by allowing to use PAT without MTRR and by adding an
> > environment dependent PAT init function.
>
> Aha, there's the explanation I was looking for.
>
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index 0a1bd14f7966..3edfb779dab5 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -2408,8 +2408,8 @@ void __init cache_bp_init(void)
> > {
> > if (IS_ENABLED(CONFIG_MTRR))
> > mtrr_bp_init();
> > - else
> > - pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
> > +
> > + pat_cpu_init();
> > }
> >
> > void cache_ap_init(void)
> > @@ -2417,7 +2417,8 @@ void cache_ap_init(void)
> > if (cache_aps_delayed_init)
> > return;
> >
> > - mtrr_ap_init();
> > + if (!mtrr_ap_init())
> > + pat_ap_init_nomtrr();
> > }
>
> So I'm reading this as: if it couldn't init AP's MTRRs, init its PAT.
>
> But currently, the code sets the MTRRs for the delayed case or when the
> CPU is not online by doing ->set_all and in there it sets first MTRRs
> and then PAT.
>
> I think the code above should simply try the two things, one after the
> other, independently from one another.
>
> And I see you've added another stomp machine call for PAT only.
>
> Now, what I think the design of all this should be, is:
>
> you have a bunch of things you need to do at each point:
>
> * cache_ap_init
>
> * cache_aps_init
>
> * ...
>
> Now, in each those, you look at whether PAT or MTRR is supported and you
> do only those which are supported.
>
> Also, the rendezvous handler should do:
>
> if MTRR:
> do MTRR specific stuff
>
> if PAT:
> do PAT specific stuff
>
> This way you have clean definitions of what needs to happen when and you
> also do *only* the things that the platform supports, by keeping the
> proper order of operations - I believe MTRRs first and then PAT.
>
> This way we'll get rid of that crazy maze of who calls what and when.
>
> But first we need to define those points where stuff needs to happen and
> then for each point define what stuff needs to happen.
>
> How does that sound?
>
> Thx.
>

Hi Thorsten,

IMHO, silence here is unacceptable given that this is supposed to
be fixing a regression and not just adding a new feature or
re-working the code in a case where there is no regression.

The regression was first reported on May 4, 2022, now over
three months ago:

https://lore.kernel.org/regressions/YnHK1Z3o99eMXsVK@mail-itl/

Why has Juergen not at least responded in some way to the
comments that Boris has made here? Why has Boris not
pinged Juergen by now, which is almost four weeks after his
comment and over three months from the first report of the
regression? IMHO, both Juergen and Boris are not treating
this with the priority of a regression fix. At the very least,
they should reaffirm their commitment to fix the regression
in a timely manner or explain what factors demand that the
Linus regression rule be set aside in this case.

There are valid reasons to delay a fix, but in all the discussion
of the various patches that have been proposed to fix this
regression, no maintainer has yet given a clear and reasonable
explanation for why this is not getting a higher priority from the
developers.

Some developers, (Dave, Luto, and Peter) have ignored a fix
proposed by Jan Beulich as you pointed out in an earlier message
to Jan Beulich here:

https://lore.kernel.org/lkml/4c8c9d4c-1c6b-8e9f-fa47-918a64898a28@leemhuis.info/

To his credit, Jan Beulich replied to your message in a reasonable
manner but he also could not explain why Dave, Luto, and Peter
ignored Jan's patch and remain silent in the discussion of the possible
fixes for this regression. I note also that the original report of the
regression identified a specific commit that also fixes the regression
if that bad commit is reverted, and that commit is also mentioned in
the aforementioned message to Jan about his proposed fix. It is a commit
that lives in the i915 Intel GPU/DRM driver, commit bdd8b6c98239, and
my testing confirms the regression can also be fixed by reverting
bdd8b6c98239 instead of applying Jan Beulich's patch that was the
subject of the aforementioned message from you to Jan Beulich where
you also expressed your dissatisfaction with the silence of some
developers (Dave, Lotu, and Peter) when there is a regression that needs
fixing.

Why their silence? In that same message, you pondered that it might
be necessary to bring this matter to Linus' attention. The developers'
silence makes me think this regression is a regression the developers
do not want to fix. And that would be a clear violation of the Linux
regression rule if it were true. So, Thorsten, I think it is time for you to
elevate this to Linus if the developers do not clearly explain why they
are ignoring this again.

Best regards,

Chuck
Re: PING [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On Sat, Aug 13, 2022 at 12:56:44PM -0400, Chuck Zmudzinski wrote:
> Why has Juergen not at least responded in some way to the
> comments that Boris has made here? Why has Boris not
> pinged Juergen by now,

How about: it is summer here and people usually take their vacations
during that time and everything takes a bit longer than usual?

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: PING [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On 8/13/2022 1:20 PM, Borislav Petkov wrote:
> On Sat, Aug 13, 2022 at 12:56:44PM -0400, Chuck Zmudzinski wrote:
> > Why has Juergen not at least responded in some way to the
> > comments that Boris has made here? Why has Boris not
> > pinged Juergen by now,
>
> How about: it is summer here and people usually take their vacations
> during that time and everything takes a bit longer than usual?
>

I did a search for Juergen Gross on lkml and he is active submitting and
reviewing patches during the past few weeks. However, he is ignoring
comments on his patch to fix this regression.

Chuck
Re: PING [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On Sat, Aug 13, 2022 at 05:40:34PM -0400, Chuck Zmudzinski wrote:
> I did a search for Juergen Gross on lkml and he is active submitting and
> reviewing patches during the past few weeks. However, he is ignoring
> comments on his patch to fix this regression.

Please stop this non-sense and be patient. We will fix this soon. For
the time being you can use Jan's patch locally.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: PING [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On 8/13/2022 5:48 PM, Borislav Petkov wrote:
> On Sat, Aug 13, 2022 at 05:40:34PM -0400, Chuck Zmudzinski wrote:
> > I did a search for Juergen Gross on lkml and he is active submitting and
> > reviewing patches during the past few weeks. However, he is ignoring
> > comments on his patch to fix this regression.
>
> Please stop this non-sense and be patient. We will fix this soon. For
> the time being you can use Jan's patch locally.
>

Hello Boris,

I am grateful that you took the time to respond and say it will be fixed soon.
By soon, I presume that means within two weeks as the guidance for
fixing regressions recommends:

https://www.kernel.org/doc/html/latest/process/handling-regressions.html

Quoting from that page: "Try to fix regressions quickly once the culprit has
been identified; fixes for most regressions should be merged within two
weeks, but some need to be resolved within two or three days."

If the regression is not fixed by the end of August, I don't think it would
be "nonsense" for me to send another PING at that time. I also think the
PING I sent earlier today is not "nonsense," given that this regression has been
waiting for a fix for over three months, which is much longer than the
expected time to fix a regression of two weeks.

Best regards,

Chuck
Re: PING [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On 8/13/2022 6:41 PM, Chuck Zmudzinski wrote:
> On 8/13/2022 5:48 PM, Borislav Petkov wrote:
> > On Sat, Aug 13, 2022 at 05:40:34PM -0400, Chuck Zmudzinski wrote:
> > > I did a search for Juergen Gross on lkml and he is active submitting and
> > > reviewing patches during the past few weeks. However, he is ignoring
> > > comments on his patch to fix this regression.
> >
> > Please stop this non-sense and be patient. We will fix this soon. For
> > the time being you can use Jan's patch locally.
> >

Hi Boris,

I see you have signed off on Jan's patch with a slight modification as
the short-term fix that is also tagged to be backported to stable. Thank you!
I hope no other problems or objections pop up and Linus will merge it into
the mainline kernel soon. Please forgive me for my impatience.

Best regards,

Chuck
Re: [PATCH 3/3] x86: decouple pat and mtrr handling [ In reply to ]
On 19.07.22 17:15, Borislav Petkov wrote:
> On Fri, Jul 15, 2022 at 04:25:49PM +0200, Juergen Gross wrote:
>> Today PAT is usable only with MTRR being active, with some nasty tweaks
>> to make PAT usable when running as Xen PV guest, which doesn't support
>> MTRR.
>>
>> The reason for this coupling is, that both, PAT MSR changes and MTRR
>> changes, require a similar sequence and so full PAT support was added
>> using the already available MTRR handling.
>>
>> Xen PV PAT handling can work without MTRR, as it just needs to consume
>> the PAT MSR setting done by the hypervisor without the ability and need
>> to change it. This in turn has resulted in a convoluted initialization
>> sequence and wrong decisions regarding cache mode availability due to
>> misguiding PAT availability flags.
>>
>> Fix all of that by allowing to use PAT without MTRR and by adding an
>> environment dependent PAT init function.
>
> Aha, there's the explanation I was looking for.
>
>> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>> index 0a1bd14f7966..3edfb779dab5 100644
>> --- a/arch/x86/kernel/cpu/common.c
>> +++ b/arch/x86/kernel/cpu/common.c
>> @@ -2408,8 +2408,8 @@ void __init cache_bp_init(void)
>> {
>> if (IS_ENABLED(CONFIG_MTRR))
>> mtrr_bp_init();
>> - else
>> - pat_disable("PAT support disabled because CONFIG_MTRR is disabled in the kernel.");
>> +
>> + pat_cpu_init();
>> }
>>
>> void cache_ap_init(void)
>> @@ -2417,7 +2417,8 @@ void cache_ap_init(void)
>> if (cache_aps_delayed_init)
>> return;
>>
>> - mtrr_ap_init();
>> + if (!mtrr_ap_init())
>> + pat_ap_init_nomtrr();
>> }
>
> So I'm reading this as: if it couldn't init AP's MTRRs, init its PAT.
>
> But currently, the code sets the MTRRs for the delayed case or when the
> CPU is not online by doing ->set_all and in there it sets first MTRRs
> and then PAT.
>
> I think the code above should simply try the two things, one after the
> other, independently from one another.
>
> And I see you've added another stomp machine call for PAT only.
>
> Now, what I think the design of all this should be, is:
>
> you have a bunch of things you need to do at each point:
>
> * cache_ap_init
>
> * cache_aps_init
>
> * ...
>
> Now, in each those, you look at whether PAT or MTRR is supported and you
> do only those which are supported.
>
> Also, the rendezvous handler should do:
>
> if MTRR:
> do MTRR specific stuff
>
> if PAT:
> do PAT specific stuff
>
> This way you have clean definitions of what needs to happen when and you
> also do *only* the things that the platform supports, by keeping the
> proper order of operations - I believe MTRRs first and then PAT.
>
> This way we'll get rid of that crazy maze of who calls what and when.
>
> But first we need to define those points where stuff needs to happen and
> then for each point define what stuff needs to happen.
>
> How does that sound?

This asks for some more cleanup in the MTRR code:

mtrr_if->set_all() is the relevant callback, and it will only ever be called
for the generic case (use_intel() == true), so I think we want to:

- remove the cyrix specific set_all() function
- split the set_all() callback case from mtrr_rendezvous_handler() into a
dedicated rendezvous handler
- remove the set_all() member from struct mtrr_ops and directly call
generic_set_all() from the new rendezvous handler
- optional: rename use_intel() to use_generic(), or even introduce just
a static bool for that purpose

Then the new rendezvous handler can be modified as you suggested.

Are you okay with that route?


Juergen