Mailing List Archive

[PATCH v2 07/12] mm: allow page scrubbing routine(s) to be arch controlled
Especially when dealing with large amounts of memory, memset() may not
be very efficient; this can be bad enough that even for debug builds a
custom function is warranted. We additionally want to distinguish "hot"
and "cold" cases.

Keep the default fallback to clear_page_*() in common code; this may
want to be revisited down the road.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v2: New.
---
The choice between hot and cold in scrub_one_page()'s callers is
certainly up for discussion / improvement.

--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -55,6 +55,7 @@ obj-y += percpu.o
obj-y += physdev.o
obj-$(CONFIG_COMPAT) += x86_64/physdev.o
obj-y += psr.o
+obj-bin-$(CONFIG_DEBUG) += scrub_page.o
obj-y += setup.o
obj-y += shutdown.o
obj-y += smp.o
--- /dev/null
+++ b/xen/arch/x86/scrub_page.S
@@ -0,0 +1,41 @@
+ .file __FILE__
+
+#include <asm/asm_defns.h>
+#include <xen/page-size.h>
+#include <xen/scrub.h>
+
+ENTRY(scrub_page_cold)
+ mov $PAGE_SIZE/32, %ecx
+ mov $SCRUB_PATTERN, %rax
+
+0: movnti %rax, (%rdi)
+ movnti %rax, 8(%rdi)
+ movnti %rax, 16(%rdi)
+ movnti %rax, 24(%rdi)
+ add $32, %rdi
+ sub $1, %ecx
+ jnz 0b
+
+ sfence
+ ret
+ .type scrub_page_cold, @function
+ .size scrub_page_cold, . - scrub_page_cold
+
+ .macro scrub_page_stosb
+ mov $PAGE_SIZE, %ecx
+ mov $SCRUB_BYTE_PATTERN, %eax
+ rep stosb
+ ret
+ .endm
+
+ .macro scrub_page_stosq
+ mov $PAGE_SIZE/8, %ecx
+ mov $SCRUB_PATTERN, %rax
+ rep stosq
+ ret
+ .endm
+
+ENTRY(scrub_page_hot)
+ ALTERNATIVE scrub_page_stosq, scrub_page_stosb, X86_FEATURE_ERMS
+ .type scrub_page_hot, @function
+ .size scrub_page_hot, . - scrub_page_hot
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -124,6 +124,7 @@
#include <xen/types.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/scrub.h>
#include <xen/spinlock.h>
#include <xen/mm.h>
#include <xen/param.h>
@@ -750,27 +751,31 @@ static void page_list_add_scrub(struct p
page_list_add(pg, &heap(node, zone, order));
}

-/* SCRUB_PATTERN needs to be a repeating series of bytes. */
-#ifndef NDEBUG
-#define SCRUB_PATTERN 0xc2c2c2c2c2c2c2c2ULL
-#else
-#define SCRUB_PATTERN 0ULL
+/*
+ * While in debug builds we want callers to avoid relying on allocations
+ * returning zeroed pages, for a production build, clear_page_*() is the
+ * fastest way to scrub.
+ */
+#ifndef CONFIG_DEBUG
+# undef scrub_page_hot
+# define scrub_page_hot clear_page_hot
+# undef scrub_page_cold
+# define scrub_page_cold clear_page_cold
#endif
-#define SCRUB_BYTE_PATTERN (SCRUB_PATTERN & 0xff)

-static void scrub_one_page(const struct page_info *pg)
+static void scrub_one_page(const struct page_info *pg, bool cold)
{
+ void *ptr;
+
if ( unlikely(pg->count_info & PGC_broken) )
return;

-#ifndef NDEBUG
- /* Avoid callers relying on allocations returning zeroed pages. */
- unmap_domain_page(memset(__map_domain_page(pg),
- SCRUB_BYTE_PATTERN, PAGE_SIZE));
-#else
- /* For a production build, clear_page() is the fastest way to scrub. */
- clear_domain_page(_mfn(page_to_mfn(pg)));
-#endif
+ ptr = __map_domain_page(pg);
+ if ( cold )
+ scrub_page_cold(ptr);
+ else
+ scrub_page_hot(ptr);
+ unmap_domain_page(ptr);
}

static void poison_one_page(struct page_info *pg)
@@ -1046,12 +1051,14 @@ static struct page_info *alloc_heap_page
if ( first_dirty != INVALID_DIRTY_IDX ||
(scrub_debug && !(memflags & MEMF_no_scrub)) )
{
+ bool cold = d && d != current->domain;
+
for ( i = 0; i < (1U << order); i++ )
{
if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
{
if ( !(memflags & MEMF_no_scrub) )
- scrub_one_page(&pg[i]);
+ scrub_one_page(&pg[i], cold);

dirty_cnt++;
}
@@ -1308,7 +1315,7 @@ bool scrub_free_pages(void)
{
if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
{
- scrub_one_page(&pg[i]);
+ scrub_one_page(&pg[i], true);
/*
* We can modify count_info without holding heap
* lock since we effectively locked this buddy by
@@ -1947,7 +1954,7 @@ static void __init smp_scrub_heap_pages(
if ( !mfn_valid(_mfn(mfn)) || !page_state_is(pg, free) )
continue;

- scrub_one_page(pg);
+ scrub_one_page(pg, true);
}
}

--- a/xen/include/asm-arm/page.h
+++ b/xen/include/asm-arm/page.h
@@ -135,6 +135,12 @@ extern size_t dcache_line_bytes;

#define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)

+#define clear_page_hot clear_page
+#define clear_page_cold clear_page
+
+#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
+#define scrub_page_cold scrub_page_hot
+
static inline size_t read_dcache_line_bytes(void)
{
register_t ctr;
--- a/xen/include/asm-x86/page.h
+++ b/xen/include/asm-x86/page.h
@@ -239,6 +239,11 @@ void copy_page_sse2(void *, const void *
#define clear_page(_p) clear_page_cold(_p)
#define copy_page(_t, _f) copy_page_sse2(_t, _f)

+#ifdef CONFIG_DEBUG
+void scrub_page_hot(void *);
+void scrub_page_cold(void *);
+#endif
+
/* Convert between Xen-heap virtual addresses and machine addresses. */
#define __pa(x) (virt_to_maddr(x))
#define __va(x) (maddr_to_virt(x))
--- /dev/null
+++ b/xen/include/xen/scrub.h
@@ -0,0 +1,24 @@
+#ifndef __XEN_SCRUB_H__
+#define __XEN_SCRUB_H__
+
+#include <xen/const.h>
+
+/* SCRUB_PATTERN needs to be a repeating series of bytes. */
+#ifdef CONFIG_DEBUG
+# define SCRUB_PATTERN _AC(0xc2c2c2c2c2c2c2c2,ULL)
+#else
+# define SCRUB_PATTERN _AC(0,ULL)
+#endif
+#define SCRUB_BYTE_PATTERN (SCRUB_PATTERN & 0xff)
+
+#endif /* __XEN_SCRUB_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Re: [PATCH v2 07/12] mm: allow page scrubbing routine(s) to be arch controlled [ In reply to ]
Hi Jan,

On 27/05/2021 13:33, Jan Beulich wrote:
> Especially when dealing with large amounts of memory, memset() may not
> be very efficient; this can be bad enough that even for debug builds a
> custom function is warranted. We additionally want to distinguish "hot"
> and "cold" cases.

Do you have any benchmark showing the performance improvement?

>
> Keep the default fallback to clear_page_*() in common code; this may
> want to be revisited down the road.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> v2: New.
> ---
> The choice between hot and cold in scrub_one_page()'s callers is
> certainly up for discussion / improvement.

To get the discussion started, can you explain how you made the decision
between hot/cot? This will also want to be written down in the commit
message.

>
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -55,6 +55,7 @@ obj-y += percpu.o
> obj-y += physdev.o
> obj-$(CONFIG_COMPAT) += x86_64/physdev.o
> obj-y += psr.o
> +obj-bin-$(CONFIG_DEBUG) += scrub_page.o
> obj-y += setup.o
> obj-y += shutdown.o
> obj-y += smp.o
> --- /dev/null
> +++ b/xen/arch/x86/scrub_page.S
> @@ -0,0 +1,41 @@
> + .file __FILE__
> +
> +#include <asm/asm_defns.h>
> +#include <xen/page-size.h>
> +#include <xen/scrub.h>
> +
> +ENTRY(scrub_page_cold)
> + mov $PAGE_SIZE/32, %ecx
> + mov $SCRUB_PATTERN, %rax
> +
> +0: movnti %rax, (%rdi)
> + movnti %rax, 8(%rdi)
> + movnti %rax, 16(%rdi)
> + movnti %rax, 24(%rdi)
> + add $32, %rdi
> + sub $1, %ecx
> + jnz 0b
> +
> + sfence
> + ret
> + .type scrub_page_cold, @function
> + .size scrub_page_cold, . - scrub_page_cold
> +
> + .macro scrub_page_stosb
> + mov $PAGE_SIZE, %ecx
> + mov $SCRUB_BYTE_PATTERN, %eax
> + rep stosb
> + ret
> + .endm
> +
> + .macro scrub_page_stosq
> + mov $PAGE_SIZE/8, %ecx
> + mov $SCRUB_PATTERN, %rax
> + rep stosq
> + ret
> + .endm
> +
> +ENTRY(scrub_page_hot)
> + ALTERNATIVE scrub_page_stosq, scrub_page_stosb, X86_FEATURE_ERMS
> + .type scrub_page_hot, @function
> + .size scrub_page_hot, . - scrub_page_hot

From the commit message, it is not clear how the implementation for
hot/cold was chosen. Can you outline in the commit message what are the
assumption for each helper?

This will be helpful for anyone that may notice regression or even other
arch if they need to implement it.

> --- a/xen/common/page_alloc.c
> +++ b/xen/common/page_alloc.c
> @@ -124,6 +124,7 @@
> #include <xen/types.h>
> #include <xen/lib.h>
> #include <xen/sched.h>
> +#include <xen/scrub.h>
> #include <xen/spinlock.h>
> #include <xen/mm.h>
> #include <xen/param.h>
> @@ -750,27 +751,31 @@ static void page_list_add_scrub(struct p
> page_list_add(pg, &heap(node, zone, order));
> }
>
> -/* SCRUB_PATTERN needs to be a repeating series of bytes. */
> -#ifndef NDEBUG
> -#define SCRUB_PATTERN 0xc2c2c2c2c2c2c2c2ULL
> -#else
> -#define SCRUB_PATTERN 0ULL
> +/*
> + * While in debug builds we want callers to avoid relying on allocations
> + * returning zeroed pages, for a production build, clear_page_*() is the
> + * fastest way to scrub.
> + */
> +#ifndef CONFIG_DEBUG
> +# undef scrub_page_hot
> +# define scrub_page_hot clear_page_hot
> +# undef scrub_page_cold
> +# define scrub_page_cold clear_page_cold
> #endif
> -#define SCRUB_BYTE_PATTERN (SCRUB_PATTERN & 0xff)
>
> -static void scrub_one_page(const struct page_info *pg)
> +static void scrub_one_page(const struct page_info *pg, bool cold)
> {
> + void *ptr;
> +
> if ( unlikely(pg->count_info & PGC_broken) )
> return;
>
> -#ifndef NDEBUG
> - /* Avoid callers relying on allocations returning zeroed pages. */
> - unmap_domain_page(memset(__map_domain_page(pg),
> - SCRUB_BYTE_PATTERN, PAGE_SIZE));
> -#else
> - /* For a production build, clear_page() is the fastest way to scrub. */
> - clear_domain_page(_mfn(page_to_mfn(pg)));
> -#endif
> + ptr = __map_domain_page(pg);
> + if ( cold )
> + scrub_page_cold(ptr);
> + else
> + scrub_page_hot(ptr);
> + unmap_domain_page(ptr);
> }
>
> static void poison_one_page(struct page_info *pg)
> @@ -1046,12 +1051,14 @@ static struct page_info *alloc_heap_page
> if ( first_dirty != INVALID_DIRTY_IDX ||
> (scrub_debug && !(memflags & MEMF_no_scrub)) )
> {
> + bool cold = d && d != current->domain;

So the assumption is if the domain is not running, then the content is
not in the cache. Is that correct?

> +
> for ( i = 0; i < (1U << order); i++ )
> {
> if ( test_and_clear_bit(_PGC_need_scrub, &pg[i].count_info) )
> {
> if ( !(memflags & MEMF_no_scrub) )
> - scrub_one_page(&pg[i]);
> + scrub_one_page(&pg[i], cold);
>
> dirty_cnt++;
> }
> @@ -1308,7 +1315,7 @@ bool scrub_free_pages(void)
> {
> if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
> {
> - scrub_one_page(&pg[i]);
> + scrub_one_page(&pg[i], true);
> /*
> * We can modify count_info without holding heap
> * lock since we effectively locked this buddy by
> @@ -1947,7 +1954,7 @@ static void __init smp_scrub_heap_pages(
> if ( !mfn_valid(_mfn(mfn)) || !page_state_is(pg, free) )
> continue;
>
> - scrub_one_page(pg);
> + scrub_one_page(pg, true);
> }
> }
>
> --- a/xen/include/asm-arm/page.h
> +++ b/xen/include/asm-arm/page.h
> @@ -135,6 +135,12 @@ extern size_t dcache_line_bytes;
>
> #define copy_page(dp, sp) memcpy(dp, sp, PAGE_SIZE)
>
> +#define clear_page_hot clear_page
> +#define clear_page_cold clear_page
> +
> +#define scrub_page_hot(page) memset(page, SCRUB_BYTE_PATTERN, PAGE_SIZE)
> +#define scrub_page_cold scrub_page_hot
> +
> static inline size_t read_dcache_line_bytes(void)
> {
> register_t ctr;
> --- a/xen/include/asm-x86/page.h
> +++ b/xen/include/asm-x86/page.h
> @@ -239,6 +239,11 @@ void copy_page_sse2(void *, const void *
> #define clear_page(_p) clear_page_cold(_p)
> #define copy_page(_t, _f) copy_page_sse2(_t, _f)
>
> +#ifdef CONFIG_DEBUG
> +void scrub_page_hot(void *);
> +void scrub_page_cold(void *);
> +#endif
> +
> /* Convert between Xen-heap virtual addresses and machine addresses. */
> #define __pa(x) (virt_to_maddr(x))
> #define __va(x) (maddr_to_virt(x))
> --- /dev/null
> +++ b/xen/include/xen/scrub.h
> @@ -0,0 +1,24 @@
> +#ifndef __XEN_SCRUB_H__
> +#define __XEN_SCRUB_H__
> +
> +#include <xen/const.h>
> +
> +/* SCRUB_PATTERN needs to be a repeating series of bytes. */
> +#ifdef CONFIG_DEBUG
> +# define SCRUB_PATTERN _AC(0xc2c2c2c2c2c2c2c2,ULL)
> +#else
> +# define SCRUB_PATTERN _AC(0,ULL)
> +#endif
> +#define SCRUB_BYTE_PATTERN (SCRUB_PATTERN & 0xff)
> +
> +#endif /* __XEN_SCRUB_H__ */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
>
>

Cheers,

--
Julien Grall
Re: [PATCH v2 07/12] mm: allow page scrubbing routine(s) to be arch controlled [ In reply to ]
On 27.05.2021 15:06, Julien Grall wrote:
> On 27/05/2021 13:33, Jan Beulich wrote:
>> Especially when dealing with large amounts of memory, memset() may not
>> be very efficient; this can be bad enough that even for debug builds a
>> custom function is warranted. We additionally want to distinguish "hot"
>> and "cold" cases.
>
> Do you have any benchmark showing the performance improvement?

This is based on the numbers provided at
https://lists.xen.org/archives/html/xen-devel/2021-04/msg00716.html (???)
with the thread with some of the prior discussion rooted at
https://lists.xen.org/archives/html/xen-devel/2021-04/msg00425.html

I'm afraid I lack ideas on how to sensibly measure _all_ of the
effects (i.e. including the amount of disturbing of caches).

>> ---
>> The choice between hot and cold in scrub_one_page()'s callers is
>> certainly up for discussion / improvement.
>
> To get the discussion started, can you explain how you made the decision
> between hot/cot? This will also want to be written down in the commit
> message.

Well, the initial trivial heuristic is "allocation for oneself" vs
"allocation for someone else, or freeing, or scrubbing", i.e. whether
it would be likely that the page will soon be accessed again (or for
the first time).

>> --- /dev/null
>> +++ b/xen/arch/x86/scrub_page.S
>> @@ -0,0 +1,41 @@
>> + .file __FILE__
>> +
>> +#include <asm/asm_defns.h>
>> +#include <xen/page-size.h>
>> +#include <xen/scrub.h>
>> +
>> +ENTRY(scrub_page_cold)
>> + mov $PAGE_SIZE/32, %ecx
>> + mov $SCRUB_PATTERN, %rax
>> +
>> +0: movnti %rax, (%rdi)
>> + movnti %rax, 8(%rdi)
>> + movnti %rax, 16(%rdi)
>> + movnti %rax, 24(%rdi)
>> + add $32, %rdi
>> + sub $1, %ecx
>> + jnz 0b
>> +
>> + sfence
>> + ret
>> + .type scrub_page_cold, @function
>> + .size scrub_page_cold, . - scrub_page_cold
>> +
>> + .macro scrub_page_stosb
>> + mov $PAGE_SIZE, %ecx
>> + mov $SCRUB_BYTE_PATTERN, %eax
>> + rep stosb
>> + ret
>> + .endm
>> +
>> + .macro scrub_page_stosq
>> + mov $PAGE_SIZE/8, %ecx
>> + mov $SCRUB_PATTERN, %rax
>> + rep stosq
>> + ret
>> + .endm
>> +
>> +ENTRY(scrub_page_hot)
>> + ALTERNATIVE scrub_page_stosq, scrub_page_stosb, X86_FEATURE_ERMS
>> + .type scrub_page_hot, @function
>> + .size scrub_page_hot, . - scrub_page_hot
>
> From the commit message, it is not clear how the implementation for
> hot/cold was chosen. Can you outline in the commit message what are the
> assumption for each helper?

I've added 'The goal is for accesses of "cold" pages to not
disturb caches (albeit finding a good balance between this
and the higher latency looks to be difficult).'

>> @@ -1046,12 +1051,14 @@ static struct page_info *alloc_heap_page
>> if ( first_dirty != INVALID_DIRTY_IDX ||
>> (scrub_debug && !(memflags & MEMF_no_scrub)) )
>> {
>> + bool cold = d && d != current->domain;
>
> So the assumption is if the domain is not running, then the content is
> not in the cache. Is that correct?

Not exactly: For one, instead of "not running" it is "is not the current
domain", i.e. there may still be vCPU-s of the domain running elsewhere.
And for the cache the question isn't so much of "is in cache", but to
avoid needlessly bringing contents into the cache when the data is
unlikely to be used again soon.

Jan
Re: [PATCH v2 07/12] mm: allow page scrubbing routine(s) to be arch controlled [ In reply to ]
Hi Jan,

On 27/05/2021 14:58, Jan Beulich wrote:
> On 27.05.2021 15:06, Julien Grall wrote:
>> On 27/05/2021 13:33, Jan Beulich wrote:
>>> Especially when dealing with large amounts of memory, memset() may not
>>> be very efficient; this can be bad enough that even for debug builds a
>>> custom function is warranted. We additionally want to distinguish "hot"
>>> and "cold" cases.
>>
>> Do you have any benchmark showing the performance improvement?
>
> This is based on the numbers provided at
> https://lists.xen.org/archives/html/xen-devel/2021-04/msg00716.html (???)
> with the thread with some of the prior discussion rooted at
> https://lists.xen.org/archives/html/xen-devel/2021-04/msg00425.html

Thanks for the pointer!

> I'm afraid I lack ideas on how to sensibly measure _all_ of the
> effects (i.e. including the amount of disturbing of caches).

I think it is quite important to provide some benchmark (or at least
rationale) in the commit message.

We had a similar situation in the past (see the discussion [1]) where a
commit message claimed it would improve the performance but in reality
it also added regression. Unfortunately, there is no easy way forward as
the rationale is now forgotten...

>>> ---
>>> The choice between hot and cold in scrub_one_page()'s callers is
>>> certainly up for discussion / improvement.
>>
>> To get the discussion started, can you explain how you made the decision
>> between hot/cot? This will also want to be written down in the commit
>> message.
>
> Well, the initial trivial heuristic is "allocation for oneself" vs
> "allocation for someone else, or freeing, or scrubbing", i.e. whether
> it would be likely that the page will soon be accessed again (or for
> the first time).
>
>>> --- /dev/null
>>> +++ b/xen/arch/x86/scrub_page.S
>>> @@ -0,0 +1,41 @@
>>> + .file __FILE__
>>> +
>>> +#include <asm/asm_defns.h>
>>> +#include <xen/page-size.h>
>>> +#include <xen/scrub.h>
>>> +
>>> +ENTRY(scrub_page_cold)
>>> + mov $PAGE_SIZE/32, %ecx
>>> + mov $SCRUB_PATTERN, %rax
>>> +
>>> +0: movnti %rax, (%rdi)
>>> + movnti %rax, 8(%rdi)
>>> + movnti %rax, 16(%rdi)
>>> + movnti %rax, 24(%rdi)
>>> + add $32, %rdi
>>> + sub $1, %ecx
>>> + jnz 0b
>>> +
>>> + sfence
>>> + ret
>>> + .type scrub_page_cold, @function
>>> + .size scrub_page_cold, . - scrub_page_cold
>>> +
>>> + .macro scrub_page_stosb
>>> + mov $PAGE_SIZE, %ecx
>>> + mov $SCRUB_BYTE_PATTERN, %eax
>>> + rep stosb
>>> + ret
>>> + .endm
>>> +
>>> + .macro scrub_page_stosq
>>> + mov $PAGE_SIZE/8, %ecx
>>> + mov $SCRUB_PATTERN, %rax
>>> + rep stosq
>>> + ret
>>> + .endm
>>> +
>>> +ENTRY(scrub_page_hot)
>>> + ALTERNATIVE scrub_page_stosq, scrub_page_stosb, X86_FEATURE_ERMS
>>> + .type scrub_page_hot, @function
>>> + .size scrub_page_hot, . - scrub_page_hot
>>
>> From the commit message, it is not clear how the implementation for
>> hot/cold was chosen. Can you outline in the commit message what are the
>> assumption for each helper?
>
> I've added 'The goal is for accesses of "cold" pages to not
> disturb caches (albeit finding a good balance between this
> and the higher latency looks to be difficult).'
>
>>> @@ -1046,12 +1051,14 @@ static struct page_info *alloc_heap_page
>>> if ( first_dirty != INVALID_DIRTY_IDX ||
>>> (scrub_debug && !(memflags & MEMF_no_scrub)) )
>>> {
>>> + bool cold = d && d != current->domain;
>>
>> So the assumption is if the domain is not running, then the content is
>> not in the cache. Is that correct?
>
> Not exactly: For one, instead of "not running" it is "is not the current
> domain", i.e. there may still be vCPU-s of the domain running elsewhere.
> And for the cache the question isn't so much of "is in cache", but to
> avoid needlessly bringing contents into the cache when the data is
> unlikely to be used again soon.

Ok. Can this be clarified in the commit message?

As to the approach itself, I'd like an ack from one of the x86
maintainers to confirm that distinguising cold vs hot page is worth it.

Cheers,

[1]
<de46590ad566d9be55b26eaca0bc4dc7fbbada59.1585063311.git.hongyxia@amazon.com>

--
Julien Grall
Re: [PATCH v2 07/12] mm: allow page scrubbing routine(s) to be arch controlled [ In reply to ]
On 03.06.2021 11:39, Julien Grall wrote:
> On 27/05/2021 14:58, Jan Beulich wrote:
>> On 27.05.2021 15:06, Julien Grall wrote:
>>> On 27/05/2021 13:33, Jan Beulich wrote:
>>>> @@ -1046,12 +1051,14 @@ static struct page_info *alloc_heap_page
>>>> if ( first_dirty != INVALID_DIRTY_IDX ||
>>>> (scrub_debug && !(memflags & MEMF_no_scrub)) )
>>>> {
>>>> + bool cold = d && d != current->domain;
>>>
>>> So the assumption is if the domain is not running, then the content is
>>> not in the cache. Is that correct?
>>
>> Not exactly: For one, instead of "not running" it is "is not the current
>> domain", i.e. there may still be vCPU-s of the domain running elsewhere.
>> And for the cache the question isn't so much of "is in cache", but to
>> avoid needlessly bringing contents into the cache when the data is
>> unlikely to be used again soon.
>
> Ok. Can this be clarified in the commit message?

I had updated it already the other day to

"Especially when dealing with large amounts of memory, memset() may not
be very efficient; this can be bad enough that even for debug builds a
custom function is warranted. We additionally want to distinguish "hot"
and "cold" cases (with, as initial heuristic, "hot" being for any
allocations a domain does for itself, assuming that in all other cases
the page wouldn't be accessed [again] soon). The goal is for accesses
of "cold" pages to not disturb caches (albeit finding a good balance
between this and the higher latency looks to be difficult)."

Is this good enough?

Jan
Re: [PATCH v2 07/12] mm: allow page scrubbing routine(s) to be arch controlled [ In reply to ]
Hi Jan,

On 04/06/2021 14:23, Jan Beulich wrote:
> On 03.06.2021 11:39, Julien Grall wrote:
>> On 27/05/2021 14:58, Jan Beulich wrote:
>>> On 27.05.2021 15:06, Julien Grall wrote:
>>>> On 27/05/2021 13:33, Jan Beulich wrote:
>>>>> @@ -1046,12 +1051,14 @@ static struct page_info *alloc_heap_page
>>>>> if ( first_dirty != INVALID_DIRTY_IDX ||
>>>>> (scrub_debug && !(memflags & MEMF_no_scrub)) )
>>>>> {
>>>>> + bool cold = d && d != current->domain;
>>>>
>>>> So the assumption is if the domain is not running, then the content is
>>>> not in the cache. Is that correct?
>>>
>>> Not exactly: For one, instead of "not running" it is "is not the current
>>> domain", i.e. there may still be vCPU-s of the domain running elsewhere.
>>> And for the cache the question isn't so much of "is in cache", but to
>>> avoid needlessly bringing contents into the cache when the data is
>>> unlikely to be used again soon.
>>
>> Ok. Can this be clarified in the commit message?
>
> I had updated it already the other day to
>
> "Especially when dealing with large amounts of memory, memset() may not
> be very efficient; this can be bad enough that even for debug builds a
> custom function is warranted. We additionally want to distinguish "hot"
> and "cold" cases (with, as initial heuristic, "hot" being for any
> allocations a domain does for itself, assuming that in all other cases
> the page wouldn't be accessed [again] soon). The goal is for accesses
> of "cold" pages to not disturb caches (albeit finding a good balance
> between this and the higher latency looks to be difficult)."
>
> Is this good enough?

Yes. Thank you for proposing an update to the commit message!

Cheers,

--
Julien Grall