Mailing List Archive

[PATCH v3 11/19] x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read()
Depending on the number of monitors available, Arm's MPAM may need to
allocate a monitor prior to reading the counter value. Allocating a
contended resource may involve sleeping.

All callers of resctrl_arch_rmid_read() read the counter on more than
one domain. If the monitor is allocated globally, there is no need to
allocate and free it for each call to resctrl_arch_rmid_read().

Add arch hooks for this allocation, which need calling before
resctrl_arch_rmid_read(). The allocated monitor is passed to
resctrl_arch_rmid_read(), then freed again afterwards. The helper
can be called on any CPU, and can sleep.

Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Signed-off-by: James Morse <james.morse@arm.com>
---
arch/x86/include/asm/resctrl.h | 11 +++++++
arch/x86/kernel/cpu/resctrl/internal.h | 1 +
arch/x86/kernel/cpu/resctrl/monitor.c | 40 +++++++++++++++++++++++---
include/linux/resctrl.h | 4 +--
4 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 752123b0ce40..1c87f1626456 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -136,6 +136,17 @@ static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
return rmid;
}

+/* x86 can always read an rmid, nothing needs allocating */
+struct rdt_resource;
+static inline int resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
+{
+ might_sleep();
+ return 0;
+};
+
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
+ int ctx) { };
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index a07557390895..7262b355e128 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -135,6 +135,7 @@ struct rmid_read {
bool first;
int err;
u64 val;
+ int arch_mon_ctx;
};

extern bool rdt_alloc_capable;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index de72df06b37b..f38cd2f12285 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,6 +15,7 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/

+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/slab.h>
@@ -271,7 +272,7 @@ static void smp_call_rmid_read(void *_arg)

int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
- u64 *val)
+ u64 *val, int ignored)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
@@ -317,9 +318,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
u32 idx, cur_idx = 1;
+ int arch_mon_ctx;
bool rmid_dirty;
u64 val = 0;

+ arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+ if (arch_mon_ctx < 0)
+ return;
+
/*
* Skip RMID 0 and start from RMID 1 and check all the RMIDs that
* are marked as busy for occupancy < threshold. If the occupancy
@@ -333,7 +339,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)

entry = __rmid_entry(idx);
if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, &val)) {
+ QOS_L3_OCCUP_EVENT_ID, &val,
+ arch_mon_ctx)) {
rmid_dirty = true;
} else {
rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
@@ -348,6 +355,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
}
cur_idx = idx + 1;
}
+
+ resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
}

bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
@@ -444,16 +453,22 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_domain *d;
+ int arch_mon_ctx;
u64 val = 0;
u32 idx;
int err;

idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

+ arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+ if (arch_mon_ctx < 0)
+ return;
+
entry->busy = 0;
list_for_each_entry(d, &r->domains, list) {
err = resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, &val);
+ QOS_L3_OCCUP_EVENT_ID, &val,
+ arch_mon_ctx);
if (err || val <= resctrl_rmid_realloc_threshold)
continue;

@@ -466,6 +481,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
+ resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);

if (entry->busy)
rmid_limbo_count++;
@@ -502,7 +518,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);

rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
- &tval);
+ &tval, rr->arch_mon_ctx);
if (rr->err)
return rr->err;

@@ -575,6 +591,9 @@ int mon_event_count(void *info)
int ret;

rdtgrp = rr->rgrp;
+ rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr->r, rr->evtid);
+ if (rr->arch_mon_ctx < 0)
+ return rr->arch_mon_ctx;

ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);

@@ -601,6 +620,8 @@ int mon_event_count(void *info)
if (ret == 0)
rr->err = 0;

+ resctrl_arch_mon_ctx_free(rr->r, rr->evtid, rr->arch_mon_ctx);
+
return 0;
}

@@ -737,11 +758,21 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
if (is_mbm_total_enabled()) {
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (rr.arch_mon_ctx < 0)
+ return;
+
__mon_event_count(closid, rmid, &rr);
+
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (rr.arch_mon_ctx < 0)
+ return;
+
__mon_event_count(closid, rmid, &rr);

/*
@@ -751,6 +782,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
*/
if (is_mba_sc(NULL))
mbm_bw_count(closid, rmid, &rr);
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
}

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index ff7452f644e4..03e4f41cd336 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -233,6 +233,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
* @rmid: rmid of the counter to read.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
+ * @arch_mon_ctx: An allocated context from resctrl_arch_mon_ctx_alloc().
*
* Call from process context on a CPU that belongs to domain @d.
*
@@ -241,8 +242,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
- u64 *val);
-
+ u64 *val, int arch_mon_ctx);

/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
--
2.39.2
Re: [PATCH v3 11/19] x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read() [ In reply to ]
Hi James,

On 3/20/2023 10:26 AM, James Morse wrote:
> Depending on the number of monitors available, Arm's MPAM may need to
> allocate a monitor prior to reading the counter value. Allocating a
> contended resource may involve sleeping.
>
> All callers of resctrl_arch_rmid_read() read the counter on more than
> one domain. If the monitor is allocated globally, there is no need to

This does not seem accurate considering the __check_limbo() call that
is called for a single domain.

> allocate and free it for each call to resctrl_arch_rmid_read().
>
> Add arch hooks for this allocation, which need calling before
> resctrl_arch_rmid_read(). The allocated monitor is passed to
> resctrl_arch_rmid_read(), then freed again afterwards. The helper
> can be called on any CPU, and can sleep.
>
> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> arch/x86/include/asm/resctrl.h | 11 +++++++
> arch/x86/kernel/cpu/resctrl/internal.h | 1 +
> arch/x86/kernel/cpu/resctrl/monitor.c | 40 +++++++++++++++++++++++---
> include/linux/resctrl.h | 4 +--
> 4 files changed, 50 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index 752123b0ce40..1c87f1626456 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -136,6 +136,17 @@ static inline u32 resctrl_arch_rmid_idx_encode(u32 ignored, u32 rmid)
> return rmid;
> }
>
> +/* x86 can always read an rmid, nothing needs allocating */
> +struct rdt_resource;
> +static inline int resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
> +{
> + might_sleep();
> + return 0;
> +};
> +
> +static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
> + int ctx) { };
> +
> void resctrl_cpu_detect(struct cpuinfo_x86 *c);
>
> #else
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index a07557390895..7262b355e128 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -135,6 +135,7 @@ struct rmid_read {
> bool first;
> int err;
> u64 val;
> + int arch_mon_ctx;
> };
>
> extern bool rdt_alloc_capable;
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index de72df06b37b..f38cd2f12285 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -15,6 +15,7 @@
> * Software Developer Manual June 2016, volume 3, section 17.17.
> */
>
> +#include <linux/cpu.h>

Why is this needed?

> #include <linux/module.h>
> #include <linux/sizes.h>
> #include <linux/slab.h>
> @@ -271,7 +272,7 @@ static void smp_call_rmid_read(void *_arg)
>
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> u32 closid, u32 rmid, enum resctrl_event_id eventid,
> - u64 *val)
> + u64 *val, int ignored)
> {
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> @@ -317,9 +318,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
> u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> struct rmid_entry *entry;
> u32 idx, cur_idx = 1;
> + int arch_mon_ctx;
> bool rmid_dirty;
> u64 val = 0;
>
> + arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
> + if (arch_mon_ctx < 0)
> + return;
> +

The vision for this is not clear to me. When I read that context needs to be allocated
I expect it to return a pointer to some new context, not an int. What would the
"context" consist of?


...

> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index ff7452f644e4..03e4f41cd336 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -233,6 +233,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
> * @rmid: rmid of the counter to read.
> * @eventid: eventid to read, e.g. L3 occupancy.
> * @val: result of the counter read in bytes.
> + * @arch_mon_ctx: An allocated context from resctrl_arch_mon_ctx_alloc().
> *

Could this description be expanded to indicate what this context is used for?

> * Call from process context on a CPU that belongs to domain @d.
> *


Reinette
Re: [PATCH v3 11/19] x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read() [ In reply to ]
Hi Reinette,

On 01/04/2023 00:27, Reinette Chatre wrote:
> On 3/20/2023 10:26 AM, James Morse wrote:
>> Depending on the number of monitors available, Arm's MPAM may need to
>> allocate a monitor prior to reading the counter value. Allocating a
>> contended resource may involve sleeping.
>>
>> All callers of resctrl_arch_rmid_read() read the counter on more than
>> one domain. If the monitor is allocated globally, there is no need to
>
> This does not seem accurate considering the __check_limbo() call that
> is called for a single domain.

True, it was add_rmid_to_limbo() that motivated this being global, but its conflated with
holding the allocation for multiple invocations of resctrl_arch_rmid_read() for the
multiple groups that __check_limbo() walks over, and the calls for each monitor group that
mon_event_count() makes.


>> allocate and free it for each call to resctrl_arch_rmid_read().
>>
>> Add arch hooks for this allocation, which need calling before
>> resctrl_arch_rmid_read(). The allocated monitor is passed to
>> resctrl_arch_rmid_read(), then freed again afterwards. The helper
>> can be called on any CPU, and can sleep.


>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index de72df06b37b..f38cd2f12285 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -15,6 +15,7 @@
>> * Software Developer Manual June 2016, volume 3, section 17.17.
>> */
>>
>> +#include <linux/cpu.h>
>
> Why is this needed?

lockdep_assert_cpus_held(), but that got folded out to a latter patch. I've moved it there.


>> #include <linux/module.h>
>> #include <linux/sizes.h>
>> #include <linux/slab.h>
>> @@ -271,7 +272,7 @@ static void smp_call_rmid_read(void *_arg)
>>
>> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>> u32 closid, u32 rmid, enum resctrl_event_id eventid,
>> - u64 *val)
>> + u64 *val, int ignored)
>> {
>> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
>> @@ -317,9 +318,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>> u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>> struct rmid_entry *entry;
>> u32 idx, cur_idx = 1;
>> + int arch_mon_ctx;
>> bool rmid_dirty;
>> u64 val = 0;
>>
>> + arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
>> + if (arch_mon_ctx < 0)
>> + return;

> The vision for this is not clear to me. When I read that context needs to be allocated
> I expect it to return a pointer to some new context, not an int. What would the
> "context" consist of?

It might just need a different name.

For MPAM, this is allocating a monitor, which is the hardware that does the counting in
the cache or the memory controller. The number of monitors is an implementation choice,
and may not match the number of CLOSID/RMID that are in use. There aren't guaranteed to be
enough to allocate one for every control or monitor group up front.

The int being returned is the allocated monitor's index. It identifies which monitor needs
programming to read the provided CLOSID/RMID, and the counter register to read with the value.

I can allocate memory for an int if you think that is clearer.
(I was hoping to leave that for whoever needs something bigger than a pointer)


>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>> index ff7452f644e4..03e4f41cd336 100644
>> --- a/include/linux/resctrl.h
>> +++ b/include/linux/resctrl.h
>> @@ -233,6 +233,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
>> * @rmid: rmid of the counter to read.
>> * @eventid: eventid to read, e.g. L3 occupancy.
>> * @val: result of the counter read in bytes.
>> + * @arch_mon_ctx: An allocated context from resctrl_arch_mon_ctx_alloc().
>> *

> Could this description be expanded to indicate what this context is used for?

Sure,
"An architecture specific value from resctrl_arch_mon_ctx_alloc(), for MPAM this
identifies the hardware monitor allocated for this read request".



Thanks,

James
Re: [PATCH v3 11/19] x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read() [ In reply to ]
Hi James,

On 4/27/2023 7:19 AM, James Morse wrote:
> On 01/04/2023 00:27, Reinette Chatre wrote:
>> On 3/20/2023 10:26 AM, James Morse wrote:

...

>>> #include <linux/module.h>
>>> #include <linux/sizes.h>
>>> #include <linux/slab.h>
>>> @@ -271,7 +272,7 @@ static void smp_call_rmid_read(void *_arg)
>>>
>>> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>>> u32 closid, u32 rmid, enum resctrl_event_id eventid,
>>> - u64 *val)
>>> + u64 *val, int ignored)
>>> {
>>> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>>> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
>>> @@ -317,9 +318,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>>> u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>>> struct rmid_entry *entry;
>>> u32 idx, cur_idx = 1;
>>> + int arch_mon_ctx;
>>> bool rmid_dirty;
>>> u64 val = 0;
>>>
>>> + arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
>>> + if (arch_mon_ctx < 0)
>>> + return;
>
>> The vision for this is not clear to me. When I read that context needs to be allocated
>> I expect it to return a pointer to some new context, not an int. What would the
>> "context" consist of?
>
> It might just need a different name.
>
> For MPAM, this is allocating a monitor, which is the hardware that does the counting in
> the cache or the memory controller. The number of monitors is an implementation choice,
> and may not match the number of CLOSID/RMID that are in use. There aren't guaranteed to be
> enough to allocate one for every control or monitor group up front.
>
> The int being returned is the allocated monitor's index. It identifies which monitor needs
> programming to read the provided CLOSID/RMID, and the counter register to read with the value.

I see.

>
> I can allocate memory for an int if you think that is clearer.
> (I was hoping to leave that for whoever needs something bigger than a pointer)

I'd rather not complicate it in this way.

>>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>>> index ff7452f644e4..03e4f41cd336 100644
>>> --- a/include/linux/resctrl.h
>>> +++ b/include/linux/resctrl.h
>>> @@ -233,6 +233,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
>>> * @rmid: rmid of the counter to read.
>>> * @eventid: eventid to read, e.g. L3 occupancy.
>>> * @val: result of the counter read in bytes.
>>> + * @arch_mon_ctx: An allocated context from resctrl_arch_mon_ctx_alloc().
>>> *
>
>> Could this description be expanded to indicate what this context is used for?
>
> Sure,
> "An architecture specific value from resctrl_arch_mon_ctx_alloc(), for MPAM this
> identifies the hardware monitor allocated for this read request".

This helps. Thank you.

Reinette
Re: [PATCH v3 11/19] x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read() [ In reply to ]
Hi Reinette,

On 28/04/2023 00:40, Reinette Chatre wrote:
> On 4/27/2023 7:19 AM, James Morse wrote:
>> On 01/04/2023 00:27, Reinette Chatre wrote:
>>> On 3/20/2023 10:26 AM, James Morse wrote:

>>>> @@ -317,9 +318,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>>>> u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>>>> struct rmid_entry *entry;
>>>> u32 idx, cur_idx = 1;
>>>> + int arch_mon_ctx;
>>>> bool rmid_dirty;
>>>> u64 val = 0;
>>>>
>>>> + arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
>>>> + if (arch_mon_ctx < 0)
>>>> + return;
>>
>>> The vision for this is not clear to me. When I read that context needs to be allocated
>>> I expect it to return a pointer to some new context, not an int. What would the
>>> "context" consist of?
>>
>> It might just need a different name.
>>
>> For MPAM, this is allocating a monitor, which is the hardware that does the counting in
>> the cache or the memory controller. The number of monitors is an implementation choice,
>> and may not match the number of CLOSID/RMID that are in use. There aren't guaranteed to be
>> enough to allocate one for every control or monitor group up front.
>>
>> The int being returned is the allocated monitor's index. It identifies which monitor needs
>> programming to read the provided CLOSID/RMID, and the counter register to read with the value.
>
> I see.
>
>>
>> I can allocate memory for an int if you think that is clearer.
>> (I was hoping to leave that for whoever needs something bigger than a pointer)

> I'd rather not complicate it in this way.

It's a no-op for x86 as these calls get optimised out, but more annoying for MPAM (I've
done it now). I think the result is more intuitive, but see what you think.


Thanks,

James