Mailing List Archive

[PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents
xen/arch/x86/mm/mem_event.c | 2 +-
xen/arch/x86/mm/mem_paging.c | 2 +-
xen/arch/x86/mm/p2m.c | 52 +++++++++++++++++++++++++++++++++++++++++--
xen/include/asm-x86/p2m.h | 2 +-
xen/include/public/domctl.h | 8 +++++-
5 files changed, 58 insertions(+), 8 deletions(-)


p2m_mem_paging_prep ensures that an mfn is backing the paged-out gfn, and
transitions to the next state in the paging state machine for that page.
Foreign mappings of the gfn will now succeed. This is the key idea, as
it allows the pager to now map the gfn and fill in its contents.

Unfortunately, it also allows any other foreign mapper to map the gfn and read
its contents. This is particularly dangerous when the populate is launched
by a foreign mapper in the first place, which will be actively retrying the
map operation and might race with the pager. Qemu-dm being a prime example.

Fix the race by allowing a buffer to be optionally passed in the prep
operation, and having the hypervisor memcpy from that buffer into the newly
prepped page before promoting the gfn type.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/arch/x86/mm/mem_event.c
--- a/xen/arch/x86/mm/mem_event.c
+++ b/xen/arch/x86/mm/mem_event.c
@@ -45,7 +45,7 @@ static int mem_event_enable(struct domai
struct domain *dom_mem_event = current->domain;
struct vcpu *v = current;
unsigned long ring_addr = mec->ring_addr;
- unsigned long shared_addr = mec->shared_addr;
+ unsigned long shared_addr = mec->u.shared_addr;
l1_pgentry_t l1e;
unsigned long shared_gfn = 0, ring_gfn = 0; /* gcc ... */
p2m_type_t p2mt;
diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/arch/x86/mm/mem_paging.c
--- a/xen/arch/x86/mm/mem_paging.c
+++ b/xen/arch/x86/mm/mem_paging.c
@@ -47,7 +47,7 @@ int mem_paging_domctl(struct domain *d,
case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP:
{
unsigned long gfn = mec->gfn;
- return p2m_mem_paging_prep(d, gfn);
+ return p2m_mem_paging_prep(d, gfn, mec->u.buffer);
}
break;

diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/arch/x86/mm/p2m.c
--- a/xen/arch/x86/mm/p2m.c
+++ b/xen/arch/x86/mm/p2m.c
@@ -974,14 +974,43 @@ void p2m_mem_paging_populate(struct doma
* mfn if populate was called for gfn which was nominated but not evicted. In
* this case only the p2mt needs to be forwarded.
*/
-int p2m_mem_paging_prep(struct domain *d, unsigned long gfn)
+int p2m_mem_paging_prep(struct domain *d, unsigned long gfn, uint64_t buffer)
{
struct page_info *page;
p2m_type_t p2mt;
p2m_access_t a;
- mfn_t mfn;
+ mfn_t mfn, buf_mfn = _mfn(INVALID_MFN);
struct p2m_domain *p2m = p2m_get_hostp2m(d);
- int ret;
+ int ret, page_extant = 1;
+ void *buf_map = NULL;
+
+ /* Map buffer page, if any, and get a reference */
+ if ( buffer )
+ {
+ l1_pgentry_t l1e;
+ unsigned long buf_gfn;
+ p2m_type_t buf_p2mt;
+
+ if ( (buffer & (PAGE_SIZE - 1)) ||
+ (!access_ok(buffer, PAGE_SIZE)) )
+ return -EINVAL;
+
+ guest_get_eff_l1e(current, buffer, &l1e);
+ buf_gfn = l1e_get_pfn(l1e);
+ buf_mfn = get_gfn(current->domain, buf_gfn,
+ &buf_p2mt);
+
+ if ( likely( mfn_valid(buf_mfn) &&
+ p2m_is_ram(buf_p2mt) ) )
+ {
+ get_page(mfn_to_page(buf_mfn), current->domain);
+ buf_map = map_domain_page(mfn_x(buf_mfn));
+ put_gfn(current->domain, buf_gfn);
+ } else {
+ put_gfn(current->domain, buf_gfn);
+ return -EINVAL;
+ }
+ }

p2m_lock(p2m);

@@ -1001,6 +1030,18 @@ int p2m_mem_paging_prep(struct domain *d
if ( unlikely(page == NULL) )
goto out;
mfn = page_to_mfn(page);
+ page_extant = 0;
+ }
+
+ /* If we were given a buffer, now is the time to use it */
+ if ( !page_extant && buffer )
+ {
+ void *guest_map;
+
+ ASSERT( mfn_valid(mfn) );
+ guest_map = map_domain_page(mfn_x(mfn));
+ memcpy(guest_map, buf_map, PAGE_SIZE);
+ unmap_domain_page(guest_map);
}

/* Fix p2m mapping */
@@ -1012,6 +1053,11 @@ int p2m_mem_paging_prep(struct domain *d

out:
p2m_unlock(p2m);
+ if ( buffer )
+ {
+ unmap_domain_page(buf_map);
+ put_page(mfn_to_page(buf_mfn));
+ }
return ret;
}

diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/include/asm-x86/p2m.h
--- a/xen/include/asm-x86/p2m.h
+++ b/xen/include/asm-x86/p2m.h
@@ -479,7 +479,7 @@ void p2m_mem_paging_drop_page(struct dom
/* Start populating a paged out frame */
void p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
/* Prepare the p2m for paging a frame in */
-int p2m_mem_paging_prep(struct domain *d, unsigned long gfn);
+int p2m_mem_paging_prep(struct domain *d, unsigned long gfn, uint64_t buffer);
/* Resume normal operation (in case a domain was paused) */
void p2m_mem_paging_resume(struct domain *d);
#else
diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/include/public/domctl.h
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -742,8 +742,12 @@ struct xen_domctl_mem_event_op {
uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */
uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */

- /* OP_ENABLE */
- uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */
+ union {
+ /* OP_ENABLE IN: Virtual address of shared page */
+ uint64_aligned_t shared_addr;
+ /* PAGING_PREP IN: buffer to immediately fill page in */
+ uint64_aligned_t buffer;
+ } u;
uint64_aligned_t ring_addr; /* IN: Virtual address of ring page */

/* Other OPs */

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents [ In reply to ]
Andres Lagar-Cavilla writes ("[PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents"):
> - /* OP_ENABLE */
> - uint64_aligned_t shared_addr; /* IN: Virtual address of shared page */
> + union {
> + /* OP_ENABLE IN: Virtual address of shared page */
> + uint64_aligned_t shared_addr;
> + /* PAGING_PREP IN: buffer to immediately fill page in */
> + uint64_aligned_t buffer;
> + } u;

Do we care that this interface is very binary-incompatible ? Is there
a flag or version somewhere where we can at least arrange for this to
be detected ? Perhaps we should allocate a new domctl number for this
version, so old code gets "no idea what you're talking about" rather
than wrong behaviour ?

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents [ In reply to ]
> Andres Lagar-Cavilla writes ("[PATCH 1 of 2] After preparing a page for
> page-in, allow immediate fill-in of the page contents"):
>> - /* OP_ENABLE */
>> - uint64_aligned_t shared_addr; /* IN: Virtual address of shared
>> page */
>> + union {
>> + /* OP_ENABLE IN: Virtual address of shared page */
>> + uint64_aligned_t shared_addr;
>> + /* PAGING_PREP IN: buffer to immediately fill page in */
>> + uint64_aligned_t buffer;
>> + } u;
>
> Do we care that this interface is very binary-incompatible ? Is there
> a flag or version somewhere where we can at least arrange for this to
> be detected ? Perhaps we should allocate a new domctl number for this
> version, so old code gets "no idea what you're talking about" rather
> than wrong behaviour ?

I turned the field into a union of the same size, so it should be binary
compatible. Should...

There is no reason to use a union other than clarity: "this field is used
for different purposes in different domctls".

I think this is fine, but your call.

Andres
>
> Ian.
>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents [ In reply to ]
Hi,

This looks good to me. I think it needs two more things to make it
correct (as well as the tools patch 2/2):
- an update to the xenpaging tool to use the new interface; and
- a possible update to the paging state machine --- after all, if the
prep call allocates the pageand fills its contents, do we need
any more stages on the page-in path?

One more comment below:

At 16:52 -0500 on 29 Nov (1322585560), Andres Lagar-Cavilla wrote:
> diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/arch/x86/mm/p2m.c
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -974,14 +974,43 @@ void p2m_mem_paging_populate(struct doma
> * mfn if populate was called for gfn which was nominated but not evicted. In
> * this case only the p2mt needs to be forwarded.
> */
> -int p2m_mem_paging_prep(struct domain *d, unsigned long gfn)
> +int p2m_mem_paging_prep(struct domain *d, unsigned long gfn, uint64_t buffer)
> {
> struct page_info *page;
> p2m_type_t p2mt;
> p2m_access_t a;
> - mfn_t mfn;
> + mfn_t mfn, buf_mfn = _mfn(INVALID_MFN);
> struct p2m_domain *p2m = p2m_get_hostp2m(d);
> - int ret;
> + int ret, page_extant = 1;
> + void *buf_map = NULL;
> +
> + /* Map buffer page, if any, and get a reference */
> + if ( buffer )
> + {
> + l1_pgentry_t l1e;
> + unsigned long buf_gfn;
> + p2m_type_t buf_p2mt;
> +
> + if ( (buffer & (PAGE_SIZE - 1)) ||
> + (!access_ok(buffer, PAGE_SIZE)) )
> + return -EINVAL;
> +
> + guest_get_eff_l1e(current, buffer, &l1e);
> + buf_gfn = l1e_get_pfn(l1e);
> + buf_mfn = get_gfn(current->domain, buf_gfn,
> + &buf_p2mt);
> +
> + if ( likely( mfn_valid(buf_mfn) &&
> + p2m_is_ram(buf_p2mt) ) )
> + {
> + get_page(mfn_to_page(buf_mfn), current->domain);
> + buf_map = map_domain_page(mfn_x(buf_mfn));
> + put_gfn(current->domain, buf_gfn);
> + } else {
> + put_gfn(current->domain, buf_gfn);
> + return -EINVAL;
> + }
> + }

We could maybe avoid all this mechanism by doing a copy_from_user() of
the buffer contents directly into the new page, instead of an explicit
map-and-memcpy().

Cheers,

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents [ In reply to ]
> Hi,
>
> This looks good to me. I think it needs two more things to make it
> correct (as well as the tools patch 2/2):
> - an update to the xenpaging tool to use the new interface; and
Sure, have it ready, will definitely cc Olaf for his ack on that one.
> - a possible update to the paging state machine --- after all, if the
> prep call allocates the pageand fills its contents, do we need
> any more stages on the page-in path?
I am kind of torn about this. Maybe the pager wants to do a set of loads,
and then fire off many vcpu unpauses in a batched fashion (which is
possible with patches I submitted later).

This isn't a necessity for correctness, though. And we still need the
resume kick for cases like a guest accessing a page that has not been
paged out yet (p2m_ram_paging_out)

Andres

>
> One more comment below:
>
> At 16:52 -0500 on 29 Nov (1322585560), Andres Lagar-Cavilla wrote:
>> diff -r 4ee6d40edc2c -r 0ce71e5bfaac xen/arch/x86/mm/p2m.c
>> --- a/xen/arch/x86/mm/p2m.c
>> +++ b/xen/arch/x86/mm/p2m.c
>> @@ -974,14 +974,43 @@ void p2m_mem_paging_populate(struct doma
>> * mfn if populate was called for gfn which was nominated but not
>> evicted. In
>> * this case only the p2mt needs to be forwarded.
>> */
>> -int p2m_mem_paging_prep(struct domain *d, unsigned long gfn)
>> +int p2m_mem_paging_prep(struct domain *d, unsigned long gfn, uint64_t
>> buffer)
>> {
>> struct page_info *page;
>> p2m_type_t p2mt;
>> p2m_access_t a;
>> - mfn_t mfn;
>> + mfn_t mfn, buf_mfn = _mfn(INVALID_MFN);
>> struct p2m_domain *p2m = p2m_get_hostp2m(d);
>> - int ret;
>> + int ret, page_extant = 1;
>> + void *buf_map = NULL;
>> +
>> + /* Map buffer page, if any, and get a reference */
>> + if ( buffer )
>> + {
>> + l1_pgentry_t l1e;
>> + unsigned long buf_gfn;
>> + p2m_type_t buf_p2mt;
>> +
>> + if ( (buffer & (PAGE_SIZE - 1)) ||
>> + (!access_ok(buffer, PAGE_SIZE)) )
>> + return -EINVAL;
>> +
>> + guest_get_eff_l1e(current, buffer, &l1e);
>> + buf_gfn = l1e_get_pfn(l1e);
>> + buf_mfn = get_gfn(current->domain, buf_gfn,
>> + &buf_p2mt);
>> +
>> + if ( likely( mfn_valid(buf_mfn) &&
>> + p2m_is_ram(buf_p2mt) ) )
>> + {
>> + get_page(mfn_to_page(buf_mfn), current->domain);
>> + buf_map = map_domain_page(mfn_x(buf_mfn));
>> + put_gfn(current->domain, buf_gfn);
>> + } else {
>> + put_gfn(current->domain, buf_gfn);
>> + return -EINVAL;
>> + }
>> + }
>
> We could maybe avoid all this mechanism by doing a copy_from_user() of
> the buffer contents directly into the new page, instead of an explicit
> map-and-memcpy().
>
> Cheers,
>
> Tim.
>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents [ In reply to ]
Andres Lagar-Cavilla writes ("Re: [Xen-devel] [PATCH 1 of 2] After preparing a page for page-in, allow immediate fill-in of the page contents"):
> I turned the field into a union of the same size, so it should be binary
> compatible. Should...

So you did. Yesterday I thought it was a struct. Sorry for being
confused.

Ian.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel