Mailing List Archive

[PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

Introduce a new x86_init hook called pagetable_reserve that during the
initial memory mapping is used to reserve a range of memory addresses for
kernel pagetable usage.

On native it just calls memblock_x86_reserve_range while on xen it also
takes care of setting the spare memory previously allocated
for kernel pagetable pages from RO to RW, so that it can be used for
other purposes.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
arch/x86/include/asm/pgtable_types.h | 1 +
arch/x86/include/asm/x86_init.h | 9 +++++++++
arch/x86/kernel/x86_init.c | 4 ++++
arch/x86/mm/init.c | 9 +++++++--
arch/x86/xen/mmu.c | 15 +++++++++++++++
5 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723..d56187c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);

+extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2..d66b3a2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,14 @@ struct x86_init_oem {
};

/**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
+ */
+struct x86_init_mapping {
+ void (*pagetable_reserve)(u64 start, u64 end);
+};
+
+/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_setup_start: platform specific pre paging_init() call
* @pagetable_setup_done: platform specific post paging_init() call
@@ -123,6 +131,7 @@ struct x86_init_ops {
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
+ struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e..75ef4b1 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},

+ .mapping = {
+ .pagetable_reserve = native_pagetable_reserve,
+ },
+
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289..ed0650b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}

+void native_pagetable_reserve(u64 start, u64 end)
+{
+ memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
struct map_range {
unsigned long start;
unsigned long end;
@@ -273,8 +278,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
__flush_tlb_all();

if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
- pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+ PFN_PHYS(pgt_buf_end));

if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6b833db..fec8680 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
{
}

+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+ /* reserve the range used */
+ memblock_x86_reserve_range(start, end, "PGTABLE");
+
+ /* set as RW the rest */
+ printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+ PFN_PHYS(pgt_buf_top));
+ while (end < PFN_PHYS(pgt_buf_top)) {
+ make_lowmem_page_readwrite(__va(end));
+ end += PAGE_SIZE;
+ }
+}
+
static void xen_post_allocator_init(void);

static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -2100,6 +2114,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {

void __init xen_init_mmu_ops(void)
{
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
--
1.7.2.3


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
>>> On 12.04.11 at 13:19, <stefano.stabellini@eu.citrix.com> wrote:
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 6b833db..fec8680 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t
> *base)
> {
> }
>
> +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
> +{
> + /* reserve the range used */
> + memblock_x86_reserve_range(start, end, "PGTABLE");

Wouldn't it be more natural (and involving less future changes) if
you called native_pagetable_reserve() here?

Jan

> +
> + /* set as RW the rest */
> + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
> + PFN_PHYS(pgt_buf_top));
> + while (end < PFN_PHYS(pgt_buf_top)) {
> + make_lowmem_page_readwrite(__va(end));
> + end += PAGE_SIZE;
> + }
> +}
> +
> static void xen_post_allocator_init(void);
>
> static __init void xen_pagetable_setup_done(pgd_t *base)



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/12/2011 04:19 AM, stefano.stabellini@eu.citrix.com wrote:
> From: Stefano Stabellini<stefano.stabellini@eu.citrix.com>
>
> Introduce a new x86_init hook called pagetable_reserve that during the
> initial memory mapping is used to reserve a range of memory addresses for
> kernel pagetable usage.
>
> On native it just calls memblock_x86_reserve_range while on xen it also
> takes care of setting the spare memory previously allocated
> for kernel pagetable pages from RO to RW, so that it can be used for
> other purposes.
>
> Signed-off-by: Stefano Stabellini<stefano.stabellini@eu.citrix.com>
> Cc: Yinghai Lu<yinghai@kernel.org>
> Cc: H. Peter Anvin<hpa@zytor.com>
> Cc: Ingo Molnar<mingo@elte.hu>

Acked-by: Yinghai Lu <yinghai@kernel.org>

> ---
> arch/x86/include/asm/pgtable_types.h | 1 +
> arch/x86/include/asm/x86_init.h | 9 +++++++++
> arch/x86/kernel/x86_init.c | 4 ++++
> arch/x86/mm/init.c | 9 +++++++--
> arch/x86/xen/mmu.c | 15 +++++++++++++++
> 5 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index 7db7723..d56187c 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
> /* Install a pte for a particular vaddr in kernel space. */
> void set_pte_vaddr(unsigned long vaddr, pte_t pte);
>
> +extern void native_pagetable_reserve(u64 start, u64 end);
> #ifdef CONFIG_X86_32
> extern void native_pagetable_setup_start(pgd_t *base);
> extern void native_pagetable_setup_done(pgd_t *base);
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index 643ebf2..d66b3a2 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -68,6 +68,14 @@ struct x86_init_oem {
> };
>
> /**
> + * struct x86_init_mapping - platform specific initial kernel pagetable setup
> + * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
> + */
> +struct x86_init_mapping {
> + void (*pagetable_reserve)(u64 start, u64 end);
> +};
> +
> +/**
> * struct x86_init_paging - platform specific paging functions
> * @pagetable_setup_start: platform specific pre paging_init() call
> * @pagetable_setup_done: platform specific post paging_init() call
> @@ -123,6 +131,7 @@ struct x86_init_ops {
> struct x86_init_mpparse mpparse;
> struct x86_init_irqs irqs;
> struct x86_init_oem oem;
> + struct x86_init_mapping mapping;
> struct x86_init_paging paging;
> struct x86_init_timers timers;
> struct x86_init_iommu iommu;
> diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
> index c11514e..75ef4b1 100644
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
> .banner = default_banner,
> },
>
> + .mapping = {
> + .pagetable_reserve = native_pagetable_reserve,
> + },
> +
> .paging = {
> .pagetable_setup_start = native_pagetable_setup_start,
> .pagetable_setup_done = native_pagetable_setup_done,
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index 286d289..ed0650b 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
> end, pgt_buf_start<< PAGE_SHIFT, pgt_buf_top<< PAGE_SHIFT);
> }
>
> +void native_pagetable_reserve(u64 start, u64 end)
> +{
> + memblock_x86_reserve_range(start, end, "PGTABLE");
> +}
> +
> struct map_range {
> unsigned long start;
> unsigned long end;
> @@ -273,8 +278,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
> __flush_tlb_all();
>
> if (!after_bootmem&& pgt_buf_end> pgt_buf_start)
> - memblock_x86_reserve_range(pgt_buf_start<< PAGE_SHIFT,
> - pgt_buf_end<< PAGE_SHIFT, "PGTABLE");
> + x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
> + PFN_PHYS(pgt_buf_end));
>
> if (!after_bootmem)
> early_memtest(start, end);
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 6b833db..fec8680 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
> {
> }
>
> +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
> +{
> + /* reserve the range used */
> + memblock_x86_reserve_range(start, end, "PGTABLE");
> +
> + /* set as RW the rest */
> + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
> + PFN_PHYS(pgt_buf_top));
> + while (end< PFN_PHYS(pgt_buf_top)) {
> + make_lowmem_page_readwrite(__va(end));
> + end += PAGE_SIZE;
> + }
> +}
> +

it would be more cleaner to xen code if you make mark start/end to RO, and not make them all before as RO.

Thanks

Yinghai Lu


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Tue, 12 Apr 2011, Jan Beulich wrote:
> >>> On 12.04.11 at 13:19, <stefano.stabellini@eu.citrix.com> wrote:
> > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> > index 6b833db..fec8680 100644
> > --- a/arch/x86/xen/mmu.c
> > +++ b/arch/x86/xen/mmu.c
> > @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t
> > *base)
> > {
> > }
> >
> > +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
> > +{
> > + /* reserve the range used */
> > + memblock_x86_reserve_range(start, end, "PGTABLE");
>
> Wouldn't it be more natural (and involving less future changes) if
> you called native_pagetable_reserve() here?

Good point.
Patch update below:

---

commit fa4eeb1d4213fee248e8d141bb8d1504aae457ab
Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed Mar 30 16:17:33 2011 +0000

x86,xen: introduce x86_init.mapping.pagetable_reserve

Introduce a new x86_init hook called pagetable_reserve that during the
initial memory mapping is used to reserve a range of memory addresses for
kernel pagetable usage.

On native it just calls memblock_x86_reserve_range while on xen it also
takes care of setting the spare memory previously allocated
for kernel pagetable pages from RO to RW, so that it can be used for
other purposes.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723..d56187c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);

+extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2..d66b3a2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,14 @@ struct x86_init_oem {
};

/**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
+ */
+struct x86_init_mapping {
+ void (*pagetable_reserve)(u64 start, u64 end);
+};
+
+/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_setup_start: platform specific pre paging_init() call
* @pagetable_setup_done: platform specific post paging_init() call
@@ -123,6 +131,7 @@ struct x86_init_ops {
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
+ struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e..75ef4b1 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},

+ .mapping = {
+ .pagetable_reserve = native_pagetable_reserve,
+ },
+
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289..ed0650b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}

+void native_pagetable_reserve(u64 start, u64 end)
+{
+ memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
struct map_range {
unsigned long start;
unsigned long end;
@@ -273,8 +278,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
__flush_tlb_all();

if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
- pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+ PFN_PHYS(pgt_buf_end));

if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6b833db..7ad0292 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
{
}

+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+ /* reserve the range used */
+ native_pagetable_reserve(start, end);
+
+ /* set as RW the rest */
+ printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+ PFN_PHYS(pgt_buf_top));
+ while (end < PFN_PHYS(pgt_buf_top)) {
+ make_lowmem_page_readwrite(__va(end));
+ end += PAGE_SIZE;
+ }
+}
+
static void xen_post_allocator_init(void);

static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -2100,6 +2114,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {

void __init xen_init_mmu_ops(void)
{
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Tue, 12 Apr 2011, Yinghai Lu wrote:
> > diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> > index 6b833db..fec8680 100644
> > --- a/arch/x86/xen/mmu.c
> > +++ b/arch/x86/xen/mmu.c
> > @@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
> > {
> > }
> >
> > +static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
> > +{
> > + /* reserve the range used */
> > + memblock_x86_reserve_range(start, end, "PGTABLE");
> > +
> > + /* set as RW the rest */
> > + printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
> > + PFN_PHYS(pgt_buf_top));
> > + while (end< PFN_PHYS(pgt_buf_top)) {
> > + make_lowmem_page_readwrite(__va(end));
> > + end += PAGE_SIZE;
> > + }
> > +}
> > +
>
> it would be more cleaner to xen code if you make mark start/end to RO, and not make them all before as RO.

Yes, that would be ideal, but we cannot do that because we don't know
exactly where is pgt_buf_end before allocating the pagetable pages and
the pagetable pages need to be marked RO before being hooked into the
pagetable. This is why we mark the whole range RO and after the
pagetable allocation when we know for sure where is pgt_buf_end we
modify the range pgt_buf_end-pgt_buf_top to RW.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Tue, Apr 12, 2011 at 10:40:57AM -0700, Yinghai Lu wrote:
> On 04/12/2011 04:19 AM, stefano.stabellini@eu.citrix.com wrote:
> >From: Stefano Stabellini<stefano.stabellini@eu.citrix.com>
> >
> >Introduce a new x86_init hook called pagetable_reserve that during the
> >initial memory mapping is used to reserve a range of memory addresses for
> >kernel pagetable usage.
> >
> >On native it just calls memblock_x86_reserve_range while on xen it also
> >takes care of setting the spare memory previously allocated
> >for kernel pagetable pages from RO to RW, so that it can be used for
> >other purposes.
> >
> >Signed-off-by: Stefano Stabellini<stefano.stabellini@eu.citrix.com>
> >Cc: Yinghai Lu<yinghai@kernel.org>
> >Cc: H. Peter Anvin<hpa@zytor.com>
> >Cc: Ingo Molnar<mingo@elte.hu>
>
> Acked-by: Yinghai Lu <yinghai@kernel.org>

Peter,

I stuck this and some of the other in this thread on 'stable/bug-fixes-rc3'
(git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git). But before
I send them to Linus to pull I need your OK? Would it be OK if I stuck
your Ack on the patch?

Or you could take this patch (the one I stuck in my tree has the description
a bit modified so I recommend that one).

Thanks,
Konrad

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/12/2011 04:19 AM, stefano.stabellini@eu.citrix.com wrote:
> From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
>
> Introduce a new x86_init hook called pagetable_reserve that during the
> initial memory mapping is used to reserve a range of memory addresses for
> kernel pagetable usage.
>
> On native it just calls memblock_x86_reserve_range while on xen it also
> takes care of setting the spare memory previously allocated
> for kernel pagetable pages from RO to RW, so that it can be used for
> other purposes.
>

What are the *semantics* of this hook?

Hooks are insanely nasty if they are just defined by a particular code
flow, as evidenced by the royal mess called paravirt_ops.

-hpa

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/13/2011 03:35 AM, Stefano Stabellini wrote:
> Yes, that would be ideal, but we cannot do that because we don't know
> exactly where is pgt_buf_end before allocating the pagetable pages and
> the pagetable pages need to be marked RO before being hooked into the
> pagetable. This is why we mark the whole range RO and after the
> pagetable allocation when we know for sure where is pgt_buf_end we
> modify the range pgt_buf_end-pgt_buf_top to RW.

The hell? You have to fill the pages before you hook them into the page
tables anyway (this means writing!) and then you have to mark them RO as
you add them to the page tables... anything else doesn't make any sense
at all.

-hpa

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/13/2011 11:03 AM, Konrad Rzeszutek Wilk wrote:
>
> Peter,
>
> I stuck this and some of the other in this thread on 'stable/bug-fixes-rc3'
> (git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git). But before
> I send them to Linus to pull I need your OK? Would it be OK if I stuck
> your Ack on the patch?
>
> Or you could take this patch (the one I stuck in my tree has the description
> a bit modified so I recommend that one).
>

You have my extremely firm NAK until the intended semantics of the new
hook is explained in detail.

-hpa

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Wed, Apr 13, 2011 at 11:35:02AM -0700, H. Peter Anvin wrote:
> On 04/13/2011 11:03 AM, Konrad Rzeszutek Wilk wrote:
> >
> > Peter,
> >
> > I stuck this and some of the other in this thread on 'stable/bug-fixes-rc3'
> > (git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git). But before
> > I send them to Linus to pull I need your OK? Would it be OK if I stuck
> > your Ack on the patch?
> >
> > Or you could take this patch (the one I stuck in my tree has the description
> > a bit modified so I recommend that one).
> >
>
> You have my extremely firm NAK until the intended semantics of the new
> hook is explained in detail.

<nods>I will let Stefano answer your questions as he has been battling
this particular regression and trying to find an acceptable solution
to the problem.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Wed, 13 Apr 2011, H. Peter Anvin wrote:
> On 04/13/2011 03:35 AM, Stefano Stabellini wrote:
> > Yes, that would be ideal, but we cannot do that because we don't know
> > exactly where is pgt_buf_end before allocating the pagetable pages and
> > the pagetable pages need to be marked RO before being hooked into the
> > pagetable. This is why we mark the whole range RO and after the
> > pagetable allocation when we know for sure where is pgt_buf_end we
> > modify the range pgt_buf_end-pgt_buf_top to RW.
>
> The hell? You have to fill the pages before you hook them into the page
> tables anyway (this means writing!) and then you have to mark them RO as
> you add them to the page tables... anything else doesn't make any sense
> at all.

Right.
The problem is that at some point init_memory_mapping is going reach the
pagetable pages area and map those pages too (I don't mean hooking the
pagetable pages in the pagetable, I mean mapping them as normal memory
that falls in the range of addresses passed to init_memory_mapping as
argument).
Some of those pages are already pagetable pages (they are in the range
pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
everything is fine.
Some of these pages are not pagetable pages yet (they fall in the range
pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
are going to be mapped RW. When these pages become pagetable pages and
are hooked into the pagetable, xen will find that the guest has already
a RW mapping of them somewhere and fail the operation.

In order to fix the issue I could mark all the pages in the entire range
pgt_buf_start-pgt_buf_top as RO, but then once the pagetable allocation
is completed only the range pgt_buf_start-pgt_buf_end is reserved by
init_memory_mapping therefore the kernel is going to crash as soon as
one of the pages in the range pgt_buf_end-pgt_buf_top is reused.


Initially I suggested to add two hooks: one to allocate the pagetable
pages memory and one to reserve the pagetable pages memory after the
allocation:

http://marc.info/?l=linux-kernel&m=130141955626268

Following Yinghai's suggestion I removed the first hook (currently
unnecessary because we would use the same implementation on native and
on xen) and modified the second one, that became
x86_init.mapping.pagetable_reserve.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Wed, 13 Apr 2011, H. Peter Anvin wrote:
> On 04/12/2011 04:19 AM, stefano.stabellini@eu.citrix.com wrote:
> > From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> >
> > Introduce a new x86_init hook called pagetable_reserve that during the
> > initial memory mapping is used to reserve a range of memory addresses for
> > kernel pagetable usage.
> >
> > On native it just calls memblock_x86_reserve_range while on xen it also
> > takes care of setting the spare memory previously allocated
> > for kernel pagetable pages from RO to RW, so that it can be used for
> > other purposes.
> >
>
> What are the *semantics* of this hook?
>
> Hooks are insanely nasty if they are just defined by a particular code
> flow, as evidenced by the royal mess called paravirt_ops.

I hope that the other email I have just sent clarifies the purpose of
the hook.
I admit that as it is it wouldn't find much usage outside
init_memory_mapping.
Maybe adding the corresponding hook to allocate the initial kernel
pagetable pages would help generalizing it. Or maybe we just need a
better comment in the code:


/* Reserve the kernel pagetable pages we used and free the other ones so
* that they can be reused for other purposes.
*
* On native it just means calling memblock_x86_reserve_range, on Xen it
* also means marking RW the pagetable pages that we allocated before
* but that haven't been used here.
*/
if (!after_bootmem && pgt_buf_end > pgt_buf_start)
x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
PFN_PHYS(pgt_buf_end));


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Thu, 14 Apr 2011, Stefano Stabellini wrote:
> On Wed, 13 Apr 2011, H. Peter Anvin wrote:
> > On 04/12/2011 04:19 AM, stefano.stabellini@eu.citrix.com wrote:
> > > From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > >
> > > Introduce a new x86_init hook called pagetable_reserve that during the
> > > initial memory mapping is used to reserve a range of memory addresses for
> > > kernel pagetable usage.
> > >
> > > On native it just calls memblock_x86_reserve_range while on xen it also
> > > takes care of setting the spare memory previously allocated
> > > for kernel pagetable pages from RO to RW, so that it can be used for
> > > other purposes.
> > >
> >
> > What are the *semantics* of this hook?
> >
> > Hooks are insanely nasty if they are just defined by a particular code
> > flow, as evidenced by the royal mess called paravirt_ops.
>
> I hope that the other email I have just sent clarifies the purpose of
> the hook.
> I admit that as it is it wouldn't find much usage outside
> init_memory_mapping.
> Maybe adding the corresponding hook to allocate the initial kernel
> pagetable pages would help generalizing it. Or maybe we just need a
> better comment in the code:
>
>
> /* Reserve the kernel pagetable pages we used and free the other ones so
> * that they can be reused for other purposes.
> *
> * On native it just means calling memblock_x86_reserve_range, on Xen it
> * also means marking RW the pagetable pages that we allocated before
> * but that haven't been used here.
> */
> if (!after_bootmem && pgt_buf_end > pgt_buf_start)
> x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
> PFN_PHYS(pgt_buf_end));


I added a detailed explanation to the commit message and in the code, I
hope this version is better:


commit 6f97ad736f304d600669cba1498e788099cea2cd
Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed Mar 30 16:17:33 2011 +0000

x86,xen: introduce x86_init.mapping.pagetable_reserve

Introduce a new x86_init hook called pagetable_reserve that at the end
of init_memory_mapping is used to reserve a range of memory addresses for
the kernel pagetable pages we used and free the other ones.

On native it just calls memblock_x86_reserve_range while on xen it also
takes care of setting the spare memory previously allocated
for kernel pagetable pages from RO to RW, so that it can be used for
other purposes.

A detailed explanation of the reason why this hook is needed follows.

As a consequence of the commit:

commit 4b239f458c229de044d6905c2b0f9fe16ed9e01e
Author: Yinghai Lu <yinghai@kernel.org>
Date: Fri Dec 17 16:58:28 2010 -0800

x86-64, mm: Put early page table high

at some point init_memory_mapping is going to reach the pagetable pages
area and map those pages too (mapping them as normal memory that falls
in the range of addresses passed to init_memory_mapping as argument).
Some of those pages are already pagetable pages (they are in the range
pgt_buf_start-pgt_buf_end) therefore they are going to be mapped RO and
everything is fine.
Some of these pages are not pagetable pages yet (they fall in the range
pgt_buf_end-pgt_buf_top; for example the page at pgt_buf_end) so they
are going to be mapped RW. When these pages become pagetable pages and
are hooked into the pagetable, xen will find that the guest has already
a RW mapping of them somewhere and fail the operation.
The reason Xen requires pagetables to be RO is that the hypervisor needs
to verify that the pagetables are valid before using them. The validation
operations are called "pinning" (more details in arch/x86/xen/mmu.c).

In order to fix the issue we mark all the pages in the entire range
pgt_buf_start-pgt_buf_top as RO, however when the pagetable allocation
is completed only the range pgt_buf_start-pgt_buf_end is reserved by
init_memory_mapping. Hence the kernel is going to crash as soon as one
of the pages in the range pgt_buf_end-pgt_buf_top is reused (b/c those
ranges are RO).

For this reason we need a hook to reserve the kernel pagetable pages we
used and free the other ones so that they can be reused for other
purposes.
On native it just means calling memblock_x86_reserve_range, on Xen it
also means marking RW the pagetable pages that we allocated before but
that haven't been used before.

Another way to fix this is without using the hook is by adding a 'if
(xen_pv_domain)' in the 'init_memory_mapping' code and calling the Xen
counterpart, but that is just nasty.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7db7723..d56187c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,6 +299,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);

+extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 643ebf2..d3d8590 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,6 +68,17 @@ struct x86_init_oem {
};

/**
+ * struct x86_init_mapping - platform specific initial kernel pagetable setup
+ * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
+ *
+ * For more details on the purpose of this hook, look in
+ * init_memory_mapping and the commit that added it.
+ */
+struct x86_init_mapping {
+ void (*pagetable_reserve)(u64 start, u64 end);
+};
+
+/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_setup_start: platform specific pre paging_init() call
* @pagetable_setup_done: platform specific post paging_init() call
@@ -123,6 +134,7 @@ struct x86_init_ops {
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
+ struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c11514e..75ef4b1 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,6 +61,10 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},

+ .mapping = {
+ .pagetable_reserve = native_pagetable_reserve,
+ },
+
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 286d289..08fee27 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -81,6 +81,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
}

+void native_pagetable_reserve(u64 start, u64 end)
+{
+ memblock_x86_reserve_range(start, end, "PGTABLE");
+}
+
struct map_range {
unsigned long start;
unsigned long end;
@@ -272,9 +277,24 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,

__flush_tlb_all();

+ /*
+ * Reserve the kernel pagetable pages we used (pgt_buf_start -
+ * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
+ * so that they can be reused for other purposes.
+ *
+ * On native it just means calling memblock_x86_reserve_range, on Xen it
+ * also means marking RW the pagetable pages that we allocated before
+ * but that haven't been used.
+ *
+ * In fact on xen we mark RO the whole range pgt_buf_start -
+ * pgt_buf_top, because we have to make sure that when
+ * init_memory_mapping reaches the pagetable pages area, it maps
+ * RO all the pagetable pages, including the ones that are beyond
+ * pgt_buf_end at that time.
+ */
if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
- pgt_buf_end << PAGE_SHIFT, "PGTABLE");
+ x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
+ PFN_PHYS(pgt_buf_end));

if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6b833db..7ad0292 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1275,6 +1275,20 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
{
}

+static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
+{
+ /* reserve the range used */
+ native_pagetable_reserve(start, end);
+
+ /* set as RW the rest */
+ printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
+ PFN_PHYS(pgt_buf_top));
+ while (end < PFN_PHYS(pgt_buf_top)) {
+ make_lowmem_page_readwrite(__va(end));
+ end += PAGE_SIZE;
+ }
+}
+
static void xen_post_allocator_init(void);

static __init void xen_pagetable_setup_done(pgd_t *base)
@@ -2100,6 +2114,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {

void __init xen_init_mmu_ops(void)
{
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/14/2011 04:30 AM, Stefano Stabellini wrote:
>
> I hope that the other email I have just sent clarifies the purpose of
> the hook.
>

You're commingling rationale and (proposed) semantics. BOTH need to be
stated clearly.

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Thu, 14 Apr 2011, H. Peter Anvin wrote:
> On 04/14/2011 04:30 AM, Stefano Stabellini wrote:
> >
> > I hope that the other email I have just sent clarifies the purpose of
> > the hook.
> >
>
> You're commingling rationale and (proposed) semantics. BOTH need to be
> stated clearly.

Sorry I didn't answer the right question straight away.
The semantics follow:



It is as if there is an implicit pvop after find_early_table_space
that exports the range pgt_buf_start - pgt_buf_top.
The range indicated is used, and it is the only range used, for the initial
kernel pagetable pages.
The range indicated is an overestimate.
It is implicit because we use the pgt_buf_* variables directly but
nonetheless it was in the first version of the patch.

After the pagetable setup done by kernel_physical_mapping_init, we know
exactly how many pages we actually used for the initial kernel
pagetables and how many are left unused.

The purpose of the second pvop (x86_init.mapping.pagetable_reserve) is
to notify sub-architectures of the actual range used for initial kernel
pagetable pages and partially revoke the previous indication.



If you agree with the proposed semantics above, I'll add the description
to the commit message as well as the code. I could also modify the
syntax of the new pvop call to better suit the semantics and/or restore
the first pvop call.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Thu, 14 Apr 2011, Stefano Stabellini wrote:
> On Thu, 14 Apr 2011, H. Peter Anvin wrote:
> > On 04/14/2011 04:30 AM, Stefano Stabellini wrote:
> > >
> > > I hope that the other email I have just sent clarifies the purpose of
> > > the hook.
> > >
> >
> > You're commingling rationale and (proposed) semantics. BOTH need to be
> > stated clearly.
>
> Sorry I didn't answer the right question straight away.
> The semantics follow:
>
>
>
> It is as if there is an implicit pvop after find_early_table_space
> that exports the range pgt_buf_start - pgt_buf_top.
> The range indicated is used, and it is the only range used, for the initial
> kernel pagetable pages.
> The range indicated is an overestimate.
> It is implicit because we use the pgt_buf_* variables directly but
> nonetheless it was in the first version of the patch.
>
> After the pagetable setup done by kernel_physical_mapping_init, we know
> exactly how many pages we actually used for the initial kernel
> pagetables and how many are left unused.
>
> The purpose of the second pvop (x86_init.mapping.pagetable_reserve) is
> to notify sub-architectures of the actual range used for initial kernel
> pagetable pages and partially revoke the previous indication.
>
>
>
> If you agree with the proposed semantics above, I'll add the description
> to the commit message as well as the code. I could also modify the
> syntax of the new pvop call to better suit the semantics and/or restore
> the first pvop call.
>

ping?


Sorry, I don't mean to be pushy, it is just that I'll be AFK for 12 days
starting from next Friday and I think that this issue should really be
fixed in time for the 2.6.39 release, otherwise no 2.6.39 kernels will
be able to boot on any xen system.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/18/2011 07:09 AM, Stefano Stabellini wrote:
>
> Sorry, I don't mean to be pushy, it is just that I'll be AFK for 12 days
> starting from next Friday and I think that this issue should really be
> fixed in time for the 2.6.39 release, otherwise no 2.6.39 kernels will
> be able to boot on any xen system.

YOU STILL HAVEN'T PROPOSED ANY SEMANTICS.

The semantics of a hook is a description of what the preconditions are,
what the postconditions are, and what exactly they are allowed or not
allowed to do.

This is a real pain to do, *exactly because hooks are a real pain*.
Most hooks that have been put in has been "oh, just do something at
point X in the code", which is a case of definition by implementation,
which is exactly how we ended up with the current mess.

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86,xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On Mon, 18 Apr 2011, H. Peter Anvin wrote:
> On 04/18/2011 07:09 AM, Stefano Stabellini wrote:
> >
> > Sorry, I don't mean to be pushy, it is just that I'll be AFK for 12 days
> > starting from next Friday and I think that this issue should really be
> > fixed in time for the 2.6.39 release, otherwise no 2.6.39 kernels will
> > be able to boot on any xen system.
>
> YOU STILL HAVEN'T PROPOSED ANY SEMANTICS.
>
> The semantics of a hook is a description of what the preconditions are,
> what the postconditions are, and what exactly they are allowed or not
> allowed to do.
>
> This is a real pain to do, *exactly because hooks are a real pain*.
> Most hooks that have been put in has been "oh, just do something at
> point X in the code", which is a case of definition by implementation,
> which is exactly how we ended up with the current mess.
>


x86_init.mapping.pagetable_setup_start(start, max_end)

Notifies the subarch that the kernel early startup intends to
use pages as page table pages.
These pages will have physical addresses in the range
start..max_end.

Can be called multiple times to setup kernel page table pages,
always in a pair with x86_init.mapping.pagetable_setup_done.
It must be called before x86_init.paging.pagetable_setup_start().

Preconditions: no kernel code after the 32-bit kernel entry point
has hooked any new page table pages into the active page table
except between a pair of
x86_init.mapping.pagetable_setup_start and
x86_init.mapping.pagetable_setup_done.

Postcondition: pages in the range start..max_end may be used as
page table pages and hooked into active page tables.
Pages in this range may NOT be used for any other purpose.


x86_init.mapping.pagetable_setup_done(real_end)

Notifies the subarch that the initial memory mapping is complete,
and of the actual range of pages used.

Preconditions:

x86_init.mapping.pagetable_setup_start was previously called; the
arguments to the calls must satisfy start <= real_end <= max_end

The pages in the range start..real_end have been hooked into the
active page table.
The pages in the range real_end..max_end are not currently in use.

Postconditions:

Pages in the range real_end..max_end may be used as normal
memory.

Pages in the range start..real_end might be hooked into the active
page table and cannot be used for other purposes.

New page table pages must be allocated using
pv_mmu_ops.alloc_pte/pmd/pud.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
Re: [PATCH 2/4] x86, xen: introduce x86_init.mapping.pagetable_reserve [ In reply to ]
On 04/18/2011 07:42 AM, H. Peter Anvin wrote:
> On 04/18/2011 07:09 AM, Stefano Stabellini wrote:
>> Sorry, I don't mean to be pushy, it is just that I'll be AFK for 12 days
>> starting from next Friday and I think that this issue should really be
>> fixed in time for the 2.6.39 release, otherwise no 2.6.39 kernels will
>> be able to boot on any xen system.
> YOU STILL HAVEN'T PROPOSED ANY SEMANTICS.
>
> The semantics of a hook is a description of what the preconditions are,
> what the postconditions are, and what exactly they are allowed or not
> allowed to do.
>
> This is a real pain to do, *exactly because hooks are a real pain*.
> Most hooks that have been put in has been "oh, just do something at
> point X in the code", which is a case of definition by implementation,
> which is exactly how we ended up with the current mess.

Yeah. The basic problem is that the change to pagetable setup
introduced a regression when running under Xen. The foremost
consideration is to fix that regression.

I think the most pragmatic thing to do at this point - as much as it
pains me to say so - is just put in an explicit Xen hook in which does
the right thing, rather than try to prettify it as a general hook, since
it only has one user anyway.

If a second user comes along, when we can talk about generalizing it.

But it would be nice if we could come up with an initial pagetable
construction algorithm that follows the same rules as normal pagetable
management and uses the normal pvops hooks in the normal way so that we
don't need to special case it at all.

J

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel