Mailing List Archive

[PATCH 2/5] x86/pvh: Make PVH entrypoint PIC for x86-64
The PVH entrypoint is 32bit non-PIC code running the uncompressed
vmlinux at its load address CONFIG_PHYSICAL_START - default 0x1000000
(16MB). The kernel is loaded at that physical address inside the VM by
the VMM software (Xen/QEMU).

When running a Xen PVH Dom0, the host reserved addresses are mapped 1-1
into the PVH container. There exist system firmwares (Coreboot/EDK2)
with reserved memory at 16MB. This creates a conflict where the PVH
kernel cannot be loaded at that address.

Modify the PVH entrypoint to be position-indepedent to allow flexibility
in load address. Only the 64bit entry path is converted. A 32bit
kernel is not PIC, so calling into other parts of the kernel, like
xen_prepare_pvh() and mk_pgtable_32(), don't work properly when
relocated.

This makes the code PIC, but the page tables need to be updated as well
to handle running from the kernel high map.

The UNWIND_HINT_END_OF_STACK is to silence:
vmlinux.o: warning: objtool: pvh_start_xen+0x7f: unreachable instruction
after the lret into 64bit code.

Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
---
---
arch/x86/platform/pvh/head.S | 44 ++++++++++++++++++++++++++++--------
1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index f7235ef87bc3..bb1e582e32b1 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -7,6 +7,7 @@
.code32
.text
#define _pa(x) ((x) - __START_KERNEL_map)
+#define rva(x) ((x) - pvh_start_xen)

#include <linux/elfnote.h>
#include <linux/init.h>
@@ -54,7 +55,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
UNWIND_HINT_END_OF_STACK
cld

- lgdt (_pa(gdt))
+ /*
+ * See the comment for startup_32 for more details. We need to
+ * execute a call to get the execution address to be position
+ * independent, but we don't have a stack. Save and restore the
+ * magic field of start_info in ebx, and use that as the stack.
+ */
+ mov (%ebx), %eax
+ leal 4(%ebx), %esp
+ ANNOTATE_INTRA_FUNCTION_CALL
+ call 1f
+1: popl %ebp
+ mov %eax, (%ebx)
+ subl $rva(1b), %ebp
+ movl $0, %esp
+
+ leal rva(gdt)(%ebp), %eax
+ leal rva(gdt_start)(%ebp), %ecx
+ movl %ecx, 2(%eax)
+ lgdt (%eax)

mov $PVH_DS_SEL,%eax
mov %eax,%ds
@@ -62,14 +81,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
mov %eax,%ss

/* Stash hvm_start_info. */
- mov $_pa(pvh_start_info), %edi
+ leal rva(pvh_start_info)(%ebp), %edi
mov %ebx, %esi
- mov _pa(pvh_start_info_sz), %ecx
+ movl rva(pvh_start_info_sz)(%ebp), %ecx
shr $2,%ecx
rep
movsl

- mov $_pa(early_stack_end), %esp
+ leal rva(early_stack_end)(%ebp), %esp

/* Enable PAE mode. */
mov %cr4, %eax
@@ -84,28 +103,33 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
wrmsr

/* Enable pre-constructed page tables. */
- mov $_pa(init_top_pgt), %eax
+ leal rva(init_top_pgt)(%ebp), %eax
mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0

/* Jump to 64-bit mode. */
- ljmp $PVH_CS_SEL, $_pa(1f)
+ pushl $PVH_CS_SEL
+ leal rva(1f)(%ebp), %eax
+ pushl %eax
+ lretl

/* 64-bit entry point. */
.code64
1:
+ UNWIND_HINT_END_OF_STACK
+
/* Set base address in stack canary descriptor. */
mov $MSR_GS_BASE,%ecx
- mov $_pa(canary), %eax
+ leal rva(canary)(%ebp), %eax
xor %edx, %edx
wrmsr

call xen_prepare_pvh

/* startup_64 expects boot_params in %rsi. */
- mov $_pa(pvh_bootparams), %rsi
- mov $_pa(startup_64), %rax
+ lea rva(pvh_bootparams)(%ebp), %rsi
+ lea rva(startup_64)(%ebp), %rax
ANNOTATE_RETPOLINE_SAFE
jmp *%rax

@@ -143,7 +167,7 @@ SYM_CODE_END(pvh_start_xen)
.balign 8
SYM_DATA_START_LOCAL(gdt)
.word gdt_end - gdt_start
- .long _pa(gdt_start)
+ .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
.word 0
SYM_DATA_END(gdt)
SYM_DATA_START_LOCAL(gdt_start)
--
2.44.0
Re: [PATCH 2/5] x86/pvh: Make PVH entrypoint PIC for x86-64 [ In reply to ]
On Wed, Apr 10, 2024 at 3:50?PM Jason Andryuk <jason.andryuk@amd.com> wrote:
>
> The PVH entrypoint is 32bit non-PIC code running the uncompressed
> vmlinux at its load address CONFIG_PHYSICAL_START - default 0x1000000
> (16MB). The kernel is loaded at that physical address inside the VM by
> the VMM software (Xen/QEMU).
>
> When running a Xen PVH Dom0, the host reserved addresses are mapped 1-1
> into the PVH container. There exist system firmwares (Coreboot/EDK2)
> with reserved memory at 16MB. This creates a conflict where the PVH
> kernel cannot be loaded at that address.
>
> Modify the PVH entrypoint to be position-indepedent to allow flexibility
> in load address. Only the 64bit entry path is converted. A 32bit
> kernel is not PIC, so calling into other parts of the kernel, like
> xen_prepare_pvh() and mk_pgtable_32(), don't work properly when
> relocated.
>
> This makes the code PIC, but the page tables need to be updated as well
> to handle running from the kernel high map.
>
> The UNWIND_HINT_END_OF_STACK is to silence:
> vmlinux.o: warning: objtool: pvh_start_xen+0x7f: unreachable instruction
> after the lret into 64bit code.
>
> Signed-off-by: Jason Andryuk <jason.andryuk@amd.com>
> ---
> ---
> arch/x86/platform/pvh/head.S | 44 ++++++++++++++++++++++++++++--------
> 1 file changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
> index f7235ef87bc3..bb1e582e32b1 100644
> --- a/arch/x86/platform/pvh/head.S
> +++ b/arch/x86/platform/pvh/head.S
> @@ -7,6 +7,7 @@
> .code32
> .text
> #define _pa(x) ((x) - __START_KERNEL_map)
> +#define rva(x) ((x) - pvh_start_xen)
>
> #include <linux/elfnote.h>
> #include <linux/init.h>
> @@ -54,7 +55,25 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> UNWIND_HINT_END_OF_STACK
> cld
>
> - lgdt (_pa(gdt))
> + /*
> + * See the comment for startup_32 for more details. We need to
> + * execute a call to get the execution address to be position
> + * independent, but we don't have a stack. Save and restore the
> + * magic field of start_info in ebx, and use that as the stack.
> + */
> + mov (%ebx), %eax
> + leal 4(%ebx), %esp
> + ANNOTATE_INTRA_FUNCTION_CALL
> + call 1f
> +1: popl %ebp
> + mov %eax, (%ebx)
> + subl $rva(1b), %ebp
> + movl $0, %esp
> +
> + leal rva(gdt)(%ebp), %eax
> + leal rva(gdt_start)(%ebp), %ecx
> + movl %ecx, 2(%eax)
> + lgdt (%eax)
>
> mov $PVH_DS_SEL,%eax
> mov %eax,%ds
> @@ -62,14 +81,14 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> mov %eax,%ss
>
> /* Stash hvm_start_info. */
> - mov $_pa(pvh_start_info), %edi
> + leal rva(pvh_start_info)(%ebp), %edi
> mov %ebx, %esi
> - mov _pa(pvh_start_info_sz), %ecx
> + movl rva(pvh_start_info_sz)(%ebp), %ecx
> shr $2,%ecx
> rep
> movsl
>
> - mov $_pa(early_stack_end), %esp
> + leal rva(early_stack_end)(%ebp), %esp
>
> /* Enable PAE mode. */
> mov %cr4, %eax
> @@ -84,28 +103,33 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
> wrmsr
>
> /* Enable pre-constructed page tables. */
> - mov $_pa(init_top_pgt), %eax
> + leal rva(init_top_pgt)(%ebp), %eax
> mov %eax, %cr3
> mov $(X86_CR0_PG | X86_CR0_PE), %eax
> mov %eax, %cr0
>
> /* Jump to 64-bit mode. */
> - ljmp $PVH_CS_SEL, $_pa(1f)
> + pushl $PVH_CS_SEL
> + leal rva(1f)(%ebp), %eax
> + pushl %eax
> + lretl
>
> /* 64-bit entry point. */
> .code64
> 1:
> + UNWIND_HINT_END_OF_STACK
> +
> /* Set base address in stack canary descriptor. */
> mov $MSR_GS_BASE,%ecx
> - mov $_pa(canary), %eax
> + leal rva(canary)(%ebp), %eax

Since this is in 64-bit mode, RIP-relative addressing can be used.

> xor %edx, %edx
> wrmsr
>
> call xen_prepare_pvh
>
> /* startup_64 expects boot_params in %rsi. */
> - mov $_pa(pvh_bootparams), %rsi
> - mov $_pa(startup_64), %rax
> + lea rva(pvh_bootparams)(%ebp), %rsi
> + lea rva(startup_64)(%ebp), %rax

RIP-relative here too.

> ANNOTATE_RETPOLINE_SAFE
> jmp *%rax
>
> @@ -143,7 +167,7 @@ SYM_CODE_END(pvh_start_xen)
> .balign 8
> SYM_DATA_START_LOCAL(gdt)
> .word gdt_end - gdt_start
> - .long _pa(gdt_start)
> + .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
> .word 0
> SYM_DATA_END(gdt)
> SYM_DATA_START_LOCAL(gdt_start)
> --
> 2.44.0
>
>

Brian Gerst
Re: [PATCH 2/5] x86/pvh: Make PVH entrypoint PIC for x86-64 [ In reply to ]
On 2024-04-10 17:00, Brian Gerst wrote:
> On Wed, Apr 10, 2024 at 3:50?PM Jason Andryuk <jason.andryuk@amd.com> wrote:

>> /* 64-bit entry point. */
>> .code64
>> 1:
>> + UNWIND_HINT_END_OF_STACK
>> +
>> /* Set base address in stack canary descriptor. */
>> mov $MSR_GS_BASE,%ecx
>> - mov $_pa(canary), %eax
>> + leal rva(canary)(%ebp), %eax
>
> Since this is in 64-bit mode, RIP-relative addressing can be used.
>
>> xor %edx, %edx
>> wrmsr
>>
>> call xen_prepare_pvh
>>
>> /* startup_64 expects boot_params in %rsi. */
>> - mov $_pa(pvh_bootparams), %rsi
>> - mov $_pa(startup_64), %rax
>> + lea rva(pvh_bootparams)(%ebp), %rsi
>> + lea rva(startup_64)(%ebp), %rax
>
> RIP-relative here too.

Yes, thanks for catching that. With the RIP-relative conversion, there
is now:
vmlinux.o: warning: objtool: pvh_start_xen+0x10d: relocation to !ENDBR:
startup_64+0x0

I guess RIP-relative made it visible. That can be quieted by adding
ANNOTATE_NOENDBR to startup_64.

Thanks,
Jason
Re: [PATCH 2/5] x86/pvh: Make PVH entrypoint PIC for x86-64 [ In reply to ]
On Thu, Apr 11, 2024 at 11:26?AM Jason Andryuk <jason.andryuk@amd.com> wrote:
>
> On 2024-04-10 17:00, Brian Gerst wrote:
> > On Wed, Apr 10, 2024 at 3:50?PM Jason Andryuk <jason.andryuk@amd.com> wrote:
>
> >> /* 64-bit entry point. */
> >> .code64
> >> 1:
> >> + UNWIND_HINT_END_OF_STACK
> >> +
> >> /* Set base address in stack canary descriptor. */
> >> mov $MSR_GS_BASE,%ecx
> >> - mov $_pa(canary), %eax
> >> + leal rva(canary)(%ebp), %eax
> >
> > Since this is in 64-bit mode, RIP-relative addressing can be used.
> >
> >> xor %edx, %edx
> >> wrmsr
> >>
> >> call xen_prepare_pvh
> >>
> >> /* startup_64 expects boot_params in %rsi. */
> >> - mov $_pa(pvh_bootparams), %rsi
> >> - mov $_pa(startup_64), %rax
> >> + lea rva(pvh_bootparams)(%ebp), %rsi
> >> + lea rva(startup_64)(%ebp), %rax
> >
> > RIP-relative here too.
>
> Yes, thanks for catching that. With the RIP-relative conversion, there
> is now:
> vmlinux.o: warning: objtool: pvh_start_xen+0x10d: relocation to !ENDBR:
> startup_64+0x0
>
> I guess RIP-relative made it visible. That can be quieted by adding
> ANNOTATE_NOENDBR to startup_64.

Change it to a direct jump, since branches are always RIP-relative.

Brian Gerst