Mailing List Archive

[PATCH 05 of 10] arm: More SMP bringup
# HG changeset patch
# User Tim Deegan <tim@xen.org>
# Date 1330018799 0
# Node ID 1e9c6bd7cc99d1af0107aa927ee2ba03721449b7
# Parent 8f322ab538572e1a12c8ed716ddd5cb4c122e9ed
arm: More SMP bringup

Bring non-boot CPUs up as far as running on the relocated pagetables,
one at a time, before the non-relocated copy of Xen gets reused for
general memory pools.

Don't yet bring them up into C; that will happen later when stacks are
allocated.

Signed-off-by: Tim Deegan <tim@xen.org>

diff -r 8f322ab53857 -r 1e9c6bd7cc99 xen/arch/arm/gic.c
--- a/xen/arch/arm/gic.c Thu Feb 23 17:39:59 2012 +0000
+++ b/xen/arch/arm/gic.c Thu Feb 23 17:39:59 2012 +0000
@@ -248,7 +248,7 @@ static void __cpuinit gic_hyp_init(void)
}

/* Set up the GIC */
-void gic_init(void)
+int __init gic_init(void)
{
/* XXX FIXME get this from devicetree */
gic.dbase = GIC_BASE_ADDRESS + GIC_DR_OFFSET;
@@ -270,6 +270,8 @@ void gic_init(void)
gic_hyp_init();

spin_unlock(&gic.lock);
+
+ return gic.cpus;
}

void gic_route_irqs(void)
diff -r 8f322ab53857 -r 1e9c6bd7cc99 xen/arch/arm/gic.h
--- a/xen/arch/arm/gic.h Thu Feb 23 17:39:59 2012 +0000
+++ b/xen/arch/arm/gic.h Thu Feb 23 17:39:59 2012 +0000
@@ -138,8 +138,8 @@ extern int gic_route_irq_to_guest(struct

/* Accept an interrupt from the GIC and dispatch its handler */
extern void gic_interrupt(struct cpu_user_regs *regs, int is_fiq);
-/* Bring up the interrupt controller */
-extern void gic_init(void);
+/* Bring up the interrupt controller, and report # cpus attached */
+extern int gic_init(void);
/* setup the gic virtual interface for a guest */
extern void gicv_setup(struct domain *d);
#endif
diff -r 8f322ab53857 -r 1e9c6bd7cc99 xen/arch/arm/head.S
--- a/xen/arch/arm/head.S Thu Feb 23 17:39:59 2012 +0000
+++ b/xen/arch/arm/head.S Thu Feb 23 17:39:59 2012 +0000
@@ -62,22 +62,36 @@ start:
#endif

/* Are we the boot CPU? */
+ mov r12, #0 /* r12 := CPU ID */
mrc CP32(r0, MPIDR)
tst r0, #(1<<31) /* Multiprocessor extension supported? */
beq boot_cpu
tst r0, #(1<<30) /* Uniprocessor system? */
bne boot_cpu
- bics r0, r0, #(0xff << 24) /* Ignore flags */
- beq boot_cpu /* If all other fields are 0, we win */
+ bics r12, r0, #(0xff << 24) /* Mask out flags to get CPU ID */
+ beq boot_cpu /* If we're CPU 0, boot now */

-1: wfi
- b 1b
-
+ /* Non-boot CPUs wait here to be woken up one at a time.
+ * This is basically an open-coded spin-lock to serialize. */
+ ldr r0, =boot_gate /* VA of gate */
+ add r0, r0, r10 /* PA of gate */
+ mov r1, #1 /* (1 == locked) */
+1: wfe
+ ldrex r2, [r0] /* Linked read of current value */
+ teq r2, #0 /* (0 == unlocked) */
+ strexeq r2, r1, [r0] /* Matching update -> locked */
+ teq r2, #0 /* (0 == succeeded) */
+ bne 1b
+
boot_cpu:
#ifdef EARLY_UART_ADDRESS
- /* Say hello */
ldr r11, =EARLY_UART_ADDRESS /* r11 := UART base address */
- bl init_uart
+ teq r12, #0 /* CPU 0 sets up the UART too */
+ bleq init_uart
+ PRINT("- CPU ")
+ mov r0, r12
+ bl putn
+ PRINT(" booting -\r\n")
#endif

/* Check that this CPU has Hyp mode */
@@ -85,7 +99,6 @@ boot_cpu:
and r0, r0, #0xf000 /* Bits 12-15 define virt extensions */
teq r0, #0x1000 /* Must == 0x1 or may be incompatible */
beq 1f
- bl putn
PRINT("- CPU doesn't support the virtualization extensions -\r\n")
b fail
1:
@@ -185,6 +198,10 @@ hyp:
mov r5, #0 /* r4:r5 is paddr (xen_pagetable) */
mcrr CP64(r4, r5, HTTBR)

+ /* Non-boot CPUs don't need to rebuild the pagetable */
+ teq r12, #0
+ bne pt_ready
+
/* Build the baseline idle pagetable's first-level entries */
ldr r1, =xen_second
add r1, r1, r10 /* r1 := paddr (xen_second) */
@@ -226,6 +243,7 @@ hyp:
add r4, r4, #8
strd r2, r3, [r1, r4] /* Map it in the early boot slot */

+pt_ready:
PRINT("- Turning on paging -\r\n")

ldr r1, =paging /* Explicit vaddr, not RIP-relative */
@@ -238,7 +256,7 @@ hyp:
paging:

#ifdef EARLY_UART_ADDRESS
- /* Recover the UART address in the new address space */
+ /* Recover the UART address in the new address space. */
lsl r11, #11
lsr r11, #11 /* UART base's offset from 2MB base */
adr r0, start
@@ -246,14 +264,57 @@ paging:
add r11, r11, r0 /* r11 := vaddr (UART base address) */
#endif

- PRINT("- Entering C -\r\n")
+ PRINT("- Ready -\r\n")

+ /* The boot CPU should go straight into C now */
+ teq r12, #0
+ beq launch
+
+ /* Signal the next non-boot CPU to come and join us here */
+ ldr r0, =boot_gate /* VA of gate */
+ add r0, r0, r10 /* PA of gate */
+ mov r1, #0 /* (0 == unlocked) */
+ str r1, [r0]
+ dsb
+ isb
+ sev
+
+ /* Move on to the relocated pagetables */
+ mov r0, #0
+ ldr r4, =boot_httbr /* VA of HTTBR value stashed by CPU 0 */
+ add r4, r4, r10 /* PA of it */
+ ldrd r4, r5, [r4] /* Actual value */
+ mcrr CP64(r4, r5, HTTBR)
+ mcr CP32(r0, TLBIALLH) /* Flush hypervisor TLB */
+ mcr CP32(r0, BPIALL) /* Flush branch predictor */
+ dsb /* Ensure completion of TLB+BP flush */
+ isb
+ /* Now, the UART is in its proper fixmap address */
+ ldrne r11, =FIXMAP_ADDR(FIXMAP_CONSOLE)
+
+ /* Non-boot CPUs report that they've got this far */
+ ldr r0, =ready_cpus
+ ldr r1, [r0] /* Read count of ready CPUs */
+ add r1, r1, #1 /* ++ */
+ str r1, [r0] /* Writeback */
+ dsb
+
+ /* Here, the non-boot CPUs must wait again -- they're now running on
+ * the boot CPU's pagetables so it's safe for the boot CPU to
+ * overwrite the non-relocated copy of Xen. Once it's done that,
+ * and brought up the memory allocator, non-boot CPUs can get their
+ * own stacks and enter C. */
+1: wfe
+ b 1b
+
+launch:
ldr sp, =init_stack /* Supply a stack */
add sp, #STACK_SIZE /* (which grows down from the top). */
sub sp, #CPUINFO_sizeof /* Make room for CPU save record */
mov r0, r10 /* Marshal args: - phys_offset */
mov r1, r7 /* - machine type */
mov r2, r8 /* - ATAG address */
+ mov r3, r12 /* - CPU ID */
b start_xen /* and disappear into the land of C */

/* Fail-stop
@@ -288,7 +349,7 @@ puts:
tst r2, #0x8 /* Check BUSY bit */
bne puts /* Wait for the UART to be ready */
ldrb r2, [r0], #1 /* Load next char */
- teq r2, #0 /* Exit on nul*/
+ teq r2, #0 /* Exit on nul */
moveq pc, lr
str r2, [r11] /* -> UARTDR (Data Register) */
b puts
@@ -308,10 +369,8 @@ 1: ldr r2, [r11, #0x18] /* <- UA
lsl r0, #4 /* Roll it through one nybble at a time */
subs r3, r3, #1
bne 1b
- adr r0, crlf /* Finish with a newline */
- b puts
+ mov pc, lr

-crlf: .asciz "\r\n"
hex: .ascii "0123456789abcdef"
.align 2

diff -r 8f322ab53857 -r 1e9c6bd7cc99 xen/arch/arm/mm.c
--- a/xen/arch/arm/mm.c Thu Feb 23 17:39:59 2012 +0000
+++ b/xen/arch/arm/mm.c Thu Feb 23 17:39:59 2012 +0000
@@ -36,6 +36,9 @@ lpae_t xen_second[LPAE_ENTRIES*4] __attr
static lpae_t xen_fixmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
static lpae_t xen_xenmap[LPAE_ENTRIES] __attribute__((__aligned__(4096)));

+/* Non-boot CPUs use this to find the correct pagetables. */
+uint64_t boot_httbr;
+
/* Limits of the Xen heap */
unsigned long xenheap_mfn_start, xenheap_mfn_end;
unsigned long xenheap_virt_end;
@@ -156,14 +159,6 @@ void __init setup_pagetables(unsigned lo
lpae_t pte, *p;
int i;

- if ( boot_phys_offset != 0 )
- {
- /* Remove the old identity mapping of the boot paddr */
- pte.bits = 0;
- dest_va = (unsigned long)_start + boot_phys_offset;
- write_pte(xen_second + second_linear_offset(dest_va), pte);
- }
-
xen_paddr = device_tree_get_xen_paddr();

/* Map the destination in the boot misc area. */
@@ -186,11 +181,19 @@ void __init setup_pagetables(unsigned lo
for ( i = 0; i < 4; i++)
p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;
p = (void *) xen_second + dest_va - (unsigned long) _start;
+ if ( boot_phys_offset != 0 )
+ {
+ /* Remove the old identity mapping of the boot paddr */
+ pte.bits = 0;
+ dest_va = (unsigned long)_start + boot_phys_offset;
+ p[second_linear_offset(dest_va)] = pte;
+ }
for ( i = 0; i < 4 * LPAE_ENTRIES; i++)
if ( p[i].pt.valid )
p[i].pt.base += (phys_offset - boot_phys_offset) >> PAGE_SHIFT;

/* Change pagetables to the copy in the relocated Xen */
+ boot_httbr = (unsigned long) xen_pgtable + phys_offset;
asm volatile (
STORE_CP64(0, HTTBR) /* Change translation base */
"dsb;" /* Ensure visibility of HTTBR update */
@@ -198,7 +201,7 @@ void __init setup_pagetables(unsigned lo
STORE_CP32(0, BPIALL) /* Flush branch predictor */
"dsb;" /* Ensure completion of TLB+BP flush */
"isb;"
- : : "r" ((unsigned long) xen_pgtable + phys_offset) : "memory");
+ : : "r" (boot_httbr) : "memory");

/* Undo the temporary map */
pte.bits = 0;
diff -r 8f322ab53857 -r 1e9c6bd7cc99 xen/arch/arm/setup.c
--- a/xen/arch/arm/setup.c Thu Feb 23 17:39:59 2012 +0000
+++ b/xen/arch/arm/setup.c Thu Feb 23 17:39:59 2012 +0000
@@ -44,7 +44,12 @@ static unsigned int __initdata max_cpus
/* Xen stack for bringing up the first CPU. */
unsigned char __initdata init_stack[STACK_SIZE] __attribute__((__aligned__(STACK_SIZE)));

-extern char __init_begin[], __init_end[], __bss_start[];
+extern const char __init_begin[], __init_end[], __bss_start[];
+
+/* Spinlock for serializing CPU bringup */
+unsigned long __initdata boot_gate = 1;
+/* Number of non-boot CPUs ready to enter C */
+unsigned long __initdata ready_cpus = 0;

static __attribute_used__ void init_done(void)
{
@@ -151,14 +156,17 @@ static void __init setup_mm(unsigned lon
end_boot_allocator();
}

+/* C entry point for boot CPU */
void __init start_xen(unsigned long boot_phys_offset,
unsigned long arm_type,
- unsigned long atag_paddr)
-
+ unsigned long atag_paddr,
+ unsigned long cpuid)
{
void *fdt;
size_t fdt_size;
- int i;
+ int cpus, i;
+ paddr_t gate_pa;
+ unsigned long *gate;

fdt = (void *)BOOT_MISC_VIRT_START
+ (atag_paddr & ((1 << SECOND_SHIFT) - 1));
@@ -174,6 +182,22 @@ void __init start_xen(unsigned long boot
console_init_preirq();
#endif

+ cpus = gic_init();
+
+ printk("Waiting for %i other CPUs to be ready\n", cpus - 1);
+ /* Bring the other CPUs up to paging before the original
+ * copy of .text gets overwritten. We need to use the unrelocated
+ * copy of boot_gate as that's the one the others can see. */
+ gate_pa = ((unsigned long) &boot_gate) + boot_phys_offset;
+ gate = map_domain_page(gate_pa >> PAGE_SHIFT) + (gate_pa & ~PAGE_MASK);
+ *gate = 0;
+ unmap_domain_page(gate);
+ /* Now send an event to wake the first non-boot CPU */
+ asm volatile("dsb; isb; sev");
+ /* And wait for them all to be ready. */
+ while ( ready_cpus + 1 < cpus )
+ smp_rmb();
+
__set_current((struct vcpu *)0xfffff000); /* debug sanity */
idle_vcpu[0] = current;
set_processor_id(0); /* needed early, for smp_processor_id() */
@@ -208,8 +232,6 @@ void __init start_xen(unsigned long boot

init_IRQ();

- gic_init();
-
gic_route_irqs();

init_maintenance_interrupt();

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH 05 of 10] arm: More SMP bringup [ In reply to ]
On Thu, 2012-02-23 at 17:40 +0000, Tim Deegan wrote:
[...]
> + /* Signal the next non-boot CPU to come and join us here */
> + ldr r0, =boot_gate /* VA of gate */
> + add r0, r0, r10 /* PA of gate */
> + mov r1, #0 /* (0 == unlocked) */
> + str r1, [r0]
> + dsb
> + isb
> + sev

Here we have released the next CPU from the holding pen...

[...]
> + /* Non-boot CPUs report that they've got this far */
> + ldr r0, =ready_cpus
> + ldr r1, [r0] /* Read count of ready CPUs */
> + add r1, r1, #1 /* ++ */
> + str r1, [r0] /* Writeback */
> + dsb

... and here we do a non-atomic update of a shared variable.

What prevents the following CPU from catching us up and conflicting
here?

Would we be better signalling the next CPU after the increment instead?

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH 05 of 10] arm: More SMP bringup [ In reply to ]
At 19:16 +0000 on 23 Feb (1330024560), Ian Campbell wrote:
> On Thu, 2012-02-23 at 17:40 +0000, Tim Deegan wrote:
> [...]
> > + /* Signal the next non-boot CPU to come and join us here */
> > + ldr r0, =boot_gate /* VA of gate */
> > + add r0, r0, r10 /* PA of gate */
> > + mov r1, #0 /* (0 == unlocked) */
> > + str r1, [r0]
> > + dsb
> > + isb
> > + sev
>
> Here we have released the next CPU from the holding pen...
>
> [...]
> > + /* Non-boot CPUs report that they've got this far */
> > + ldr r0, =ready_cpus
> > + ldr r1, [r0] /* Read count of ready CPUs */
> > + add r1, r1, #1 /* ++ */
> > + str r1, [r0] /* Writeback */
> > + dsb
>
> ... and here we do a non-atomic update of a shared variable.
>
> What prevents the following CPU from catching us up and conflicting
> here?
>
> Would we be better signalling the next CPU after the increment instead?

Yes, we would. I'll fix that.

Tim.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel