Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[src/trunk]: src/sys/arch/arm/cortex Improve MP startup code. We now use a t...
details: https://anonhg.NetBSD.org/src/rev/819a41768f19
branches: trunk
changeset: 328298:819a41768f19
user: matt <matt%NetBSD.org@localhost>
date: Sun Mar 30 15:20:53 2014 +0000
description:
Improve MP startup code. We now use a two stage startup, after creating
the initial L1PT and turning on the MMU/caches, we spinup the secondary CPUs
waiting for them to get the same state as the boot processor. Once the
real L1PT is initialized and used, the secondary CPUs are kicked so they can
use it (and the initial L1PT is discarded). Finally, wait until NetBSD
kicks the secondary CPUs then load the stack from the idlelwp and then hatch
the cpu and then jump to idle_loop.
diffstat:
sys/arch/arm/cortex/a9_mpsubr.S | 757 ++++++++++++++++++++++++---------------
1 files changed, 458 insertions(+), 299 deletions(-)
diffs (truncated from 950 to 300 lines):
diff -r ad3b45b85353 -r 819a41768f19 sys/arch/arm/cortex/a9_mpsubr.S
--- a/sys/arch/arm/cortex/a9_mpsubr.S Sun Mar 30 13:14:40 2014 +0000
+++ b/sys/arch/arm/cortex/a9_mpsubr.S Sun Mar 30 15:20:53 2014 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: a9_mpsubr.S,v 1.13 2014/02/21 22:22:48 matt Exp $ */
+/* $NetBSD: a9_mpsubr.S,v 1.14 2014/03/30 15:20:54 matt Exp $ */
/*-
* Copyright (c) 2012 The NetBSD Foundation, Inc.
* All rights reserved.
@@ -37,40 +37,48 @@
#include <arm/cortex/scu_reg.h>
#include "assym.h"
+//#define MPDEBUG
-/* We'll modify va and pa at run time so we can use relocatable addresses. */
+// We'll modify va and pa at run time so we can use relocatable addresses.
#define MMU_INIT(va,pa,n_sec,attr) \
- .word va ; \
- .word pa ; \
- .word n_sec ; \
- .word attr ;
+ .word (va)|(n_sec) ; \
+ .word (pa)|(attr) ; \
-/*
- * Set up a preliminary mapping in the MMU to allow us to run
- * at KERNEL_BASE with caches on.
- */
+// Set up a preliminary mapping in the MMU to allow us to run at KERNEL_BASE
+// with caches on. If we are MULTIPROCESSOR, save the TTB address.
+//
arm_boot_l1pt_init:
- mov ip, r1 @ save mmu table addr
- /* Build page table from scratch */
- mov r1, r0 /* Start address to clear memory. */
- /* Zero the entire table so all virtual addresses are invalid. */
- mov r2, #L1_TABLE_SIZE /* in bytes */
- mov r3, #0
- mov r4, r3
- mov r5, r3
- mov r6, r3
- mov r7, r3
- mov r8, r3
- mov r10, r3
- mov r11, r3
-1: stmia r1!, {r3-r8,r10-r11}
- stmia r1!, {r3-r8,r10-r11}
- stmia r1!, {r3-r8,r10-r11}
- stmia r1!, {r3-r8,r10-r11}
- subs r2, r2, #(4 * 4 * 8) /* bytes per loop */
- bne 1b
+#if defined(MULTIPROCESSOR)
+#if defined(KERNEL_BASES_EQUAL)
+ movw r3, #:lower16:cortex_mmuinfo
+ movt r3, #:upper16:cortex_mmuinfo
+#else
+ adr r3, arm_boot_l1pt_init
+ movw r2, #:lower16:cortex_mmuinfo
+ movt r2, #:upper16:cortex_mmuinfo
+ bfi r3, r2, #0, #28
+#endif
+ str r0, [r3]
- /* Now create our entries per the mmu_init_table. */
+ // Make sure the info makes into memory
+ mcr p15, 0, r3, c7, c10, 1 // writeback the cache line
+ dsb
+#endif
+
+ mov ip, r1 // save mmu table addr
+ // Build page table from scratch
+ mov r1, r0 // Start address to clear memory.
+ // Zero the entire table so all virtual addresses are invalid.
+ add r2, r1, #L1_TABLE_SIZE // Ending address
+ mov r4, #0
+ mov r5, #0
+ mov r6, #0
+ mov r7, #0
+1: stmia r1!, {r4-r7} // 16 bytes at a time
+ cmp r1, r2
+ blt 1b
+
+ // Now create our entries per the mmu_init_table.
l1table .req r0
va .req r1
pa .req r2
@@ -78,7 +86,11 @@
attr .req r4
itable .req r5
- mov itable, ip @ reclaim table address
+ mov attr, #0
+ mrc p15, 0, r3, c0, c0, 5 // MPIDR read
+ cmp r3, #0 // not zero?
+ movne attr, #L1_S_V6_S // yes, shareable attribute
+ mov itable, ip // reclaim table address
b 3f
2: str pa, [l1table, va, lsl #2]
@@ -87,20 +99,18 @@
subs n_sec, n_sec, #1
bhi 2b
-3: ldmia itable!, {va,pa,n_sec,attr}
- /* Convert va to l1 offset: va = 4 * (va >> L1_S_SHIFT) */
+3: ldmia itable!, {va, pa}
+ // Convert va to l1 offset: va = 4 * (va >> L1_S_SHIFT)
+ ubfx n_sec, va, #0, #L1_S_SHIFT
lsr va, va, #L1_S_SHIFT
- /* Convert pa to l1 entry: pa = (pa & L1_S_FRAME) | attr */
-#ifdef _ARM_ARCH_7
- bfc pa, #0, #L1_S_SHIFT
-#else
- lsr pa, pa, #L1_S_SHIFT
- lsl pa, pa, #L1_S_SHIFT
-#endif
- orr pa, pa, attr
- cmp n_sec, #0
+
+ // Do we need add sharing for this?
+ tst pa, #(L1_S_C|L1_S_B) // is this entry cacheable?
+ orrne pa, pa, attr // add sharing
+
+4: cmp n_sec, #0
bne 2b
- bx lr @ return
+ bx lr // return
.unreq va
.unreq pa
@@ -109,6 +119,9 @@
.unreq itable
.unreq l1table
+//
+// Coprocessor register initialization values
+//
#if defined(CPU_CORTEXA8)
#undef CPU_CONTROL_SWP_ENABLE // not present on A8
#define CPU_CONTROL_SWP_ENABLE 0
@@ -126,6 +139,8 @@
#define CPU_CONTROL_AFLT_ENABLE_SET CPU_CONTROL_AFLT_ENABLE
#endif
+// bits to set in the Control Register
+//
#define CPU_CONTROL_SET \
(CPU_CONTROL_MMU_ENABLE | \
CPU_CONTROL_AFLT_ENABLE_SET | \
@@ -136,124 +151,120 @@
CPU_CONTROL_EX_BEND_SET | \
CPU_CONTROL_UNAL_ENABLE)
+// bits to clear in the Control Register
+//
#define CPU_CONTROL_CLR \
(CPU_CONTROL_AFLT_ENABLE_CLR)
arm_cpuinit:
- /*
- * In theory, because the MMU is off, we shouldn't need all of this,
- * but let's not take any chances and do a typical sequence to set
- * the Translation Table Base.
- */
+ // Because the MMU may already be on do a typical sequence to set
+ // the Translation Table Base(s).
mov ip, lr
- mov r10, r0
+ mov r10, r0 // save TTBR
mov r1, #0
mcr p15, 0, r1, c7, c5, 0 // invalidate I cache
- mrc p15, 0, r2, c1, c0, 0 // read SCTRL
+ mrc p15, 0, r2, c1, c0, 0 // SCTRL read
movw r1, #(CPU_CONTROL_DC_ENABLE|CPU_CONTROL_IC_ENABLE)
bic r2, r2, r1 // clear I+D cache enable
#ifdef __ARMEB__
- /*
- * SCTRL.EE determines the endianness of translation table lookups.
- * So we need to make sure it's set before starting to use the new
- * translation tables (which are big endian).
- */
+ // SCTRL.EE determines the endianness of translation table lookups.
+ // So we need to make sure it's set before starting to use the new
+ // translation tables (which are big endian).
+ //
orr r2, r2, #CPU_CONTROL_EX_BEND
bic r2, r2, #CPU_CONTROL_MMU_ENABLE
- pli [pc, #32] /* preload the next few cachelines */
+ pli [pc, #32] // preload the next few cachelines
pli [pc, #64]
pli [pc, #96]
pli [pc, #128]
#endif
- mcr p15, 0, r2, c1, c0, 0 /* write SCTRL */
+ mcr p15, 0, r2, c1, c0, 0 // SCTRL write
XPUTC(#70)
- dsb /* Drain the write buffers. */
+ dsb // Drain the write buffers.
1:
XPUTC(#71)
- mrc p15, 0, r1, c0, c0, 5 /* get MPIDR */
+ mrc p15, 0, r1, c0, c0, 5 // MPIDR read
cmp r1, #0
- orrlt r10, r10, #0x5b /* MP, cachable (Normal WB) */
- orrge r10, r10, #0x1b /* Non-MP, cacheable, normal WB */
- mcr p15, 0, r10, c2, c0, 0 /* Set Translation Table Base */
+ orrlt r10, r10, #0x5b // MP, cachable (Normal WB)
+ orrge r10, r10, #0x1b // Non-MP, cacheable, normal WB
+ XPUTC(#48)
+ mcr p15, 0, r10, c2, c0, 0 // TTBR0 write
+#if defined(ARM_MMU_EXTENDED)
+ // When using split TTBRs, we need to set both since the physical
+ // addresses we were/are using might be in either.
+ XPUTC(#49)
+ mcr p15, 0, r10, c2, c0, 1 // TTBR1 write
+#endif
XPUTC(#72)
- mov r1, #0
- mcr p15, 0, r1, c2, c0, 2 /* Set Translation Table Control */
+#if defined(ARM_MMU_EXTENDED)
+ XPUTC(#49)
+ mov r1, #TTBCR_S_N_1 // make sure TTBCR_S_N is 1
+#else
+ XPUTC(#48)
+ mov r1, #0 // make sure TTBCR is 0
+#endif
+ mcr p15, 0, r1, c2, c0, 2 // TTBCR write
XPUTC(#73)
mov r1, #0
- mcr p15, 0, r1, c8, c7, 0 /* Invalidate TLBs */
+ mcr p15, 0, r1, c8, c7, 0 // TLBIALL (just this core)
- /* Set the Domain Access register. Very important! */
XPUTC(#74)
+ mov r1, #0 // get KERNEL_PID
+ mcr p15, 0, r1, c13, c0, 1 // CONTEXTIDR write
+
+ // Set the Domain Access register. Very important!
+ XPUTC(#75)
mov r1, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
- mcr p15, 0, r1, c3, c0, 0
+ mcr p15, 0, r1, c3, c0, 0 // DACR write
- /*
- * Enable the MMU, etc.
- */
- XPUTC(#75)
- mrc p15, 0, r0, c1, c0, 0
+ //
+ // Enable the MMU, etc.
+ //
+ XPUTC(#76)
+ mrc p15, 0, r1, c1, c0, 0 // SCTRL read
movw r3, #:lower16:CPU_CONTROL_SET
#if (CPU_CONTROL_SET & 0xffff0000)
movt r3, #:upper16:CPU_CONTROL_SET
#endif
- orr r0, r0, r3
+ orr r0, r1, r3
#if defined(CPU_CONTROL_CLR) && (CPU_CONTROL_CLR != 0)
bic r0, r0, #CPU_CONTROL_CLR
#endif
+ //cmp r0, r1 // any changes to SCTRL?
+ //bxeq ip // no, then return.
+
pli 1f
-
dsb
- @ turn mmu on!
- mov r0, r0 /* fetch instruction cacheline */
-1: mcr p15, 0, r0, c1, c0, 0
- /*
- * Ensure that the coprocessor has finished turning on the MMU.
- */
- mrc p15, 0, r0, c0, c0, 0 /* Read an arbitrary value. */
- mov r0, r0 /* Stall until read completes. */
-1: XPUTC(#76)
+ // turn mmu on!
+ //
+ mov r0, r0 // fetch instruction cacheline
+1: mcr p15, 0, r0, c1, c0, 0 // SCTRL write
- bx ip /* return */
+ // Ensure that the coprocessor has finished turning on the MMU.
+ //
+ mrc p15, 0, r0, c0, c0, 0 // Read an arbitrary value.
+ mov r0, r0 // Stall until read completes.
+ XPUTC(#77)
-/*
Home |
Main Index |
Thread Index |
Old Index