1/*
2 * Copyright (c) 2006-2023, RT-Thread Development Team
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Date           Author       Notes
7 * 2020-01-15     bigmagic     the first version
8 * 2020-08-10     SummerGift   support clang compiler
9 * 2023-04-29     GuEe-GUI     support kernel's ARM64 boot header
10 * 2024-01-18     Shell        fix implicit dependency of cpuid management
11 */
12
13#ifndef __ASSEMBLY__
14#define __ASSEMBLY__
15#endif
16
17#include <mmu.h>
18#include <rtconfig.h>
19
20#define ARM64_IMAGE_FLAG_BE_SHIFT           0
21#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT    (ARM64_IMAGE_FLAG_BE_SHIFT + 1)
22#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT    (ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2)
23
24#define ARM64_IMAGE_FLAG_LE                 0
25#define ARM64_IMAGE_FLAG_BE                 1
26#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K       1
27#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K      2
28#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K      3
29#define ARM64_IMAGE_FLAG_PHYS_BASE          1
30
31#define _HEAD_FLAG(field)                   (_HEAD_FLAG_##field << ARM64_IMAGE_FLAG_##field##_SHIFT)
32
33#ifdef ARCH_CPU_BIG_ENDIAN
34#define _HEAD_FLAG_BE                       ARM64_IMAGE_FLAG_BE
35#else
36#define _HEAD_FLAG_BE                       ARM64_IMAGE_FLAG_LE
37#endif
38#define _HEAD_FLAG_PAGE_SIZE                ((ARCH_PAGE_SHIFT - 10) / 2)
39#define _HEAD_FLAG_PHYS_BASE                1
40
41#define _HEAD_FLAGS                         (_HEAD_FLAG(BE) | _HEAD_FLAG(PAGE_SIZE) | _HEAD_FLAG(PHYS_BASE))
42
43.macro get_phy, reg, symbol
44    adrp    \reg, \symbol
45    add     \reg, \reg, #:lo12:\symbol
46.endm
47
48.macro get_pvoff, tmp, out
49    ldr     \tmp, =.boot_cpu_stack_top
50    get_phy \out, .boot_cpu_stack_top
51    sub     \out, \out, \tmp
52.endm
53
54    .section ".text.entrypoint","ax"
55
56#ifdef RT_USING_OFW
57/*
58 * Our goal is to boot the rt-thread as possible without modifying the
59 * bootloader's config, so we use the kernel's boot header for ARM64:
60 *   https://www.kernel.org/doc/html/latest/arch/arm64/booting.html#call-the-kernel-image
61 */
62_head:
63    b       _start          /* Executable code */
64    .long   0               /* Executable code */
65    .quad   _text_offset    /* Image load offset from start of RAM, little endian */
66    .quad   _end - _head    /* Effective Image size, little endian (_end defined in link.lds) */
67    .quad   _HEAD_FLAGS     /* Kernel flags, little endian */
68    .quad   0               /* Reserved */
69    .quad   0               /* Reserved */
70    .quad   0               /* Reserved */
71    .ascii  "ARM\x64"       /* Magic number */
72    .long   0               /* Reserved (used for PE COFF offset) */
73#endif /* RT_USING_OFW */
74
75/* Variable registers: x21~x28 */
76dtb_paddr .req x21
77boot_arg0 .req x22
78boot_arg1 .req x23
79boot_arg2 .req x24
80stack_top .req x25
81
82    .global _start
83_start:
84/*
85 * Boot CPU general-purpose register settings:
86 *   x0 = physical address of device tree blob (dtb) in system RAM.
87 *   x1 = 0 (reserved for future use)
88 *   x2 = 0 (reserved for future use)
89 *   x3 = 0 (reserved for future use)
90 */
91    mov     dtb_paddr, x0
92    mov     boot_arg0, x1
93    mov     boot_arg1, x2
94    mov     boot_arg2, x3
95
96    /* Save cpu stack */
97    get_phy stack_top, .boot_cpu_stack_top
98    /* Save cpu id temp */
99#ifdef ARCH_USING_HW_THREAD_SELF
100    msr     tpidrro_el0, xzr
101    /* Save thread self */
102#endif /* ARCH_USING_HW_THREAD_SELF */
103    msr     tpidr_el1, xzr
104
105    bl      init_cpu_el
106    bl      init_kernel_bss
107    bl      init_cpu_stack_early
108
109#ifdef RT_USING_OFW
110    /* Save devicetree info */
111    mov     x0, dtb_paddr
112    bl      rt_hw_fdt_install_early
113#endif
114
115    /* Now we are in the end of boot cpu process */
116    ldr     x8, =rtthread_startup
117    b       init_mmu_early
118    /* never come back */
119
120kernel_start:
121    /* jump to the PE's system entry */
122    mov     x29, xzr
123    mov     x30, x8
124    br      x8
125
126cpu_idle:
127    wfe
128    b       cpu_idle
129
130#ifdef RT_USING_SMP
131    .globl _secondary_cpu_entry
132_secondary_cpu_entry:
133#ifdef RT_USING_OFW
134    /* Read cpu id */
135    mrs     x5, mpidr_el1
136    ldr     x1, =rt_cpu_mpidr_table
137    get_pvoff x4 x2
138    add     x1, x1, x2
139    mov     x2, #0
140    ldr     x4, =0xff00ffffff
141    and     x0, x5, x4
142
143.cpu_id_confirm:
144    add     x2, x2, #1              /* Next cpu id inc */
145    ldr     x3, [x1], #8
146    cmp     x3, #0
147    beq     cpu_idle
148    and     x3, x3, x4
149    cmp     x3, x0
150    bne     .cpu_id_confirm
151
152    /* Save this mpidr */
153    str     x5, [x1, #-8]
154
155    /* Get cpu id success */
156    sub     x0, x2, #1
157#endif /* RT_USING_OFW */
158    /* Save cpu id global */
159    bl      rt_hw_cpu_id_set
160    bl      rt_hw_cpu_id
161
162    /* Set current cpu's stack top */
163    sub     x0, x0, #1
164    mov     x1, #ARCH_SECONDARY_CPU_STACK_SIZE
165    get_phy x2, .secondary_cpu_stack_top
166    msub    stack_top, x0, x1, x2
167
168    bl      init_cpu_el
169    bl      init_cpu_stack_early
170
171    /* secondary cpu start to startup */
172    ldr     x8, =rt_hw_secondary_cpu_bsp_start
173    b       enable_mmu_early
174#endif /* RT_USING_SMP */
175
176init_cpu_el:
177    mrs     x0, CurrentEL           /* CurrentEL Register. bit 2, 3. Others reserved */
178    lsr     x0, x0, #2
179    and     x0, x0, #3
180
181    /* running at EL3? */
182    cmp     x0, #3
183    bne     .init_cpu_hyp_test
184
185    /* should never be executed, just for completeness. (EL3) */
186    mov     x1, #(1 << 0)           /* EL0 and EL1 are in Non-Secure state */
187    orr     x1, x1, #(1 << 4)       /* RES1 */
188    orr     x1, x1, #(1 << 5)       /* RES1 */
189    orr     x1, x1, #(1 << 10)      /* The next lower level is AArch64 */
190    msr     scr_el3, x1
191
192    mov     x1, #9                  /* Next level is 0b1001->EL2h */
193    orr     x1, x1, #(1 << 6)       /* Mask FIQ */
194    orr     x1, x1, #(1 << 7)       /* Mask IRQ */
195    orr     x1, x1, #(1 << 8)       /* Mask SError */
196    orr     x1, x1, #(1 << 9)       /* Mask Debug Exception */
197    msr     spsr_el3, x1
198
199    get_phy x1, .init_cpu_hyp
200    msr     elr_el3, x1
201    eret
202
203.init_cpu_hyp_test:
204    /* running at EL2? */
205    cmp     x0, #2                  /* EL2 = 0b10  */
206    bne     .init_cpu_sys
207
208.init_cpu_hyp:
209    /* Enable CNTP for EL1 */
210    mrs     x0, cnthctl_el2         /* Counter-timer Hypervisor Control register */
211    orr     x0, x0, #(1 << 0)       /* Don't traps NS EL0/1 accesses to the physical counter */
212    orr     x0, x0, #(1 << 1)       /* Don't traps NS EL0/1 accesses to the physical timer */
213    msr     cnthctl_el2, x0
214    msr     cntvoff_el2, xzr
215
216    mov     x0, #(1 << 31)          /* Enable AArch64 in EL1 */
217    orr     x0, x0, #(1 << 1)       /* SWIO hardwired */
218    msr     hcr_el2, x0
219
220    mov     x0, #5                  /* Next level is 0b0101->EL1h */
221    orr     x0, x0, #(1 << 6)       /* Mask FIQ */
222    orr     x0, x0, #(1 << 7)       /* Mask IRQ */
223    orr     x0, x0, #(1 << 8)       /* Mask SError */
224    orr     x0, x0, #(1 << 9)       /* Mask Debug Exception */
225    msr     spsr_el2, x0
226
227    get_phy x0, .init_cpu_sys
228    msr     elr_el2, x0
229    eret
230
231.init_cpu_sys:
232    mrs     x0, sctlr_el1
233    bic     x0, x0, #(3 << 3)       /* Disable SP Alignment check */
234    bic     x0, x0, #(1 << 1)       /* Disable Alignment check */
235    msr     sctlr_el1, x0
236
237    mrs     x0, cntkctl_el1
238    orr     x0, x0, #(1 << 1)      /* Set EL0VCTEN, enabling the EL0 Virtual Count Timer */
239    msr     cntkctl_el1, x0
240
241    /* Avoid trap from SIMD or float point instruction */
242    mov     x0, #0x00300000         /* Don't trap any SIMD/FP instructions in both EL0 and EL1 */
243    msr     cpacr_el1, x0
244
245    /* Applying context change */
246    dsb     ish
247    isb
248
249    ret
250
251init_kernel_bss:
252    get_phy x1, __bss_start
253    get_phy x2, __bss_end
254    sub     x2, x2, x1              /* Get bss size */
255
256    and     x3, x2, #7              /* x3 is < 7 */
257    ldr     x4, =~0x7
258    and     x2, x2, x4              /* Mask ~7 */
259
260.clean_bss_loop_quad:
261    cbz     x2, .clean_bss_loop_byte
262    str     xzr, [x1], #8
263    sub     x2, x2, #8
264    b       .clean_bss_loop_quad
265
266.clean_bss_loop_byte:
267    cbz     x3, .clean_bss_end
268    strb    wzr, [x1], #1
269    sub     x3, x3, #1
270    b       .clean_bss_loop_byte
271
272.clean_bss_end:
273    ret
274
275init_cpu_stack_early:
276    msr     spsel, #1
277    mov     sp, stack_top
278
279    ret
280
281init_mmu_early:
282    get_phy x0, .early_page_array
283    bl      set_free_page
284
285    get_phy x0, .early_tbl0_page
286    get_phy x1, .early_tbl1_page
287
288    get_pvoff x2 x3
289    ldr     x2, =ARCH_EARLY_MAP_SIZE    /* Map 1G memory for kernel space */
290    bl      rt_hw_mem_setup_early
291
292    b       enable_mmu_early
293
294enable_mmu_early:
295    get_phy x0, .early_tbl0_page
296    get_phy x1, .early_tbl1_page
297
298    msr     ttbr0_el1, x0
299    msr     ttbr1_el1, x1
300    dsb     sy
301
302    bl      mmu_tcr_init
303
304    /*
305     * OK, now, we don't use sp before jump to kernel, set sp to current cpu's
306     * stack top to visual address
307     */
308    get_pvoff x1 x0
309    mov     x1, stack_top
310    sub     x1, x1, x0
311    mov     sp, x1
312
313    ldr     x30, =kernel_start      /* Set LR to kernel_start function, it's virtual addresses */
314
315    /* Enable page table translation */
316    mrs     x1, sctlr_el1
317    orr     x1, x1, #(1 << 12)      /* Stage 1 instruction access Cacheability control */
318    orr     x1, x1, #(1 << 2)       /* Cacheable Normal memory in stage1 */
319    orr     x1, x1, #(1 << 0)       /* MMU Enable */
320    msr     sctlr_el1, x1
321
322    dsb     ish
323    isb
324
325    ic      ialluis     /* Invalidate all instruction caches in Inner Shareable domain to Point of Unification */
326    dsb     ish
327    isb
328
329    tlbi    vmalle1     /* Invalidate all stage 1 translations used at EL1 with the current VMID */
330    dsb     ish
331    isb
332
333    ret
334
335/*
336 * CPU stack builtin
337 */
338    .section ".bss.noclean.cpus_stack"
339    .align 12
340.cpus_stack:
341#if defined(RT_USING_SMP) && RT_CPUS_NR > 1
342    .space (ARCH_SECONDARY_CPU_STACK_SIZE * (RT_CPUS_NR - 1))
343#endif
344.secondary_cpu_stack_top:
345    .space ARCH_SECONDARY_CPU_STACK_SIZE
346.boot_cpu_stack_top:
347
348/*
349 * Early page builtin
350 */
351    .section ".bss.noclean.early_page"
352    .align 12
353.early_tbl0_page:
354    .space ARCH_PAGE_SIZE
355.early_tbl1_page:
356    /* Map 4G -> 2M * 512 entries */
357    .space 4 * ARCH_PAGE_SIZE
358.early_page_array:
359    .space 24 * ARCH_PAGE_SIZE
360