1/* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 */ 6#include <xtensa_asm2_s.h> 7#include <zephyr/offsets.h> 8#include <zephyr/zsr.h> 9 10#if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR) 11#include <xtensa/simcall.h> 12#endif 13 14/* 15 * xtensa_spill_reg_windows 16 * 17 * Spill all register windows. Not a C function, enter this via CALL0 18 * (so you have to save off A0, but no other registers need to be 19 * spilled). On return, all registers not part of the current 20 * function will be spilled to memory. The WINDOWSTART SR will have a 21 * single 1 bit corresponding to the current frame at WINDOWBASE. 22 */ 23.global xtensa_spill_reg_windows 24.align 4 25xtensa_spill_reg_windows: 26 SPILL_ALL_WINDOWS 27 ret 28 29/* 30 * xtensa_save_high_regs 31 * 32 * Call with CALL0, with A2/A3 available as scratch. Pushes the high 33 * A4-A15 GPRs to the stack if needed (i.e. if those registers are not 34 * part of wrapped-around frames higher up the call stack), returning 35 * to the caller with the stack pointer HAVING BEEN MODIFIED to 36 * contain them. 37 */ 38.global xtensa_save_high_regs 39.align 4 40xtensa_save_high_regs: 41 /* Generate a rotated (modulo NREGS/4 bits!) WINDOWSTART in A2 42 * by duplicating the bits twice and shifting down by WINDOWBASE 43 * bits. Now the LSB is the register quad at WINDOWBASE. 44 */ 45 rsr a2, WINDOWSTART 46 slli a3, a2, (XCHAL_NUM_AREGS / 4) 47 or a2, a2, a3 48 rsr a3, WINDOWBASE 49 ssr a3 50 srl a2, a2 51 52 mov a3, a1 /* Stash our original stack pointer */ 53 54 /* For the next three bits in WINDOWSTART (which correspond to 55 * the A4-A7, A8-A11 and A12-A15 quads), if we find a one, 56 * that means that the quad is owned by a wrapped-around call 57 * in the registers, so we don't need to spill it or any 58 * further registers from the GPRs and can skip to the end. 59 */ 60 bbsi a2, 1, _high_gpr_spill_done 61 addi a1, a1, -16 62 s32i a4, a1, 0 63 s32i a5, a1, 4 64 s32i a6, a1, 8 65 s32i a7, a1, 12 66 67 bbsi a2, 2, _high_gpr_spill_done 68 addi a1, a1, -16 69 s32i a8, a1, 0 70 s32i a9, a1, 4 71 s32i a10, a1, 8 72 s32i a11, a1, 12 73 74 bbsi a2, 3, _high_gpr_spill_done 75 addi a1, a1, -16 76 s32i a12, a1, 0 77 s32i a13, a1, 4 78 s32i a14, a1, 8 79 s32i a15, a1, 12 80 81_high_gpr_spill_done: 82 /* Push the original stack pointer so we know at restore 83 * time how many registers were spilled, then return, leaving the 84 * modified SP in A1. 85 */ 86 addi a1, a1, -4 87 s32i a3, a1, 0 88 89 ret 90 91/* 92 * xtensa_restore_high_regs 93 * 94 * Does the inverse of xtensa_save_high_regs, taking a stack pointer 95 * in A1 that resulted and restoring the A4-A15 state (and the stack 96 * pointer) to the state they had at the earlier call. Call with 97 * CALL0, leaving A2/A3 available as scratch. 98 */ 99.global xtensa_restore_high_regs 100.align 4 101xtensa_restore_high_regs: 102 /* pop our "original" stack pointer into a2, stash in a3 also */ 103 l32i a2, a1, 0 104 addi a1, a1, 4 105 mov a3, a2 106 107 beq a1, a2, _high_restore_done 108 addi a2, a2, -16 109 l32i a4, a2, 0 110 l32i a5, a2, 4 111 l32i a6, a2, 8 112 l32i a7, a2, 12 113 114 beq a1, a2, _high_restore_done 115 addi a2, a2, -16 116 l32i a8, a2, 0 117 l32i a9, a2, 4 118 l32i a10, a2, 8 119 l32i a11, a2, 12 120 121 beq a1, a2, _high_restore_done 122 addi a2, a2, -16 123 l32i a12, a2, 0 124 l32i a13, a2, 4 125 l32i a14, a2, 8 126 l32i a15, a2, 12 127 128_high_restore_done: 129 mov a1, a3 /* Original stack */ 130 ret 131 132/* 133 * _restore_context 134 * 135 * Arrive here via a jump. Enters into the restored context and does 136 * not return. A1 should have a context pointer in it as received 137 * from switch or an interrupt exit. Interrupts must be disabled, 138 * and register windows should have been spilled. 139 * 140 * Note that exit from the restore is done with the RFI instruction, 141 * using the EPCn/EPSn registers. Those will have been saved already 142 * by any interrupt entry so they are save to use. Note that EPC1 and 143 * RFE are NOT usable (they can't preserve PS). Per the ISA spec, all 144 * RFI levels do the same thing and differ only in the special 145 * registers used to hold PC/PS, but Qemu has been observed to behave 146 * strangely when RFI doesn't "return" to a INTLEVEL strictly lower 147 * than it started from. So we leverage the zsr.h framework to pick 148 * the highest level available for our specific platform. 149 */ 150.global _restore_context 151_restore_context: 152 call0 xtensa_restore_high_regs 153 154 l32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET 155 wsr a0, ZSR_EPC 156 157#ifdef CONFIG_USERSPACE 158 /* When restoring context via xtensa_switch and 159 * returning from non-nested interrupts, we use 160 * the stashed PS value in the thread struct 161 * instead of the one in the thread stack. 162 * Both scenarios will have nested value of 0. 163 */ 164 rsr.ZSR_CPU a2 165 l32i a0, a2, ___cpu_t_nested_OFFSET 166 bnez a0, _restore_ps_from_stack 167 168 l32i a0, a2, ___cpu_t_current_OFFSET 169 l32i a0, a0, _thread_offset_to_return_ps 170 wsr a0, ZSR_EPS 171 172 j _restore_ps_after 173 174_restore_ps_from_stack: 175#endif 176 177 l32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 178 wsr a0, ZSR_EPS 179 180#ifdef CONFIG_USERSPACE 181_restore_ps_after: 182#endif 183 184#if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 185 FPU_REG_RESTORE 186#endif 187 188#if defined(CONFIG_XTENSA_EAGER_HIFI_SHARING) 189.extern _xtensa_hifi_load 190 call0 _xtensa_hifi_load 191#endif 192 193 ODD_REG_RESTORE a0, a1 194 195 rsync 196 197 l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 198 l32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET 199 l32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET 200 addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF 201 202 rfi ZSR_RFI_LEVEL 203 204/* 205 * void xtensa_arch_except(int reason_p); 206 * 207 * Implements hardware exception for Xtensa ARCH_EXCEPT to save 208 * interrupted stack frame and reason_p for use in exception handler 209 * and coredump 210 */ 211.global xtensa_arch_except 212.global xtensa_arch_except_epc 213.align 4 214xtensa_arch_except: 215 entry a1, 16 216xtensa_arch_except_epc: 217 ill 218 retw 219 220/* 221 * void xtensa_arch_kernel_oops(int reason_p, void *ssf); 222 * 223 * Simply to raise hardware exception for Kernel OOPS. 224 */ 225.global xtensa_arch_kernel_oops 226.global xtensa_arch_kernel_oops_epc 227.align 4 228xtensa_arch_kernel_oops: 229 entry a1, 16 230xtensa_arch_kernel_oops_epc: 231 ill 232 retw 233 234/* 235 * void xtensa_switch(void *new, void **old_return); 236 * 237 * Context switches into the previously-saved "new" handle, placing 238 * the saved "old" handle into the address provided by old_return. 239 */ 240.global xtensa_switch 241.align 4 242xtensa_switch: 243#ifdef CONFIG_USERSPACE 244 entry a1, 32 245 246 s32i a4, a1, 0 247 s32i a5, a1, 4 248 s32i a6, a1, 8 249 s32i a7, a1, 12 250 251 rsr a6, ZSR_CPU 252 l32i a6, a6, ___cpu_t_current_OFFSET 253#ifdef CONFIG_XTENSA_MMU 254#ifdef CONFIG_XTENSA_MMU_FLUSH_AUTOREFILL_DTLBS_ON_SWAP 255 call4 xtensa_swap_update_page_tables 256#else 257 SWAP_PAGE_TABLE a6, a4, a7 258#endif 259#endif 260#ifdef CONFIG_XTENSA_MPU 261 call4 xtensa_mpu_map_write 262#endif 263 264 l32i a7, a1, 12 265 l32i a6, a1, 8 266 l32i a5, a1, 4 267 l32i a4, a1, 0 268#else 269 entry a1, 16 270#endif 271 272 SPILL_ALL_WINDOWS 273 addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF 274 275 /* Stash our A0/2/3 and the shift/loop registers into the base 276 * save area so they get restored as they are now. A2/A3 277 * don't actually get used post-restore, but they need to be 278 * stashed across the xtensa_save_high_regs call and this is a 279 * convenient place. 280 */ 281 s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET 282 s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET 283 s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET 284 ODD_REG_SAVE a0, a1 285 286#if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING) 287 FPU_REG_SAVE 288#endif 289 290 /* Stash our PS register contents and a "restore" PC. */ 291 rsr a0, PS 292 s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET 293 294#ifdef CONFIG_USERSPACE 295 /* Backtrack to the head of thread struct and 296 * then store the PS value to be restored in 297 * the architecture specific section. 298 * This will be used to restore PS instead of 299 * the one stashed inside stack. 300 */ 301 addi a3, a3, -___thread_t_switch_handle_OFFSET 302 s32i a0, a3, _thread_offset_to_return_ps 303#endif 304 305 movi a0, _switch_restore_pc 306 s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET 307 308#if defined(CONFIG_XTENSA_EAGER_HIFI_SHARING) 309 call0 _xtensa_hifi_save 310#elif defined(CONFIG_XTENSA_LAZY_HIFI_SHARING) 311 /* Disable HiFi sharing */ 312 rsr a6, CPENABLE 313 movi a7, ~(1 << XCHAL_CP_ID_AUDIOENGINELX) 314 and a6, a6, a7 315 wsr a6, CPENABLE 316#endif 317 318 /* Now the high registers */ 319 call0 xtensa_save_high_regs 320 321#if defined(CONFIG_KERNEL_COHERENCE) && !defined(CONFIG_SCHED_CPU_MASK_PIN_ONLY) 322 /* Flush the stack. The top of stack was stored for us by 323 * arch_cohere_stacks(). It can be NULL for a dummy thread. 324 */ 325 rsync 326 rsr a0, ZSR_FLUSH 327 beqz a0, noflush 328 mov a3, a1 329flushloop: 330 dhwb a3, 0 331 addi a3, a3, XCHAL_DCACHE_LINESIZE 332 blt a3, a0, flushloop 333noflush: 334#endif 335 336 /* Restore the A3 argument we spilled earlier (via the base 337 * save pointer pushed at the bottom of the stack) and set the 338 * stack to the "new" context out of the A2 spill slot. 339 */ 340 l32i a2, a1, 0 341 l32i a3, a2, ___xtensa_irq_bsa_t_a3_OFFSET 342 s32i a1, a3, 0 343 344 /* Switch stack pointer and restore. The jump to 345 * _restore_context does not return as such, but we arrange 346 * for the restored "next" address to be immediately after for 347 * sanity. 348 */ 349 l32i a1, a2, ___xtensa_irq_bsa_t_a2_OFFSET 350 351#ifdef CONFIG_INSTRUMENT_THREAD_SWITCHING 352 call4 z_thread_mark_switched_in 353#endif 354 j _restore_context 355_switch_restore_pc: 356 retw 357 358/* Define our entry handler to load the struct kernel_t from the 359 * MISC0 special register, and to find the nest and irq_stack values 360 * at the precomputed offsets. 361 */ 362.align 4 363_handle_excint: 364 EXCINT_HANDLER ___cpu_t_nested_OFFSET, ___cpu_t_irq_stack_OFFSET 365 366/* Define the actual vectors for the hardware-defined levels with 367 * DEF_EXCINT. These load a C handler address and jump to our handler 368 * above. 369 */ 370 371DEF_EXCINT 1, _handle_excint, xtensa_excint1_c 372 373/* In code below we are using XCHAL_NMILEVEL and XCHAL_NUM_INTLEVELS 374 * (whichever is higher), since not all Xtensa configurations support 375 * NMI. In such case we will use XCHAL_NUM_INTLEVELS. 376 */ 377#if XCHAL_HAVE_NMI 378#define MAX_INTR_LEVEL XCHAL_NMILEVEL 379#elif XCHAL_HAVE_INTERRUPTS 380#define MAX_INTR_LEVEL XCHAL_NUM_INTLEVELS 381#else 382#error Xtensa core with no interrupt support is used 383#define MAX_INTR_LEVEL 0 384#endif 385 386#if MAX_INTR_LEVEL >= 2 387#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 2)) 388DEF_EXCINT 2, _handle_excint, xtensa_int2_c 389#endif 390#endif 391 392#if MAX_INTR_LEVEL >= 3 393#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 3)) 394DEF_EXCINT 3, _handle_excint, xtensa_int3_c 395#endif 396#endif 397 398#if MAX_INTR_LEVEL >= 4 399#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 4)) 400DEF_EXCINT 4, _handle_excint, xtensa_int4_c 401#endif 402#endif 403 404#if MAX_INTR_LEVEL >= 5 405#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 5)) 406DEF_EXCINT 5, _handle_excint, xtensa_int5_c 407#endif 408#endif 409 410#if MAX_INTR_LEVEL >= 6 411#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 6)) 412DEF_EXCINT 6, _handle_excint, xtensa_int6_c 413#endif 414#endif 415 416#if MAX_INTR_LEVEL >= 7 417#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 7)) 418DEF_EXCINT 7, _handle_excint, xtensa_int7_c 419#endif 420#endif 421 422#if defined(CONFIG_GDBSTUB) 423DEF_EXCINT XCHAL_DEBUGLEVEL, _handle_excint, xtensa_debugint_c 424#endif 425 426/* The user exception vector is defined here, as we need to handle 427 * MOVSP exceptions in assembly (the result has to be to unspill the 428 * caller function of the code that took the exception, and that can't 429 * be done in C). A prototype exists which mucks with the stack frame 430 * from the C handler instead, but that would add a LARGE overhead to 431 * some alloca() calls (those whent he caller has been spilled) just 432 * to save these five cycles during other exceptions and L1 433 * interrupts. Maybe revisit at some point, with better benchmarking. 434 * Note that _xt_alloca_exc is Xtensa-authored code which expects A0 435 * to have been saved to EXCSAVE1, we've modified it to use the zsr.h 436 * API to get assigned a scratch register. 437 */ 438.pushsection .UserExceptionVector.text, "ax" 439.global _Level1RealVector 440_Level1RealVector: 441 wsr a0, ZSR_A0SAVE 442 rsync 443 rsr.exccause a0 444#ifdef CONFIG_XTENSA_MMU 445 beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_user 446#endif /* CONFIG_XTENSA_MMU */ 447#ifdef CONFIG_USERSPACE 448 beqi a0, EXCCAUSE_SYSCALL, _syscall 449#endif /* CONFIG_USERSPACE */ 450#ifdef CONFIG_XTENSA_MMU 451 addi a0, a0, -EXCCAUSE_DTLB_MISS 452 beqz a0, _handle_tlb_miss_user 453 rsr.exccause a0 454#endif /* CONFIG_XTENSA_MMU */ 455 bnei a0, EXCCAUSE_ALLOCA, _not_alloca 456 457 j _xt_alloca_exc 458_not_alloca: 459 rsr a0, ZSR_A0SAVE 460 j _Level1Vector 461#ifdef CONFIG_XTENSA_MMU 462_handle_tlb_miss_user: 463 /** 464 * Handle TLB miss by loading the PTE page: 465 * The way it works is, when we try to access an address that is not 466 * mapped, we will have a miss. The HW then will try to get the 467 * correspondent memory in the page table. As the page table is not 468 * mapped in memory we will have a second miss, which will trigger 469 * an exception. In the exception (here) what we do is to exploit 470 * this hardware capability just trying to load the page table 471 * (not mapped address), which will cause a miss, but then the hardware 472 * will automatically map it again from the page table. This time 473 * it will work since the page necessary to map the page table itself 474 * are wired map. 475 */ 476 rsr.ptevaddr a0 477 l32i a0, a0, 0 478 rsr a0, ZSR_A0SAVE 479 rfe 480#endif /* CONFIG_XTENSA_MMU */ 481#ifdef CONFIG_USERSPACE 482_syscall: 483 rsr a0, ZSR_A0SAVE 484 j xtensa_do_syscall 485#endif /* CONFIG_USERSPACE */ 486.popsection 487 488/* In theory you can have levels up to 15, but known hardware only uses 7. */ 489#if XCHAL_NMILEVEL > 7 490#error More interrupts than expected. 491#endif 492 493/* We don't actually use "kernel mode" currently. Populate the vector 494 * out of simple caution in case app code clears the UM bit by mistake. 495 */ 496.pushsection .KernelExceptionVector.text, "ax" 497.global _KernelExceptionVector 498_KernelExceptionVector: 499#ifdef CONFIG_XTENSA_MMU 500 wsr a0, ZSR_A0SAVE 501 rsr.exccause a0 502 beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_kernel 503 addi a0, a0, -EXCCAUSE_DTLB_MISS 504 beqz a0, _handle_tlb_miss_kernel 505 rsr a0, ZSR_A0SAVE 506#endif 507 j _Level1Vector 508#ifdef CONFIG_XTENSA_MMU 509_handle_tlb_miss_kernel: 510 /* The TLB miss handling is used only during xtensa_mmu_init() 511 * where vecbase is at a different address, as the offset used 512 * in the jump ('j') instruction will not jump to correct 513 * address (... remember the vecbase is moved). 514 * So we handle TLB misses in a very simple way here until 515 * we move back to using UserExceptionVector above. 516 */ 517 rsr.ptevaddr a0 518 l32i a0, a0, 0 519 rsr a0, ZSR_A0SAVE 520 rfe 521#endif 522.popsection 523 524#ifdef XCHAL_DOUBLEEXC_VECTOR_VADDR 525.pushsection .DoubleExceptionVector.text, "ax" 526.global _DoubleExceptionVector 527_DoubleExceptionVector: 528#ifdef CONFIG_XTENSA_MMU 529 wsr a0, ZSR_DBLEXC 530 rsync 531 532 rsr.exccause a0 533 addi a0, a0, -EXCCAUSE_DTLB_MISS 534 beqz a0, _handle_tlb_miss_dblexc 535 536 /* Need to stash the DEPC for used by the C handler. 537 * If we encounter any DTLB misses when PS.EXCM is set, 538 * this vector will be used and the DEPC register will 539 * have the new address instead of the one resulted in 540 * double exception. 541 */ 542 rsr.depc a0 543 wsr a0, ZSR_DEPC_SAVE 544 545 rsr a0, ZSR_DBLEXC 546 547 j _Level1Vector 548 549_TripleFault: 550#endif /* CONFIG_XTENSA_MMU */ 551 552#if XCHAL_HAVE_DEBUG && defined(CONFIG_XTENSA_BREAK_ON_UNRECOVERABLE_EXCEPTIONS) 553 /* Signals an unhandled double exception, and unrecoverable exceptions. 554 * Definitely needs debugger to be attached to the hardware or simulator 555 * to catch this. 556 */ 557 break 1, 4 558#elif defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR) 559/* Tell simulator to stop executing here, instead of trying to do 560 * an infinite loop (see below). Greatly help with using tracing in 561 * simulator so that traces will not have infinite iterations of 562 * jumps. 563 */ 564 movi a3, 1 565 movi a2, SYS_exit 566 simcall 567#endif 5681: 569 j 1b 570 571#ifdef CONFIG_XTENSA_MMU 572_handle_tlb_miss_dblexc: 573 /* Handle all data TLB misses here. 574 * These data TLB misses are mostly caused by preloading 575 * page table entries in the level 1 exception handler. 576 * Failure to load the PTE will result in another exception 577 * with different failure (exccause), which can be handled 578 * when the CPU re-enters the double exception handler. 579 */ 580 rsr.ptevaddr a0 581 l32i a0, a0, 0 582 583 rsr a0, ZSR_DBLEXC 584 rfde 585#endif 586.popsection 587 588#endif 589