1/*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 */
6#include <xtensa_asm2_s.h>
7#include <zephyr/offsets.h>
8#include <zephyr/zsr.h>
9
10#if defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR)
11#include <xtensa/simcall.h>
12#endif
13
14/*
15 * xtensa_spill_reg_windows
16 *
17 * Spill all register windows.  Not a C function, enter this via CALL0
18 * (so you have to save off A0, but no other registers need to be
19 * spilled).  On return, all registers not part of the current
20 * function will be spilled to memory.  The WINDOWSTART SR will have a
21 * single 1 bit corresponding to the current frame at WINDOWBASE.
22 */
23.global xtensa_spill_reg_windows
24.align 4
25xtensa_spill_reg_windows:
26        SPILL_ALL_WINDOWS
27        ret
28
29/*
30 * xtensa_save_high_regs
31 *
32 * Call with CALL0, with A2/A3 available as scratch.  Pushes the high
33 * A4-A15 GPRs to the stack if needed (i.e. if those registers are not
34 * part of wrapped-around frames higher up the call stack), returning
35 * to the caller with the stack pointer HAVING BEEN MODIFIED to
36 * contain them.
37 */
38.global xtensa_save_high_regs
39.align 4
40xtensa_save_high_regs:
41	/* Generate a rotated (modulo NREGS/4 bits!) WINDOWSTART in A2
42	 * by duplicating the bits twice and shifting down by WINDOWBASE
43	 * bits.  Now the LSB is the register quad at WINDOWBASE.
44	 */
45	rsr a2, WINDOWSTART
46	slli a3, a2, (XCHAL_NUM_AREGS / 4)
47	or a2, a2, a3
48	rsr a3, WINDOWBASE
49	ssr a3
50	srl a2, a2
51
52	mov a3, a1 /* Stash our original stack pointer */
53
54	/* For the next three bits in WINDOWSTART (which correspond to
55	 * the A4-A7, A8-A11 and A12-A15 quads), if we find a one,
56	 * that means that the quad is owned by a wrapped-around call
57	 * in the registers, so we don't need to spill it or any
58	 * further registers from the GPRs and can skip to the end.
59	 */
60	bbsi a2, 1, _high_gpr_spill_done
61	addi a1, a1, -16
62	s32i a4, a1, 0
63	s32i a5, a1, 4
64	s32i a6, a1, 8
65	s32i a7, a1, 12
66
67	bbsi a2, 2, _high_gpr_spill_done
68	addi a1, a1, -16
69	s32i a8, a1, 0
70	s32i a9, a1, 4
71	s32i a10, a1, 8
72	s32i a11, a1, 12
73
74	bbsi a2, 3, _high_gpr_spill_done
75	addi a1, a1, -16
76	s32i a12, a1, 0
77	s32i a13, a1, 4
78	s32i a14, a1, 8
79	s32i a15, a1, 12
80
81_high_gpr_spill_done:
82	/* Push the original stack pointer so we know at restore
83	 * time how many registers were spilled, then return, leaving the
84	 * modified SP in A1.
85	 */
86	addi a1, a1, -4
87	s32i a3, a1, 0
88
89	ret
90
91/*
92 * xtensa_restore_high_regs
93 *
94 * Does the inverse of xtensa_save_high_regs, taking a stack pointer
95 * in A1 that resulted and restoring the A4-A15 state (and the stack
96 * pointer) to the state they had at the earlier call.  Call with
97 * CALL0, leaving A2/A3 available as scratch.
98 */
99.global xtensa_restore_high_regs
100.align 4
101xtensa_restore_high_regs:
102	/* pop our "original" stack pointer into a2, stash in a3 also */
103	l32i a2, a1, 0
104	addi a1, a1, 4
105	mov a3, a2
106
107	beq a1, a2, _high_restore_done
108	addi a2, a2, -16
109	l32i a4, a2, 0
110	l32i a5, a2, 4
111	l32i a6, a2, 8
112	l32i a7, a2, 12
113
114	beq a1, a2, _high_restore_done
115	addi a2, a2, -16
116	l32i a8, a2, 0
117	l32i a9, a2, 4
118	l32i a10, a2, 8
119	l32i a11, a2, 12
120
121	beq a1, a2, _high_restore_done
122	addi a2, a2, -16
123	l32i a12, a2, 0
124	l32i a13, a2, 4
125	l32i a14, a2, 8
126	l32i a15, a2, 12
127
128_high_restore_done:
129	mov a1, a3 /* Original stack */
130	ret
131
132/*
133 * _restore_context
134 *
135 * Arrive here via a jump.  Enters into the restored context and does
136 * not return.  A1 should have a context pointer in it as received
137 * from switch or an interrupt exit.  Interrupts must be disabled,
138 * and register windows should have been spilled.
139 *
140 * Note that exit from the restore is done with the RFI instruction,
141 * using the EPCn/EPSn registers.  Those will have been saved already
142 * by any interrupt entry so they are save to use.  Note that EPC1 and
143 * RFE are NOT usable (they can't preserve PS).  Per the ISA spec, all
144 * RFI levels do the same thing and differ only in the special
145 * registers used to hold PC/PS, but Qemu has been observed to behave
146 * strangely when RFI doesn't "return" to a INTLEVEL strictly lower
147 * than it started from.  So we leverage the zsr.h framework to pick
148 * the highest level available for our specific platform.
149 */
150.global _restore_context
151_restore_context:
152	call0 xtensa_restore_high_regs
153
154	l32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET
155	wsr a0, ZSR_EPC
156
157#ifdef CONFIG_USERSPACE
158	/* When restoring context via xtensa_switch and
159	 * returning from non-nested interrupts, we use
160	 * the stashed PS value in the thread struct
161	 * instead of the one in the thread stack.
162	 * Both scenarios will have nested value of 0.
163	 */
164	rsr.ZSR_CPU a2
165	l32i a0, a2, ___cpu_t_nested_OFFSET
166	bnez a0, _restore_ps_from_stack
167
168	l32i a0, a2, ___cpu_t_current_OFFSET
169	l32i a0, a0, _thread_offset_to_return_ps
170	wsr a0, ZSR_EPS
171
172	j _restore_ps_after
173
174_restore_ps_from_stack:
175#endif
176
177	l32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET
178	wsr a0, ZSR_EPS
179
180#ifdef CONFIG_USERSPACE
181_restore_ps_after:
182#endif
183
184#if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING)
185	FPU_REG_RESTORE
186#endif
187
188#if defined(CONFIG_XTENSA_EAGER_HIFI_SHARING)
189.extern _xtensa_hifi_load
190	call0 _xtensa_hifi_load
191#endif
192
193	ODD_REG_RESTORE a0, a1
194
195	rsync
196
197	l32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET
198	l32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET
199	l32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET
200	addi a1, a1, ___xtensa_irq_bsa_t_SIZEOF
201
202	rfi ZSR_RFI_LEVEL
203
204/*
205 * void xtensa_arch_except(int reason_p);
206 *
207 * Implements hardware exception for Xtensa ARCH_EXCEPT to save
208 * interrupted stack frame and reason_p for use in exception handler
209 * and coredump
210 */
211.global xtensa_arch_except
212.global xtensa_arch_except_epc
213.align 4
214xtensa_arch_except:
215	entry a1, 16
216xtensa_arch_except_epc:
217	ill
218	retw
219
220/*
221 * void xtensa_arch_kernel_oops(int reason_p, void *ssf);
222 *
223 * Simply to raise hardware exception for Kernel OOPS.
224 */
225.global xtensa_arch_kernel_oops
226.global xtensa_arch_kernel_oops_epc
227.align 4
228xtensa_arch_kernel_oops:
229	entry a1, 16
230xtensa_arch_kernel_oops_epc:
231	ill
232	retw
233
234/*
235 * void xtensa_switch(void *new, void **old_return);
236 *
237 * Context switches into the previously-saved "new" handle, placing
238 * the saved "old" handle into the address provided by old_return.
239 */
240.global xtensa_switch
241.align 4
242xtensa_switch:
243#ifdef CONFIG_USERSPACE
244	entry a1, 32
245
246	s32i a4, a1, 0
247	s32i a5, a1, 4
248	s32i a6, a1, 8
249	s32i a7, a1, 12
250
251	rsr a6, ZSR_CPU
252	l32i a6, a6, ___cpu_t_current_OFFSET
253#ifdef CONFIG_XTENSA_MMU
254#ifdef CONFIG_XTENSA_MMU_FLUSH_AUTOREFILL_DTLBS_ON_SWAP
255	call4 xtensa_swap_update_page_tables
256#else
257	SWAP_PAGE_TABLE a6, a4, a7
258#endif
259#endif
260#ifdef CONFIG_XTENSA_MPU
261	call4 xtensa_mpu_map_write
262#endif
263
264	l32i a7, a1, 12
265	l32i a6, a1, 8
266	l32i a5, a1, 4
267	l32i a4, a1, 0
268#else
269	entry a1, 16
270#endif
271
272	SPILL_ALL_WINDOWS
273	addi a1, a1, -___xtensa_irq_bsa_t_SIZEOF
274
275	/* Stash our A0/2/3 and the shift/loop registers into the base
276	 * save area so they get restored as they are now.  A2/A3
277	 * don't actually get used post-restore, but they need to be
278	 * stashed across the xtensa_save_high_regs call and this is a
279	 * convenient place.
280	 */
281	s32i a0, a1, ___xtensa_irq_bsa_t_a0_OFFSET
282	s32i a2, a1, ___xtensa_irq_bsa_t_a2_OFFSET
283	s32i a3, a1, ___xtensa_irq_bsa_t_a3_OFFSET
284	ODD_REG_SAVE a0, a1
285
286#if XCHAL_HAVE_FP && defined(CONFIG_CPU_HAS_FPU) && defined(CONFIG_FPU_SHARING)
287	FPU_REG_SAVE
288#endif
289
290	/* Stash our PS register contents and a "restore" PC. */
291	rsr a0, PS
292	s32i a0, a1, ___xtensa_irq_bsa_t_ps_OFFSET
293
294#ifdef CONFIG_USERSPACE
295	/* Backtrack to the head of thread struct and
296	 * then store the PS value to be restored in
297	 * the architecture specific section.
298	 * This will be used to restore PS instead of
299	 * the one stashed inside stack.
300	 */
301	addi a3, a3, -___thread_t_switch_handle_OFFSET
302	s32i a0, a3, _thread_offset_to_return_ps
303#endif
304
305	movi a0, _switch_restore_pc
306	s32i a0, a1, ___xtensa_irq_bsa_t_pc_OFFSET
307
308#if defined(CONFIG_XTENSA_EAGER_HIFI_SHARING)
309	call0 _xtensa_hifi_save
310#elif defined(CONFIG_XTENSA_LAZY_HIFI_SHARING)
311	/* Disable HiFi sharing */
312	rsr a6, CPENABLE
313	movi a7, ~(1 << XCHAL_CP_ID_AUDIOENGINELX)
314	and a6, a6, a7
315	wsr a6, CPENABLE
316#endif
317
318	/* Now the high registers */
319	call0 xtensa_save_high_regs
320
321#if defined(CONFIG_KERNEL_COHERENCE) && !defined(CONFIG_SCHED_CPU_MASK_PIN_ONLY)
322	/* Flush the stack.  The top of stack was stored for us by
323	 * arch_cohere_stacks().  It can be NULL for a dummy thread.
324	 */
325	rsync
326	rsr a0, ZSR_FLUSH
327	beqz a0, noflush
328	mov a3, a1
329flushloop:
330	dhwb a3, 0
331	addi a3, a3, XCHAL_DCACHE_LINESIZE
332	blt a3, a0, flushloop
333noflush:
334#endif
335
336	/* Restore the A3 argument we spilled earlier (via the base
337	 * save pointer pushed at the bottom of the stack) and set the
338	 * stack to the "new" context out of the A2 spill slot.
339	 */
340	l32i a2, a1, 0
341	l32i a3, a2, ___xtensa_irq_bsa_t_a3_OFFSET
342	s32i a1, a3, 0
343
344	/* Switch stack pointer and restore.  The jump to
345	 * _restore_context does not return as such, but we arrange
346	 * for the restored "next" address to be immediately after for
347	 * sanity.
348	 */
349	 l32i a1, a2, ___xtensa_irq_bsa_t_a2_OFFSET
350
351#ifdef CONFIG_INSTRUMENT_THREAD_SWITCHING
352	call4 z_thread_mark_switched_in
353#endif
354	j _restore_context
355_switch_restore_pc:
356	retw
357
358/* Define our entry handler to load the struct kernel_t from the
359 * MISC0 special register, and to find the nest and irq_stack values
360 * at the precomputed offsets.
361 */
362.align 4
363_handle_excint:
364	EXCINT_HANDLER ___cpu_t_nested_OFFSET, ___cpu_t_irq_stack_OFFSET
365
366/* Define the actual vectors for the hardware-defined levels with
367 * DEF_EXCINT.  These load a C handler address and jump to our handler
368 * above.
369 */
370
371DEF_EXCINT 1, _handle_excint, xtensa_excint1_c
372
373/* In code below we are using XCHAL_NMILEVEL and XCHAL_NUM_INTLEVELS
374 * (whichever is higher), since not all Xtensa configurations support
375 * NMI. In such case we will use XCHAL_NUM_INTLEVELS.
376 */
377#if XCHAL_HAVE_NMI
378#define MAX_INTR_LEVEL XCHAL_NMILEVEL
379#elif XCHAL_HAVE_INTERRUPTS
380#define MAX_INTR_LEVEL XCHAL_NUM_INTLEVELS
381#else
382#error Xtensa core with no interrupt support is used
383#define MAX_INTR_LEVEL 0
384#endif
385
386#if MAX_INTR_LEVEL >= 2
387#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 2))
388DEF_EXCINT 2, _handle_excint, xtensa_int2_c
389#endif
390#endif
391
392#if MAX_INTR_LEVEL >= 3
393#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 3))
394DEF_EXCINT 3, _handle_excint, xtensa_int3_c
395#endif
396#endif
397
398#if MAX_INTR_LEVEL >= 4
399#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 4))
400DEF_EXCINT 4, _handle_excint, xtensa_int4_c
401#endif
402#endif
403
404#if MAX_INTR_LEVEL >= 5
405#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 5))
406DEF_EXCINT 5, _handle_excint, xtensa_int5_c
407#endif
408#endif
409
410#if MAX_INTR_LEVEL >= 6
411#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 6))
412DEF_EXCINT 6, _handle_excint, xtensa_int6_c
413#endif
414#endif
415
416#if MAX_INTR_LEVEL >= 7
417#if !(defined(CONFIG_GDBSTUB) && (XCHAL_DEBUGLEVEL == 7))
418DEF_EXCINT 7, _handle_excint, xtensa_int7_c
419#endif
420#endif
421
422#if defined(CONFIG_GDBSTUB)
423DEF_EXCINT XCHAL_DEBUGLEVEL, _handle_excint, xtensa_debugint_c
424#endif
425
426/* The user exception vector is defined here, as we need to handle
427 * MOVSP exceptions in assembly (the result has to be to unspill the
428 * caller function of the code that took the exception, and that can't
429 * be done in C).  A prototype exists which mucks with the stack frame
430 * from the C handler instead, but that would add a LARGE overhead to
431 * some alloca() calls (those whent he caller has been spilled) just
432 * to save these five cycles during other exceptions and L1
433 * interrupts.  Maybe revisit at some point, with better benchmarking.
434 * Note that _xt_alloca_exc is Xtensa-authored code which expects A0
435 * to have been saved to EXCSAVE1, we've modified it to use the zsr.h
436 * API to get assigned a scratch register.
437 */
438.pushsection .UserExceptionVector.text, "ax"
439.global _Level1RealVector
440_Level1RealVector:
441	wsr a0, ZSR_A0SAVE
442	rsync
443	rsr.exccause a0
444#ifdef CONFIG_XTENSA_MMU
445	beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_user
446#endif /* CONFIG_XTENSA_MMU */
447#ifdef CONFIG_USERSPACE
448	beqi a0, EXCCAUSE_SYSCALL, _syscall
449#endif /* CONFIG_USERSPACE */
450#ifdef CONFIG_XTENSA_MMU
451	addi a0, a0, -EXCCAUSE_DTLB_MISS
452	beqz a0, _handle_tlb_miss_user
453	rsr.exccause a0
454#endif /* CONFIG_XTENSA_MMU */
455	bnei a0, EXCCAUSE_ALLOCA, _not_alloca
456
457	j _xt_alloca_exc
458_not_alloca:
459	rsr a0, ZSR_A0SAVE
460	j _Level1Vector
461#ifdef CONFIG_XTENSA_MMU
462_handle_tlb_miss_user:
463	/**
464	 * Handle TLB miss by loading the PTE page:
465	 * The way it works is, when we try to access an address that is not
466	 * mapped, we will have a miss. The HW then will try to get the
467	 * correspondent memory in the page table. As the page table is not
468	 * mapped in memory we will have a second miss, which will trigger
469	 * an exception. In the exception (here) what we do is to exploit
470	 * this hardware capability just trying to load the page table
471	 * (not mapped address), which will cause a miss, but then the hardware
472	 * will automatically map it again from the page table. This time
473	 * it will work since the page necessary to map the page table itself
474	 * are wired map.
475	 */
476	rsr.ptevaddr a0
477	l32i a0, a0, 0
478	rsr a0, ZSR_A0SAVE
479	rfe
480#endif /* CONFIG_XTENSA_MMU */
481#ifdef CONFIG_USERSPACE
482_syscall:
483	rsr a0, ZSR_A0SAVE
484	j xtensa_do_syscall
485#endif /* CONFIG_USERSPACE */
486.popsection
487
488/* In theory you can have levels up to 15, but known hardware only uses 7. */
489#if XCHAL_NMILEVEL > 7
490#error More interrupts than expected.
491#endif
492
493/* We don't actually use "kernel mode" currently.  Populate the vector
494 * out of simple caution in case app code clears the UM bit by mistake.
495 */
496.pushsection .KernelExceptionVector.text, "ax"
497.global _KernelExceptionVector
498_KernelExceptionVector:
499#ifdef CONFIG_XTENSA_MMU
500	wsr a0, ZSR_A0SAVE
501	rsr.exccause a0
502	beqi a0, EXCCAUSE_ITLB_MISS, _handle_tlb_miss_kernel
503	addi a0, a0, -EXCCAUSE_DTLB_MISS
504	beqz a0, _handle_tlb_miss_kernel
505	rsr a0, ZSR_A0SAVE
506#endif
507	j _Level1Vector
508#ifdef CONFIG_XTENSA_MMU
509_handle_tlb_miss_kernel:
510	/* The TLB miss handling is used only during xtensa_mmu_init()
511	 * where vecbase is at a different address, as the offset used
512	 * in the jump ('j') instruction will not jump to correct
513	 * address (... remember the vecbase is moved).
514	 * So we handle TLB misses in a very simple way here until
515	 * we move back to using UserExceptionVector above.
516	 */
517	rsr.ptevaddr a0
518	l32i a0, a0, 0
519	rsr a0, ZSR_A0SAVE
520	rfe
521#endif
522.popsection
523
524#ifdef XCHAL_DOUBLEEXC_VECTOR_VADDR
525.pushsection .DoubleExceptionVector.text, "ax"
526.global _DoubleExceptionVector
527_DoubleExceptionVector:
528#ifdef CONFIG_XTENSA_MMU
529	wsr a0, ZSR_DBLEXC
530	rsync
531
532	rsr.exccause a0
533	addi a0, a0, -EXCCAUSE_DTLB_MISS
534	beqz a0, _handle_tlb_miss_dblexc
535
536	/* Need to stash the DEPC for used by the C handler.
537	 * If we encounter any DTLB misses when PS.EXCM is set,
538	 * this vector will be used and the DEPC register will
539	 * have the new address instead of the one resulted in
540	 * double exception.
541	 */
542	rsr.depc a0
543	wsr a0, ZSR_DEPC_SAVE
544
545	rsr a0, ZSR_DBLEXC
546
547	j _Level1Vector
548
549_TripleFault:
550#endif /* CONFIG_XTENSA_MMU */
551
552#if XCHAL_HAVE_DEBUG && defined(CONFIG_XTENSA_BREAK_ON_UNRECOVERABLE_EXCEPTIONS)
553	/* Signals an unhandled double exception, and unrecoverable exceptions.
554	 * Definitely needs debugger to be attached to the hardware or simulator
555	 * to catch this.
556	 */
557	break	1, 4
558#elif defined(CONFIG_SIMULATOR_XTENSA) || defined(XT_SIMULATOR)
559/* Tell simulator to stop executing here, instead of trying to do
560 * an infinite loop (see below). Greatly help with using tracing in
561 * simulator so that traces will not have infinite iterations of
562 * jumps.
563 */
564	movi a3, 1
565	movi a2, SYS_exit
566	simcall
567#endif
5681:
569	j	1b
570
571#ifdef CONFIG_XTENSA_MMU
572_handle_tlb_miss_dblexc:
573	/* Handle all data TLB misses here.
574	 * These data TLB misses are mostly caused by preloading
575	 * page table entries in the level 1 exception handler.
576	 * Failure to load the PTE will result in another exception
577	 * with different failure (exccause), which can be handled
578	 * when the CPU re-enters the double exception handler.
579	 */
580	rsr.ptevaddr a0
581	l32i a0, a0, 0
582
583	rsr a0, ZSR_DBLEXC
584	rfde
585#endif
586.popsection
587
588#endif
589