1 /*
2  * Copyright (c) 2015 Google Inc. All rights reserved
3  *
4  * Use of this source code is governed by a MIT-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/MIT
7  */
8 
9 #include <arch/arm64.h>
10 #include <kernel/thread.h>
11 #include <lk/trace.h>
12 
13 #define LOCAL_TRACE 0
14 
15 static struct fpstate *current_fpstate[SMP_MAX_CPUS];
16 
arm64_fpu_load_state(struct thread * t)17 static void arm64_fpu_load_state(struct thread *t) {
18     uint cpu = arch_curr_cpu_num();
19     struct fpstate *fpstate = &t->arch.fpstate;
20 
21     if (fpstate == current_fpstate[cpu] && fpstate->current_cpu == cpu) {
22         LTRACEF("cpu %d, thread %s, fpstate already valid\n", cpu, t->name);
23         return;
24     }
25     LTRACEF("cpu %d, thread %s, load fpstate %p, last cpu %d, last fpstate %p\n",
26             cpu, t->name, fpstate, fpstate->current_cpu, current_fpstate[cpu]);
27     fpstate->current_cpu = cpu;
28     current_fpstate[cpu] = fpstate;
29 
30 
31     STATIC_ASSERT(sizeof(fpstate->regs) == 16 * 32);
32     __asm__ volatile(
33         ".arch_extension fp\n"
34         "ldp     q0, q1, [%0, #(0 * 32)]\n"
35         "ldp     q2, q3, [%0, #(1 * 32)]\n"
36         "ldp     q4, q5, [%0, #(2 * 32)]\n"
37         "ldp     q6, q7, [%0, #(3 * 32)]\n"
38         "ldp     q8, q9, [%0, #(4 * 32)]\n"
39         "ldp     q10, q11, [%0, #(5 * 32)]\n"
40         "ldp     q12, q13, [%0, #(6 * 32)]\n"
41         "ldp     q14, q15, [%0, #(7 * 32)]\n"
42         "ldp     q16, q17, [%0, #(8 * 32)]\n"
43         "ldp     q18, q19, [%0, #(9 * 32)]\n"
44         "ldp     q20, q21, [%0, #(10 * 32)]\n"
45         "ldp     q22, q23, [%0, #(11 * 32)]\n"
46         "ldp     q24, q25, [%0, #(12 * 32)]\n"
47         "ldp     q26, q27, [%0, #(13 * 32)]\n"
48         "ldp     q28, q29, [%0, #(14 * 32)]\n"
49         "ldp     q30, q31, [%0, #(15 * 32)]\n"
50         "msr     fpcr, %1\n"
51         "msr     fpsr, %2\n"
52         ".arch_extension nofp\n"
53         :: "r"(fpstate), "r"((uint64_t)fpstate->fpcr), "r"((uint64_t)fpstate->fpsr));
54 }
55 
arm64_fpu_save_state(struct thread * t)56 void arm64_fpu_save_state(struct thread *t) {
57     struct fpstate *fpstate = &t->arch.fpstate;
58     uint64_t fpcr, fpsr;
59     __asm__ volatile(
60         ".arch_extension fp\n"
61         "stp     q0, q1, [%2, #(0 * 32)]\n"
62         "stp     q2, q3, [%2, #(1 * 32)]\n"
63         "stp     q4, q5, [%2, #(2 * 32)]\n"
64         "stp     q6, q7, [%2, #(3 * 32)]\n"
65         "stp     q8, q9, [%2, #(4 * 32)]\n"
66         "stp     q10, q11, [%2, #(5 * 32)]\n"
67         "stp     q12, q13, [%2, #(6 * 32)]\n"
68         "stp     q14, q15, [%2, #(7 * 32)]\n"
69         "stp     q16, q17, [%2, #(8 * 32)]\n"
70         "stp     q18, q19, [%2, #(9 * 32)]\n"
71         "stp     q20, q21, [%2, #(10 * 32)]\n"
72         "stp     q22, q23, [%2, #(11 * 32)]\n"
73         "stp     q24, q25, [%2, #(12 * 32)]\n"
74         "stp     q26, q27, [%2, #(13 * 32)]\n"
75         "stp     q28, q29, [%2, #(14 * 32)]\n"
76         "stp     q30, q31, [%2, #(15 * 32)]\n"
77         "mrs     %0, fpcr\n"
78         "mrs     %1, fpsr\n"
79         ".arch_extension nofp\n"
80         : "=r"(fpcr), "=r"(fpsr)
81         : "r"(fpstate));
82     fpstate->fpcr = (uint32_t)fpcr;
83     fpstate->fpsr = (uint32_t)fpsr;
84 
85     LTRACEF("thread %s, fpcr %x, fpsr %x\n", t->name, fpstate->fpcr, fpstate->fpsr);
86 }
87 
arm64_fpu_exception(struct arm64_iframe_long * iframe)88 void arm64_fpu_exception(struct arm64_iframe_long *iframe) {
89     uint32_t cpacr = ARM64_READ_SYSREG(cpacr_el1);
90     if (((cpacr >> 20) & 3) != 3) {
91         cpacr |= 3 << 20;
92         ARM64_WRITE_SYSREG(cpacr_el1, cpacr);
93         thread_t *t = get_current_thread();
94         if (likely(t))
95             arm64_fpu_load_state(t);
96         return;
97     }
98 }
99