1 // Copyright 2017 The Fuchsia Authors
2 //
3 // Use of this source code is governed by a MIT-style
4 // license that can be found in the LICENSE file or at
5 // https://opensource.org/licenses/MIT
6 
7 #include "vmexit_priv.h"
8 
9 #include <bits.h>
10 #include <platform.h>
11 #include <trace.h>
12 
13 #include <arch/arm64/el2_state.h>
14 #include <arch/hypervisor.h>
15 #include <dev/psci.h>
16 #include <dev/timer/arm_generic.h>
17 #include <hypervisor/ktrace.h>
18 #include <vm/fault.h>
19 #include <vm/physmap.h>
20 #include <zircon/syscalls/hypervisor.h>
21 #include <zircon/syscalls/port.h>
22 
23 #define LOCAL_TRACE 0
24 
25 #define SET_SYSREG(sysreg)                                                      \
26     ({                                                                          \
27         guest_state->system_state.sysreg = reg;                                 \
28         LTRACEF("guest " #sysreg ": %#lx\n", guest_state->system_state.sysreg); \
29         next_pc(guest_state);                                                   \
30         ZX_OK;                                                                  \
31     })
32 
33 static constexpr size_t kPageTableLevelShift = 3;
34 static constexpr uint16_t kSmcPsci = 0;
35 
36 enum TimerControl : uint32_t {
37     ENABLE = 1u << 0,
38     IMASK = 1u << 1,
39     ISTATUS = 1u << 2,
40 };
41 
ExceptionSyndrome(uint32_t esr)42 ExceptionSyndrome::ExceptionSyndrome(uint32_t esr) {
43     ec = static_cast<ExceptionClass>(BITS_SHIFT(esr, 31, 26));
44     iss = BITS(esr, 24, 0);
45 }
46 
WaitInstruction(uint32_t iss)47 WaitInstruction::WaitInstruction(uint32_t iss) {
48     is_wfe = BIT(iss, 0);
49 }
50 
SmcInstruction(uint32_t iss)51 SmcInstruction::SmcInstruction(uint32_t iss) {
52     imm = static_cast<uint16_t>(BITS(iss, 15, 0));
53 }
54 
SystemInstruction(uint32_t iss)55 SystemInstruction::SystemInstruction(uint32_t iss) {
56     sysreg = static_cast<SystemRegister>(BITS(iss, 21, 10) >> 6 | BITS_SHIFT(iss, 4, 1));
57     xt = static_cast<uint8_t>(BITS_SHIFT(iss, 9, 5));
58     read = BIT(iss, 0);
59 }
60 
SgiRegister(uint64_t sgir)61 SgiRegister::SgiRegister(uint64_t sgir) {
62     aff3 = static_cast<uint8_t>(BITS_SHIFT(sgir, 55, 48));
63     aff2 = static_cast<uint8_t>(BITS_SHIFT(sgir, 39, 32));
64     aff1 = static_cast<uint8_t>(BITS_SHIFT(sgir, 23, 16));
65     rs = static_cast<uint8_t>(BITS_SHIFT(sgir, 47, 44));
66     target_list = static_cast<uint8_t>(BITS_SHIFT(sgir, 15, 0));
67     int_id = static_cast<uint8_t>(BITS_SHIFT(sgir, 27, 24));
68     all_but_local = BIT(sgir, 40);
69 }
70 
DataAbort(uint32_t iss)71 DataAbort::DataAbort(uint32_t iss) {
72     valid = BIT_SHIFT(iss, 24);
73     access_size = static_cast<uint8_t>(1u << BITS_SHIFT(iss, 23, 22));
74     sign_extend = BIT(iss, 21);
75     xt = static_cast<uint8_t>(BITS_SHIFT(iss, 20, 16));
76     read = !BIT(iss, 6);
77 }
78 
next_pc(GuestState * guest_state)79 static void next_pc(GuestState* guest_state) {
80     guest_state->system_state.elr_el2 += 4;
81 }
82 
timer_enabled(GuestState * guest_state)83 static bool timer_enabled(GuestState* guest_state) {
84     bool enabled = guest_state->cntv_ctl_el0 & TimerControl::ENABLE;
85     bool masked = guest_state->cntv_ctl_el0 & TimerControl::IMASK;
86     return enabled && !masked;
87 }
88 
timer_maybe_interrupt(GuestState * guest_state,GichState * gich_state)89 void timer_maybe_interrupt(GuestState* guest_state, GichState* gich_state) {
90     if (timer_enabled(guest_state) && current_ticks() >= guest_state->cntv_cval_el0 &&
91         !gich_state->active_interrupts.GetOne(kTimerVector)) {
92         gich_state->interrupt_tracker.Track(kTimerVector, hypervisor::InterruptType::PHYSICAL);
93     }
94 }
95 
handle_wfi_wfe_instruction(uint32_t iss,GuestState * guest_state,GichState * gich_state)96 static zx_status_t handle_wfi_wfe_instruction(uint32_t iss, GuestState* guest_state,
97                                               GichState* gich_state) {
98     next_pc(guest_state);
99     const WaitInstruction wi(iss);
100     if (wi.is_wfe) {
101         ktrace_vcpu_exit(VCPU_WFE_INSTRUCTION, guest_state->system_state.elr_el2);
102         thread_reschedule();
103         return ZX_OK;
104     }
105     ktrace_vcpu_exit(VCPU_WFI_INSTRUCTION, guest_state->system_state.elr_el2);
106     zx_time_t deadline = ZX_TIME_INFINITE;
107     if (timer_enabled(guest_state)) {
108         if (current_ticks() >= guest_state->cntv_cval_el0) {
109             return ZX_OK;
110         }
111         deadline = cntpct_to_zx_time(guest_state->cntv_cval_el0);
112     }
113     return gich_state->interrupt_tracker.Wait(deadline, nullptr);
114 }
115 
handle_smc_instruction(uint32_t iss,GuestState * guest_state,zx_port_packet_t * packet)116 static zx_status_t handle_smc_instruction(uint32_t iss, GuestState* guest_state,
117                                           zx_port_packet_t* packet) {
118     const SmcInstruction si(iss);
119     if (si.imm != kSmcPsci)
120         return ZX_ERR_NOT_SUPPORTED;
121 
122     next_pc(guest_state);
123     switch (guest_state->x[0]) {
124     case PSCI64_CPU_ON:
125         memset(packet, 0, sizeof(*packet));
126         packet->type = ZX_PKT_TYPE_GUEST_VCPU;
127         packet->guest_vcpu.type = ZX_PKT_GUEST_VCPU_STARTUP;
128         packet->guest_vcpu.startup.id = guest_state->x[1];
129         packet->guest_vcpu.startup.entry = guest_state->x[2];
130         guest_state->x[0] = PSCI_SUCCESS;
131         return ZX_ERR_NEXT;
132     default:
133         guest_state->x[0] = PSCI_NOT_SUPPORTED;
134         return ZX_ERR_NOT_SUPPORTED;
135     }
136 }
137 
clean_invalidate_cache(zx_paddr_t table,size_t index_shift)138 static void clean_invalidate_cache(zx_paddr_t table, size_t index_shift) {
139     // TODO(abdulla): Make this understand concatenated page tables.
140     auto* pte = static_cast<pte_t*>(paddr_to_physmap(table));
141     pte_t page = index_shift > MMU_GUEST_PAGE_SIZE_SHIFT ?
142                  MMU_PTE_L012_DESCRIPTOR_BLOCK : MMU_PTE_L3_DESCRIPTOR_PAGE;
143     for (size_t i = 0; i < PAGE_SIZE / sizeof(pte_t); i++) {
144         pte_t desc = pte[i] & MMU_PTE_DESCRIPTOR_MASK;
145         pte_t paddr = pte[i] & MMU_PTE_OUTPUT_ADDR_MASK;
146         if (desc == page) {
147             zx_vaddr_t vaddr = reinterpret_cast<zx_vaddr_t>(paddr_to_physmap(paddr));
148             arch_clean_invalidate_cache_range(vaddr, 1lu << index_shift);
149         } else if (desc != MMU_PTE_DESCRIPTOR_INVALID) {
150             size_t adjust_shift = MMU_GUEST_PAGE_SIZE_SHIFT - kPageTableLevelShift;
151             clean_invalidate_cache(paddr, index_shift - adjust_shift);
152         }
153     }
154 }
155 
handle_system_instruction(uint32_t iss,uint64_t * hcr,GuestState * guest_state,hypervisor::GuestPhysicalAddressSpace * gpas,zx_port_packet_t * packet)156 static zx_status_t handle_system_instruction(uint32_t iss, uint64_t* hcr, GuestState* guest_state,
157                                              hypervisor::GuestPhysicalAddressSpace* gpas,
158                                              zx_port_packet_t* packet) {
159     const SystemInstruction si(iss);
160     const uint64_t reg = guest_state->x[si.xt];
161 
162     switch (si.sysreg) {
163     case SystemRegister::MAIR_EL1:
164         return SET_SYSREG(mair_el1);
165     case SystemRegister::SCTLR_EL1: {
166         if (si.read) {
167             return ZX_ERR_NOT_SUPPORTED;
168         }
169 
170         // From ARM DDI 0487B.b, Section D10.2.89: If the value of HCR_EL2.{DC,
171         // TGE} is not {0, 0} then in Non-secure state the PE behaves as if the
172         // value of the SCTLR_EL1.M field is 0 for all purposes other than
173         // returning the value of a direct read of the field.
174         //
175         // Therefore if SCTLR_EL1.M is set to 1, we need to set HCR_EL2.DC to 0
176         // and invalidate the guest physical address space.
177         uint32_t sctlr_el1 = reg & UINT32_MAX;
178         if (sctlr_el1 & SCTLR_ELX_M) {
179             *hcr &= ~HCR_EL2_DC;
180             // Additionally, if the guest has also set SCTLR_EL1.C to 1, we no
181             // longer need to trap writes to virtual memory control registers,
182             // so we can set HCR_EL2.TVM to 0 to improve performance.
183             if (sctlr_el1 & SCTLR_ELX_C) {
184                 *hcr &= ~HCR_EL2_TVM;
185             }
186             clean_invalidate_cache(gpas->arch_aspace()->arch_table_phys(), MMU_GUEST_TOP_SHIFT);
187         }
188         guest_state->system_state.sctlr_el1 = sctlr_el1;
189 
190         LTRACEF("guest sctlr_el1: %#x\n", sctlr_el1);
191         LTRACEF("guest hcr_el2: %#lx\n", *hcr);
192         next_pc(guest_state);
193         return ZX_OK;
194     }
195     case SystemRegister::TCR_EL1:
196         return SET_SYSREG(tcr_el1);
197     case SystemRegister::TTBR0_EL1:
198         return SET_SYSREG(ttbr0_el1);
199     case SystemRegister::TTBR1_EL1:
200         return SET_SYSREG(ttbr1_el1);
201     case SystemRegister::OSLAR_EL1:
202     case SystemRegister::OSLSR_EL1:
203     case SystemRegister::OSDLR_EL1:
204     case SystemRegister::DBGPRCR_EL1:
205         next_pc(guest_state);
206         // These registers are RAZ/WI. Their state is dictated by the host.
207         if (si.read) {
208             guest_state->x[si.xt] = 0;
209         }
210         return ZX_OK;
211     case SystemRegister::ICC_SGI1R_EL1: {
212         if (si.read) {
213             // ICC_SGI1R_EL1 is write-only.
214             return ZX_ERR_INVALID_ARGS;
215         }
216         SgiRegister sgi(reg);
217         if (sgi.aff3 != 0 || sgi.aff2 != 0 || sgi.aff1 != 0 || sgi.rs != 0) {
218             return ZX_ERR_NOT_SUPPORTED;
219         }
220 
221         memset(packet, 0, sizeof(*packet));
222         packet->type = ZX_PKT_TYPE_GUEST_VCPU;
223         packet->guest_vcpu.type = ZX_PKT_GUEST_VCPU_INTERRUPT;
224         if (sgi.all_but_local) {
225             auto vpid = BITS(guest_state->system_state.vmpidr_el2, 8, 0);
226             packet->guest_vcpu.interrupt.mask = ~(static_cast<uint64_t>(1) << vpid);
227         } else {
228             packet->guest_vcpu.interrupt.mask = sgi.target_list;
229         }
230         packet->guest_vcpu.interrupt.vector = sgi.int_id;
231         next_pc(guest_state);
232         return ZX_ERR_NEXT;
233     }
234     }
235 
236     dprintf(CRITICAL, "Unhandled system register %#x\n", static_cast<uint16_t>(si.sysreg));
237     return ZX_ERR_NOT_SUPPORTED;
238 }
239 
handle_instruction_abort(GuestState * guest_state,hypervisor::GuestPhysicalAddressSpace * gpas)240 static zx_status_t handle_instruction_abort(GuestState* guest_state,
241                                             hypervisor::GuestPhysicalAddressSpace* gpas) {
242     const zx_vaddr_t guest_paddr = guest_state->hpfar_el2;
243     zx_status_t status = gpas->PageFault(guest_paddr);
244     if (status != ZX_OK) {
245         dprintf(CRITICAL, "Unhandled instruction abort %#lx\n", guest_paddr);
246     }
247     return status;
248 }
249 
handle_data_abort(uint32_t iss,GuestState * guest_state,hypervisor::GuestPhysicalAddressSpace * gpas,hypervisor::TrapMap * traps,zx_port_packet_t * packet)250 static zx_status_t handle_data_abort(uint32_t iss, GuestState* guest_state,
251                                      hypervisor::GuestPhysicalAddressSpace* gpas,
252                                      hypervisor::TrapMap* traps,
253                                      zx_port_packet_t* packet) {
254     zx_vaddr_t guest_paddr = guest_state->hpfar_el2;
255     hypervisor::Trap* trap;
256     zx_status_t status = traps->FindTrap(ZX_GUEST_TRAP_BELL, guest_paddr, &trap);
257     switch (status) {
258     case ZX_ERR_NOT_FOUND:
259         status = gpas->PageFault(guest_paddr);
260         if (status != ZX_OK) {
261             dprintf(CRITICAL, "Unhandled data abort %#lx\n", guest_paddr);
262         }
263         return status;
264     case ZX_OK:
265         break;
266     default:
267         return status;
268     }
269     next_pc(guest_state);
270 
271     // Combine the lower bits of FAR_EL2 with HPFAR_EL2 to get the exact IPA.
272     guest_paddr |= guest_state->far_el2 & (PAGE_SIZE - 1);
273     LTRACEF("guest far_el2: %#lx\n", guest_state->far_el2);
274 
275     const DataAbort data_abort(iss);
276     switch (trap->kind()) {
277     case ZX_GUEST_TRAP_BELL:
278         if (data_abort.read)
279             return ZX_ERR_NOT_SUPPORTED;
280         *packet = {};
281         packet->key = trap->key();
282         packet->type = ZX_PKT_TYPE_GUEST_BELL;
283         packet->guest_bell.addr = guest_paddr;
284         if (!trap->HasPort())
285             return ZX_ERR_BAD_STATE;
286         return trap->Queue(*packet, nullptr);
287     case ZX_GUEST_TRAP_MEM:
288         if (!data_abort.valid)
289             return ZX_ERR_IO_DATA_INTEGRITY;
290         *packet = {};
291         packet->key = trap->key();
292         packet->type = ZX_PKT_TYPE_GUEST_MEM;
293         packet->guest_mem.addr = guest_paddr;
294         packet->guest_mem.access_size = data_abort.access_size;
295         packet->guest_mem.sign_extend = data_abort.sign_extend;
296         packet->guest_mem.xt = data_abort.xt;
297         packet->guest_mem.read = data_abort.read;
298         if (!data_abort.read)
299             packet->guest_mem.data = guest_state->x[data_abort.xt];
300         return ZX_ERR_NEXT;
301     default:
302         return ZX_ERR_BAD_STATE;
303     }
304 }
305 
vmexit_handler(uint64_t * hcr,GuestState * guest_state,GichState * gich_state,hypervisor::GuestPhysicalAddressSpace * gpas,hypervisor::TrapMap * traps,zx_port_packet_t * packet)306 zx_status_t vmexit_handler(uint64_t* hcr, GuestState* guest_state, GichState* gich_state,
307                            hypervisor::GuestPhysicalAddressSpace* gpas, hypervisor::TrapMap* traps,
308                            zx_port_packet_t* packet) {
309     LTRACEF("guest esr_el1: %#x\n", guest_state->system_state.esr_el1);
310     LTRACEF("guest esr_el2: %#x\n", guest_state->esr_el2);
311     LTRACEF("guest elr_el2: %#lx\n", guest_state->system_state.elr_el2);
312     LTRACEF("guest spsr_el2: %#x\n", guest_state->system_state.spsr_el2);
313 
314     ExceptionSyndrome syndrome(guest_state->esr_el2);
315     zx_status_t status;
316     switch (syndrome.ec) {
317     case ExceptionClass::WFI_WFE_INSTRUCTION:
318         LTRACEF("handling wfi/wfe instruction, iss %#x\n", syndrome.iss);
319         status = handle_wfi_wfe_instruction(syndrome.iss, guest_state, gich_state);
320         break;
321     case ExceptionClass::SMC_INSTRUCTION:
322         LTRACEF("handling smc instruction, iss %#x func %#lx\n", syndrome.iss, guest_state->x[0]);
323         ktrace_vcpu_exit(VCPU_SMC_INSTRUCTION, guest_state->system_state.elr_el2);
324         status = handle_smc_instruction(syndrome.iss, guest_state, packet);
325         break;
326     case ExceptionClass::SYSTEM_INSTRUCTION:
327         LTRACEF("handling system instruction\n");
328         ktrace_vcpu_exit(VCPU_SYSTEM_INSTRUCTION, guest_state->system_state.elr_el2);
329         status = handle_system_instruction(syndrome.iss, hcr, guest_state, gpas, packet);
330         break;
331     case ExceptionClass::INSTRUCTION_ABORT:
332         LTRACEF("handling instruction abort at %#lx\n", guest_state->hpfar_el2);
333         ktrace_vcpu_exit(VCPU_INSTRUCTION_ABORT, guest_state->system_state.elr_el2);
334         status = handle_instruction_abort(guest_state, gpas);
335         break;
336     case ExceptionClass::DATA_ABORT:
337         LTRACEF("handling data abort at %#lx\n", guest_state->hpfar_el2);
338         ktrace_vcpu_exit(VCPU_DATA_ABORT, guest_state->system_state.elr_el2);
339         status = handle_data_abort(syndrome.iss, guest_state, gpas, traps, packet);
340         break;
341     default:
342         LTRACEF("unhandled exception syndrome, ec %#x iss %#x\n",
343                 static_cast<uint32_t>(syndrome.ec), syndrome.iss);
344         ktrace_vcpu_exit(VCPU_UNKNOWN, guest_state->system_state.elr_el2);
345         status = ZX_ERR_NOT_SUPPORTED;
346         break;
347     }
348     if (status != ZX_OK && status != ZX_ERR_NEXT && status != ZX_ERR_CANCELED) {
349         dprintf(CRITICAL, "VM exit handler for %u (%s) in EL%u at %#lx returned %d\n",
350                 static_cast<uint32_t>(syndrome.ec),
351                 exception_class_name(syndrome.ec),
352                 BITS_SHIFT(guest_state->system_state.spsr_el2, 3, 2),
353                 guest_state->system_state.elr_el2,
354                 status);
355     }
356     return status;
357 }
358