1 // Copyright 2017 The Fuchsia Authors
2 //
3 // Use of this source code is governed by a MIT-style
4 // license that can be found in the LICENSE file or at
5 // https://opensource.org/licenses/MIT
6 
7 #include <arch/hypervisor.h>
8 #include <arch/ops.h>
9 #include <bits.h>
10 #include <dev/interrupt/arm_gic_common.h>
11 #include <dev/interrupt/arm_gic_hw_interface.h>
12 #include <fbl/auto_call.h>
13 #include <hypervisor/cpu.h>
14 #include <hypervisor/guest_physical_address_space.h>
15 #include <hypervisor/ktrace.h>
16 #include <kernel/event.h>
17 #include <kernel/mp.h>
18 #include <lib/ktrace.h>
19 #include <platform/timer.h>
20 #include <vm/physmap.h>
21 #include <vm/pmm.h>
22 #include <zircon/errors.h>
23 #include <zircon/syscalls/hypervisor.h>
24 
25 #include "el2_cpu_state_priv.h"
26 #include "vmexit_priv.h"
27 
28 static constexpr uint32_t kGichHcrEn = 1u << 0;
29 static constexpr uint32_t kGichHcrUie = 1u << 1;
30 static constexpr uint32_t kGichMisrU = 1u << 1;
31 static constexpr uint32_t kSpsrDaif = 0b1111 << 6;
32 static constexpr uint32_t kSpsrEl1h = 0b0101;
33 static constexpr uint32_t kSpsrNzcv = 0b1111 << 28;
34 
vmpidr_of(uint8_t vpid,uint64_t mpidr)35 static uint64_t vmpidr_of(uint8_t vpid, uint64_t mpidr) {
36     return (vpid - 1) | (mpidr & 0xffffff00fe000000);
37 }
38 
gich_maybe_interrupt(GichState * gich_state)39 static void gich_maybe_interrupt(GichState* gich_state) {
40     // From ARM GIC v3/v4, Section 4.8: If, on a particular CPU interface,
41     // multiple pending interrupts have the same priority, and have sufficient
42     // priority for the interface to signal them to the PE, it is IMPLEMENTATION
43     // DEFINED how the interface selects which interrupt to signal.
44     //
45     // If interrupts are of the same priority, we can choose whatever ordering
46     // we prefer when populating the LRs.
47     for (uint64_t elrsr = gich_state->elrsr; elrsr != 0;) {
48         uint32_t vector;
49         hypervisor::InterruptType type = gich_state->interrupt_tracker.Pop(&vector);
50         if (type == hypervisor::InterruptType::INACTIVE) {
51             // There are no more pending interrupts.
52             break;
53         } else if (gich_state->active_interrupts.GetOne(vector)) {
54             // Skip an interrupt if it was already active.
55             continue;
56         }
57         uint32_t lr_index = __builtin_ctzl(elrsr);
58         bool hw = type == hypervisor::InterruptType::PHYSICAL;
59         // From ARM GIC v3/v4, Section 4.8: If the GIC implements fewer than 256
60         // priority levels, the low-order bits of the priority fields are
61         // RAZ/WI.
62         // ...
63         // In the GIC prioritization scheme, lower numbers have higher priority.
64         //
65         // We may have as few as 16 priority levels, so step by 16 to the next
66         // lowest priority in order to prioritise SGIs and PPIs over SPIs.
67         uint8_t prio = vector < GIC_BASE_SPI ? 0 : 0x10;
68         uint64_t lr = gic_get_lr_from_vector(hw, prio, vector);
69         gich_state->lr[lr_index] = lr;
70         elrsr &= ~(1u << lr_index);
71     }
72 }
73 
gich_active_interrupts(GichState * gich_state)74 static void gich_active_interrupts(GichState* gich_state) {
75     gich_state->active_interrupts.ClearAll();
76     for (uint32_t i = 0; i < gich_state->num_lrs; i++) {
77         if (BIT(gich_state->elrsr, i)) {
78             continue;
79         }
80         uint32_t vector = gic_get_vector_from_lr(gich_state->lr[i]);
81         gich_state->active_interrupts.SetOne(vector);
82     }
83 }
84 
vmexit_interrupt_ktrace_meta()85 static VcpuExit vmexit_interrupt_ktrace_meta() {
86     if (gic_read_gich_misr() & kGichMisrU) {
87         return VCPU_UNDERFLOW_MAINTENANCE_INTERRUPT;
88     }
89     return VCPU_PHYSICAL_INTERRUPT;
90 }
91 
AutoGich(GichState * gich_state)92 AutoGich::AutoGich(GichState* gich_state)
93     : gich_state_(gich_state) {
94     DEBUG_ASSERT(!arch_ints_disabled());
95     arch_disable_ints();
96 
97     // Load
98     gic_write_gich_vmcr(gich_state_->vmcr);
99     for (uint32_t i = 0; i < gich_state_->num_aprs; i++) {
100         gic_write_gich_apr(i, gich_state_->apr[i]);
101     }
102     for (uint32_t i = 0; i < gich_state_->num_lrs; i++) {
103         uint64_t lr = gich_state->lr[i];
104         gic_write_gich_lr(i, lr);
105     }
106 
107     // Underflow maintenance interrupt is signalled if there is one or no free
108     // LRs. We use it in case when there is not enough free LRs to inject all
109     // pending interrupts, so when guest finishes processing most of them, a maintenance
110     // interrupt will cause VM exit and will give us a chance to inject the remaining
111     // interrupts. The point of this is to reduce latency when processing interrupts.
112     uint32_t gich_hcr = kGichHcrEn;
113     if (gich_state_->interrupt_tracker.Pending() && gich_state_->num_lrs > 1) {
114         gich_hcr |= kGichHcrUie;
115     }
116     gic_write_gich_hcr(gich_hcr);
117 }
118 
~AutoGich()119 AutoGich::~AutoGich() {
120     DEBUG_ASSERT(arch_ints_disabled());
121 
122     // Save
123     gich_state_->vmcr = gic_read_gich_vmcr();
124     gich_state_->elrsr = gic_read_gich_elrsr();
125     for (uint32_t i = 0; i < gich_state_->num_aprs; i++) {
126         gich_state_->apr[i] = gic_read_gich_apr(i);
127     }
128     for (uint32_t i = 0; i < gich_state_->num_lrs; i++) {
129         gich_state_->lr[i] = !BIT(gich_state_->elrsr, i) ? gic_read_gich_lr(i) : 0;
130     }
131 
132     arch_enable_ints();
133 }
134 
Alloc()135 zx_status_t El2StatePtr::Alloc() {
136     zx_status_t status = page_.Alloc(0);
137     if (status != ZX_OK) {
138         return status;
139     }
140     state_ = page_.VirtualAddress<El2State>();
141     return ZX_OK;
142 }
143 
144 // Returns the number of active priorities registers, based on the number of
145 // preemption bits.
146 //
147 // From ARM GIC v2, Section 5.3.2: The number of preemption bits implemented,
148 // minus one. In GICv2, the only valid value is 5 bits.
149 //
150 // From ARM GIC v3/v4, Section 8.4.2: If 5 bits of preemption are implemented
151 // (bits [7:3] of priority), then there are 32 preemption levels... If 6 bits of
152 // preemption are implemented (bits [7:2] of priority), then there are 64
153 // preemption levels... If 7 bits of preemption are implemented (bits [7:1] of
154 // priority), then there are 128 preemption levels...
num_aprs(uint32_t num_pres)155 static uint32_t num_aprs(uint32_t num_pres) {
156     return 1u << (num_pres - 5u);
157 }
158 
159 // static
Create(Guest * guest,zx_vaddr_t entry,ktl::unique_ptr<Vcpu> * out)160 zx_status_t Vcpu::Create(Guest* guest, zx_vaddr_t entry, ktl::unique_ptr<Vcpu>* out) {
161     hypervisor::GuestPhysicalAddressSpace* gpas = guest->AddressSpace();
162     if (entry >= gpas->size()) {
163         return ZX_ERR_INVALID_ARGS;
164     }
165 
166     uint8_t vpid;
167     zx_status_t status = guest->AllocVpid(&vpid);
168     if (status != ZX_OK) {
169         return status;
170     }
171     auto auto_call = fbl::MakeAutoCall([guest, vpid]() { guest->FreeVpid(vpid); });
172 
173     // For efficiency, we pin the thread to the CPU.
174     thread_t* thread = hypervisor::pin_thread(vpid);
175 
176     fbl::AllocChecker ac;
177     ktl::unique_ptr<Vcpu> vcpu(new (&ac) Vcpu(guest, vpid, thread));
178     if (!ac.check()) {
179         return ZX_ERR_NO_MEMORY;
180     }
181     auto_call.cancel();
182 
183     status = vcpu->gich_state_.interrupt_tracker.Init();
184     if (status != ZX_OK) {
185         return status;
186     }
187 
188     status = vcpu->el2_state_.Alloc();
189     if (status != ZX_OK) {
190         return status;
191     }
192 
193     vcpu->gich_state_.active_interrupts.Reset(kNumInterrupts);
194     vcpu->gich_state_.num_aprs = num_aprs(gic_get_num_pres());
195     vcpu->gich_state_.num_lrs = gic_get_num_lrs();
196     vcpu->gich_state_.vmcr = gic_default_gich_vmcr();
197     vcpu->gich_state_.elrsr = (1ul << gic_get_num_lrs()) - 1;
198     vcpu->el2_state_->guest_state.system_state.elr_el2 = entry;
199     vcpu->el2_state_->guest_state.system_state.spsr_el2 = kSpsrDaif | kSpsrEl1h;
200     uint64_t mpidr = __arm_rsr64("mpidr_el1");
201     vcpu->el2_state_->guest_state.system_state.vmpidr_el2 = vmpidr_of(vpid, mpidr);
202     vcpu->el2_state_->host_state.system_state.vmpidr_el2 = mpidr;
203     vcpu->hcr_ = HCR_EL2_VM | HCR_EL2_PTW | HCR_EL2_FMO | HCR_EL2_IMO | HCR_EL2_DC | HCR_EL2_TWI |
204                  HCR_EL2_TWE | HCR_EL2_TSC | HCR_EL2_TVM | HCR_EL2_RW;
205 
206     *out = ktl::move(vcpu);
207     return ZX_OK;
208 }
209 
Vcpu(Guest * guest,uint8_t vpid,const thread_t * thread)210 Vcpu::Vcpu(Guest* guest, uint8_t vpid, const thread_t* thread)
211     : guest_(guest), vpid_(vpid), thread_(thread), running_(false) {}
212 
~Vcpu()213 Vcpu::~Vcpu() {
214     __UNUSED zx_status_t status = guest_->FreeVpid(vpid_);
215     DEBUG_ASSERT(status == ZX_OK);
216 }
217 
Resume(zx_port_packet_t * packet)218 zx_status_t Vcpu::Resume(zx_port_packet_t* packet) {
219     if (!hypervisor::check_pinned_cpu_invariant(vpid_, thread_))
220         return ZX_ERR_BAD_STATE;
221     const ArchVmAspace& aspace = *guest_->AddressSpace()->arch_aspace();
222     zx_paddr_t vttbr = arm64_vttbr(aspace.arch_asid(), aspace.arch_table_phys());
223     GuestState* guest_state = &el2_state_->guest_state;
224     zx_status_t status;
225     do {
226         timer_maybe_interrupt(guest_state, &gich_state_);
227         gich_maybe_interrupt(&gich_state_);
228         {
229             AutoGich auto_gich(&gich_state_);
230 
231             ktrace(TAG_VCPU_ENTER, 0, 0, 0, 0);
232             running_.store(true);
233             status = arm64_el2_resume(vttbr, el2_state_.PhysicalAddress(), hcr_);
234             running_.store(false);
235         }
236         gich_active_interrupts(&gich_state_);
237         if (status == ZX_ERR_NEXT) {
238             // We received a physical interrupt. If it was due to the thread
239             // being killed, then we should exit with an error, otherwise return
240             // to the guest.
241             ktrace_vcpu_exit(vmexit_interrupt_ktrace_meta(),
242                              guest_state->system_state.elr_el2);
243             status = thread_->signals & THREAD_SIGNAL_KILL ? ZX_ERR_CANCELED : ZX_OK;
244         } else if (status == ZX_OK) {
245             status = vmexit_handler(&hcr_, guest_state, &gich_state_, guest_->AddressSpace(),
246                                     guest_->Traps(), packet);
247         } else {
248             ktrace_vcpu_exit(VCPU_FAILURE, guest_state->system_state.elr_el2);
249             dprintf(INFO, "VCPU resume failed: %d\n", status);
250         }
251     } while (status == ZX_OK);
252     return status == ZX_ERR_NEXT ? ZX_OK : status;
253 }
254 
Interrupt(uint32_t vector,hypervisor::InterruptType type)255 cpu_mask_t Vcpu::Interrupt(uint32_t vector, hypervisor::InterruptType type) {
256     bool signaled = false;
257     gich_state_.interrupt_tracker.Interrupt(vector, type, &signaled);
258     if (signaled || !running_.load()) {
259         return 0;
260     }
261     return cpu_num_to_mask(hypervisor::cpu_of(vpid_));
262 }
263 
VirtualInterrupt(uint32_t vector)264 void Vcpu::VirtualInterrupt(uint32_t vector) {
265     cpu_mask_t mask = Interrupt(vector, hypervisor::InterruptType::VIRTUAL);
266     if (mask != 0) {
267         mp_interrupt(MP_IPI_TARGET_MASK, mask);
268     }
269 }
270 
ReadState(uint32_t kind,void * buf,size_t len) const271 zx_status_t Vcpu::ReadState(uint32_t kind, void* buf, size_t len) const {
272     if (!hypervisor::check_pinned_cpu_invariant(vpid_, thread_)) {
273         return ZX_ERR_BAD_STATE;
274     } else if (kind != ZX_VCPU_STATE || len != sizeof(zx_vcpu_state_t)) {
275         return ZX_ERR_INVALID_ARGS;
276     }
277 
278     auto state = static_cast<zx_vcpu_state_t*>(buf);
279     memcpy(state->x, el2_state_->guest_state.x, sizeof(uint64_t) * GS_NUM_REGS);
280     state->sp = el2_state_->guest_state.system_state.sp_el1;
281     state->cpsr = el2_state_->guest_state.system_state.spsr_el2 & kSpsrNzcv;
282     return ZX_OK;
283 }
284 
WriteState(uint32_t kind,const void * buf,size_t len)285 zx_status_t Vcpu::WriteState(uint32_t kind, const void* buf, size_t len) {
286     if (!hypervisor::check_pinned_cpu_invariant(vpid_, thread_)) {
287         return ZX_ERR_BAD_STATE;
288     } else if (kind != ZX_VCPU_STATE || len != sizeof(zx_vcpu_state_t)) {
289         return ZX_ERR_INVALID_ARGS;
290     }
291 
292     auto state = static_cast<const zx_vcpu_state_t*>(buf);
293     memcpy(el2_state_->guest_state.x, state->x, sizeof(uint64_t) * GS_NUM_REGS);
294     el2_state_->guest_state.system_state.sp_el1 = state->sp;
295     el2_state_->guest_state.system_state.spsr_el2 |= state->cpsr & kSpsrNzcv;
296     return ZX_OK;
297 }
298