1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * Copyright (c) 2018-2022 Intel Corporation.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28 #include <asm/guest/vm.h>
29 #include <errno.h>
30 #include <ptdev.h>
31 #include <asm/guest/assign.h>
32 #include <asm/vtd.h>
33 #include <asm/guest/ept.h>
34 #include <asm/mmu.h>
35 #include <asm/io.h>
36 #include <logmsg.h>
37 #include <config.h>
38 #include "vpci_priv.h"
39
40 /**
41 * @pre vdev != NULL
42 */
get_msix_table_entry(const struct pci_vdev * vdev,uint32_t index)43 static inline struct msix_table_entry *get_msix_table_entry(const struct pci_vdev *vdev, uint32_t index)
44 {
45 void *hva = hpa2hva(vdev->msix.mmio_hpa + vdev->msix.table_offset);
46
47 return ((struct msix_table_entry *)hva + index);
48 }
49
50 /**
51 * @brief Reading MSI-X Capability Structure
52 *
53 * @pre vdev != NULL
54 * @pre vdev->pdev != NULL
55 */
read_pt_vmsix_cap_reg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t * val)56 void read_pt_vmsix_cap_reg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val)
57 {
58 if (vdev->msix.is_vmsix_on_msi) {
59 *val = pci_vdev_read_vcfg(vdev, offset, bytes);
60 } else {
61 read_vmsix_cap_reg(vdev, offset, bytes, val);
62 }
63 }
64
65 /**
66 * @brief Writing MSI-X Capability Structure
67 *
68 * @pre vdev != NULL
69 * @pre vdev->pdev != NULL
70 */
write_pt_vmsix_cap_reg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t val)71 void write_pt_vmsix_cap_reg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val)
72 {
73 uint32_t msgctrl;
74
75 if (write_vmsix_cap_reg(vdev, offset, bytes, val)) {
76 msgctrl = pci_vdev_read_vcfg(vdev, vdev->msix.capoff + PCIR_MSIX_CTRL, 2U);
77 /* If MSI Enable is being set, make sure INTxDIS bit is set */
78 if ((msgctrl & PCIM_MSIXCTRL_MSIX_ENABLE) != 0U) {
79 enable_disable_pci_intx(vdev->pdev->bdf, false);
80 }
81 pci_pdev_write_cfg(vdev->pdev->bdf, vdev->msix.capoff + PCIR_MSIX_CTRL, 2U, msgctrl);
82 }
83 }
84
85 /**
86 * @pre vdev != NULL
87 */
mask_one_msix_vector(const struct pci_vdev * vdev,uint32_t index)88 static void mask_one_msix_vector(const struct pci_vdev *vdev, uint32_t index)
89 {
90 uint32_t vector_control;
91 struct msix_table_entry *pentry = get_msix_table_entry(vdev, index);
92
93 stac();
94 vector_control = pentry->vector_control | PCIM_MSIX_VCTRL_MASK;
95 mmio_write32(vector_control, (void *)&(pentry->vector_control));
96 clac();
97 }
98
99
100 /**
101 * @pre vdev != NULL
102 * @pre vdev->vpci != NULL
103 * @pre vdev->pdev != NULL
104 */
remap_one_vmsix_entry(const struct pci_vdev * vdev,uint32_t index)105 static void remap_one_vmsix_entry(const struct pci_vdev *vdev, uint32_t index)
106 {
107 const struct msix_table_entry *ventry;
108 struct msix_table_entry *pentry;
109 struct msi_info info = {};
110 int32_t ret;
111
112 mask_one_msix_vector(vdev, index);
113 ventry = &vdev->msix.table_entries[index];
114 if ((ventry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0U) {
115 info.addr.full = vdev->msix.table_entries[index].addr;
116 info.data.full = vdev->msix.table_entries[index].data;
117
118 ret = ptirq_prepare_msix_remap(vpci2vm(vdev->vpci), vdev->bdf.value, vdev->pdev->bdf.value,
119 (uint16_t)index, &info, INVALID_IRTE_ID);
120 if (ret == 0) {
121 /* Write the table entry to the physical structure */
122 pentry = get_msix_table_entry(vdev, index);
123
124 /*
125 * PCI 3.0 Spec allows writing to Message Address and Message Upper Address
126 * fields with a single QWORD write, but some hardware can accept 32 bits
127 * write only
128 */
129 stac();
130 mmio_write32((uint32_t)(info.addr.full), (void *)&(pentry->addr));
131 mmio_write32((uint32_t)(info.addr.full >> 32U), (void *)((char *)&(pentry->addr) + 4U));
132
133 mmio_write32(info.data.full, (void *)&(pentry->data));
134 mmio_write32(vdev->msix.table_entries[index].vector_control, (void *)&(pentry->vector_control));
135 clac();
136 }
137 }
138
139 }
140
141 /**
142 * @pre io_req != NULL
143 * @pre priv_data != NULL
144 */
pt_vmsix_handle_table_mmio_access(struct io_request * io_req,void * priv_data)145 static int32_t pt_vmsix_handle_table_mmio_access(struct io_request *io_req, void *priv_data)
146 {
147 struct acrn_mmio_request *mmio = &io_req->reqs.mmio_request;
148 struct pci_vdev *vdev;
149 uint32_t index;
150 int32_t ret = 0;
151
152 vdev = (struct pci_vdev *)priv_data;
153 if (vdev->user == vdev) {
154 index = rw_vmsix_table(vdev, io_req);
155
156 if ((mmio->direction == ACRN_IOREQ_DIR_WRITE) && (index < vdev->msix.table_count)) {
157 if (vdev->msix.is_vmsix_on_msi) {
158 remap_one_vmsix_entry_on_msi(vdev, index);
159 } else {
160 remap_one_vmsix_entry(vdev, index);
161 }
162 }
163 } else {
164 ret = -EFAULT;
165 }
166
167 return ret;
168 }
169
170 /*
171 * @pre vdev != NULL
172 * @pre vdev->vpci != NULL
173 */
vdev_pt_unmap_msix(struct pci_vdev * vdev)174 static void vdev_pt_unmap_msix(struct pci_vdev *vdev)
175 {
176 uint32_t i;
177 uint64_t addr_hi, addr_lo;
178 struct pci_msix *msix = &vdev->msix;
179
180 /* Mask all table entries */
181 for (i = 0U; i < msix->table_count; i++) {
182 msix->table_entries[i].vector_control = PCIM_MSIX_VCTRL_MASK;
183 msix->table_entries[i].addr = 0U;
184 msix->table_entries[i].data = 0U;
185 }
186
187 if (msix->mmio_gpa != 0UL) {
188 addr_lo = msix->mmio_gpa + msix->table_offset;
189 addr_hi = addr_lo + (msix->table_count * MSIX_TABLE_ENTRY_SIZE);
190 addr_lo = round_page_down(addr_lo);
191 addr_hi = round_page_up(addr_hi);
192 unregister_mmio_emulation_handler(vpci2vm(vdev->vpci), addr_lo, addr_hi);
193 msix->mmio_gpa = 0UL;
194 }
195 }
196
197 /*
198 * @pre vdev != NULL
199 * @pre vdev->vpci != NULL
200 */
vdev_pt_map_msix(struct pci_vdev * vdev,bool hold_lock)201 void vdev_pt_map_msix(struct pci_vdev *vdev, bool hold_lock)
202 {
203 struct pci_vbar *vbar;
204 uint64_t addr_hi, addr_lo;
205 struct pci_msix *msix = &vdev->msix;
206
207 vbar = &vdev->vbars[msix->table_bar];
208 if (vbar->base_gpa != 0UL) {
209 struct acrn_vm *vm = vpci2vm(vdev->vpci);
210
211 addr_lo = vbar->base_gpa + msix->table_offset;
212 addr_hi = addr_lo + (msix->table_count * MSIX_TABLE_ENTRY_SIZE);
213 addr_lo = round_page_down(addr_lo);
214 addr_hi = round_page_up(addr_hi);
215 register_mmio_emulation_handler(vm, pt_vmsix_handle_table_mmio_access,
216 addr_lo, addr_hi, vdev, hold_lock);
217 ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, addr_lo, addr_hi - addr_lo);
218 msix->mmio_gpa = vbar->base_gpa;
219 }
220 }
221
222 /**
223 * @pre vdev != NULL
224 * @pre vdev->vpci != NULL
225 */
vdev_pt_unmap_mem_vbar(struct pci_vdev * vdev,uint32_t idx)226 static void vdev_pt_unmap_mem_vbar(struct pci_vdev *vdev, uint32_t idx)
227 {
228 struct pci_vbar *vbar = &vdev->vbars[idx];
229
230 if (vbar->base_gpa != 0UL) {
231 struct acrn_vm *vm = vpci2vm(vdev->vpci);
232
233 ept_del_mr(vm, (uint64_t *)(vm->arch_vm.nworld_eptp),
234 vbar->base_gpa, /* GPA (old vbar) */
235 vbar->size);
236 }
237
238 if ((has_msix_cap(vdev) && (idx == vdev->msix.table_bar))) {
239 vdev_pt_unmap_msix(vdev);
240 }
241 }
242
243 /**
244 * @pre vdev != NULL
245 * @pre vdev->vpci != NULL
246 */
vdev_pt_map_mem_vbar(struct pci_vdev * vdev,uint32_t idx)247 static void vdev_pt_map_mem_vbar(struct pci_vdev *vdev, uint32_t idx)
248 {
249 struct pci_vbar *vbar = &vdev->vbars[idx];
250
251 if (vbar->base_gpa != 0UL) {
252 struct acrn_vm *vm = vpci2vm(vdev->vpci);
253
254 ept_add_mr(vm, (uint64_t *)(vm->arch_vm.nworld_eptp),
255 vbar->base_hpa, /* HPA (pbar) */
256 vbar->base_gpa, /* GPA (new vbar) */
257 vbar->size,
258 EPT_WR | EPT_RD | EPT_UNCACHED);
259 }
260
261 if (has_msix_cap(vdev) && (idx == vdev->msix.table_bar)) {
262 vdev_pt_map_msix(vdev, true);
263 }
264 }
265
266 /**
267 * @brief Allow IO bar access
268 * @pre vdev != NULL
269 * @pre vdev->vpci != NULL
270 */
vdev_pt_allow_io_vbar(struct pci_vdev * vdev,uint32_t idx)271 static void vdev_pt_allow_io_vbar(struct pci_vdev *vdev, uint32_t idx)
272 {
273 struct acrn_vm *vm = vpci2vm(vdev->vpci);
274
275 /* For Service VM, all port IO access is allowed by default, so skip Service VM here */
276 if (!is_service_vm(vm)) {
277 struct pci_vbar *vbar = &vdev->vbars[idx];
278 if (vbar->base_gpa != 0UL) {
279 allow_guest_pio_access(vm, (uint16_t)vbar->base_gpa, (uint32_t)(vbar->size));
280 }
281 }
282 }
283
284 /**
285 * @brief Deny IO bar access
286 * @pre vdev != NULL
287 * @pre vdev->vpci != NULL
288 */
vdev_pt_deny_io_vbar(struct pci_vdev * vdev,uint32_t idx)289 static void vdev_pt_deny_io_vbar(struct pci_vdev *vdev, uint32_t idx)
290 {
291 struct acrn_vm *vm = vpci2vm(vdev->vpci);
292
293 /* For Service VM, all port IO access is allowed by default, so skip Service VM here */
294 if (!is_service_vm(vm)) {
295 struct pci_vbar *vbar = &vdev->vbars[idx];
296 if (vbar->base_gpa != 0UL) {
297 deny_guest_pio_access(vm, (uint16_t)(vbar->base_gpa), (uint32_t)(vbar->size));
298 }
299
300 }
301 }
302
303 /**
304 * @pre vdev != NULL
305 */
vdev_pt_write_vbar(struct pci_vdev * vdev,uint32_t idx,uint32_t val)306 void vdev_pt_write_vbar(struct pci_vdev *vdev, uint32_t idx, uint32_t val)
307 {
308 struct pci_vbar *vbar = &vdev->vbars[idx];
309
310 if (is_pci_io_bar(vbar)) {
311 vpci_update_one_vbar(vdev, idx, val, vdev_pt_allow_io_vbar, vdev_pt_deny_io_vbar);
312 } else {
313 /* pci mem bar */
314 vpci_update_one_vbar(vdev, idx, val, vdev_pt_map_mem_vbar, vdev_pt_unmap_mem_vbar);
315 }
316 }
317
318 /*
319 * @pre vdev != NULL
320 * @pre vdev->pdev != NULL
321 */
vdev_bridge_pt_restore_space(struct pci_vdev * vdev)322 void vdev_bridge_pt_restore_space(struct pci_vdev *vdev)
323 {
324 struct pci_pdev *pdev = vdev->pdev;
325 uint32_t pre_val;
326 uint32_t offset;
327
328 /* I/O Base (0x1c) and I/O Limit (0x1d) */
329 pre_val = pci_vdev_read_vcfg(vdev, PCIR_IO_BASE, 2U);
330 if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_IO_BASE, 2U)) {
331 pci_pdev_write_cfg(pdev->bdf, PCIR_IO_BASE, 2U, pre_val);
332 }
333
334 /* From Memory Base (0x20) to I/O Base Limit 16 Bits (0x32) */
335 for (offset = PCIR_MEM_BASE; offset < PCIR_IO_BASE_UPPER_16; offset += 4) {
336 pre_val = pci_vdev_read_vcfg(vdev, offset, 4U);
337 if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, offset, 4U)) {
338 pci_pdev_write_cfg(pdev->bdf, offset, 4U, pre_val);
339 }
340 }
341 }
342
343 /*
344 * @pre vdev != NULL
345 * @pre vdev->pdev != NULL
346 */
vdev_bridge_pt_restore_bus(struct pci_vdev * vdev)347 void vdev_bridge_pt_restore_bus(struct pci_vdev *vdev)
348 {
349 struct pci_pdev *pdev = vdev->pdev;
350 uint32_t pre_val;
351
352 /* Primary Bus Number (0x18) and Secondary Bus Number (0x19) */
353 pre_val = pci_vdev_read_vcfg(vdev, PCIR_PRIBUS_1, 2U);
354 if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_PRIBUS_1, 2U)) {
355 pci_pdev_write_cfg(pdev->bdf, PCIR_PRIBUS_1, 2U, pre_val);
356 }
357
358 /* Subordinate Bus Number (0x1a) */
359 pre_val = pci_vdev_read_vcfg(vdev, PCIR_SUBBUS_1, 1U);
360 if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_SUBBUS_1, 1U)) {
361 pci_pdev_write_cfg(pdev->bdf, PCIR_SUBBUS_1, 1U, pre_val);
362 }
363 }
364
365 /**
366 * PCI base address register (bar) virtualization:
367 *
368 * Virtualize the PCI bars (up to 6 bars at byte offset 0x10~0x24 for type 0 PCI device,
369 * 2 bars at byte offset 0x10-0x14 for type 1 PCI device) of the PCI configuration space
370 * header.
371 *
372 * pbar: bar for the physical PCI device (pci_pdev), the value of pbar (hpa) is assigned
373 * by platform firmware during boot. It is assumed a valid hpa is always assigned to a
374 * mmio pbar, hypervisor shall not change the value of a pbar.
375 *
376 * vbar: for each pci_pdev, it has a virtual PCI device (pci_vdev) counterpart. pci_vdev
377 * virtualizes all the bars (called vbars). a vbar can be initialized by hypervisor by
378 * assigning a gpa to it; if vbar has a value of 0 (unassigned), guest may assign
379 * and program a gpa to it. The guest only sees the vbars, it will not see and can
380 * never change the pbars.
381 *
382 * Hypervisor traps guest changes to the mmio vbar (gpa) to establish ept mapping
383 * between vbar(gpa) and pbar(hpa). pbar should always align on 4K boundary.
384 *
385 * @param vdev Pointer to a vdev structure
386 * @param is_sriov_bar When the first parameter vdev is a SRIOV PF vdev, the function
387 * init_bars is used to initialize normal PCIe BARs of PF vdev if the
388 * parameter is_sriov_bar is false, the function init_bars is used to
389 * initialize SRIOV VF BARs of PF vdev if parameter is_sriov_bar is true
390 * Otherwise, the parameter is_sriov_bar should be false if the first
391 * parameter vdev is not SRIOV PF vdev
392 *
393 * @pre vdev != NULL
394 * @pre vdev->vpci != NULL
395 * @pre vdev->pdev != NULL
396 */
init_bars(struct pci_vdev * vdev,bool is_sriov_bar)397 static void init_bars(struct pci_vdev *vdev, bool is_sriov_bar)
398 {
399 uint32_t idx, bar_cnt;
400 struct pci_vbar *vbar;
401 uint32_t size32, offset, lo, hi = 0U;
402 union pci_bdf pbdf;
403 uint64_t mask;
404
405 if (is_sriov_bar) {
406 bar_cnt = PCI_BAR_COUNT;
407 } else {
408 vdev->nr_bars = vdev->pdev->nr_bars;
409 bar_cnt = vdev->nr_bars;
410 }
411 pbdf.value = vdev->pdev->bdf.value;
412
413 for (idx = 0U; idx < bar_cnt; idx++) {
414 if (is_sriov_bar) {
415 vbar = &vdev->sriov.vbars[idx];
416 offset = sriov_bar_offset(vdev, idx);
417 } else {
418 vbar = &vdev->vbars[idx];
419 offset = pci_bar_offset(idx);
420 }
421 lo = pci_pdev_read_cfg(pbdf, offset, 4U);
422 vbar->bar_type.bits = lo;
423
424 if (is_pci_reserved_bar(vbar)) {
425 continue;
426 }
427
428 if (is_pci_io_bar(vbar)) {
429 if (lo & ~IO_SPACE_BITMASK) {
430 /*
431 * Some buggy x86 BIOS may program an invalid I/O BAR whose upper 16 bits are not zero.
432 * Such I/O BAR is not addressable on x86 platforms. Skip it when initializing the
433 * virtual PCI function as I/O BAR reprogramming in VM is currently unsupported.
434 */
435 pr_warn("%s: %02x:%02x.%x: IO BAR%d value 0x%08x has invalid bits, IO_SPACE_BITMASK "
436 "is 0x%08x, Ignore this BAR in vdev",
437 __func__, vdev->bdf.bits.b, vdev->bdf.bits.d, vdev->bdf.bits.f, idx, lo,
438 IO_SPACE_BITMASK);
439 continue;
440 }
441 mask = PCI_BASE_ADDRESS_IO_MASK;
442 } else {
443 mask = PCI_BASE_ADDRESS_MEM_MASK;
444 }
445 vbar->base_hpa = (uint64_t)lo & mask;
446
447 if (is_pci_mem64lo_bar(vbar)) {
448 hi = pci_pdev_read_cfg(pbdf, offset + 4U, 4U);
449 vbar->base_hpa |= ((uint64_t)hi << 32U);
450 }
451
452 if (vbar->base_hpa != 0UL) {
453 pci_pdev_write_cfg(pbdf, offset, 4U, ~0U);
454 size32 = pci_pdev_read_cfg(pbdf, offset, 4U);
455 pci_pdev_write_cfg(pbdf, offset, 4U, lo);
456
457 vbar->mask = size32 & mask;
458 vbar->bar_type.bits &= (uint32_t)(~mask);
459 vbar->size = (uint64_t)size32 & mask;
460
461 if (is_prelaunched_vm(vpci2vm(vdev->vpci))) {
462 lo = (uint32_t)vdev->pci_dev_config->vbar_base[idx];
463 }
464
465 if (is_pci_mem64lo_bar(vbar)) {
466 idx++;
467 if (is_sriov_bar) {
468 offset = sriov_bar_offset(vdev, idx);
469 } else {
470 offset = pci_bar_offset(idx);
471 }
472 pci_pdev_write_cfg(pbdf, offset, 4U, ~0U);
473 size32 = pci_pdev_read_cfg(pbdf, offset, 4U);
474 pci_pdev_write_cfg(pbdf, offset, 4U, hi);
475
476 vbar->size |= ((uint64_t)size32 << 32U);
477 vbar->size = vbar->size & ~(vbar->size - 1UL);
478 vbar->size = round_page_up(vbar->size);
479
480 if (is_sriov_bar) {
481 vbar = &vdev->sriov.vbars[idx];
482 } else {
483 vbar = &vdev->vbars[idx];
484 }
485
486 vbar->mask = size32;
487 vbar->is_mem64hi = true;
488
489 if (is_prelaunched_vm(vpci2vm(vdev->vpci))) {
490 hi = (uint32_t)(vdev->pci_dev_config->vbar_base[idx - 1U] >> 32U);
491 }
492 /* if it is parsing SRIOV VF BARs, no need to write vdev bars */
493 if (!is_sriov_bar) {
494 pci_vdev_write_vbar(vdev, idx - 1U, lo);
495 pci_vdev_write_vbar(vdev, idx, hi);
496 }
497 } else {
498 vbar->size = vbar->size & ~(vbar->size - 1UL);
499 if (is_pci_mem32_bar(vbar)) {
500 vbar->size = round_page_up(vbar->size);
501 }
502
503 /* if it is parsing SRIOV VF BARs, no need to write vdev bar */
504 if (!is_sriov_bar) {
505 pci_vdev_write_vbar(vdev, idx, lo);
506 }
507 }
508 }
509 }
510
511 /* Initialize MSIx mmio hpa and size after BARs initialization */
512 if (has_msix_cap(vdev) && (!is_sriov_bar)) {
513 vdev->msix.mmio_hpa = vdev->vbars[vdev->msix.table_bar].base_hpa;
514 vdev->msix.mmio_size = vdev->vbars[vdev->msix.table_bar].size;
515 }
516 }
517
518 /**
519 * @pre vdev != NULL
520 * @pre vdev->pdev != NULL
521 */
init_vmsix_pt(struct pci_vdev * vdev)522 void init_vmsix_pt(struct pci_vdev *vdev)
523 {
524 struct pci_pdev *pdev = vdev->pdev;
525
526 vdev->msix.capoff = pdev->msix.capoff;
527 vdev->msix.caplen = pdev->msix.caplen;
528 vdev->msix.table_bar = pdev->msix.table_bar;
529 vdev->msix.table_offset = pdev->msix.table_offset;
530 vdev->msix.table_count = pdev->msix.table_count;
531
532 if (has_msix_cap(vdev)) {
533 (void)memcpy_s((void *)&vdev->cfgdata.data_8[pdev->msix.capoff], pdev->msix.caplen,
534 (void *)&pdev->msix.cap[0U], pdev->msix.caplen);
535 }
536 }
537
538 /**
539 * @pre vdev != NULL
540 * @pre vdev->vpci != NULL
541 */
deinit_vmsix_pt(struct pci_vdev * vdev)542 void deinit_vmsix_pt(struct pci_vdev *vdev)
543 {
544 if (has_msix_cap(vdev)) {
545 if (vdev->msix.table_count != 0U) {
546 ptirq_remove_msix_remapping(vpci2vm(vdev->vpci), vdev->pdev->bdf.value, vdev->msix.table_count);
547 (void)memset((void *)&vdev->msix.table_entries, 0U, sizeof(vdev->msix.table_entries));
548 vdev->msix.is_vmsix_on_msi_programmed = false;
549 }
550 }
551 }
552
vdev_pt_hide_sriov_cap(struct pci_vdev * vdev)553 void vdev_pt_hide_sriov_cap(struct pci_vdev *vdev)
554 {
555 uint32_t pre_pos = vdev->pdev->sriov.pre_pos;
556 uint32_t pre_hdr, hdr, vhdr;
557
558 pre_hdr = pci_pdev_read_cfg(vdev->pdev->bdf, pre_pos, 4U);
559 hdr = pci_pdev_read_cfg(vdev->pdev->bdf, vdev->pdev->sriov.capoff, 4U);
560
561 vhdr = pre_hdr & 0xfffffU;
562 vhdr |= hdr & 0xfff00000U;
563 pci_vdev_write_vcfg(vdev, pre_pos, 4U, vhdr);
564 vdev->pdev->sriov.hide_sriov = true;
565
566 pr_acrnlog("Hide sriov cap for %02x:%02x.%x", vdev->pdev->bdf.bits.b, vdev->pdev->bdf.bits.d, vdev->pdev->bdf.bits.f);
567 }
568
569 /* TODO:
570 * The OpRegion is not 4KB aligned, while under some platforms,
571 * it will take up to 16KB. In this case, OpRegion overlay 5 pages.
572 * So set GPU_OPREGION_SIZE to 0x5000U(20KB) here.
573 *
574 * The solution that pass-thru OpRegion has potential security issue.
575 * Will take the copy + emulation solution to expose host OpRegion to guest later.
576 */
passthru_gpu_opregion(struct pci_vdev * vdev)577 void passthru_gpu_opregion(struct pci_vdev *vdev)
578 {
579 uint32_t gpu_opregion_hpa, gpu_opregion_gpa, gpu_asls_phys;
580
581 gpu_opregion_gpa = GPU_OPREGION_GPA;
582 gpu_asls_phys = pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U);
583 gpu_opregion_hpa = gpu_asls_phys & PCIM_ASLS_OPREGION_MASK;
584 ept_add_mr(vpci2vm(vdev->vpci), vpci2vm(vdev->vpci)->arch_vm.nworld_eptp,
585 gpu_opregion_hpa, gpu_opregion_gpa,
586 GPU_OPREGION_SIZE, EPT_RD | EPT_UNCACHED);
587 pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, gpu_opregion_gpa | (gpu_asls_phys & ~PCIM_ASLS_OPREGION_MASK));
588 }
589
590 /**
591 * @brief Initialize a specified passthrough vdev structure.
592 *
593 * The function init_vdev_pt is used to initialize a vdev structure. If a vdev structure supports
594 * SRIOV capability that the vdev represents a SRIOV physical function(PF) virtual device, then
595 * function init_vdev_pt can initialize PF vdev SRIOV capability if parameter is_pf_vdev is true.
596 *
597 * @param vdev pointer to vdev data structure
598 * @param is_pf_vdev indicate the first parameter vdev is the data structure of a PF, which contains
599 * the SR-IOV capability
600 *
601 * @pre vdev != NULL
602 * @pre vdev->vpci != NULL
603 * @pre vdev->pdev != NULL
604 */
init_vdev_pt(struct pci_vdev * vdev,bool is_pf_vdev)605 void init_vdev_pt(struct pci_vdev *vdev, bool is_pf_vdev)
606 {
607 uint16_t pci_command;
608 uint32_t offset;
609
610 for (offset = 0U; offset < PCI_CFG_HEADER_LENGTH; offset += 4U) {
611 pci_vdev_write_vcfg(vdev, offset, 4U, pci_pdev_read_cfg(vdev->pdev->bdf, offset, 4U));
612 }
613
614 /* Initialize the vdev BARs except SRIOV VF, VF BARs are initialized directly from create_vf function */
615 if (vdev->phyfun == NULL) {
616 init_bars(vdev, is_pf_vdev);
617 init_vmsix_on_msi(vdev);
618 if (is_service_vm(vpci2vm(vdev->vpci)) && (vdev->pdev->bdf.value == CONFIG_IGD_SBDF)) {
619 pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U));
620 }
621 if (is_prelaunched_vm(vpci2vm(vdev->vpci)) && (!is_pf_vdev)) {
622 pci_command = (uint16_t)pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U);
623
624 /* Disable INTX */
625 pci_command |= 0x400U;
626 pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command);
627
628 if (vdev->pdev->bdf.value == CONFIG_IGD_SBDF) {
629 passthru_gpu_opregion(vdev);
630 }
631 }
632 } else {
633 if (vdev->phyfun->vpci != vdev->vpci) {
634 /* VF is assigned to a User VM */
635 uint32_t vid, did;
636
637 vdev->nr_bars = PCI_BAR_COUNT;
638 /* SRIOV VF Vendor ID and Device ID initialization */
639 vid = pci_pdev_read_cfg(vdev->phyfun->bdf, PCIR_VENDOR, 2U);
640 did = pci_pdev_read_cfg(vdev->phyfun->bdf,
641 (vdev->phyfun->sriov.capoff + PCIR_SRIOV_VF_DEV_ID), 2U);
642 pci_vdev_write_vcfg(vdev, PCIR_VENDOR, 2U, vid);
643 pci_vdev_write_vcfg(vdev, PCIR_DEVICE, 2U, did);
644 } else {
645 /* VF is unassinged: when VF was first created, the VF's BARs hasn't been assigned */
646 uint32_t bar_idx;
647
648 for (bar_idx = 0U; bar_idx < vdev->nr_bars; bar_idx++) {
649 vdev_pt_map_mem_vbar(vdev, bar_idx);
650 }
651 }
652 }
653
654 if (!is_service_vm(vpci2vm(vdev->vpci)) && (has_sriov_cap(vdev))) {
655 vdev_pt_hide_sriov_cap(vdev);
656 }
657
658 }
659
660 /**
661 * @brief Destruct a specified passthrough vdev structure.
662 *
663 * The function deinit_vdev_pt is the destructor corresponding to the function init_vdev_pt.
664 *
665 * @param vdev pointer to vdev data structure
666 *
667 * @pre vdev != NULL
668 */
deinit_vdev_pt(struct pci_vdev * vdev)669 void deinit_vdev_pt(struct pci_vdev *vdev) {
670
671 /* Check if the vdev is an unassigned SR-IOV VF device */
672 if ((vdev->phyfun != NULL) && (vdev->phyfun->vpci == vdev->vpci)) {
673 uint32_t bar_idx;
674
675 /* Delete VF MMIO from EPT table since the VF physical device has gone */
676 for (bar_idx = 0U; bar_idx < vdev->nr_bars; bar_idx++) {
677 vdev_pt_unmap_mem_vbar(vdev, bar_idx);
678 }
679 }
680 }
681