1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * Copyright (c) 2018-2022 Intel Corporation.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28 #include <asm/guest/vm.h>
29 #include <errno.h>
30 #include <ptdev.h>
31 #include <asm/guest/assign.h>
32 #include <asm/vtd.h>
33 #include <asm/guest/ept.h>
34 #include <asm/mmu.h>
35 #include <asm/io.h>
36 #include <logmsg.h>
37 #include <config.h>
38 #include "vpci_priv.h"
39 
40 /**
41  * @pre vdev != NULL
42  */
get_msix_table_entry(const struct pci_vdev * vdev,uint32_t index)43 static inline struct msix_table_entry *get_msix_table_entry(const struct pci_vdev *vdev, uint32_t index)
44 {
45 	void *hva = hpa2hva(vdev->msix.mmio_hpa + vdev->msix.table_offset);
46 
47 	return ((struct msix_table_entry *)hva + index);
48 }
49 
50 /**
51  * @brief Reading MSI-X Capability Structure
52  *
53  * @pre vdev != NULL
54  * @pre vdev->pdev != NULL
55  */
read_pt_vmsix_cap_reg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t * val)56 void read_pt_vmsix_cap_reg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val)
57 {
58 	if (vdev->msix.is_vmsix_on_msi) {
59 		*val = pci_vdev_read_vcfg(vdev, offset, bytes);
60 	} else {
61 		read_vmsix_cap_reg(vdev, offset, bytes, val);
62 	}
63 }
64 
65 /**
66  * @brief Writing MSI-X Capability Structure
67  *
68  * @pre vdev != NULL
69  * @pre vdev->pdev != NULL
70  */
write_pt_vmsix_cap_reg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t val)71 void write_pt_vmsix_cap_reg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val)
72 {
73 	uint32_t msgctrl;
74 
75 	if (write_vmsix_cap_reg(vdev, offset, bytes, val)) {
76 		msgctrl = pci_vdev_read_vcfg(vdev, vdev->msix.capoff + PCIR_MSIX_CTRL, 2U);
77 		/* If MSI Enable is being set, make sure INTxDIS bit is set */
78 		if ((msgctrl & PCIM_MSIXCTRL_MSIX_ENABLE) != 0U) {
79 			enable_disable_pci_intx(vdev->pdev->bdf, false);
80 		}
81 		pci_pdev_write_cfg(vdev->pdev->bdf, vdev->msix.capoff + PCIR_MSIX_CTRL, 2U, msgctrl);
82 	}
83 }
84 
85 /**
86  * @pre vdev != NULL
87  */
mask_one_msix_vector(const struct pci_vdev * vdev,uint32_t index)88 static void mask_one_msix_vector(const struct pci_vdev *vdev, uint32_t index)
89 {
90 	uint32_t vector_control;
91 	struct msix_table_entry *pentry = get_msix_table_entry(vdev, index);
92 
93 	stac();
94 	vector_control = pentry->vector_control | PCIM_MSIX_VCTRL_MASK;
95 	mmio_write32(vector_control, (void *)&(pentry->vector_control));
96 	clac();
97 }
98 
99 
100 /**
101  * @pre vdev != NULL
102  * @pre vdev->vpci != NULL
103  * @pre vdev->pdev != NULL
104  */
remap_one_vmsix_entry(const struct pci_vdev * vdev,uint32_t index)105 static void remap_one_vmsix_entry(const struct pci_vdev *vdev, uint32_t index)
106 {
107 	const struct msix_table_entry *ventry;
108 	struct msix_table_entry *pentry;
109 	struct msi_info info = {};
110 	int32_t ret;
111 
112 	mask_one_msix_vector(vdev, index);
113 	ventry = &vdev->msix.table_entries[index];
114 	if ((ventry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0U) {
115 		info.addr.full = vdev->msix.table_entries[index].addr;
116 		info.data.full = vdev->msix.table_entries[index].data;
117 
118 		ret = ptirq_prepare_msix_remap(vpci2vm(vdev->vpci), vdev->bdf.value, vdev->pdev->bdf.value,
119 					       (uint16_t)index, &info, INVALID_IRTE_ID);
120 		if (ret == 0) {
121 			/* Write the table entry to the physical structure */
122 			pentry = get_msix_table_entry(vdev, index);
123 
124 			/*
125 			 * PCI 3.0 Spec allows writing to Message Address and Message Upper Address
126 			 * fields with a single QWORD write, but some hardware can accept 32 bits
127 			 * write only
128 			 */
129 			stac();
130 			mmio_write32((uint32_t)(info.addr.full), (void *)&(pentry->addr));
131 			mmio_write32((uint32_t)(info.addr.full >> 32U), (void *)((char *)&(pentry->addr) + 4U));
132 
133 			mmio_write32(info.data.full, (void *)&(pentry->data));
134 			mmio_write32(vdev->msix.table_entries[index].vector_control, (void *)&(pentry->vector_control));
135 			clac();
136 		}
137 	}
138 
139 }
140 
141 /**
142  * @pre io_req != NULL
143  * @pre priv_data != NULL
144  */
pt_vmsix_handle_table_mmio_access(struct io_request * io_req,void * priv_data)145 static int32_t pt_vmsix_handle_table_mmio_access(struct io_request *io_req, void *priv_data)
146 {
147 	struct acrn_mmio_request *mmio = &io_req->reqs.mmio_request;
148 	struct pci_vdev *vdev;
149 	uint32_t index;
150 	int32_t ret = 0;
151 
152 	vdev = (struct pci_vdev *)priv_data;
153 	if (vdev->user == vdev) {
154 		index = rw_vmsix_table(vdev, io_req);
155 
156 		if ((mmio->direction == ACRN_IOREQ_DIR_WRITE) && (index < vdev->msix.table_count)) {
157 			if (vdev->msix.is_vmsix_on_msi) {
158 				remap_one_vmsix_entry_on_msi(vdev, index);
159 			} else {
160 				remap_one_vmsix_entry(vdev, index);
161 			}
162 		}
163 	} else {
164 		ret = -EFAULT;
165 	}
166 
167 	return ret;
168 }
169 
170 /*
171  * @pre vdev != NULL
172  * @pre vdev->vpci != NULL
173  */
vdev_pt_unmap_msix(struct pci_vdev * vdev)174 static void vdev_pt_unmap_msix(struct pci_vdev *vdev)
175 {
176 	uint32_t i;
177 	uint64_t addr_hi, addr_lo;
178 	struct pci_msix *msix = &vdev->msix;
179 
180 	/* Mask all table entries */
181 	for (i = 0U; i < msix->table_count; i++) {
182 		msix->table_entries[i].vector_control = PCIM_MSIX_VCTRL_MASK;
183 		msix->table_entries[i].addr = 0U;
184 		msix->table_entries[i].data = 0U;
185 	}
186 
187 	if (msix->mmio_gpa != 0UL) {
188 		addr_lo = msix->mmio_gpa + msix->table_offset;
189 		addr_hi = addr_lo + (msix->table_count * MSIX_TABLE_ENTRY_SIZE);
190 		addr_lo = round_page_down(addr_lo);
191 		addr_hi = round_page_up(addr_hi);
192 		unregister_mmio_emulation_handler(vpci2vm(vdev->vpci), addr_lo, addr_hi);
193 		msix->mmio_gpa = 0UL;
194 	}
195 }
196 
197 /*
198  * @pre vdev != NULL
199  * @pre vdev->vpci != NULL
200  */
vdev_pt_map_msix(struct pci_vdev * vdev,bool hold_lock)201 void vdev_pt_map_msix(struct pci_vdev *vdev, bool hold_lock)
202 {
203 	struct pci_vbar *vbar;
204 	uint64_t addr_hi, addr_lo;
205 	struct pci_msix *msix = &vdev->msix;
206 
207 	vbar = &vdev->vbars[msix->table_bar];
208 	if (vbar->base_gpa != 0UL) {
209 		struct acrn_vm *vm = vpci2vm(vdev->vpci);
210 
211 		addr_lo = vbar->base_gpa + msix->table_offset;
212 		addr_hi = addr_lo + (msix->table_count * MSIX_TABLE_ENTRY_SIZE);
213 		addr_lo = round_page_down(addr_lo);
214 		addr_hi = round_page_up(addr_hi);
215 		register_mmio_emulation_handler(vm, pt_vmsix_handle_table_mmio_access,
216 				addr_lo, addr_hi, vdev, hold_lock);
217 		ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, addr_lo, addr_hi - addr_lo);
218 		msix->mmio_gpa = vbar->base_gpa;
219 	}
220 }
221 
222 /**
223  * @pre vdev != NULL
224  * @pre vdev->vpci != NULL
225  */
vdev_pt_unmap_mem_vbar(struct pci_vdev * vdev,uint32_t idx)226 static void vdev_pt_unmap_mem_vbar(struct pci_vdev *vdev, uint32_t idx)
227 {
228 	struct pci_vbar *vbar = &vdev->vbars[idx];
229 
230 	if (vbar->base_gpa != 0UL) {
231 		struct acrn_vm *vm = vpci2vm(vdev->vpci);
232 
233 		ept_del_mr(vm, (uint64_t *)(vm->arch_vm.nworld_eptp),
234 			vbar->base_gpa, /* GPA (old vbar) */
235 			vbar->size);
236 	}
237 
238 	if ((has_msix_cap(vdev) && (idx == vdev->msix.table_bar))) {
239 		vdev_pt_unmap_msix(vdev);
240 	}
241 }
242 
243 /**
244  * @pre vdev != NULL
245  * @pre vdev->vpci != NULL
246  */
vdev_pt_map_mem_vbar(struct pci_vdev * vdev,uint32_t idx)247 static void vdev_pt_map_mem_vbar(struct pci_vdev *vdev, uint32_t idx)
248 {
249 	struct pci_vbar *vbar = &vdev->vbars[idx];
250 
251 	if (vbar->base_gpa != 0UL) {
252 		struct acrn_vm *vm = vpci2vm(vdev->vpci);
253 
254 		ept_add_mr(vm, (uint64_t *)(vm->arch_vm.nworld_eptp),
255 			vbar->base_hpa, /* HPA (pbar) */
256 			vbar->base_gpa, /* GPA (new vbar) */
257 			vbar->size,
258 			EPT_WR | EPT_RD | EPT_UNCACHED);
259 	}
260 
261 	if (has_msix_cap(vdev) && (idx == vdev->msix.table_bar)) {
262 		vdev_pt_map_msix(vdev, true);
263 	}
264 }
265 
266 /**
267  * @brief Allow IO bar access
268  * @pre vdev != NULL
269  * @pre vdev->vpci != NULL
270  */
vdev_pt_allow_io_vbar(struct pci_vdev * vdev,uint32_t idx)271 static void vdev_pt_allow_io_vbar(struct pci_vdev *vdev, uint32_t idx)
272 {
273 	struct acrn_vm *vm = vpci2vm(vdev->vpci);
274 
275 	/* For Service VM, all port IO access is allowed by default, so skip Service VM here */
276 	if (!is_service_vm(vm)) {
277 		struct pci_vbar *vbar = &vdev->vbars[idx];
278 		if (vbar->base_gpa != 0UL) {
279 			allow_guest_pio_access(vm, (uint16_t)vbar->base_gpa, (uint32_t)(vbar->size));
280 		}
281 	}
282 }
283 
284 /**
285  * @brief Deny IO bar access
286  * @pre vdev != NULL
287  * @pre vdev->vpci != NULL
288  */
vdev_pt_deny_io_vbar(struct pci_vdev * vdev,uint32_t idx)289 static void vdev_pt_deny_io_vbar(struct pci_vdev *vdev, uint32_t idx)
290 {
291 	struct acrn_vm *vm = vpci2vm(vdev->vpci);
292 
293 	/* For Service VM, all port IO access is allowed by default, so skip Service VM here */
294 	if (!is_service_vm(vm)) {
295 		struct pci_vbar *vbar = &vdev->vbars[idx];
296 		if (vbar->base_gpa != 0UL) {
297 			deny_guest_pio_access(vm, (uint16_t)(vbar->base_gpa), (uint32_t)(vbar->size));
298 		}
299 
300 	}
301 }
302 
303 /**
304  * @pre vdev != NULL
305  */
vdev_pt_write_vbar(struct pci_vdev * vdev,uint32_t idx,uint32_t val)306 void vdev_pt_write_vbar(struct pci_vdev *vdev, uint32_t idx, uint32_t val)
307 {
308 	struct pci_vbar *vbar = &vdev->vbars[idx];
309 
310 	if (is_pci_io_bar(vbar)) {
311 		vpci_update_one_vbar(vdev, idx, val, vdev_pt_allow_io_vbar, vdev_pt_deny_io_vbar);
312 	} else {
313 		/* pci mem bar */
314 		vpci_update_one_vbar(vdev, idx, val, vdev_pt_map_mem_vbar, vdev_pt_unmap_mem_vbar);
315 	}
316 }
317 
318 /*
319  * @pre vdev != NULL
320  * @pre vdev->pdev != NULL
321  */
vdev_bridge_pt_restore_space(struct pci_vdev * vdev)322 void vdev_bridge_pt_restore_space(struct pci_vdev *vdev)
323 {
324 	struct pci_pdev *pdev = vdev->pdev;
325 	uint32_t pre_val;
326 	uint32_t offset;
327 
328 	/* I/O Base (0x1c) and I/O Limit (0x1d) */
329 	pre_val = pci_vdev_read_vcfg(vdev, PCIR_IO_BASE, 2U);
330 	if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_IO_BASE, 2U)) {
331 		pci_pdev_write_cfg(pdev->bdf, PCIR_IO_BASE, 2U, pre_val);
332 	}
333 
334 	/* From Memory Base (0x20) to I/O Base Limit 16 Bits (0x32) */
335 	for (offset = PCIR_MEM_BASE; offset < PCIR_IO_BASE_UPPER_16; offset += 4) {
336 		pre_val = pci_vdev_read_vcfg(vdev, offset, 4U);
337 		if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, offset, 4U)) {
338 			pci_pdev_write_cfg(pdev->bdf, offset, 4U, pre_val);
339 		}
340 	}
341 }
342 
343 /*
344  * @pre vdev != NULL
345  * @pre vdev->pdev != NULL
346  */
vdev_bridge_pt_restore_bus(struct pci_vdev * vdev)347 void vdev_bridge_pt_restore_bus(struct pci_vdev *vdev)
348 {
349 	struct pci_pdev *pdev = vdev->pdev;
350 	uint32_t pre_val;
351 
352 	/* Primary Bus Number (0x18) and Secondary Bus Number (0x19) */
353 	pre_val = pci_vdev_read_vcfg(vdev, PCIR_PRIBUS_1, 2U);
354 	if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_PRIBUS_1, 2U)) {
355 		pci_pdev_write_cfg(pdev->bdf, PCIR_PRIBUS_1, 2U, pre_val);
356 	}
357 
358 	/* Subordinate Bus Number (0x1a) */
359 	pre_val = pci_vdev_read_vcfg(vdev, PCIR_SUBBUS_1, 1U);
360 	if (pre_val != pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_SUBBUS_1, 1U)) {
361 		pci_pdev_write_cfg(pdev->bdf, PCIR_SUBBUS_1, 1U, pre_val);
362 	}
363 }
364 
365 /**
366  * PCI base address register (bar) virtualization:
367  *
368  * Virtualize the PCI bars (up to 6 bars at byte offset 0x10~0x24 for type 0 PCI device,
369  * 2 bars at byte offset 0x10-0x14 for type 1 PCI device) of the PCI configuration space
370  * header.
371  *
372  * pbar: bar for the physical PCI device (pci_pdev), the value of pbar (hpa) is assigned
373  * by platform firmware during boot. It is assumed a valid hpa is always assigned to a
374  * mmio pbar, hypervisor shall not change the value of a pbar.
375  *
376  * vbar: for each pci_pdev, it has a virtual PCI device (pci_vdev) counterpart. pci_vdev
377  * virtualizes all the bars (called vbars). a vbar can be initialized by hypervisor by
378  * assigning a gpa to it; if vbar has a value of 0 (unassigned), guest may assign
379  * and program a gpa to it. The guest only sees the vbars, it will not see and can
380  * never change the pbars.
381  *
382  * Hypervisor traps guest changes to the mmio vbar (gpa) to establish ept mapping
383  * between vbar(gpa) and pbar(hpa). pbar should always align on 4K boundary.
384  *
385  * @param vdev         Pointer to a vdev structure
386  * @param is_sriov_bar When the first parameter vdev is a SRIOV PF vdev, the function
387  *                     init_bars is used to initialize normal PCIe BARs of PF vdev if the
388  *                     parameter is_sriov_bar is false, the function init_bars is used to
389  *                     initialize SRIOV VF BARs of PF vdev if parameter is_sriov_bar is true
390  *                     Otherwise, the parameter is_sriov_bar should be false if the first
391  *                     parameter vdev is not SRIOV PF vdev
392  *
393  * @pre vdev != NULL
394  * @pre vdev->vpci != NULL
395  * @pre vdev->pdev != NULL
396  */
init_bars(struct pci_vdev * vdev,bool is_sriov_bar)397 static void init_bars(struct pci_vdev *vdev, bool is_sriov_bar)
398 {
399 	uint32_t idx, bar_cnt;
400 	struct pci_vbar *vbar;
401 	uint32_t size32, offset, lo, hi = 0U;
402 	union pci_bdf pbdf;
403 	uint64_t mask;
404 
405 	if (is_sriov_bar) {
406 		bar_cnt = PCI_BAR_COUNT;
407 	} else {
408 		vdev->nr_bars = vdev->pdev->nr_bars;
409 		bar_cnt = vdev->nr_bars;
410 	}
411 	pbdf.value = vdev->pdev->bdf.value;
412 
413 	for (idx = 0U; idx < bar_cnt; idx++) {
414 		if (is_sriov_bar) {
415 			vbar = &vdev->sriov.vbars[idx];
416 			offset = sriov_bar_offset(vdev, idx);
417 		} else {
418 			vbar = &vdev->vbars[idx];
419 			offset = pci_bar_offset(idx);
420 		}
421 		lo = pci_pdev_read_cfg(pbdf, offset, 4U);
422 		vbar->bar_type.bits = lo;
423 
424 		if (is_pci_reserved_bar(vbar)) {
425 			continue;
426 		}
427 
428 		if (is_pci_io_bar(vbar)) {
429 			if (lo & ~IO_SPACE_BITMASK) {
430 				/*
431 				 * Some buggy x86 BIOS may program an invalid I/O BAR whose upper 16 bits are not zero.
432 				 * Such I/O BAR is not addressable on x86 platforms. Skip it when initializing the
433 				 * virtual PCI function as I/O BAR reprogramming in VM is currently unsupported.
434 				 */
435 				pr_warn("%s: %02x:%02x.%x: IO BAR%d value 0x%08x has invalid bits, IO_SPACE_BITMASK "
436 				        "is 0x%08x, Ignore this BAR in vdev",
437 					__func__, vdev->bdf.bits.b, vdev->bdf.bits.d, vdev->bdf.bits.f, idx, lo,
438 					IO_SPACE_BITMASK);
439 				continue;
440 			}
441 			mask = PCI_BASE_ADDRESS_IO_MASK;
442 		} else {
443 			mask = PCI_BASE_ADDRESS_MEM_MASK;
444 		}
445 		vbar->base_hpa = (uint64_t)lo & mask;
446 
447 		if (is_pci_mem64lo_bar(vbar)) {
448 			hi = pci_pdev_read_cfg(pbdf, offset + 4U, 4U);
449 			vbar->base_hpa |= ((uint64_t)hi << 32U);
450 		}
451 
452 		if (vbar->base_hpa != 0UL) {
453 			pci_pdev_write_cfg(pbdf, offset, 4U, ~0U);
454 			size32 = pci_pdev_read_cfg(pbdf, offset, 4U);
455 			pci_pdev_write_cfg(pbdf, offset, 4U, lo);
456 
457 			vbar->mask = size32 & mask;
458 			vbar->bar_type.bits &= (uint32_t)(~mask);
459 			vbar->size = (uint64_t)size32 & mask;
460 
461 			if (is_prelaunched_vm(vpci2vm(vdev->vpci))) {
462 				lo = (uint32_t)vdev->pci_dev_config->vbar_base[idx];
463 			}
464 
465 			if (is_pci_mem64lo_bar(vbar)) {
466 				idx++;
467 				if (is_sriov_bar) {
468 					offset = sriov_bar_offset(vdev, idx);
469 				} else {
470 					offset = pci_bar_offset(idx);
471 				}
472 				pci_pdev_write_cfg(pbdf, offset, 4U, ~0U);
473 				size32 = pci_pdev_read_cfg(pbdf, offset, 4U);
474 				pci_pdev_write_cfg(pbdf, offset, 4U, hi);
475 
476 				vbar->size |= ((uint64_t)size32 << 32U);
477 				vbar->size = vbar->size & ~(vbar->size - 1UL);
478 				vbar->size = round_page_up(vbar->size);
479 
480 				if (is_sriov_bar) {
481 					vbar = &vdev->sriov.vbars[idx];
482 				} else {
483 					vbar = &vdev->vbars[idx];
484 				}
485 
486 				vbar->mask = size32;
487 				vbar->is_mem64hi = true;
488 
489 				if (is_prelaunched_vm(vpci2vm(vdev->vpci))) {
490 					hi = (uint32_t)(vdev->pci_dev_config->vbar_base[idx - 1U] >> 32U);
491 				}
492 				/* if it is parsing SRIOV VF BARs, no need to write vdev bars */
493 				if (!is_sriov_bar) {
494 					pci_vdev_write_vbar(vdev, idx - 1U, lo);
495 					pci_vdev_write_vbar(vdev, idx, hi);
496 				}
497 			} else {
498 				vbar->size = vbar->size & ~(vbar->size - 1UL);
499 				if (is_pci_mem32_bar(vbar)) {
500 					vbar->size = round_page_up(vbar->size);
501 				}
502 
503 				/* if it is parsing SRIOV VF BARs, no need to write vdev bar */
504 				if (!is_sriov_bar) {
505 					pci_vdev_write_vbar(vdev, idx, lo);
506 				}
507 			}
508 		}
509 	}
510 
511 	/* Initialize MSIx mmio hpa and size after BARs initialization */
512 	if (has_msix_cap(vdev) && (!is_sriov_bar)) {
513 		vdev->msix.mmio_hpa = vdev->vbars[vdev->msix.table_bar].base_hpa;
514 		vdev->msix.mmio_size = vdev->vbars[vdev->msix.table_bar].size;
515 	}
516 }
517 
518 /**
519  * @pre vdev != NULL
520  * @pre vdev->pdev != NULL
521  */
init_vmsix_pt(struct pci_vdev * vdev)522 void init_vmsix_pt(struct pci_vdev *vdev)
523 {
524 	struct pci_pdev *pdev = vdev->pdev;
525 
526 	vdev->msix.capoff = pdev->msix.capoff;
527 	vdev->msix.caplen = pdev->msix.caplen;
528 	vdev->msix.table_bar = pdev->msix.table_bar;
529 	vdev->msix.table_offset = pdev->msix.table_offset;
530 	vdev->msix.table_count = pdev->msix.table_count;
531 
532 	if (has_msix_cap(vdev)) {
533 		(void)memcpy_s((void *)&vdev->cfgdata.data_8[pdev->msix.capoff], pdev->msix.caplen,
534 			(void *)&pdev->msix.cap[0U], pdev->msix.caplen);
535 	}
536 }
537 
538 /**
539  * @pre vdev != NULL
540  * @pre vdev->vpci != NULL
541  */
deinit_vmsix_pt(struct pci_vdev * vdev)542 void deinit_vmsix_pt(struct pci_vdev *vdev)
543 {
544 	if (has_msix_cap(vdev)) {
545 		if (vdev->msix.table_count != 0U) {
546 			ptirq_remove_msix_remapping(vpci2vm(vdev->vpci), vdev->pdev->bdf.value, vdev->msix.table_count);
547 			(void)memset((void *)&vdev->msix.table_entries, 0U, sizeof(vdev->msix.table_entries));
548 			vdev->msix.is_vmsix_on_msi_programmed = false;
549 		}
550 	}
551 }
552 
vdev_pt_hide_sriov_cap(struct pci_vdev * vdev)553 void vdev_pt_hide_sriov_cap(struct pci_vdev *vdev)
554 {
555 	uint32_t pre_pos = vdev->pdev->sriov.pre_pos;
556 	uint32_t pre_hdr, hdr, vhdr;
557 
558 	pre_hdr = pci_pdev_read_cfg(vdev->pdev->bdf, pre_pos, 4U);
559 	hdr = pci_pdev_read_cfg(vdev->pdev->bdf, vdev->pdev->sriov.capoff, 4U);
560 
561 	vhdr = pre_hdr & 0xfffffU;
562 	vhdr |= hdr & 0xfff00000U;
563 	pci_vdev_write_vcfg(vdev, pre_pos, 4U, vhdr);
564 	vdev->pdev->sriov.hide_sriov = true;
565 
566 	pr_acrnlog("Hide sriov cap for %02x:%02x.%x", vdev->pdev->bdf.bits.b, vdev->pdev->bdf.bits.d, vdev->pdev->bdf.bits.f);
567 }
568 
569 /* TODO:
570  * The OpRegion is not 4KB aligned, while under some platforms,
571  * it will take up to 16KB. In this case, OpRegion overlay 5 pages.
572  * So set GPU_OPREGION_SIZE to 0x5000U(20KB) here.
573  *
574  * The solution that pass-thru OpRegion has potential security issue.
575  * Will take the copy + emulation solution to expose host OpRegion to guest later.
576  */
passthru_gpu_opregion(struct pci_vdev * vdev)577 void passthru_gpu_opregion(struct pci_vdev *vdev)
578 {
579 	uint32_t gpu_opregion_hpa, gpu_opregion_gpa, gpu_asls_phys;
580 
581 	gpu_opregion_gpa = GPU_OPREGION_GPA;
582 	gpu_asls_phys = pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U);
583 	gpu_opregion_hpa = gpu_asls_phys & PCIM_ASLS_OPREGION_MASK;
584 	ept_add_mr(vpci2vm(vdev->vpci), vpci2vm(vdev->vpci)->arch_vm.nworld_eptp,
585 			gpu_opregion_hpa, gpu_opregion_gpa,
586 			GPU_OPREGION_SIZE, EPT_RD | EPT_UNCACHED);
587 	pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, gpu_opregion_gpa | (gpu_asls_phys & ~PCIM_ASLS_OPREGION_MASK));
588 }
589 
590 /**
591  * @brief Initialize a specified passthrough vdev structure.
592  *
593  * The function init_vdev_pt is used to initialize a vdev structure. If a vdev structure supports
594  * SRIOV capability that the vdev represents a SRIOV physical function(PF) virtual device, then
595  * function init_vdev_pt can initialize PF vdev SRIOV capability if parameter is_pf_vdev is true.
596  *
597  * @param vdev        pointer to vdev data structure
598  * @param is_pf_vdev  indicate the first parameter vdev is the data structure of a PF, which contains
599  *                    the SR-IOV capability
600  *
601  * @pre vdev != NULL
602  * @pre vdev->vpci != NULL
603  * @pre vdev->pdev != NULL
604  */
init_vdev_pt(struct pci_vdev * vdev,bool is_pf_vdev)605 void init_vdev_pt(struct pci_vdev *vdev, bool is_pf_vdev)
606 {
607 	uint16_t pci_command;
608 	uint32_t offset;
609 
610 	for (offset = 0U; offset < PCI_CFG_HEADER_LENGTH; offset += 4U) {
611 		pci_vdev_write_vcfg(vdev, offset, 4U, pci_pdev_read_cfg(vdev->pdev->bdf, offset, 4U));
612 	}
613 
614 	/* Initialize the vdev BARs except SRIOV VF, VF BARs are initialized directly from create_vf function */
615 	if (vdev->phyfun == NULL) {
616 		init_bars(vdev, is_pf_vdev);
617 		init_vmsix_on_msi(vdev);
618 		if (is_service_vm(vpci2vm(vdev->vpci)) && (vdev->pdev->bdf.value == CONFIG_IGD_SBDF)) {
619 			pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U));
620 		}
621 		if (is_prelaunched_vm(vpci2vm(vdev->vpci)) && (!is_pf_vdev)) {
622 			pci_command = (uint16_t)pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U);
623 
624 			/* Disable INTX */
625 			pci_command |= 0x400U;
626 			pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command);
627 
628 			if (vdev->pdev->bdf.value == CONFIG_IGD_SBDF) {
629 				passthru_gpu_opregion(vdev);
630 			}
631 		}
632 	} else {
633 		if (vdev->phyfun->vpci != vdev->vpci) {
634 			/* VF is assigned to a User VM */
635 			uint32_t vid, did;
636 
637 			vdev->nr_bars = PCI_BAR_COUNT;
638 			/* SRIOV VF Vendor ID and Device ID initialization */
639 			vid = pci_pdev_read_cfg(vdev->phyfun->bdf, PCIR_VENDOR, 2U);
640 			did = pci_pdev_read_cfg(vdev->phyfun->bdf,
641 				(vdev->phyfun->sriov.capoff + PCIR_SRIOV_VF_DEV_ID), 2U);
642 			pci_vdev_write_vcfg(vdev, PCIR_VENDOR, 2U, vid);
643 			pci_vdev_write_vcfg(vdev, PCIR_DEVICE, 2U, did);
644 		} else {
645 			/* VF is unassinged: when VF was first created, the VF's BARs hasn't been assigned */
646 			uint32_t bar_idx;
647 
648 			for (bar_idx = 0U; bar_idx < vdev->nr_bars; bar_idx++) {
649 				vdev_pt_map_mem_vbar(vdev, bar_idx);
650 			}
651 		}
652 	}
653 
654 	if (!is_service_vm(vpci2vm(vdev->vpci)) && (has_sriov_cap(vdev))) {
655 		vdev_pt_hide_sriov_cap(vdev);
656 	}
657 
658 }
659 
660 /**
661  * @brief Destruct a specified passthrough vdev structure.
662  *
663  * The function deinit_vdev_pt is the destructor corresponding to the function init_vdev_pt.
664  *
665  * @param vdev  pointer to vdev data structure
666  *
667  * @pre vdev != NULL
668  */
deinit_vdev_pt(struct pci_vdev * vdev)669 void deinit_vdev_pt(struct pci_vdev *vdev) {
670 
671 	/* Check if the vdev is an unassigned SR-IOV VF device */
672 	if ((vdev->phyfun != NULL) && (vdev->phyfun->vpci == vdev->vpci)) {
673 		uint32_t bar_idx;
674 
675 		/* Delete VF MMIO from EPT table since the VF physical device has gone */
676 		for (bar_idx = 0U; bar_idx < vdev->nr_bars; bar_idx++) {
677 			vdev_pt_unmap_mem_vbar(vdev, bar_idx);
678 		}
679 	}
680 }
681