1 /*
2  * Copyright (C) 2020-2024 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #ifdef CONFIG_IVSHMEM_ENABLED
8 #include <asm/guest/vm.h>
9 #include <asm/mmu.h>
10 #include <asm/guest/ept.h>
11 #include <logmsg.h>
12 #include <errno.h>
13 #include <ivshmem.h>
14 #include <ivshmem_cfg.h>
15 #include "vpci_priv.h"
16 
17 /**
18  * @addtogroup vp-dm_vperipheral
19  *
20  * @{
21  */
22 
23 /**
24  * @file
25  * @brief Implementation of Inter-VM shared memory device (ivshmem).
26  *
27  * This file defines marcos, data structure and functions to support ivshmem devices. It also implements necessary
28  * functions to model a ivshmem device as a PCI device.
29  */
30 
31 /* config space of ivshmem device */
32 #define	IVSHMEM_CLASS		0x05U
33 #define	IVSHMEM_REV		0x01U
34 
35 /*
36  * ivshmem device supports bar0, bar1 and bar2,
37  * indexes of them shall begin with 0 and be continuous.
38  */
39 #define IVSHMEM_MMIO_BAR	0U
40 #define IVSHMEM_MSIX_BAR	1U
41 #define IVSHMEM_SHM_BAR	2U
42 
43 #define IVSHMEM_MMIO_BAR_SIZE 256UL
44 
45 /* The device-specific registers of ivshmem device */
46 #define	IVSHMEM_IRQ_MASK_REG	0x0U
47 #define	IVSHMEM_IRQ_STA_REG	0x4U
48 #define	IVSHMEM_IV_POS_REG	0x8U
49 #define	IVSHMEM_DOORBELL_REG	0xcU
50 
51 static struct ivshmem_shm_region mem_regions[8] = {
52 	IVSHMEM_SHM_REGIONS
53 };
54 
55 union ivshmem_doorbell {
56 	uint32_t val;
57 	struct {
58 		uint16_t vector_index;
59 		uint16_t peer_id;
60 	} reg;
61 };
62 
63 struct ivshmem_device {
64 	struct pci_vdev* pcidev;
65 	union {
66 		uint32_t data[4];
67 		struct {
68 			uint32_t irq_mask;
69 			uint32_t irq_state;
70 			/*
71 			 * If the device is not configured for interrupts,
72 			 * this is zero. Else, ivpos is the device's ID.
73 			 */
74 			uint32_t ivpos;
75 
76 			/* Writing doorbell register requests to interrupt a peer */
77 			union ivshmem_doorbell doorbell;
78 		} regs;
79 	} mmio;
80 	struct ivshmem_shm_region *region;
81 };
82 
83 static struct ivshmem_device ivshmem_dev[IVSHMEM_DEV_NUM];
84 static spinlock_t ivshmem_dev_lock = { .head = 0U, .tail = 0U, };
85 
86 /**
87  * @brief Initialize the shared memory regions for all ivshmem devices.
88  *
89  * An ivshmem device is used to transfer data between VMs based on shared memory region. Basic ivshmem information is
90  * configured in scenario file. After compilation, every shared memory region is stored in struct ivshmem_shm_region.
91  * This function initializes all shared memory regions for ivshmem devices and it is usually called before all VMs are
92  * created.
93  *
94  * IVSHMEM_SHM_SIZE is the sum of all ivshmem shared memory regions in bytes. It rounds IVSHMEM_SHM_SIZE up to PDE_SIZE
95  * (1 GiB) and allocates a contiguous block of memory for these memory regions from host e820. For detailed allocation
96  * operations, refer to e820_alloc_memory(). The function then iterates over the memory regions and assigns the
97  * allocated physical addresses to each region.
98  *
99  * @return None
100  *
101  * @pre N/A
102  *
103  * @post N/A
104  */
init_ivshmem_shared_memory()105 void init_ivshmem_shared_memory()
106 {
107 	uint32_t i;
108 	uint64_t addr;
109 
110 	addr = e820_alloc_memory(roundup(IVSHMEM_SHM_SIZE, PDE_SIZE), MEM_SIZE_MAX);
111 	for (i = 0U; i < ARRAY_SIZE(mem_regions); i++) {
112 		mem_regions[i].hpa = addr;
113 		addr += mem_regions[i].size;
114 	}
115 }
116 
117 /*
118  * @pre name != NULL
119  */
find_shm_region(const char * name)120 static struct ivshmem_shm_region *find_shm_region(const char *name)
121 {
122 	uint32_t i, num = ARRAY_SIZE(mem_regions);
123 
124 	for (i = 0U; i < num; i++) {
125 		if (strncmp(name, mem_regions[i].name, sizeof(mem_regions[0].name)) == 0) {
126 			break;
127 		}
128 	}
129 	return ((i < num) ? &mem_regions[i] : NULL);
130 }
131 
132 /*
133  * @brief There are two ivshmem server implementation in HV-land and
134  *	  DM-land, they're used for briding the notification channel
135  *	  between ivshmem devices acrossed VMs.
136  *
137  * @pre vdev != NULL
138  * @pre region->doorbell_peers[vm_id] = NULL
139  */
ivshmem_server_bind_peer(struct pci_vdev * vdev)140 static void ivshmem_server_bind_peer(struct pci_vdev *vdev)
141 {
142 	uint16_t vm_id;
143 	struct acrn_vm_pci_dev_config *dev_config = vdev->pci_dev_config;
144 	struct ivshmem_device *ivs_dev = (struct ivshmem_device *)vdev->priv_data;
145 	struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
146 
147 	if (region != NULL) {
148 		vm_id = vpci2vm(vdev->vpci)->vm_id;
149 		/* Device ID equals to VM ID*/
150 		ivs_dev->mmio.regs.ivpos = vm_id;
151 		ivs_dev->region = region;
152 		region->doorbell_peers[vm_id] = ivs_dev;
153 	}
154 }
155 
156 /*
157  * @pre vdev != NULL
158  */
ivshmem_server_unbind_peer(struct pci_vdev * vdev)159 static void ivshmem_server_unbind_peer(struct pci_vdev *vdev)
160 {
161 	struct ivshmem_shm_region *region = ((struct ivshmem_device *)vdev->priv_data)->region;
162 
163 	region->doorbell_peers[vpci2vm(vdev->vpci)->vm_id] = NULL;
164 }
165 
166 /*
167  * @pre src_ivs_dev != NULL
168  */
ivshmem_server_notify_peer(struct ivshmem_device * src_ivs_dev,uint16_t dest_peer_id,uint16_t vector_index)169 static void ivshmem_server_notify_peer(struct ivshmem_device *src_ivs_dev, uint16_t dest_peer_id, uint16_t vector_index)
170 {
171 	struct acrn_vm *dest_vm;
172 	struct ivshmem_device *dest_ivs_dev;
173 	struct msix_table_entry *entry;
174 	struct ivshmem_shm_region *region = src_ivs_dev->region;
175 
176 	if (dest_peer_id < MAX_IVSHMEM_PEER_NUM) {
177 
178 		dest_ivs_dev = region->doorbell_peers[dest_peer_id];
179 		if ((dest_ivs_dev != NULL) && vpci_vmsix_enabled(dest_ivs_dev->pcidev)
180 			&& (vector_index < dest_ivs_dev->pcidev->msix.table_count)) {
181 
182 			entry = &(dest_ivs_dev->pcidev->msix.table_entries[vector_index]);
183 			if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0U) {
184 
185 				dest_vm = vpci2vm(dest_ivs_dev->pcidev->vpci);
186 				vlapic_inject_msi(dest_vm, entry->addr, entry->data);
187 			} else {
188 				pr_err("%s,target msix entry [%d] is masked.\n",
189 					__func__, vector_index);
190 			}
191 		} else {
192 			pr_err("%s,Invalid peer, ID = %d, vector index [%d] or MSI-X is disabled.\n",
193 				__func__, dest_peer_id, vector_index);
194 		}
195 	}
196 }
197 
198 /*
199  * @post vdev->priv_data != NULL
200  */
create_ivshmem_device(struct pci_vdev * vdev)201 static void create_ivshmem_device(struct pci_vdev *vdev)
202 {
203 	uint32_t i;
204 
205 	spinlock_obtain(&ivshmem_dev_lock);
206 	for (i = 0U; i < IVSHMEM_DEV_NUM; i++) {
207 		if (ivshmem_dev[i].pcidev == NULL) {
208 			ivshmem_dev[i].pcidev = vdev;
209 			vdev->priv_data = &ivshmem_dev[i];
210 			break;
211 		}
212 	}
213 	spinlock_release(&ivshmem_dev_lock);
214 	ASSERT((i < IVSHMEM_DEV_NUM), "failed to find and set ivshmem device");
215 	/*
216 	 * Clear ivshmem_device mmio to ensure the same initial
217 	 * states after VM reboot.
218 	 */
219 	memset(&ivshmem_dev[i].mmio, 0U, sizeof(uint32_t) * 4);
220 }
221 
222 /**
223  * @brief Handle MMIO (Memory-Mapped I/O) operations for the ivshmem device.
224  *
225  * BAR0 is used for device registers. This function handles MMIO read and write operations to the ivshmem device BAR0.
226  *
227  * Per the specification, the access offset within should be 4-byte aligned, the access size should be 4 bytes, and the
228  * access offset exceeds 16 bytes are reserved. So the request needs to meet these conditions, otherwise, it does
229  * nothing and directly returns 0.
230  * - For a read operation, the read value is stored in the input mmio request structure:
231  *   - Doorbell register is a write-only register, so it sets the read value to 0.
232  *   - Otherwise, it reads specified register value of the ivshmem device.
233  * - For a write operation:
234  *   - IVPosition register is a read-only register, so it does nothing if writing to IVPosition.
235  *   - Writing to the Doorbell register requests to interrupt a peer. It extracts the peer ID and vector index from the
236  *     input mmio value. If the peer is valid (peer ivshmem device exists, MSI-X is enabled, the MSI-X table entry
237  *     corresponding to the vector index exists and is not masked), it injects an MSI to the peer VM. For more details
238  *     about the MSI injection, refer to vlapic_inject_msi().
239  *   - Otherwise, it writes the value to the specified register of the ivshmem device.
240  * - Finally, it returns 0.
241  *
242  * @param[inout] io_req Pointer to the I/O request structure that contains the MMIO request information.
243  * @param[inout] data Pointer to the pci_vdev structure that is treated as an ivshmem device.
244  *
245  * @return Always return 0.
246  *
247  * @pre io_req != NULL
248  * @pre data != NULL
249  * @pre data->priv_data != NULL
250  *
251  * @post retval == 0
252  */
ivshmem_mmio_handler(struct io_request * io_req,void * data)253 static int32_t ivshmem_mmio_handler(struct io_request *io_req, void *data)
254 {
255 	union ivshmem_doorbell doorbell;
256 	struct acrn_mmio_request *mmio = &io_req->reqs.mmio_request;
257 	struct pci_vdev *vdev = (struct pci_vdev *) data;
258 	struct ivshmem_device *ivs_dev = (struct ivshmem_device *) vdev->priv_data;
259 	uint64_t offset = mmio->address - vdev->vbars[IVSHMEM_MMIO_BAR].base_gpa;
260 
261 	/* ivshmem spec define the BAR0 offset > 16 are reserved */
262 	if ((mmio->size == 4U) && ((offset & 0x3U) == 0U) &&
263 		(offset < sizeof(ivs_dev->mmio))) {
264 		/*
265 		 * IVSHMEM_IRQ_MASK_REG and IVSHMEM_IRQ_STA_REG are R/W registers
266 		 * they are useless for ivshmem Rev.1.
267 		 * IVSHMEM_IV_POS_REG is Read-Only register and IVSHMEM_DOORBELL_REG
268 		 * is Write-Only register, they are used for interrupt.
269 		 */
270 		if (mmio->direction == ACRN_IOREQ_DIR_READ) {
271 			if (offset != IVSHMEM_DOORBELL_REG) {
272 				mmio->value = ivs_dev->mmio.data[offset >> 2U];
273 			} else {
274 				mmio->value = 0UL;
275 			}
276 		} else {
277 			if (offset != IVSHMEM_IV_POS_REG) {
278 				if (offset == IVSHMEM_DOORBELL_REG) {
279 					doorbell.val = mmio->value;
280 					ivshmem_server_notify_peer(ivs_dev, doorbell.reg.peer_id,
281 						doorbell.reg.vector_index);
282 				} else {
283 					ivs_dev->mmio.data[offset >> 2U] = mmio->value;
284 				}
285 			}
286 		}
287 	}
288 	return 0;
289 }
290 
291 /**
292  * @brief Read the PCI configuration space of the ivshmem device.
293  *
294  * This function reads the configuration space of the specified virtual PCI device that is configured as a ivshmem
295  * device. It is used to retrieve the configuration data of the ivshmem device for further processing or validation.
296  *
297  * It directly reads the configuration space of the ivshmem device by calling pci_vdev_read_vcfg().
298  *
299  * @param[in] vdev Pointer to the virtual PCI device whose configuration is to be read.
300  * @param[in] offset Offset within the configuration space to start reading from.
301  * @param[in] bytes Number of bytes to read from the configuration space.
302  * @param[inout] val Pointer to the buffer where the read configuration data will be stored.
303  *
304  * @return Always return 0.
305  *
306  * @pre vdev != NULL
307  * @pre val != NULL
308  * @pre offset + bytes <= 0x1000
309  *
310  * @post retval == 0
311  */
read_ivshmem_vdev_cfg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t * val)312 static int32_t read_ivshmem_vdev_cfg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val)
313 {
314 	*val = pci_vdev_read_vcfg(vdev, offset, bytes);
315 
316 	return 0;
317 }
318 
319 /**
320  * @brief Unmap the specified BAR for the ivshmem device.
321  *
322  * This function unmaps the specified BAR for the ivshmem device. It is typically called during the destroy phase of the
323  * ivshmem device or when guest updates the BAR register.
324  *
325  * - BAR0 and BAR1 are used for device registers and MSI-X table and PBA, respectively. If the specified idx is 0 or 1
326  *   and the field base_gpa in the specified vBAR is not 0, it unregisters the mmio range handler for the BAR by calling
327  *   unregister_mmio_emulation_handler().
328  * - BAR2 maps the shared memory object. If the specified idx is 2 and the field base_gpa in vBAR2 is not 0, it releases
329  *   the ept memory mapping for the shared memory region by calling ept_del_mr().
330  * - Otherwise, it does nothing.
331  *
332  * @param[inout] vdev Pointer to the PCI device that is treated as an ivshmem device.
333  * @param[in] idx Index of the BAR to be unmapped.
334  *
335  * @return None
336  *
337  * @pre vdev != NULL
338  * @pre vdev->vpci != NULL
339  * @pre idx < PCI_BAR_COUNT
340  *
341  * @post N/A
342  */
ivshmem_vbar_unmap(struct pci_vdev * vdev,uint32_t idx)343 static void ivshmem_vbar_unmap(struct pci_vdev *vdev, uint32_t idx)
344 {
345 	struct acrn_vm *vm = vpci2vm(vdev->vpci);
346 	struct pci_vbar *vbar = &vdev->vbars[idx];
347 
348 	if ((idx == IVSHMEM_SHM_BAR) && (vbar->base_gpa != 0UL)) {
349 		ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_gpa, vbar->size);
350 	} else if (((idx == IVSHMEM_MMIO_BAR) || (idx == IVSHMEM_MSIX_BAR)) && (vbar->base_gpa != 0UL)) {
351 		unregister_mmio_emulation_handler(vm, vbar->base_gpa, (vbar->base_gpa + vbar->size));
352 	}
353 }
354 
355 /**
356  * @brief Map the virtual BAR for the ivshmem device.
357  *
358  * This function maps the specified virtual BAR for the ivshmem device. It is typically called when guest updates the
359  * BAR register.
360  *
361  * - BAR0 is used for device registers. If the specified idx is 0 and the field base_gpa in the specified vBAR is not 0,
362  *   it registers the mmio range handler (via the callback ivshmem_mmio_handler) for the BAR and deletes the 4KB ept
363  *   memory mapping for the BAR by calling ept_del_mr().
364  * - BAR1 is used for MSI-X table and PBA. If the specified idx is 1 and the field base_gpa in the specified vBAR is not
365  *   0, it registers the mmio range handler (via the callback vmsix_handle_table_mmio_access) for the BAR and deletes
366  *   the ept memory mapping for the BAR by calling ept_del_mr(). It also sets the mmio_gpa field in the vdev->msix to
367  *   the GPA of the BAR for MSI-X table access.
368  * - BAR2 maps the shared memory object. If the specified idx is 2, the field base_gpa in vBAR2 is not 0 and the field
369  *   base_hpa in vBAR2 is not INVALID_HPA, it adds the ept memory mapping as (EPT_RD|EPT_WR|EPT_WB|EPT_IGNORE_PAT) for
370  *   the BAR by calling ept_add_mr().
371  * - Otherwise, it does nothing.
372  *
373  * @param[inout] vdev Pointer to the PCI device that is treated as an ivshmem device.
374  * @param[in] idx Index of the BAR to be mapped.
375  *
376  * @return None
377  *
378  * @pre vdev != NULL
379  * @pre vdev->priv_data != NULL
380  * @pre msix->table_offset == 0U
381  * @pre bar_idx < PCI_BAR_COUNT
382  *
383  * @post N/A
384  */
ivshmem_vbar_map(struct pci_vdev * vdev,uint32_t idx)385 static void ivshmem_vbar_map(struct pci_vdev *vdev, uint32_t idx)
386 {
387 	struct acrn_vm *vm = vpci2vm(vdev->vpci);
388 	struct pci_vbar *vbar = &vdev->vbars[idx];
389 
390 	if ((idx == IVSHMEM_SHM_BAR) && (vbar->base_hpa != INVALID_HPA) && (vbar->base_gpa != 0UL)) {
391 		ept_add_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_hpa,
392 				vbar->base_gpa, vbar->size, EPT_RD | EPT_WR | EPT_WB | EPT_IGNORE_PAT);
393 	} else if ((idx == IVSHMEM_MMIO_BAR) && (vbar->base_gpa != 0UL)) {
394 		register_mmio_emulation_handler(vm, ivshmem_mmio_handler, vbar->base_gpa,
395 				(vbar->base_gpa + vbar->size), vdev, false);
396 		ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_gpa, round_page_up(vbar->size));
397 	} else if ((idx == IVSHMEM_MSIX_BAR) && (vbar->base_gpa != 0UL)) {
398 		register_mmio_emulation_handler(vm, vmsix_handle_table_mmio_access, vbar->base_gpa,
399 			(vbar->base_gpa + vbar->size), vdev, false);
400 		ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_gpa, vbar->size);
401 		vdev->msix.mmio_gpa = vbar->base_gpa;
402 	}
403 }
404 
405 /**
406  * @brief Write to the virtual ivshmem device configuration space.
407  *
408  * This function handles writes to the configuration space of the specified virtual PCI device that is configured as an
409  * ivshmem device. It is typically called when the guest writes to the ivshmem device's configuration space.
410  *
411  * - If the write request is for a BAR register, it updates the BAR with the provided value. It also needs to update the
412  *   ept mapping and mmio emulation handler based on the bar information. For detailed operations, refer to
413  *   vpci_update_one_vbar(), ivshmem_vbar_map() and ivshmem_vbar_unmap().
414  * - If the write request is for the MSI-X capability register, it specially handles the write request. For detailed
415  *   operations, refer to write_vmsix_cap_reg().
416  * - Otherwise, the function writes the provided value to the specified configuration space register.
417  * - Finally, the function returns 0.
418  *
419  * @param[inout] vdev Pointer to the virtual PCI device whose configuration is to be written.
420  * @param[in] offset Offset within the configuration space to start writing to.
421  * @param[in] bytes Number of bytes to write.
422  * @param[in] val The value to be written to the register.
423  *
424  * @return Always return 0.
425  *
426  * @pre vdev != NULL
427  * @pre offset + bytes <= 0x1000
428  *
429  * @post retval == 0
430  */
write_ivshmem_vdev_cfg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t val)431 static int32_t write_ivshmem_vdev_cfg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val)
432 {
433 	if (vbar_access(vdev, offset)) {
434 		vpci_update_one_vbar(vdev, pci_bar_index(offset), val,
435 			ivshmem_vbar_map, ivshmem_vbar_unmap);
436 	} else if (msixcap_access(vdev, offset)) {
437 		write_vmsix_cap_reg(vdev, offset, bytes, val);
438 	} else {
439 		pci_vdev_write_vcfg(vdev, offset, bytes, val);
440 	}
441 
442 	return 0;
443 }
444 
445 /**
446  * @brief Initialize the specified BAR for the ivshmem device.
447  *
448  * The ivshmem PCI device has three BARs: BAR0, BAR1, and BAR2. BAR0/BAR1 is a 32-bit memory BAR and BAR2 is a 64-bit
449  * memory BAR. This function initializes a specified BAR for the ivshmem device. It is typically called during the
450  * initialization phase of the ivshmem device.
451  *
452  * - If bar_idx exceeds 2, the function does nothing.
453  * - For BAR2, it finds the shared memory region based on the shared memory region name. If the shared memory region is
454  *   not found, the function does nothing.
455  * - It updates corresponding fields in the pci_vbar structure of specified bar_idx.
456  * - It configures the Base Address Register in the device's configuration space.
457  * - For a 64-bit memory BAR (BAR2 for now), it also sets up the next Base Address Register as the high 32 bits.
458  *
459  * @param[inout] vdev Pointer to the PCI device that is treated as an ivshmem device.
460  * @param[in] bar_idx Index of the BAR to be initialized.
461  *
462  * @return None
463  *
464  * @pre vdev != NULL
465  * @pre vdev->pci_dev_config != NULL
466  * @pre bar_idx < PCI_BAR_COUNT
467  *
468  * @post N/A
469  */
init_ivshmem_bar(struct pci_vdev * vdev,uint32_t bar_idx)470 static void init_ivshmem_bar(struct pci_vdev *vdev, uint32_t bar_idx)
471 {
472 	struct pci_vbar *vbar;
473 	uint64_t addr, mask, size = 0UL;
474 	struct acrn_vm_pci_dev_config *dev_config = vdev->pci_dev_config;
475 
476 	addr = dev_config->vbar_base[bar_idx];
477 	vbar = &vdev->vbars[bar_idx];
478 	vbar->bar_type.bits = addr;
479 	mask = is_pci_io_bar(vbar) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK;
480 	vbar->bar_type.bits &= (~mask);
481 
482 	if (bar_idx == IVSHMEM_SHM_BAR) {
483 		struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
484 		if (region != NULL) {
485 			size = region->size;
486 			vbar->base_hpa = region->hpa;
487 		} else {
488 			pr_err("%s ivshmem device %x:%x.%x has no memory region\n",
489 				__func__, vdev->bdf.bits.b, vdev->bdf.bits.d, vdev->bdf.bits.f);
490 		}
491 	} else if (bar_idx == IVSHMEM_MSIX_BAR) {
492 		size = VMSIX_ENTRY_TABLE_PBA_BAR_SIZE;
493 	} else if (bar_idx == IVSHMEM_MMIO_BAR) {
494 		size = IVSHMEM_MMIO_BAR_SIZE;
495 	}
496 	if (size != 0UL) {
497 		vbar->size = size;
498 		vbar->mask = (uint32_t) (~(size - 1UL));
499 		pci_vdev_write_vbar(vdev, bar_idx, (uint32_t)addr);
500 		if (is_pci_mem64lo_bar(vbar)) {
501 			vbar = &vdev->vbars[bar_idx + 1U];
502 			vbar->is_mem64hi = true;
503 			vbar->mask = (uint32_t) ((~(size - 1UL)) >> 32U);
504 			pci_vdev_write_vbar(vdev, (bar_idx + 1U), ((uint32_t)(addr >> 32U)));
505 		}
506 	}
507 }
508 
509 /**
510  * @brief Initialize a virtual ivshmem device.
511  *
512  * This function initializes the specified virtual PCI device as an ivshmem device. It sets up the device to follow the
513  * specifications. Because the ivshmem is introduced by QEMU, the spec link is
514  * https://www.qemu.org/docs/master/specs/ivshmem-spec.html. This function is usually used in the initialization phase
515  * of VM.
516  *
517  * - It sets the pcidev field in ivshmem_device (all ivshmem devices emulated by hypervisor are static stored based on
518  *   the configuration) to the vdev and sets the priv_data field in the specified vdev to new ivshmem_device structure
519  *   data, indicating the association between the virtual PCI device and the ivshmem device.
520  * - Per the ivshmem specification and PCI Express Base Specification, it initializes the ivshmem device configuration
521  *   space with appropriate values:
522  *   - The device ID and Vendor ID is 0x11101af4.
523  *   - It sets subsystem vendor ID to 0x8086 (Intel) and subsystem ID to the region ID of the shared memory region.
524  *   - It sets up the MSI-X capability with 8 MSI-X table entries and maps the table and PBA into BAR1. For detailed
525  *     operations, refer to add_vmsix_capability().
526  *   - It initializes BAR0 for the device to hold device registers (256 Byte MMIO).
527  *   - It initializes BAR1 for the device to hold MSI-X table and PBA.
528  *   - It initializes BAR2 for the device to map the shared memory object. Because BAR2 is a 64-bit memory BAR, it also
529  *     sets up the next Base Address Register as the high 32 bits and the total number of bars is set to 4.
530  *   - It binds the device to the ivshmem server (hosts in hypervisor) for inter-VM communication.
531  * - Finally, it sets the user field to vdev, indicating that this ivshmem is used by a VM.
532  *
533  * @param[inout] vdev Pointer to the virtual PCI device to be initialized.
534  *
535  * @return None
536  *
537  * @pre vdev != NULL
538  * @pre vdev->pci_dev_config != NULL
539  * @pre vdev->pci != NULL
540  *
541  * @post N/A
542  */
init_ivshmem_vdev(struct pci_vdev * vdev)543 static void init_ivshmem_vdev(struct pci_vdev *vdev)
544 {
545 	struct acrn_vm_pci_dev_config *dev_config = vdev->pci_dev_config;
546 	struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
547 
548 	create_ivshmem_device(vdev);
549 
550 	/* initialize ivshmem config */
551 	pci_vdev_write_vcfg(vdev, PCIR_VENDOR, 2U, IVSHMEM_VENDOR_ID);
552 	pci_vdev_write_vcfg(vdev, PCIR_DEVICE, 2U, IVSHMEM_DEVICE_ID);
553 	pci_vdev_write_vcfg(vdev, PCIR_REVID, 1U, IVSHMEM_REV);
554 	pci_vdev_write_vcfg(vdev, PCIR_CLASS, 1U, IVSHMEM_CLASS);
555 	pci_vdev_write_vcfg(vdev, PCIR_HDRTYPE, 1U,
556 		PCIM_HDRTYPE_NORMAL | ((vdev->bdf.bits.f == 0U) ? PCIM_MFDEV : 0U));
557 
558 	pci_vdev_write_vcfg(vdev, PCIV_SUB_VENDOR_ID, 2U, IVSHMEM_INTEL_SUBVENDOR_ID);
559 	if (region != NULL) {
560 		pci_vdev_write_vcfg(vdev, PCIV_SUB_SYSTEM_ID, 2U, region->region_id);
561 	}
562 
563 	add_vmsix_capability(vdev, MAX_IVSHMEM_MSIX_TBL_ENTRY_NUM, IVSHMEM_MSIX_BAR);
564 
565 	/* initialize ivshmem bars */
566 	vdev->nr_bars = 4U;
567 	init_ivshmem_bar(vdev, IVSHMEM_MMIO_BAR);
568 	init_ivshmem_bar(vdev, IVSHMEM_MSIX_BAR);
569 	init_ivshmem_bar(vdev, IVSHMEM_SHM_BAR);
570 	ivshmem_server_bind_peer(vdev);
571 
572 	vdev->user = vdev;
573 }
574 
575 /**
576  * @brief Deinitialize a virtual ivshmem device.
577  *
578  * This function deinitializes the specified virtual PCI device that was previously initialized as an ivshmem device.
579  *
580  * - It unbinds the device from the ivshmem server (hosts in hypervisor).
581  * - It sets the priv_data field in the specified vdev to NULL and sets the pcidev field in ivshmem_device to NULL,
582  *   indicating the disassociation between the virtual PCI device and the ivshmem device.
583  * - It sets the user field to NULL, indicating that this virtual device is not owned by any VM.
584  *
585  * @param[inout] vdev Pointer to the virtual PCI device to be deinitialized.
586  *
587  * @return None
588  *
589  * @pre vdev != NULL
590  * @pre vdev->priv_data != NULL
591  * @pre vdev->pci != NULL
592  *
593  * @post N/A
594  */
deinit_ivshmem_vdev(struct pci_vdev * vdev)595 static void deinit_ivshmem_vdev(struct pci_vdev *vdev)
596 {
597 	struct ivshmem_device *ivs_dev = (struct ivshmem_device *) vdev->priv_data;
598 
599 	ivshmem_server_unbind_peer(vdev);
600 
601 	spinlock_obtain(&ivshmem_dev_lock);
602 	vdev->priv_data = NULL;
603 	vdev->user = NULL;
604 	ivs_dev->pcidev = NULL;
605 	spinlock_release(&ivshmem_dev_lock);
606 }
607 
608 /**
609  * @brief Create a virtual ivshmem device based on the specified device information.
610  *
611  * Basic ivshmem information is configured in the scenario file. After compilation, some device configurations of every
612  * ivshmem PCI device are stored in struct acrn_vm_pci_dev_config and every shared memory region is stored in struct
613  * ivshmem_shm_region. This function creates one virtual ivshmem device based on the input device information. The
614  * user-space tool(such as acrn-dm) may add an ivshmem device for a post-launch VM and this device is emulated in
615  * hypervisor. This function is used for the case for now and it is usually used in the initialization phase of a
616  * post-launch VM.
617  *
618  * - Per the ivshmem specification, BAR2 maps the shared memory object. For the ivshmem device to be created, the shared
619  *   memory region name is stored in dev->args and the size of the shared memory region is stored in
620  *   dev->io_size[IVSHMEM_SHM_BAR].
621  * - It traverses all configured PCI devices of the specified VM. Based on the input shared memory region name, it finds
622  *   corresponding acrn_vm_pci_dev_config and ivshmem_shm_region.
623  * - If the acrn_vm_pci_dev_config is not found or the ivshmem_shm_region is not found or the size of ivshmem_shm_region
624  *   is not equal to the size specified in dev->io_size[IVSHMEM_SHM_BAR], the function returns -EINVAL.
625  * - Otherwise, update the acrn_vm_pci_dev_config with input device information specified in dev and initializes a new
626  *   virtual PCI device as an ivshmem device. For detailed operations, refer to vpci_init_vdev(). The function returns
627  *   -EINVAL if the vpci_init_vdev() fails.
628  * - Finally, it returns 0 on success.
629  *
630  * @param[inout] vm Pointer to the VM that owns the ivshmem device.
631  * @param[in] dev Pointer to the device information to create an ivshmem device.
632  *
633  * @return A int32_t value to indicate the status of the ivshmem device creation.
634  *
635  * @retval 0 On success.
636  * @retval -EINVAL If the ivshmem device creation fails.
637  *
638  * @pre vm != NULL
639  * @pre dev != NULL
640  *
641  * @post retval <= 0
642  */
create_ivshmem_vdev(struct acrn_vm * vm,struct acrn_vdev * dev)643 int32_t create_ivshmem_vdev(struct acrn_vm *vm, struct acrn_vdev *dev)
644 {
645 	uint32_t i;
646 	struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
647 	struct acrn_vm_pci_dev_config *dev_config = NULL;
648 	struct pci_vdev *vdev = NULL;
649 	int32_t ret = -EINVAL;
650 
651 	for (i = 0U; i < vm_config->pci_dev_num; i++) {
652 		dev_config = &vm_config->pci_devs[i];
653 		if (strncmp(dev_config->shm_region_name, (char *)dev->args, sizeof(dev_config->shm_region_name)) == 0) {
654 			struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
655 			if ((region != NULL) && (region->size == dev->io_size[IVSHMEM_SHM_BAR])) {
656 				spinlock_obtain(&vm->vpci.lock);
657 				dev_config->vbdf.value = (uint16_t) dev->slot;
658 				dev_config->vbar_base[IVSHMEM_MMIO_BAR] = (uint64_t) dev->io_addr[IVSHMEM_MMIO_BAR];
659 				dev_config->vbar_base[IVSHMEM_MSIX_BAR] = (uint64_t) dev->io_addr[IVSHMEM_MSIX_BAR];
660 				dev_config->vbar_base[IVSHMEM_SHM_BAR] = (uint64_t) dev->io_addr[IVSHMEM_SHM_BAR];
661 				dev_config->vbar_base[IVSHMEM_SHM_BAR] |= ((uint64_t) dev->io_addr[IVSHMEM_SHM_BAR + 1U]) << 32U;
662 				vdev = vpci_init_vdev(&vm->vpci, dev_config, NULL);
663 				spinlock_release(&vm->vpci.lock);
664 				if (vdev != NULL) {
665 					ret = 0;
666 				}
667 			}
668 			break;
669 		}
670 	}
671 
672 	if (ret != 0) {
673 		pr_warn("%s, failed to create ivshmem device %x:%x.%x\n", __func__,
674 			dev->slot >> 8U, (dev->slot >> 3U) & 0x1fU, dev->slot & 0x7U);
675 	}
676 	return ret;
677 }
678 
679 /**
680  * @brief Destroy the virtual ivshmem device.
681  *
682  * This function is the counterpart of create_ivshmem_vdev(). This function destroys the specified virtual PCI device
683  * that was previously initialized as an ivshmem device. It is usually used for a post-launch VM to destroy the ivshmem
684  * device.
685  *
686  * - It updates all BARs of the specified vdev. For detailed operations, refer to vpci_update_one_vbar() and the
687  *   function ivshmem_vbar_unmap().
688  * - It deinitializes the specified virtual PCI device. For detailed operations, refer to vpci_deinit_vdev().
689  * - Finally, it returns 0.
690  *
691  * @param[inout] vdev Pointer to the virtual PCI device to be destroyed.
692  *
693  * @return Always return 0.
694  *
695  * @pre vdev != NULL
696  * @pre vdev->vpci != NULL
697  *
698  * @post retval == 0
699  */
destroy_ivshmem_vdev(struct pci_vdev * vdev)700 int32_t destroy_ivshmem_vdev(struct pci_vdev *vdev)
701 {
702 	uint32_t i;
703 	struct acrn_vpci *vpci = vdev->vpci;
704 
705 	for (i = 0U; i < vdev->nr_bars; i++) {
706 		vpci_update_one_vbar(vdev, i, 0U, NULL, ivshmem_vbar_unmap);
707 	}
708 
709 	spinlock_obtain(&vpci->lock);
710 	vpci_deinit_vdev(vdev);
711 	spinlock_release(&vpci->lock);
712 
713 	return 0;
714 }
715 
716 /**
717  * @brief Data structure implementation for virtual Inter-VM shared memory device (ivshmem) operations.
718  *
719  * The ivshmem is actually first introduced by QEMU to share a memory region between multiple VMs and host. It is
720  * modeled as a PCI device exposing said memory to the VM as a PCI BAR. ACRN also introduces it to transfer data between
721  * VMs based on the shared memory region. Struct pci_vdev_ops is used to define the operations of virtual PCI device and
722  * definition here is used to support ivshmem device.
723  *
724  * @consistency N/A
725  * @alignment N/A
726  *
727  * @remark N/A
728  */
729 const struct pci_vdev_ops vpci_ivshmem_ops = {
730 	.init_vdev	= init_ivshmem_vdev,
731 	.deinit_vdev	= deinit_ivshmem_vdev,
732 	.write_vdev_cfg	= write_ivshmem_vdev_cfg,
733 	.read_vdev_cfg	= read_ivshmem_vdev_cfg,
734 };
735 /**
736  * @}
737  */
738 #endif
739