1 /*
2 * Copyright (C) 2020-2024 Intel Corporation.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #ifdef CONFIG_IVSHMEM_ENABLED
8 #include <asm/guest/vm.h>
9 #include <asm/mmu.h>
10 #include <asm/guest/ept.h>
11 #include <logmsg.h>
12 #include <errno.h>
13 #include <ivshmem.h>
14 #include <ivshmem_cfg.h>
15 #include "vpci_priv.h"
16
17 /**
18 * @addtogroup vp-dm_vperipheral
19 *
20 * @{
21 */
22
23 /**
24 * @file
25 * @brief Implementation of Inter-VM shared memory device (ivshmem).
26 *
27 * This file defines marcos, data structure and functions to support ivshmem devices. It also implements necessary
28 * functions to model a ivshmem device as a PCI device.
29 */
30
31 /* config space of ivshmem device */
32 #define IVSHMEM_CLASS 0x05U
33 #define IVSHMEM_REV 0x01U
34
35 /*
36 * ivshmem device supports bar0, bar1 and bar2,
37 * indexes of them shall begin with 0 and be continuous.
38 */
39 #define IVSHMEM_MMIO_BAR 0U
40 #define IVSHMEM_MSIX_BAR 1U
41 #define IVSHMEM_SHM_BAR 2U
42
43 #define IVSHMEM_MMIO_BAR_SIZE 256UL
44
45 /* The device-specific registers of ivshmem device */
46 #define IVSHMEM_IRQ_MASK_REG 0x0U
47 #define IVSHMEM_IRQ_STA_REG 0x4U
48 #define IVSHMEM_IV_POS_REG 0x8U
49 #define IVSHMEM_DOORBELL_REG 0xcU
50
51 static struct ivshmem_shm_region mem_regions[8] = {
52 IVSHMEM_SHM_REGIONS
53 };
54
55 union ivshmem_doorbell {
56 uint32_t val;
57 struct {
58 uint16_t vector_index;
59 uint16_t peer_id;
60 } reg;
61 };
62
63 struct ivshmem_device {
64 struct pci_vdev* pcidev;
65 union {
66 uint32_t data[4];
67 struct {
68 uint32_t irq_mask;
69 uint32_t irq_state;
70 /*
71 * If the device is not configured for interrupts,
72 * this is zero. Else, ivpos is the device's ID.
73 */
74 uint32_t ivpos;
75
76 /* Writing doorbell register requests to interrupt a peer */
77 union ivshmem_doorbell doorbell;
78 } regs;
79 } mmio;
80 struct ivshmem_shm_region *region;
81 };
82
83 static struct ivshmem_device ivshmem_dev[IVSHMEM_DEV_NUM];
84 static spinlock_t ivshmem_dev_lock = { .head = 0U, .tail = 0U, };
85
86 /**
87 * @brief Initialize the shared memory regions for all ivshmem devices.
88 *
89 * An ivshmem device is used to transfer data between VMs based on shared memory region. Basic ivshmem information is
90 * configured in scenario file. After compilation, every shared memory region is stored in struct ivshmem_shm_region.
91 * This function initializes all shared memory regions for ivshmem devices and it is usually called before all VMs are
92 * created.
93 *
94 * IVSHMEM_SHM_SIZE is the sum of all ivshmem shared memory regions in bytes. It rounds IVSHMEM_SHM_SIZE up to PDE_SIZE
95 * (1 GiB) and allocates a contiguous block of memory for these memory regions from host e820. For detailed allocation
96 * operations, refer to e820_alloc_memory(). The function then iterates over the memory regions and assigns the
97 * allocated physical addresses to each region.
98 *
99 * @return None
100 *
101 * @pre N/A
102 *
103 * @post N/A
104 */
init_ivshmem_shared_memory()105 void init_ivshmem_shared_memory()
106 {
107 uint32_t i;
108 uint64_t addr;
109
110 addr = e820_alloc_memory(roundup(IVSHMEM_SHM_SIZE, PDE_SIZE), MEM_SIZE_MAX);
111 for (i = 0U; i < ARRAY_SIZE(mem_regions); i++) {
112 mem_regions[i].hpa = addr;
113 addr += mem_regions[i].size;
114 }
115 }
116
117 /*
118 * @pre name != NULL
119 */
find_shm_region(const char * name)120 static struct ivshmem_shm_region *find_shm_region(const char *name)
121 {
122 uint32_t i, num = ARRAY_SIZE(mem_regions);
123
124 for (i = 0U; i < num; i++) {
125 if (strncmp(name, mem_regions[i].name, sizeof(mem_regions[0].name)) == 0) {
126 break;
127 }
128 }
129 return ((i < num) ? &mem_regions[i] : NULL);
130 }
131
132 /*
133 * @brief There are two ivshmem server implementation in HV-land and
134 * DM-land, they're used for briding the notification channel
135 * between ivshmem devices acrossed VMs.
136 *
137 * @pre vdev != NULL
138 * @pre region->doorbell_peers[vm_id] = NULL
139 */
ivshmem_server_bind_peer(struct pci_vdev * vdev)140 static void ivshmem_server_bind_peer(struct pci_vdev *vdev)
141 {
142 uint16_t vm_id;
143 struct acrn_vm_pci_dev_config *dev_config = vdev->pci_dev_config;
144 struct ivshmem_device *ivs_dev = (struct ivshmem_device *)vdev->priv_data;
145 struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
146
147 if (region != NULL) {
148 vm_id = vpci2vm(vdev->vpci)->vm_id;
149 /* Device ID equals to VM ID*/
150 ivs_dev->mmio.regs.ivpos = vm_id;
151 ivs_dev->region = region;
152 region->doorbell_peers[vm_id] = ivs_dev;
153 }
154 }
155
156 /*
157 * @pre vdev != NULL
158 */
ivshmem_server_unbind_peer(struct pci_vdev * vdev)159 static void ivshmem_server_unbind_peer(struct pci_vdev *vdev)
160 {
161 struct ivshmem_shm_region *region = ((struct ivshmem_device *)vdev->priv_data)->region;
162
163 region->doorbell_peers[vpci2vm(vdev->vpci)->vm_id] = NULL;
164 }
165
166 /*
167 * @pre src_ivs_dev != NULL
168 */
ivshmem_server_notify_peer(struct ivshmem_device * src_ivs_dev,uint16_t dest_peer_id,uint16_t vector_index)169 static void ivshmem_server_notify_peer(struct ivshmem_device *src_ivs_dev, uint16_t dest_peer_id, uint16_t vector_index)
170 {
171 struct acrn_vm *dest_vm;
172 struct ivshmem_device *dest_ivs_dev;
173 struct msix_table_entry *entry;
174 struct ivshmem_shm_region *region = src_ivs_dev->region;
175
176 if (dest_peer_id < MAX_IVSHMEM_PEER_NUM) {
177
178 dest_ivs_dev = region->doorbell_peers[dest_peer_id];
179 if ((dest_ivs_dev != NULL) && vpci_vmsix_enabled(dest_ivs_dev->pcidev)
180 && (vector_index < dest_ivs_dev->pcidev->msix.table_count)) {
181
182 entry = &(dest_ivs_dev->pcidev->msix.table_entries[vector_index]);
183 if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0U) {
184
185 dest_vm = vpci2vm(dest_ivs_dev->pcidev->vpci);
186 vlapic_inject_msi(dest_vm, entry->addr, entry->data);
187 } else {
188 pr_err("%s,target msix entry [%d] is masked.\n",
189 __func__, vector_index);
190 }
191 } else {
192 pr_err("%s,Invalid peer, ID = %d, vector index [%d] or MSI-X is disabled.\n",
193 __func__, dest_peer_id, vector_index);
194 }
195 }
196 }
197
198 /*
199 * @post vdev->priv_data != NULL
200 */
create_ivshmem_device(struct pci_vdev * vdev)201 static void create_ivshmem_device(struct pci_vdev *vdev)
202 {
203 uint32_t i;
204
205 spinlock_obtain(&ivshmem_dev_lock);
206 for (i = 0U; i < IVSHMEM_DEV_NUM; i++) {
207 if (ivshmem_dev[i].pcidev == NULL) {
208 ivshmem_dev[i].pcidev = vdev;
209 vdev->priv_data = &ivshmem_dev[i];
210 break;
211 }
212 }
213 spinlock_release(&ivshmem_dev_lock);
214 ASSERT((i < IVSHMEM_DEV_NUM), "failed to find and set ivshmem device");
215 /*
216 * Clear ivshmem_device mmio to ensure the same initial
217 * states after VM reboot.
218 */
219 memset(&ivshmem_dev[i].mmio, 0U, sizeof(uint32_t) * 4);
220 }
221
222 /**
223 * @brief Handle MMIO (Memory-Mapped I/O) operations for the ivshmem device.
224 *
225 * BAR0 is used for device registers. This function handles MMIO read and write operations to the ivshmem device BAR0.
226 *
227 * Per the specification, the access offset within should be 4-byte aligned, the access size should be 4 bytes, and the
228 * access offset exceeds 16 bytes are reserved. So the request needs to meet these conditions, otherwise, it does
229 * nothing and directly returns 0.
230 * - For a read operation, the read value is stored in the input mmio request structure:
231 * - Doorbell register is a write-only register, so it sets the read value to 0.
232 * - Otherwise, it reads specified register value of the ivshmem device.
233 * - For a write operation:
234 * - IVPosition register is a read-only register, so it does nothing if writing to IVPosition.
235 * - Writing to the Doorbell register requests to interrupt a peer. It extracts the peer ID and vector index from the
236 * input mmio value. If the peer is valid (peer ivshmem device exists, MSI-X is enabled, the MSI-X table entry
237 * corresponding to the vector index exists and is not masked), it injects an MSI to the peer VM. For more details
238 * about the MSI injection, refer to vlapic_inject_msi().
239 * - Otherwise, it writes the value to the specified register of the ivshmem device.
240 * - Finally, it returns 0.
241 *
242 * @param[inout] io_req Pointer to the I/O request structure that contains the MMIO request information.
243 * @param[inout] data Pointer to the pci_vdev structure that is treated as an ivshmem device.
244 *
245 * @return Always return 0.
246 *
247 * @pre io_req != NULL
248 * @pre data != NULL
249 * @pre data->priv_data != NULL
250 *
251 * @post retval == 0
252 */
ivshmem_mmio_handler(struct io_request * io_req,void * data)253 static int32_t ivshmem_mmio_handler(struct io_request *io_req, void *data)
254 {
255 union ivshmem_doorbell doorbell;
256 struct acrn_mmio_request *mmio = &io_req->reqs.mmio_request;
257 struct pci_vdev *vdev = (struct pci_vdev *) data;
258 struct ivshmem_device *ivs_dev = (struct ivshmem_device *) vdev->priv_data;
259 uint64_t offset = mmio->address - vdev->vbars[IVSHMEM_MMIO_BAR].base_gpa;
260
261 /* ivshmem spec define the BAR0 offset > 16 are reserved */
262 if ((mmio->size == 4U) && ((offset & 0x3U) == 0U) &&
263 (offset < sizeof(ivs_dev->mmio))) {
264 /*
265 * IVSHMEM_IRQ_MASK_REG and IVSHMEM_IRQ_STA_REG are R/W registers
266 * they are useless for ivshmem Rev.1.
267 * IVSHMEM_IV_POS_REG is Read-Only register and IVSHMEM_DOORBELL_REG
268 * is Write-Only register, they are used for interrupt.
269 */
270 if (mmio->direction == ACRN_IOREQ_DIR_READ) {
271 if (offset != IVSHMEM_DOORBELL_REG) {
272 mmio->value = ivs_dev->mmio.data[offset >> 2U];
273 } else {
274 mmio->value = 0UL;
275 }
276 } else {
277 if (offset != IVSHMEM_IV_POS_REG) {
278 if (offset == IVSHMEM_DOORBELL_REG) {
279 doorbell.val = mmio->value;
280 ivshmem_server_notify_peer(ivs_dev, doorbell.reg.peer_id,
281 doorbell.reg.vector_index);
282 } else {
283 ivs_dev->mmio.data[offset >> 2U] = mmio->value;
284 }
285 }
286 }
287 }
288 return 0;
289 }
290
291 /**
292 * @brief Read the PCI configuration space of the ivshmem device.
293 *
294 * This function reads the configuration space of the specified virtual PCI device that is configured as a ivshmem
295 * device. It is used to retrieve the configuration data of the ivshmem device for further processing or validation.
296 *
297 * It directly reads the configuration space of the ivshmem device by calling pci_vdev_read_vcfg().
298 *
299 * @param[in] vdev Pointer to the virtual PCI device whose configuration is to be read.
300 * @param[in] offset Offset within the configuration space to start reading from.
301 * @param[in] bytes Number of bytes to read from the configuration space.
302 * @param[inout] val Pointer to the buffer where the read configuration data will be stored.
303 *
304 * @return Always return 0.
305 *
306 * @pre vdev != NULL
307 * @pre val != NULL
308 * @pre offset + bytes <= 0x1000
309 *
310 * @post retval == 0
311 */
read_ivshmem_vdev_cfg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t * val)312 static int32_t read_ivshmem_vdev_cfg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t *val)
313 {
314 *val = pci_vdev_read_vcfg(vdev, offset, bytes);
315
316 return 0;
317 }
318
319 /**
320 * @brief Unmap the specified BAR for the ivshmem device.
321 *
322 * This function unmaps the specified BAR for the ivshmem device. It is typically called during the destroy phase of the
323 * ivshmem device or when guest updates the BAR register.
324 *
325 * - BAR0 and BAR1 are used for device registers and MSI-X table and PBA, respectively. If the specified idx is 0 or 1
326 * and the field base_gpa in the specified vBAR is not 0, it unregisters the mmio range handler for the BAR by calling
327 * unregister_mmio_emulation_handler().
328 * - BAR2 maps the shared memory object. If the specified idx is 2 and the field base_gpa in vBAR2 is not 0, it releases
329 * the ept memory mapping for the shared memory region by calling ept_del_mr().
330 * - Otherwise, it does nothing.
331 *
332 * @param[inout] vdev Pointer to the PCI device that is treated as an ivshmem device.
333 * @param[in] idx Index of the BAR to be unmapped.
334 *
335 * @return None
336 *
337 * @pre vdev != NULL
338 * @pre vdev->vpci != NULL
339 * @pre idx < PCI_BAR_COUNT
340 *
341 * @post N/A
342 */
ivshmem_vbar_unmap(struct pci_vdev * vdev,uint32_t idx)343 static void ivshmem_vbar_unmap(struct pci_vdev *vdev, uint32_t idx)
344 {
345 struct acrn_vm *vm = vpci2vm(vdev->vpci);
346 struct pci_vbar *vbar = &vdev->vbars[idx];
347
348 if ((idx == IVSHMEM_SHM_BAR) && (vbar->base_gpa != 0UL)) {
349 ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_gpa, vbar->size);
350 } else if (((idx == IVSHMEM_MMIO_BAR) || (idx == IVSHMEM_MSIX_BAR)) && (vbar->base_gpa != 0UL)) {
351 unregister_mmio_emulation_handler(vm, vbar->base_gpa, (vbar->base_gpa + vbar->size));
352 }
353 }
354
355 /**
356 * @brief Map the virtual BAR for the ivshmem device.
357 *
358 * This function maps the specified virtual BAR for the ivshmem device. It is typically called when guest updates the
359 * BAR register.
360 *
361 * - BAR0 is used for device registers. If the specified idx is 0 and the field base_gpa in the specified vBAR is not 0,
362 * it registers the mmio range handler (via the callback ivshmem_mmio_handler) for the BAR and deletes the 4KB ept
363 * memory mapping for the BAR by calling ept_del_mr().
364 * - BAR1 is used for MSI-X table and PBA. If the specified idx is 1 and the field base_gpa in the specified vBAR is not
365 * 0, it registers the mmio range handler (via the callback vmsix_handle_table_mmio_access) for the BAR and deletes
366 * the ept memory mapping for the BAR by calling ept_del_mr(). It also sets the mmio_gpa field in the vdev->msix to
367 * the GPA of the BAR for MSI-X table access.
368 * - BAR2 maps the shared memory object. If the specified idx is 2, the field base_gpa in vBAR2 is not 0 and the field
369 * base_hpa in vBAR2 is not INVALID_HPA, it adds the ept memory mapping as (EPT_RD|EPT_WR|EPT_WB|EPT_IGNORE_PAT) for
370 * the BAR by calling ept_add_mr().
371 * - Otherwise, it does nothing.
372 *
373 * @param[inout] vdev Pointer to the PCI device that is treated as an ivshmem device.
374 * @param[in] idx Index of the BAR to be mapped.
375 *
376 * @return None
377 *
378 * @pre vdev != NULL
379 * @pre vdev->priv_data != NULL
380 * @pre msix->table_offset == 0U
381 * @pre bar_idx < PCI_BAR_COUNT
382 *
383 * @post N/A
384 */
ivshmem_vbar_map(struct pci_vdev * vdev,uint32_t idx)385 static void ivshmem_vbar_map(struct pci_vdev *vdev, uint32_t idx)
386 {
387 struct acrn_vm *vm = vpci2vm(vdev->vpci);
388 struct pci_vbar *vbar = &vdev->vbars[idx];
389
390 if ((idx == IVSHMEM_SHM_BAR) && (vbar->base_hpa != INVALID_HPA) && (vbar->base_gpa != 0UL)) {
391 ept_add_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_hpa,
392 vbar->base_gpa, vbar->size, EPT_RD | EPT_WR | EPT_WB | EPT_IGNORE_PAT);
393 } else if ((idx == IVSHMEM_MMIO_BAR) && (vbar->base_gpa != 0UL)) {
394 register_mmio_emulation_handler(vm, ivshmem_mmio_handler, vbar->base_gpa,
395 (vbar->base_gpa + vbar->size), vdev, false);
396 ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_gpa, round_page_up(vbar->size));
397 } else if ((idx == IVSHMEM_MSIX_BAR) && (vbar->base_gpa != 0UL)) {
398 register_mmio_emulation_handler(vm, vmsix_handle_table_mmio_access, vbar->base_gpa,
399 (vbar->base_gpa + vbar->size), vdev, false);
400 ept_del_mr(vm, (uint64_t *)vm->arch_vm.nworld_eptp, vbar->base_gpa, vbar->size);
401 vdev->msix.mmio_gpa = vbar->base_gpa;
402 }
403 }
404
405 /**
406 * @brief Write to the virtual ivshmem device configuration space.
407 *
408 * This function handles writes to the configuration space of the specified virtual PCI device that is configured as an
409 * ivshmem device. It is typically called when the guest writes to the ivshmem device's configuration space.
410 *
411 * - If the write request is for a BAR register, it updates the BAR with the provided value. It also needs to update the
412 * ept mapping and mmio emulation handler based on the bar information. For detailed operations, refer to
413 * vpci_update_one_vbar(), ivshmem_vbar_map() and ivshmem_vbar_unmap().
414 * - If the write request is for the MSI-X capability register, it specially handles the write request. For detailed
415 * operations, refer to write_vmsix_cap_reg().
416 * - Otherwise, the function writes the provided value to the specified configuration space register.
417 * - Finally, the function returns 0.
418 *
419 * @param[inout] vdev Pointer to the virtual PCI device whose configuration is to be written.
420 * @param[in] offset Offset within the configuration space to start writing to.
421 * @param[in] bytes Number of bytes to write.
422 * @param[in] val The value to be written to the register.
423 *
424 * @return Always return 0.
425 *
426 * @pre vdev != NULL
427 * @pre offset + bytes <= 0x1000
428 *
429 * @post retval == 0
430 */
write_ivshmem_vdev_cfg(struct pci_vdev * vdev,uint32_t offset,uint32_t bytes,uint32_t val)431 static int32_t write_ivshmem_vdev_cfg(struct pci_vdev *vdev, uint32_t offset, uint32_t bytes, uint32_t val)
432 {
433 if (vbar_access(vdev, offset)) {
434 vpci_update_one_vbar(vdev, pci_bar_index(offset), val,
435 ivshmem_vbar_map, ivshmem_vbar_unmap);
436 } else if (msixcap_access(vdev, offset)) {
437 write_vmsix_cap_reg(vdev, offset, bytes, val);
438 } else {
439 pci_vdev_write_vcfg(vdev, offset, bytes, val);
440 }
441
442 return 0;
443 }
444
445 /**
446 * @brief Initialize the specified BAR for the ivshmem device.
447 *
448 * The ivshmem PCI device has three BARs: BAR0, BAR1, and BAR2. BAR0/BAR1 is a 32-bit memory BAR and BAR2 is a 64-bit
449 * memory BAR. This function initializes a specified BAR for the ivshmem device. It is typically called during the
450 * initialization phase of the ivshmem device.
451 *
452 * - If bar_idx exceeds 2, the function does nothing.
453 * - For BAR2, it finds the shared memory region based on the shared memory region name. If the shared memory region is
454 * not found, the function does nothing.
455 * - It updates corresponding fields in the pci_vbar structure of specified bar_idx.
456 * - It configures the Base Address Register in the device's configuration space.
457 * - For a 64-bit memory BAR (BAR2 for now), it also sets up the next Base Address Register as the high 32 bits.
458 *
459 * @param[inout] vdev Pointer to the PCI device that is treated as an ivshmem device.
460 * @param[in] bar_idx Index of the BAR to be initialized.
461 *
462 * @return None
463 *
464 * @pre vdev != NULL
465 * @pre vdev->pci_dev_config != NULL
466 * @pre bar_idx < PCI_BAR_COUNT
467 *
468 * @post N/A
469 */
init_ivshmem_bar(struct pci_vdev * vdev,uint32_t bar_idx)470 static void init_ivshmem_bar(struct pci_vdev *vdev, uint32_t bar_idx)
471 {
472 struct pci_vbar *vbar;
473 uint64_t addr, mask, size = 0UL;
474 struct acrn_vm_pci_dev_config *dev_config = vdev->pci_dev_config;
475
476 addr = dev_config->vbar_base[bar_idx];
477 vbar = &vdev->vbars[bar_idx];
478 vbar->bar_type.bits = addr;
479 mask = is_pci_io_bar(vbar) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK;
480 vbar->bar_type.bits &= (~mask);
481
482 if (bar_idx == IVSHMEM_SHM_BAR) {
483 struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
484 if (region != NULL) {
485 size = region->size;
486 vbar->base_hpa = region->hpa;
487 } else {
488 pr_err("%s ivshmem device %x:%x.%x has no memory region\n",
489 __func__, vdev->bdf.bits.b, vdev->bdf.bits.d, vdev->bdf.bits.f);
490 }
491 } else if (bar_idx == IVSHMEM_MSIX_BAR) {
492 size = VMSIX_ENTRY_TABLE_PBA_BAR_SIZE;
493 } else if (bar_idx == IVSHMEM_MMIO_BAR) {
494 size = IVSHMEM_MMIO_BAR_SIZE;
495 }
496 if (size != 0UL) {
497 vbar->size = size;
498 vbar->mask = (uint32_t) (~(size - 1UL));
499 pci_vdev_write_vbar(vdev, bar_idx, (uint32_t)addr);
500 if (is_pci_mem64lo_bar(vbar)) {
501 vbar = &vdev->vbars[bar_idx + 1U];
502 vbar->is_mem64hi = true;
503 vbar->mask = (uint32_t) ((~(size - 1UL)) >> 32U);
504 pci_vdev_write_vbar(vdev, (bar_idx + 1U), ((uint32_t)(addr >> 32U)));
505 }
506 }
507 }
508
509 /**
510 * @brief Initialize a virtual ivshmem device.
511 *
512 * This function initializes the specified virtual PCI device as an ivshmem device. It sets up the device to follow the
513 * specifications. Because the ivshmem is introduced by QEMU, the spec link is
514 * https://www.qemu.org/docs/master/specs/ivshmem-spec.html. This function is usually used in the initialization phase
515 * of VM.
516 *
517 * - It sets the pcidev field in ivshmem_device (all ivshmem devices emulated by hypervisor are static stored based on
518 * the configuration) to the vdev and sets the priv_data field in the specified vdev to new ivshmem_device structure
519 * data, indicating the association between the virtual PCI device and the ivshmem device.
520 * - Per the ivshmem specification and PCI Express Base Specification, it initializes the ivshmem device configuration
521 * space with appropriate values:
522 * - The device ID and Vendor ID is 0x11101af4.
523 * - It sets subsystem vendor ID to 0x8086 (Intel) and subsystem ID to the region ID of the shared memory region.
524 * - It sets up the MSI-X capability with 8 MSI-X table entries and maps the table and PBA into BAR1. For detailed
525 * operations, refer to add_vmsix_capability().
526 * - It initializes BAR0 for the device to hold device registers (256 Byte MMIO).
527 * - It initializes BAR1 for the device to hold MSI-X table and PBA.
528 * - It initializes BAR2 for the device to map the shared memory object. Because BAR2 is a 64-bit memory BAR, it also
529 * sets up the next Base Address Register as the high 32 bits and the total number of bars is set to 4.
530 * - It binds the device to the ivshmem server (hosts in hypervisor) for inter-VM communication.
531 * - Finally, it sets the user field to vdev, indicating that this ivshmem is used by a VM.
532 *
533 * @param[inout] vdev Pointer to the virtual PCI device to be initialized.
534 *
535 * @return None
536 *
537 * @pre vdev != NULL
538 * @pre vdev->pci_dev_config != NULL
539 * @pre vdev->pci != NULL
540 *
541 * @post N/A
542 */
init_ivshmem_vdev(struct pci_vdev * vdev)543 static void init_ivshmem_vdev(struct pci_vdev *vdev)
544 {
545 struct acrn_vm_pci_dev_config *dev_config = vdev->pci_dev_config;
546 struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
547
548 create_ivshmem_device(vdev);
549
550 /* initialize ivshmem config */
551 pci_vdev_write_vcfg(vdev, PCIR_VENDOR, 2U, IVSHMEM_VENDOR_ID);
552 pci_vdev_write_vcfg(vdev, PCIR_DEVICE, 2U, IVSHMEM_DEVICE_ID);
553 pci_vdev_write_vcfg(vdev, PCIR_REVID, 1U, IVSHMEM_REV);
554 pci_vdev_write_vcfg(vdev, PCIR_CLASS, 1U, IVSHMEM_CLASS);
555 pci_vdev_write_vcfg(vdev, PCIR_HDRTYPE, 1U,
556 PCIM_HDRTYPE_NORMAL | ((vdev->bdf.bits.f == 0U) ? PCIM_MFDEV : 0U));
557
558 pci_vdev_write_vcfg(vdev, PCIV_SUB_VENDOR_ID, 2U, IVSHMEM_INTEL_SUBVENDOR_ID);
559 if (region != NULL) {
560 pci_vdev_write_vcfg(vdev, PCIV_SUB_SYSTEM_ID, 2U, region->region_id);
561 }
562
563 add_vmsix_capability(vdev, MAX_IVSHMEM_MSIX_TBL_ENTRY_NUM, IVSHMEM_MSIX_BAR);
564
565 /* initialize ivshmem bars */
566 vdev->nr_bars = 4U;
567 init_ivshmem_bar(vdev, IVSHMEM_MMIO_BAR);
568 init_ivshmem_bar(vdev, IVSHMEM_MSIX_BAR);
569 init_ivshmem_bar(vdev, IVSHMEM_SHM_BAR);
570 ivshmem_server_bind_peer(vdev);
571
572 vdev->user = vdev;
573 }
574
575 /**
576 * @brief Deinitialize a virtual ivshmem device.
577 *
578 * This function deinitializes the specified virtual PCI device that was previously initialized as an ivshmem device.
579 *
580 * - It unbinds the device from the ivshmem server (hosts in hypervisor).
581 * - It sets the priv_data field in the specified vdev to NULL and sets the pcidev field in ivshmem_device to NULL,
582 * indicating the disassociation between the virtual PCI device and the ivshmem device.
583 * - It sets the user field to NULL, indicating that this virtual device is not owned by any VM.
584 *
585 * @param[inout] vdev Pointer to the virtual PCI device to be deinitialized.
586 *
587 * @return None
588 *
589 * @pre vdev != NULL
590 * @pre vdev->priv_data != NULL
591 * @pre vdev->pci != NULL
592 *
593 * @post N/A
594 */
deinit_ivshmem_vdev(struct pci_vdev * vdev)595 static void deinit_ivshmem_vdev(struct pci_vdev *vdev)
596 {
597 struct ivshmem_device *ivs_dev = (struct ivshmem_device *) vdev->priv_data;
598
599 ivshmem_server_unbind_peer(vdev);
600
601 spinlock_obtain(&ivshmem_dev_lock);
602 vdev->priv_data = NULL;
603 vdev->user = NULL;
604 ivs_dev->pcidev = NULL;
605 spinlock_release(&ivshmem_dev_lock);
606 }
607
608 /**
609 * @brief Create a virtual ivshmem device based on the specified device information.
610 *
611 * Basic ivshmem information is configured in the scenario file. After compilation, some device configurations of every
612 * ivshmem PCI device are stored in struct acrn_vm_pci_dev_config and every shared memory region is stored in struct
613 * ivshmem_shm_region. This function creates one virtual ivshmem device based on the input device information. The
614 * user-space tool(such as acrn-dm) may add an ivshmem device for a post-launch VM and this device is emulated in
615 * hypervisor. This function is used for the case for now and it is usually used in the initialization phase of a
616 * post-launch VM.
617 *
618 * - Per the ivshmem specification, BAR2 maps the shared memory object. For the ivshmem device to be created, the shared
619 * memory region name is stored in dev->args and the size of the shared memory region is stored in
620 * dev->io_size[IVSHMEM_SHM_BAR].
621 * - It traverses all configured PCI devices of the specified VM. Based on the input shared memory region name, it finds
622 * corresponding acrn_vm_pci_dev_config and ivshmem_shm_region.
623 * - If the acrn_vm_pci_dev_config is not found or the ivshmem_shm_region is not found or the size of ivshmem_shm_region
624 * is not equal to the size specified in dev->io_size[IVSHMEM_SHM_BAR], the function returns -EINVAL.
625 * - Otherwise, update the acrn_vm_pci_dev_config with input device information specified in dev and initializes a new
626 * virtual PCI device as an ivshmem device. For detailed operations, refer to vpci_init_vdev(). The function returns
627 * -EINVAL if the vpci_init_vdev() fails.
628 * - Finally, it returns 0 on success.
629 *
630 * @param[inout] vm Pointer to the VM that owns the ivshmem device.
631 * @param[in] dev Pointer to the device information to create an ivshmem device.
632 *
633 * @return A int32_t value to indicate the status of the ivshmem device creation.
634 *
635 * @retval 0 On success.
636 * @retval -EINVAL If the ivshmem device creation fails.
637 *
638 * @pre vm != NULL
639 * @pre dev != NULL
640 *
641 * @post retval <= 0
642 */
create_ivshmem_vdev(struct acrn_vm * vm,struct acrn_vdev * dev)643 int32_t create_ivshmem_vdev(struct acrn_vm *vm, struct acrn_vdev *dev)
644 {
645 uint32_t i;
646 struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
647 struct acrn_vm_pci_dev_config *dev_config = NULL;
648 struct pci_vdev *vdev = NULL;
649 int32_t ret = -EINVAL;
650
651 for (i = 0U; i < vm_config->pci_dev_num; i++) {
652 dev_config = &vm_config->pci_devs[i];
653 if (strncmp(dev_config->shm_region_name, (char *)dev->args, sizeof(dev_config->shm_region_name)) == 0) {
654 struct ivshmem_shm_region *region = find_shm_region(dev_config->shm_region_name);
655 if ((region != NULL) && (region->size == dev->io_size[IVSHMEM_SHM_BAR])) {
656 spinlock_obtain(&vm->vpci.lock);
657 dev_config->vbdf.value = (uint16_t) dev->slot;
658 dev_config->vbar_base[IVSHMEM_MMIO_BAR] = (uint64_t) dev->io_addr[IVSHMEM_MMIO_BAR];
659 dev_config->vbar_base[IVSHMEM_MSIX_BAR] = (uint64_t) dev->io_addr[IVSHMEM_MSIX_BAR];
660 dev_config->vbar_base[IVSHMEM_SHM_BAR] = (uint64_t) dev->io_addr[IVSHMEM_SHM_BAR];
661 dev_config->vbar_base[IVSHMEM_SHM_BAR] |= ((uint64_t) dev->io_addr[IVSHMEM_SHM_BAR + 1U]) << 32U;
662 vdev = vpci_init_vdev(&vm->vpci, dev_config, NULL);
663 spinlock_release(&vm->vpci.lock);
664 if (vdev != NULL) {
665 ret = 0;
666 }
667 }
668 break;
669 }
670 }
671
672 if (ret != 0) {
673 pr_warn("%s, failed to create ivshmem device %x:%x.%x\n", __func__,
674 dev->slot >> 8U, (dev->slot >> 3U) & 0x1fU, dev->slot & 0x7U);
675 }
676 return ret;
677 }
678
679 /**
680 * @brief Destroy the virtual ivshmem device.
681 *
682 * This function is the counterpart of create_ivshmem_vdev(). This function destroys the specified virtual PCI device
683 * that was previously initialized as an ivshmem device. It is usually used for a post-launch VM to destroy the ivshmem
684 * device.
685 *
686 * - It updates all BARs of the specified vdev. For detailed operations, refer to vpci_update_one_vbar() and the
687 * function ivshmem_vbar_unmap().
688 * - It deinitializes the specified virtual PCI device. For detailed operations, refer to vpci_deinit_vdev().
689 * - Finally, it returns 0.
690 *
691 * @param[inout] vdev Pointer to the virtual PCI device to be destroyed.
692 *
693 * @return Always return 0.
694 *
695 * @pre vdev != NULL
696 * @pre vdev->vpci != NULL
697 *
698 * @post retval == 0
699 */
destroy_ivshmem_vdev(struct pci_vdev * vdev)700 int32_t destroy_ivshmem_vdev(struct pci_vdev *vdev)
701 {
702 uint32_t i;
703 struct acrn_vpci *vpci = vdev->vpci;
704
705 for (i = 0U; i < vdev->nr_bars; i++) {
706 vpci_update_one_vbar(vdev, i, 0U, NULL, ivshmem_vbar_unmap);
707 }
708
709 spinlock_obtain(&vpci->lock);
710 vpci_deinit_vdev(vdev);
711 spinlock_release(&vpci->lock);
712
713 return 0;
714 }
715
716 /**
717 * @brief Data structure implementation for virtual Inter-VM shared memory device (ivshmem) operations.
718 *
719 * The ivshmem is actually first introduced by QEMU to share a memory region between multiple VMs and host. It is
720 * modeled as a PCI device exposing said memory to the VM as a PCI BAR. ACRN also introduces it to transfer data between
721 * VMs based on the shared memory region. Struct pci_vdev_ops is used to define the operations of virtual PCI device and
722 * definition here is used to support ivshmem device.
723 *
724 * @consistency N/A
725 * @alignment N/A
726 *
727 * @remark N/A
728 */
729 const struct pci_vdev_ops vpci_ivshmem_ops = {
730 .init_vdev = init_ivshmem_vdev,
731 .deinit_vdev = deinit_ivshmem_vdev,
732 .write_vdev_cfg = write_ivshmem_vdev_cfg,
733 .read_vdev_cfg = read_ivshmem_vdev_cfg,
734 };
735 /**
736 * @}
737 */
738 #endif
739