1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VDPA simulator for block device.
4 *
5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
6 * Copyright (c) 2021, Red Hat Inc. All rights reserved.
7 *
8 */
9
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/sched.h>
15 #include <linux/blkdev.h>
16 #include <linux/vringh.h>
17 #include <linux/vdpa.h>
18 #include <uapi/linux/virtio_blk.h>
19
20 #include "vdpa_sim.h"
21
22 #define DRV_VERSION "0.1"
23 #define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>"
24 #define DRV_DESC "vDPA Device Simulator for block device"
25 #define DRV_LICENSE "GPL v2"
26
27 #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
28 (1ULL << VIRTIO_BLK_F_FLUSH) | \
29 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
30 (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
31 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
32 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
33 (1ULL << VIRTIO_BLK_F_MQ) | \
34 (1ULL << VIRTIO_BLK_F_DISCARD) | \
35 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
36
37 #define VDPASIM_BLK_CAPACITY 0x40000
38 #define VDPASIM_BLK_SIZE_MAX 0x1000
39 #define VDPASIM_BLK_SEG_MAX 32
40 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
41
42 /* 1 virtqueue, 1 address space, 1 virtqueue group */
43 #define VDPASIM_BLK_VQ_NUM 1
44 #define VDPASIM_BLK_AS_NUM 1
45 #define VDPASIM_BLK_GROUP_NUM 1
46
47 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
48
vdpasim_blk_check_range(struct vdpasim * vdpasim,u64 start_sector,u64 num_sectors,u64 max_sectors)49 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
50 u64 num_sectors, u64 max_sectors)
51 {
52 if (start_sector > VDPASIM_BLK_CAPACITY) {
53 dev_dbg(&vdpasim->vdpa.dev,
54 "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
55 start_sector, VDPASIM_BLK_CAPACITY);
56 }
57
58 if (num_sectors > max_sectors) {
59 dev_dbg(&vdpasim->vdpa.dev,
60 "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
61 num_sectors, max_sectors);
62 return false;
63 }
64
65 if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
66 dev_dbg(&vdpasim->vdpa.dev,
67 "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
68 start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
69 return false;
70 }
71
72 return true;
73 }
74
75 /* Returns 'true' if the request is handled (with or without an I/O error)
76 * and the status is correctly written in the last byte of the 'in iov',
77 * 'false' otherwise.
78 */
vdpasim_blk_handle_req(struct vdpasim * vdpasim,struct vdpasim_virtqueue * vq)79 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
80 struct vdpasim_virtqueue *vq)
81 {
82 size_t pushed = 0, to_pull, to_push;
83 struct virtio_blk_outhdr hdr;
84 bool handled = false;
85 ssize_t bytes;
86 loff_t offset;
87 u64 sector;
88 u8 status;
89 u32 type;
90 int ret;
91
92 ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
93 &vq->head, GFP_ATOMIC);
94 if (ret != 1)
95 return false;
96
97 if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
98 dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
99 vq->out_iov.used, vq->in_iov.used);
100 goto err;
101 }
102
103 if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
104 dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
105 goto err;
106 }
107
108 /* The last byte is the status and we checked if the last iov has
109 * enough room for it.
110 */
111 to_push = vringh_kiov_length(&vq->in_iov) - 1;
112
113 to_pull = vringh_kiov_length(&vq->out_iov);
114
115 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
116 sizeof(hdr));
117 if (bytes != sizeof(hdr)) {
118 dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
119 goto err;
120 }
121
122 to_pull -= bytes;
123
124 type = vdpasim32_to_cpu(vdpasim, hdr.type);
125 sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
126 offset = sector << SECTOR_SHIFT;
127 status = VIRTIO_BLK_S_OK;
128
129 if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
130 sector != 0) {
131 dev_dbg(&vdpasim->vdpa.dev,
132 "sector must be 0 for %u request - sector: 0x%llx\n",
133 type, sector);
134 status = VIRTIO_BLK_S_IOERR;
135 goto err_status;
136 }
137
138 switch (type) {
139 case VIRTIO_BLK_T_IN:
140 if (!vdpasim_blk_check_range(vdpasim, sector,
141 to_push >> SECTOR_SHIFT,
142 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
143 status = VIRTIO_BLK_S_IOERR;
144 break;
145 }
146
147 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
148 vdpasim->buffer + offset,
149 to_push);
150 if (bytes < 0) {
151 dev_dbg(&vdpasim->vdpa.dev,
152 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
153 bytes, offset, to_push);
154 status = VIRTIO_BLK_S_IOERR;
155 break;
156 }
157
158 pushed += bytes;
159 break;
160
161 case VIRTIO_BLK_T_OUT:
162 if (!vdpasim_blk_check_range(vdpasim, sector,
163 to_pull >> SECTOR_SHIFT,
164 VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
165 status = VIRTIO_BLK_S_IOERR;
166 break;
167 }
168
169 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
170 vdpasim->buffer + offset,
171 to_pull);
172 if (bytes < 0) {
173 dev_dbg(&vdpasim->vdpa.dev,
174 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
175 bytes, offset, to_pull);
176 status = VIRTIO_BLK_S_IOERR;
177 break;
178 }
179 break;
180
181 case VIRTIO_BLK_T_GET_ID:
182 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
183 vdpasim_blk_id,
184 VIRTIO_BLK_ID_BYTES);
185 if (bytes < 0) {
186 dev_dbg(&vdpasim->vdpa.dev,
187 "vringh_iov_push_iotlb() error: %zd\n", bytes);
188 status = VIRTIO_BLK_S_IOERR;
189 break;
190 }
191
192 pushed += bytes;
193 break;
194
195 case VIRTIO_BLK_T_FLUSH:
196 /* nothing to do */
197 break;
198
199 case VIRTIO_BLK_T_DISCARD:
200 case VIRTIO_BLK_T_WRITE_ZEROES: {
201 struct virtio_blk_discard_write_zeroes range;
202 u32 num_sectors, flags;
203
204 if (to_pull != sizeof(range)) {
205 dev_dbg(&vdpasim->vdpa.dev,
206 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
207 to_pull, sizeof(range));
208 status = VIRTIO_BLK_S_IOERR;
209 break;
210 }
211
212 bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
213 to_pull);
214 if (bytes < 0) {
215 dev_dbg(&vdpasim->vdpa.dev,
216 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
217 bytes, offset, to_pull);
218 status = VIRTIO_BLK_S_IOERR;
219 break;
220 }
221
222 sector = le64_to_cpu(range.sector);
223 offset = sector << SECTOR_SHIFT;
224 num_sectors = le32_to_cpu(range.num_sectors);
225 flags = le32_to_cpu(range.flags);
226
227 if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
228 dev_dbg(&vdpasim->vdpa.dev,
229 "discard unexpected flags set - flags: 0x%x\n",
230 flags);
231 status = VIRTIO_BLK_S_UNSUPP;
232 break;
233 }
234
235 if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
236 flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
237 dev_dbg(&vdpasim->vdpa.dev,
238 "write_zeroes unexpected flags set - flags: 0x%x\n",
239 flags);
240 status = VIRTIO_BLK_S_UNSUPP;
241 break;
242 }
243
244 if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
245 VDPASIM_BLK_DWZ_MAX_SECTORS)) {
246 status = VIRTIO_BLK_S_IOERR;
247 break;
248 }
249
250 if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
251 memset(vdpasim->buffer + offset, 0,
252 num_sectors << SECTOR_SHIFT);
253 }
254
255 break;
256 }
257 default:
258 dev_dbg(&vdpasim->vdpa.dev,
259 "Unsupported request type %d\n", type);
260 status = VIRTIO_BLK_S_IOERR;
261 break;
262 }
263
264 err_status:
265 /* If some operations fail, we need to skip the remaining bytes
266 * to put the status in the last byte
267 */
268 if (to_push - pushed > 0)
269 vringh_kiov_advance(&vq->in_iov, to_push - pushed);
270
271 /* Last byte is the status */
272 bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
273 if (bytes != 1)
274 goto err;
275
276 pushed += bytes;
277
278 /* Make sure data is wrote before advancing index */
279 smp_wmb();
280
281 handled = true;
282
283 err:
284 vringh_complete_iotlb(&vq->vring, vq->head, pushed);
285
286 return handled;
287 }
288
vdpasim_blk_work(struct work_struct * work)289 static void vdpasim_blk_work(struct work_struct *work)
290 {
291 struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
292 bool reschedule = false;
293 int i;
294
295 spin_lock(&vdpasim->lock);
296
297 if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
298 goto out;
299
300 if (!vdpasim->running)
301 goto out;
302
303 for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
304 struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
305 int reqs = 0;
306
307 if (!vq->ready)
308 continue;
309
310 while (vdpasim_blk_handle_req(vdpasim, vq)) {
311 /* Make sure used is visible before rasing the interrupt. */
312 smp_wmb();
313
314 local_bh_disable();
315 if (vringh_need_notify_iotlb(&vq->vring) > 0)
316 vringh_notify(&vq->vring);
317 local_bh_enable();
318
319 if (++reqs > 4) {
320 reschedule = true;
321 break;
322 }
323 }
324 }
325 out:
326 spin_unlock(&vdpasim->lock);
327
328 if (reschedule)
329 schedule_work(&vdpasim->work);
330 }
331
vdpasim_blk_get_config(struct vdpasim * vdpasim,void * config)332 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
333 {
334 struct virtio_blk_config *blk_config = config;
335
336 memset(config, 0, sizeof(struct virtio_blk_config));
337
338 blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
339 blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
340 blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
341 blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
342 blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
343 blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
344 blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
345 /* VIRTIO_BLK_F_DISCARD */
346 blk_config->discard_sector_alignment =
347 cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
348 blk_config->max_discard_sectors =
349 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
350 blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
351 /* VIRTIO_BLK_F_WRITE_ZEROES */
352 blk_config->max_write_zeroes_sectors =
353 cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
354 blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
355
356 }
357
vdpasim_blk_mgmtdev_release(struct device * dev)358 static void vdpasim_blk_mgmtdev_release(struct device *dev)
359 {
360 }
361
362 static struct device vdpasim_blk_mgmtdev = {
363 .init_name = "vdpasim_blk",
364 .release = vdpasim_blk_mgmtdev_release,
365 };
366
vdpasim_blk_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)367 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
368 const struct vdpa_dev_set_config *config)
369 {
370 struct vdpasim_dev_attr dev_attr = {};
371 struct vdpasim *simdev;
372 int ret;
373
374 dev_attr.mgmt_dev = mdev;
375 dev_attr.name = name;
376 dev_attr.id = VIRTIO_ID_BLOCK;
377 dev_attr.supported_features = VDPASIM_BLK_FEATURES;
378 dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
379 dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
380 dev_attr.nas = VDPASIM_BLK_AS_NUM;
381 dev_attr.alloc_size = sizeof(struct vdpasim);
382 dev_attr.config_size = sizeof(struct virtio_blk_config);
383 dev_attr.get_config = vdpasim_blk_get_config;
384 dev_attr.work_fn = vdpasim_blk_work;
385 dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
386
387 simdev = vdpasim_create(&dev_attr, config);
388 if (IS_ERR(simdev))
389 return PTR_ERR(simdev);
390
391 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
392 if (ret)
393 goto put_dev;
394
395 return 0;
396
397 put_dev:
398 put_device(&simdev->vdpa.dev);
399 return ret;
400 }
401
vdpasim_blk_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)402 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
403 struct vdpa_device *dev)
404 {
405 struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
406
407 _vdpa_unregister_device(&simdev->vdpa);
408 }
409
410 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
411 .dev_add = vdpasim_blk_dev_add,
412 .dev_del = vdpasim_blk_dev_del
413 };
414
415 static struct virtio_device_id id_table[] = {
416 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
417 { 0 },
418 };
419
420 static struct vdpa_mgmt_dev mgmt_dev = {
421 .device = &vdpasim_blk_mgmtdev,
422 .id_table = id_table,
423 .ops = &vdpasim_blk_mgmtdev_ops,
424 };
425
vdpasim_blk_init(void)426 static int __init vdpasim_blk_init(void)
427 {
428 int ret;
429
430 ret = device_register(&vdpasim_blk_mgmtdev);
431 if (ret) {
432 put_device(&vdpasim_blk_mgmtdev);
433 return ret;
434 }
435
436 ret = vdpa_mgmtdev_register(&mgmt_dev);
437 if (ret)
438 goto parent_err;
439
440 return 0;
441
442 parent_err:
443 device_unregister(&vdpasim_blk_mgmtdev);
444 return ret;
445 }
446
vdpasim_blk_exit(void)447 static void __exit vdpasim_blk_exit(void)
448 {
449 vdpa_mgmtdev_unregister(&mgmt_dev);
450 device_unregister(&vdpasim_blk_mgmtdev);
451 }
452
453 module_init(vdpasim_blk_init)
454 module_exit(vdpasim_blk_exit)
455
456 MODULE_VERSION(DRV_VERSION);
457 MODULE_LICENSE(DRV_LICENSE);
458 MODULE_AUTHOR(DRV_AUTHOR);
459 MODULE_DESCRIPTION(DRV_DESC);
460