1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * VDPA simulator for block device.
4  *
5  * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
6  * Copyright (c) 2021, Red Hat Inc. All rights reserved.
7  *
8  */
9 
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/sched.h>
15 #include <linux/blkdev.h>
16 #include <linux/vringh.h>
17 #include <linux/vdpa.h>
18 #include <uapi/linux/virtio_blk.h>
19 
20 #include "vdpa_sim.h"
21 
22 #define DRV_VERSION  "0.1"
23 #define DRV_AUTHOR   "Max Gurtovoy <mgurtovoy@nvidia.com>"
24 #define DRV_DESC     "vDPA Device Simulator for block device"
25 #define DRV_LICENSE  "GPL v2"
26 
27 #define VDPASIM_BLK_FEATURES	(VDPASIM_FEATURES | \
28 				 (1ULL << VIRTIO_BLK_F_FLUSH)    | \
29 				 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
30 				 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
31 				 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
32 				 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
33 				 (1ULL << VIRTIO_BLK_F_MQ)       | \
34 				 (1ULL << VIRTIO_BLK_F_DISCARD)  | \
35 				 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
36 
37 #define VDPASIM_BLK_CAPACITY	0x40000
38 #define VDPASIM_BLK_SIZE_MAX	0x1000
39 #define VDPASIM_BLK_SEG_MAX	32
40 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
41 
42 /* 1 virtqueue, 1 address space, 1 virtqueue group */
43 #define VDPASIM_BLK_VQ_NUM	1
44 #define VDPASIM_BLK_AS_NUM	1
45 #define VDPASIM_BLK_GROUP_NUM	1
46 
47 static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
48 
vdpasim_blk_check_range(struct vdpasim * vdpasim,u64 start_sector,u64 num_sectors,u64 max_sectors)49 static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
50 				    u64 num_sectors, u64 max_sectors)
51 {
52 	if (start_sector > VDPASIM_BLK_CAPACITY) {
53 		dev_dbg(&vdpasim->vdpa.dev,
54 			"starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
55 			start_sector, VDPASIM_BLK_CAPACITY);
56 	}
57 
58 	if (num_sectors > max_sectors) {
59 		dev_dbg(&vdpasim->vdpa.dev,
60 			"number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
61 			num_sectors, max_sectors);
62 		return false;
63 	}
64 
65 	if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
66 		dev_dbg(&vdpasim->vdpa.dev,
67 			"request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
68 			start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
69 		return false;
70 	}
71 
72 	return true;
73 }
74 
75 /* Returns 'true' if the request is handled (with or without an I/O error)
76  * and the status is correctly written in the last byte of the 'in iov',
77  * 'false' otherwise.
78  */
vdpasim_blk_handle_req(struct vdpasim * vdpasim,struct vdpasim_virtqueue * vq)79 static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
80 				   struct vdpasim_virtqueue *vq)
81 {
82 	size_t pushed = 0, to_pull, to_push;
83 	struct virtio_blk_outhdr hdr;
84 	bool handled = false;
85 	ssize_t bytes;
86 	loff_t offset;
87 	u64 sector;
88 	u8 status;
89 	u32 type;
90 	int ret;
91 
92 	ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
93 				   &vq->head, GFP_ATOMIC);
94 	if (ret != 1)
95 		return false;
96 
97 	if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
98 		dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
99 			vq->out_iov.used, vq->in_iov.used);
100 		goto err;
101 	}
102 
103 	if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
104 		dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
105 		goto err;
106 	}
107 
108 	/* The last byte is the status and we checked if the last iov has
109 	 * enough room for it.
110 	 */
111 	to_push = vringh_kiov_length(&vq->in_iov) - 1;
112 
113 	to_pull = vringh_kiov_length(&vq->out_iov);
114 
115 	bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
116 				      sizeof(hdr));
117 	if (bytes != sizeof(hdr)) {
118 		dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
119 		goto err;
120 	}
121 
122 	to_pull -= bytes;
123 
124 	type = vdpasim32_to_cpu(vdpasim, hdr.type);
125 	sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
126 	offset = sector << SECTOR_SHIFT;
127 	status = VIRTIO_BLK_S_OK;
128 
129 	if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
130 	    sector != 0) {
131 		dev_dbg(&vdpasim->vdpa.dev,
132 			"sector must be 0 for %u request - sector: 0x%llx\n",
133 			type, sector);
134 		status = VIRTIO_BLK_S_IOERR;
135 		goto err_status;
136 	}
137 
138 	switch (type) {
139 	case VIRTIO_BLK_T_IN:
140 		if (!vdpasim_blk_check_range(vdpasim, sector,
141 					     to_push >> SECTOR_SHIFT,
142 					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
143 			status = VIRTIO_BLK_S_IOERR;
144 			break;
145 		}
146 
147 		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
148 					      vdpasim->buffer + offset,
149 					      to_push);
150 		if (bytes < 0) {
151 			dev_dbg(&vdpasim->vdpa.dev,
152 				"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
153 				bytes, offset, to_push);
154 			status = VIRTIO_BLK_S_IOERR;
155 			break;
156 		}
157 
158 		pushed += bytes;
159 		break;
160 
161 	case VIRTIO_BLK_T_OUT:
162 		if (!vdpasim_blk_check_range(vdpasim, sector,
163 					     to_pull >> SECTOR_SHIFT,
164 					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
165 			status = VIRTIO_BLK_S_IOERR;
166 			break;
167 		}
168 
169 		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
170 					      vdpasim->buffer + offset,
171 					      to_pull);
172 		if (bytes < 0) {
173 			dev_dbg(&vdpasim->vdpa.dev,
174 				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
175 				bytes, offset, to_pull);
176 			status = VIRTIO_BLK_S_IOERR;
177 			break;
178 		}
179 		break;
180 
181 	case VIRTIO_BLK_T_GET_ID:
182 		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
183 					      vdpasim_blk_id,
184 					      VIRTIO_BLK_ID_BYTES);
185 		if (bytes < 0) {
186 			dev_dbg(&vdpasim->vdpa.dev,
187 				"vringh_iov_push_iotlb() error: %zd\n", bytes);
188 			status = VIRTIO_BLK_S_IOERR;
189 			break;
190 		}
191 
192 		pushed += bytes;
193 		break;
194 
195 	case VIRTIO_BLK_T_FLUSH:
196 		/* nothing to do */
197 		break;
198 
199 	case VIRTIO_BLK_T_DISCARD:
200 	case VIRTIO_BLK_T_WRITE_ZEROES: {
201 		struct virtio_blk_discard_write_zeroes range;
202 		u32 num_sectors, flags;
203 
204 		if (to_pull != sizeof(range)) {
205 			dev_dbg(&vdpasim->vdpa.dev,
206 				"discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
207 				to_pull, sizeof(range));
208 			status = VIRTIO_BLK_S_IOERR;
209 			break;
210 		}
211 
212 		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
213 					      to_pull);
214 		if (bytes < 0) {
215 			dev_dbg(&vdpasim->vdpa.dev,
216 				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
217 				bytes, offset, to_pull);
218 			status = VIRTIO_BLK_S_IOERR;
219 			break;
220 		}
221 
222 		sector = le64_to_cpu(range.sector);
223 		offset = sector << SECTOR_SHIFT;
224 		num_sectors = le32_to_cpu(range.num_sectors);
225 		flags = le32_to_cpu(range.flags);
226 
227 		if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
228 			dev_dbg(&vdpasim->vdpa.dev,
229 				"discard unexpected flags set - flags: 0x%x\n",
230 				flags);
231 			status = VIRTIO_BLK_S_UNSUPP;
232 			break;
233 		}
234 
235 		if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
236 		    flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
237 			dev_dbg(&vdpasim->vdpa.dev,
238 				"write_zeroes unexpected flags set - flags: 0x%x\n",
239 				flags);
240 			status = VIRTIO_BLK_S_UNSUPP;
241 			break;
242 		}
243 
244 		if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
245 					     VDPASIM_BLK_DWZ_MAX_SECTORS)) {
246 			status = VIRTIO_BLK_S_IOERR;
247 			break;
248 		}
249 
250 		if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
251 			memset(vdpasim->buffer + offset, 0,
252 			       num_sectors << SECTOR_SHIFT);
253 		}
254 
255 		break;
256 	}
257 	default:
258 		dev_dbg(&vdpasim->vdpa.dev,
259 			"Unsupported request type %d\n", type);
260 		status = VIRTIO_BLK_S_IOERR;
261 		break;
262 	}
263 
264 err_status:
265 	/* If some operations fail, we need to skip the remaining bytes
266 	 * to put the status in the last byte
267 	 */
268 	if (to_push - pushed > 0)
269 		vringh_kiov_advance(&vq->in_iov, to_push - pushed);
270 
271 	/* Last byte is the status */
272 	bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
273 	if (bytes != 1)
274 		goto err;
275 
276 	pushed += bytes;
277 
278 	/* Make sure data is wrote before advancing index */
279 	smp_wmb();
280 
281 	handled = true;
282 
283 err:
284 	vringh_complete_iotlb(&vq->vring, vq->head, pushed);
285 
286 	return handled;
287 }
288 
vdpasim_blk_work(struct work_struct * work)289 static void vdpasim_blk_work(struct work_struct *work)
290 {
291 	struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
292 	bool reschedule = false;
293 	int i;
294 
295 	spin_lock(&vdpasim->lock);
296 
297 	if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
298 		goto out;
299 
300 	if (!vdpasim->running)
301 		goto out;
302 
303 	for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
304 		struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
305 		int reqs = 0;
306 
307 		if (!vq->ready)
308 			continue;
309 
310 		while (vdpasim_blk_handle_req(vdpasim, vq)) {
311 			/* Make sure used is visible before rasing the interrupt. */
312 			smp_wmb();
313 
314 			local_bh_disable();
315 			if (vringh_need_notify_iotlb(&vq->vring) > 0)
316 				vringh_notify(&vq->vring);
317 			local_bh_enable();
318 
319 			if (++reqs > 4) {
320 				reschedule = true;
321 				break;
322 			}
323 		}
324 	}
325 out:
326 	spin_unlock(&vdpasim->lock);
327 
328 	if (reschedule)
329 		schedule_work(&vdpasim->work);
330 }
331 
vdpasim_blk_get_config(struct vdpasim * vdpasim,void * config)332 static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
333 {
334 	struct virtio_blk_config *blk_config = config;
335 
336 	memset(config, 0, sizeof(struct virtio_blk_config));
337 
338 	blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
339 	blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
340 	blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
341 	blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
342 	blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
343 	blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
344 	blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
345 	/* VIRTIO_BLK_F_DISCARD */
346 	blk_config->discard_sector_alignment =
347 		cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
348 	blk_config->max_discard_sectors =
349 		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
350 	blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
351 	/* VIRTIO_BLK_F_WRITE_ZEROES */
352 	blk_config->max_write_zeroes_sectors =
353 		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
354 	blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
355 
356 }
357 
vdpasim_blk_mgmtdev_release(struct device * dev)358 static void vdpasim_blk_mgmtdev_release(struct device *dev)
359 {
360 }
361 
362 static struct device vdpasim_blk_mgmtdev = {
363 	.init_name = "vdpasim_blk",
364 	.release = vdpasim_blk_mgmtdev_release,
365 };
366 
vdpasim_blk_dev_add(struct vdpa_mgmt_dev * mdev,const char * name,const struct vdpa_dev_set_config * config)367 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
368 			       const struct vdpa_dev_set_config *config)
369 {
370 	struct vdpasim_dev_attr dev_attr = {};
371 	struct vdpasim *simdev;
372 	int ret;
373 
374 	dev_attr.mgmt_dev = mdev;
375 	dev_attr.name = name;
376 	dev_attr.id = VIRTIO_ID_BLOCK;
377 	dev_attr.supported_features = VDPASIM_BLK_FEATURES;
378 	dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
379 	dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
380 	dev_attr.nas = VDPASIM_BLK_AS_NUM;
381 	dev_attr.alloc_size = sizeof(struct vdpasim);
382 	dev_attr.config_size = sizeof(struct virtio_blk_config);
383 	dev_attr.get_config = vdpasim_blk_get_config;
384 	dev_attr.work_fn = vdpasim_blk_work;
385 	dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
386 
387 	simdev = vdpasim_create(&dev_attr, config);
388 	if (IS_ERR(simdev))
389 		return PTR_ERR(simdev);
390 
391 	ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
392 	if (ret)
393 		goto put_dev;
394 
395 	return 0;
396 
397 put_dev:
398 	put_device(&simdev->vdpa.dev);
399 	return ret;
400 }
401 
vdpasim_blk_dev_del(struct vdpa_mgmt_dev * mdev,struct vdpa_device * dev)402 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
403 				struct vdpa_device *dev)
404 {
405 	struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
406 
407 	_vdpa_unregister_device(&simdev->vdpa);
408 }
409 
410 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
411 	.dev_add = vdpasim_blk_dev_add,
412 	.dev_del = vdpasim_blk_dev_del
413 };
414 
415 static struct virtio_device_id id_table[] = {
416 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
417 	{ 0 },
418 };
419 
420 static struct vdpa_mgmt_dev mgmt_dev = {
421 	.device = &vdpasim_blk_mgmtdev,
422 	.id_table = id_table,
423 	.ops = &vdpasim_blk_mgmtdev_ops,
424 };
425 
vdpasim_blk_init(void)426 static int __init vdpasim_blk_init(void)
427 {
428 	int ret;
429 
430 	ret = device_register(&vdpasim_blk_mgmtdev);
431 	if (ret) {
432 		put_device(&vdpasim_blk_mgmtdev);
433 		return ret;
434 	}
435 
436 	ret = vdpa_mgmtdev_register(&mgmt_dev);
437 	if (ret)
438 		goto parent_err;
439 
440 	return 0;
441 
442 parent_err:
443 	device_unregister(&vdpasim_blk_mgmtdev);
444 	return ret;
445 }
446 
vdpasim_blk_exit(void)447 static void __exit vdpasim_blk_exit(void)
448 {
449 	vdpa_mgmtdev_unregister(&mgmt_dev);
450 	device_unregister(&vdpasim_blk_mgmtdev);
451 }
452 
453 module_init(vdpasim_blk_init)
454 module_exit(vdpasim_blk_exit)
455 
456 MODULE_VERSION(DRV_VERSION);
457 MODULE_LICENSE(DRV_LICENSE);
458 MODULE_AUTHOR(DRV_AUTHOR);
459 MODULE_DESCRIPTION(DRV_DESC);
460