1 /*
2 * Copyright (c) 2014-2015 Travis Geiselbrecht
3 *
4 * Use of this source code is governed by a MIT-style
5 * license that can be found in the LICENSE file or at
6 * https://opensource.org/licenses/MIT
7 */
8 #include <assert.h>
9 #include <dev/virtio/block.h>
10 #include <inttypes.h>
11 #include <kernel/event.h>
12 #include <kernel/spinlock.h>
13 #include <kernel/thread.h>
14 #include <lib/bio.h>
15 #include <lk/compiler.h>
16 #include <lk/debug.h>
17 #include <lk/err.h>
18 #include <lk/list.h>
19 #include <lk/trace.h>
20 #include <stdlib.h>
21
22 #if WITH_KERNEL_VM
23 #include <kernel/vm.h>
24 #endif
25
26 #define LOCAL_TRACE 0
27
28 struct virtio_blk_config {
29 uint64_t capacity;
30 uint32_t size_max;
31 uint32_t seg_max;
32 struct virtio_blk_geometry {
33 uint16_t cylinders;
34 uint8_t heads;
35 uint8_t sectors;
36 } geometry;
37 uint32_t blk_size;
38 struct virtio_blk_topology {
39 uint8_t physical_block_exp;
40 uint8_t alignment_offset;
41 uint16_t min_io_size;
42 uint32_t opt_io_size;
43 } topology;
44 uint8_t writeback;
45 uint8_t unused[3];
46 uint32_t max_discard_sectors;
47 uint32_t max_discard_seq;
48 uint32_t discard_sector_alignment;
49 uint32_t max_write_zeroes_sectors;
50 uint32_t max_write_zeroes_seq;
51 uint8_t write_zeros_may_unmap;
52 uint8_t unused1[3];
53 uint32_t max_secure_erase_sectors;
54 uint32_t max_secure_erase_seg;
55 uint32_t secure_erase_sector_alignment;
56 struct virtio_blk_zoned_characteristics {
57 uint32_t zone_sectors;
58 uint32_t max_open_zones;
59 uint32_t max_active_zones;
60 uint32_t max_append_sectors;
61 uint32_t write_granularity;
62 uint8_t model;
63 uint8_t unused2[3];
64 } zoned;
65 };
66 STATIC_ASSERT(sizeof(struct virtio_blk_config) == 96);
67
68 struct virtio_blk_req {
69 uint32_t type;
70 uint32_t ioprio; // v1.3 says this is 'reserved'
71 uint64_t sector;
72 };
73 STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
74
75 struct virtio_blk_discard_write_zeroes {
76 uint64_t sector;
77 uint32_t num_sectors;
78 struct {
79 uint32_t unmap:1;
80 uint32_t reserved:31;
81 } flags;
82 };
83 STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
84
85 struct virtio_block_txn {
86 /* bio callback, for async */
87 void *cookie;
88 size_t len;
89
90 /* for async calls */
91 void (*callback)(void *, struct bdev *, ssize_t);
92 /* virtio request structure, must be DMA-able */
93 struct virtio_blk_req req;
94
95 /* response status, must be DMA-able */
96 uint8_t status;
97 };
98
99 #define VIRTIO_BLK_F_BARRIER (1<<0) // legacy
100 #define VIRTIO_BLK_F_SIZE_MAX (1<<1)
101 #define VIRTIO_BLK_F_SEG_MAX (1<<2)
102 #define VIRTIO_BLK_F_GEOMETRY (1<<4)
103 #define VIRTIO_BLK_F_RO (1<<5)
104 #define VIRTIO_BLK_F_BLK_SIZE (1<<6)
105 #define VIRTIO_BLK_F_SCSI (1<<7) // legacy
106 #define VIRTIO_BLK_F_FLUSH (1<<9)
107 #define VIRTIO_BLK_F_TOPOLOGY (1<<10)
108 #define VIRTIO_BLK_F_CONFIG_WCE (1<<11)
109 #define VIRTIO_BLK_F_DISCARD (1<<13)
110 #define VIRTIO_BLK_F_WRITE_ZEROES (1<<14)
111 #define VIRTIO_BLK_F_LIFETIME (1<<15)
112 #define VIRTIO_BLK_F_SECURE_ERASE (1<<16)
113 #define VIRTIO_BLK_F_ZONED (1<<17)
114
115 #define VIRTIO_BLK_T_IN 0
116 #define VIRTIO_BLK_T_OUT 1
117 #define VIRTIO_BLK_T_FLUSH 4
118 #define VIRTIO_BLK_T_GET_ID 8
119 #define VIRTIO_BLK_T_GET_LIFETIME 10
120 #define VIRTIO_BLK_T_DISCARD 11
121 #define VIRTIO_BLK_T_WRITE_ZEROES 13
122 #define VIRTIO_BLK_T_SECURE_ERASE 13
123
124 #define VIRTIO_BLK_S_OK 0
125 #define VIRTIO_BLK_S_IOERR 1
126 #define VIRTIO_BLK_S_UNSUPP 2
127
128 #define VIRTIO_BLK_RING_LEN 256
129
130 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
131 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count);
132 static status_t virtio_bdev_read_async(
133 struct bdev *bdev, void *buf, off_t offset, size_t len,
134 void (*callback)(void *, struct bdev *, ssize_t), void *cookie);
135 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count);
136
137 struct virtio_block_dev {
138 struct virtio_device *dev;
139
140 /* bio block device */
141 bdev_t bdev;
142
143 /* our negotiated guest features */
144 uint32_t guest_features;
145 struct virtio_block_txn *txns;
146 };
147
dump_feature_bits(const char * name,uint32_t feature)148 static void dump_feature_bits(const char *name, uint32_t feature) {
149 printf("virtio-block %s features (%#x):", name, feature);
150 if (feature & VIRTIO_BLK_F_BARRIER) printf(" BARRIER");
151 if (feature & VIRTIO_BLK_F_SIZE_MAX) printf(" SIZE_MAX");
152 if (feature & VIRTIO_BLK_F_SEG_MAX) printf(" SEG_MAX");
153 if (feature & VIRTIO_BLK_F_GEOMETRY) printf(" GEOMETRY");
154 if (feature & VIRTIO_BLK_F_RO) printf(" RO");
155 if (feature & VIRTIO_BLK_F_BLK_SIZE) printf(" BLK_SIZE");
156 if (feature & VIRTIO_BLK_F_SCSI) printf(" SCSI");
157 if (feature & VIRTIO_BLK_F_FLUSH) printf(" FLUSH");
158 if (feature & VIRTIO_BLK_F_TOPOLOGY) printf(" TOPOLOGY");
159 if (feature & VIRTIO_BLK_F_CONFIG_WCE) printf(" CONFIG_WCE");
160 if (feature & VIRTIO_BLK_F_DISCARD) printf(" DISCARD");
161 if (feature & VIRTIO_BLK_F_WRITE_ZEROES) printf(" WRITE_ZEROES");
162 if (feature & VIRTIO_BLK_F_LIFETIME) printf(" LIFETIME");
163 if (feature & VIRTIO_BLK_F_SECURE_ERASE) printf(" SECURE_ERASE");
164 if (feature & VIRTIO_BLK_F_ZONED) printf(" ZONED");
165 printf("\n");
166 }
167
virtio_block_init(struct virtio_device * dev,uint32_t host_features)168 status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
169 LTRACEF("dev %p, host_features %#x\n", dev, host_features);
170
171 /* allocate a new block device */
172 struct virtio_block_dev *bdev = malloc(sizeof(struct virtio_block_dev));
173 if (!bdev)
174 return ERR_NO_MEMORY;
175
176 bdev->dev = dev;
177 dev->priv = bdev;
178
179 /* make sure the device is reset */
180 virtio_reset_device(dev);
181
182 volatile struct virtio_blk_config *config = (struct virtio_blk_config *)dev->config_ptr;
183
184 LTRACEF("capacity %" PRIx64 "\n", config->capacity);
185 LTRACEF("size_max %#x\n", config->size_max);
186 LTRACEF("seg_max %#x\n", config->seg_max);
187 LTRACEF("blk_size %#x\n", config->blk_size);
188
189 /* ack and set the driver status bit */
190 virtio_status_acknowledge_driver(dev);
191
192 /* check features bits and ack/nak them */
193 bdev->guest_features = host_features;
194
195 /* keep the features we understand or can tolerate */
196 bdev->guest_features &= (VIRTIO_BLK_F_SIZE_MAX |
197 VIRTIO_BLK_F_BLK_SIZE |
198 VIRTIO_BLK_F_GEOMETRY |
199 VIRTIO_BLK_F_TOPOLOGY |
200 VIRTIO_BLK_F_DISCARD |
201 VIRTIO_BLK_F_WRITE_ZEROES);
202 virtio_set_guest_features(dev, 0, bdev->guest_features);
203
204 /* TODO: handle a RO feature */
205
206 /* allocate a virtio ring */
207 virtio_alloc_ring(dev, 0, VIRTIO_BLK_RING_LEN);
208 // descriptor index would be used to index into the txns array
209 // This is a simple way to keep track of which transaction entry is
210 // free, and which transaction entry corresponds to which descriptor.
211 // Hence, we allocate txns array with the same size as the ring.
212 bdev->txns = memalign(sizeof(struct virtio_block_txn), VIRTIO_BLK_RING_LEN * sizeof(struct virtio_block_txn));
213
214 /* set our irq handler */
215 dev->irq_driver_callback = &virtio_block_irq_driver_callback;
216
217 /* set DRIVER_OK */
218 virtio_status_driver_ok(dev);
219
220 /* construct the block device */
221 static uint8_t found_index = 0;
222 char buf[16];
223 snprintf(buf, sizeof(buf), "virtio%u", found_index++);
224 bio_initialize_bdev(&bdev->bdev, buf,
225 config->blk_size, config->capacity,
226 0, NULL, BIO_FLAGS_NONE);
227
228 /* override our block device hooks */
229 bdev->bdev.read_block = &virtio_bdev_read_block;
230 bdev->bdev.write_block = &virtio_bdev_write_block;
231 bdev->bdev.read_async = &virtio_bdev_read_async;
232
233 bio_register_device(&bdev->bdev);
234
235 printf("virtio-block found device of size %" PRIu64 "\n", config->capacity * config->blk_size);
236
237 /* dump feature bits */
238 dump_feature_bits("host", host_features);
239 dump_feature_bits("guest", bdev->guest_features);
240 printf("\tsize_max %u seg_max %u\n", config->size_max, config->seg_max);
241 if (host_features & VIRTIO_BLK_F_GEOMETRY) {
242 printf("\tgeometry: cyl %u head %u sector %u\n", config->geometry.cylinders, config->geometry.heads, config->geometry.sectors);
243 }
244 if (host_features & VIRTIO_BLK_F_BLK_SIZE) {
245 printf("\tblock_size: %u\n", config->blk_size);
246 }
247 if (host_features & VIRTIO_BLK_F_TOPOLOGY) {
248 printf("\ttopology: block exp %u alignment_offset %u min_io_size %u opt_io_size %u\n",
249 config->topology.physical_block_exp, config->topology.alignment_offset,
250 config->topology.min_io_size, config->topology.opt_io_size);
251 }
252 if (host_features & VIRTIO_BLK_F_DISCARD) {
253 printf("\tdiscard: max sectors %u max sequence %u alignment %u\n",
254 config->max_discard_sectors, config->max_discard_sectors, config->discard_sector_alignment);
255 }
256 if (host_features & VIRTIO_BLK_F_WRITE_ZEROES) {
257 printf("\twrite zeroes: max sectors %u max sequence %u may unmap %u\n",
258 config->max_write_zeroes_sectors, config->max_write_zeroes_seq, config->write_zeros_may_unmap);
259 }
260
261 return NO_ERROR;
262 }
263
virtio_block_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)264 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e) {
265 struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
266
267
268 struct virtio_block_txn *txn = &bdev->txns[e->id];
269 LTRACEF("dev %p, ring %u, e %p, id %u, len %u, status %d\n", dev, ring, e, e->id, e->len, txn->status);
270
271 /* parse our descriptor chain, add back to the free queue */
272 uint16_t i = e->id;
273 for (;;) {
274 int next;
275 struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
276
277 //virtio_dump_desc(desc);
278
279 if (desc->flags & VRING_DESC_F_NEXT) {
280 next = desc->next;
281 } else {
282 /* end of chain */
283 next = -1;
284 }
285
286 virtio_free_desc(dev, ring, i);
287
288 if (next < 0)
289 break;
290 i = next;
291 }
292
293 if (txn->callback) {
294 // async
295 ssize_t result =
296 (txn->status == VIRTIO_BLK_S_OK) ? (ssize_t)txn->len : ERR_IO;
297 LTRACEF("calling callback %p with cookie %p, len %zu\n", txn->callback,
298 txn->cookie, result);
299 txn->callback(txn->cookie, &bdev->bdev, result);
300 }
301
302 return INT_RESCHEDULE;
303 }
304
virtio_block_do_txn(struct virtio_device * dev,void * buf,off_t offset,size_t len,bool write,bio_async_callback_t callback,void * cookie,struct virtio_block_txn ** txn_out)305 static status_t virtio_block_do_txn(struct virtio_device *dev, void *buf,
306 off_t offset, size_t len, bool write,
307 bio_async_callback_t callback, void *cookie,
308 struct virtio_block_txn **txn_out) {
309 struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
310
311 uint16_t i;
312 struct vring_desc *desc;
313
314 LTRACEF("dev %p, buf %p, offset 0x%llx, len %zu\n", dev, buf, offset, len);
315 /* put together a transfer */
316 desc = virtio_alloc_desc_chain(dev, 0, 3, &i);
317 LTRACEF("after alloc chain desc %p, i %u\n", desc, i);
318 if (desc == NULL) {
319 return ERR_NO_RESOURCES;
320 }
321 struct virtio_block_txn *txn = &bdev->txns[i];
322 /* set up the request */
323 txn->req.type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
324 txn->req.ioprio = 0;
325 txn->req.sector = offset / 512;
326
327 txn->callback = callback;
328 txn->cookie = cookie;
329 txn->len = len;
330 LTRACEF("blk_req type %u ioprio %u sector %llu\n", txn->req.type,
331 txn->req.ioprio, txn->req.sector);
332
333 if (txn_out) {
334 *txn_out = txn;
335 }
336
337 // XXX not cache safe.
338 // At the moment only tested on arm qemu, which doesn't emulate cache.
339
340 /* set up the descriptor pointing to the head */
341 #if WITH_KERNEL_VM
342 paddr_t req_phys = vaddr_to_paddr(&txn->req);
343 #else
344 paddr_t req_phys = (uint64_t)(uintptr_t)&txn->req;
345 #endif
346 desc->addr = req_phys;
347 desc->len = sizeof(struct virtio_blk_req);
348 desc->flags |= VRING_DESC_F_NEXT;
349
350 /* set up the descriptor pointing to the buffer */
351 desc = virtio_desc_index_to_desc(dev, 0, desc->next);
352 #if WITH_KERNEL_VM
353 /* translate the first buffer */
354 vaddr_t va = (vaddr_t)buf;
355 paddr_t pa = vaddr_to_paddr((void *)va);
356 desc->addr = (uint64_t)pa;
357 /* desc->len is filled in below */
358 #else
359 /* non VM world simply queues a single buffer that transfers the whole thing */
360 desc->addr = (uint64_t)(uintptr_t)buf;
361 desc->len = len;
362 #endif
363 desc->flags |= write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
364 desc->flags |= VRING_DESC_F_NEXT;
365
366 #if WITH_KERNEL_VM
367 /* see if we need to add more descriptors due to scatter gather */
368 paddr_t next_pa = PAGE_ALIGN(pa + 1);
369 desc->len = MIN(next_pa - pa, len);
370 LTRACEF("first descriptor va 0x%lx desc->addr 0x%llx desc->len %u\n", va, desc->addr, desc->len);
371
372 size_t remaining_len = len;
373 remaining_len -= desc->len;
374 while (remaining_len > 0) {
375 /* amount of source buffer handled by this iteration of the loop */
376 size_t len_tohandle = MIN(remaining_len, PAGE_SIZE);
377
378 /* translate the next page in the buffer */
379 va = PAGE_ALIGN(va + 1);
380 pa = vaddr_to_paddr((void *)va);
381 LTRACEF("va now 0x%lx, pa 0x%lx, next_pa 0x%lx, remaining len %zu\n", va, pa, next_pa, remaining_len);
382
383 /* is the new translated physical address contiguous to the last one? */
384 if (next_pa == pa) {
385 /* we can simply extend the previous descriptor by another page */
386 LTRACEF("extending last one by %zu bytes\n", len_tohandle);
387 desc->len += len_tohandle;
388 } else {
389 /* new physical page needed, allocate a new descriptor and start again */
390 uint16_t next_i = virtio_alloc_desc(dev, 0);
391 struct vring_desc *next_desc = virtio_desc_index_to_desc(dev, 0, next_i);
392 DEBUG_ASSERT(next_desc);
393
394 LTRACEF("doesn't extend, need new desc, allocated desc %i (%p)\n", next_i, next_desc);
395
396 /* fill this descriptor in and put it after the last one but before the response descriptor */
397 next_desc->addr = (uint64_t)pa;
398 next_desc->len = len_tohandle;
399 next_desc->flags = write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
400 next_desc->flags |= VRING_DESC_F_NEXT;
401 next_desc->next = desc->next;
402 desc->next = next_i;
403
404 desc = next_desc;
405 }
406 remaining_len -= len_tohandle;
407 next_pa += PAGE_SIZE;
408 }
409 #endif
410
411 /* set up the descriptor pointing to the response */
412 #if WITH_KERNEL_VM
413 paddr_t status_phys = vaddr_to_paddr(&txn->status);
414 #else
415 paddr_t status_phys = (uint64_t)(uintptr_t)&txn->status;
416 #endif
417 desc = virtio_desc_index_to_desc(dev, 0, desc->next);
418 desc->addr = status_phys;
419 desc->len = 1;
420 desc->flags = VRING_DESC_F_WRITE;
421
422 /* submit the transfer */
423 virtio_submit_chain(dev, 0, i);
424
425 /* kick it off */
426 virtio_kick(dev, 0);
427
428 return NO_ERROR;
429 }
430
sync_completion_cb(void * cookie,struct bdev * dev,ssize_t bytes)431 static void sync_completion_cb(void *cookie, struct bdev *dev, ssize_t bytes) {
432 DEBUG_ASSERT(cookie);
433 event_t *event = (event_t *)cookie;
434 event_signal(event, false);
435 }
436
virtio_block_read_write(struct virtio_device * dev,void * buf,const off_t offset,const size_t len,const bool write)437 ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf,
438 const off_t offset, const size_t len,
439 const bool write) {
440 struct virtio_block_txn *txn;
441 event_t event;
442 event_init(&event, false, EVENT_FLAG_AUTOUNSIGNAL);
443
444 status_t err = virtio_block_do_txn(dev, buf, offset, len, write,
445 &sync_completion_cb, &event, &txn);
446 if (err < 0) {
447 return err;
448 }
449
450 /* wait for the transfer to complete */
451 event_wait(&event);
452
453 LTRACEF("status 0x%hhx\n", txn->status);
454
455 ssize_t result = (txn->status == VIRTIO_BLK_S_OK) ? (ssize_t)len : ERR_IO;
456
457 return result;
458 }
459
virtio_bdev_read_block(struct bdev * bdev,void * buf,bnum_t block,uint count)460 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count) {
461 struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
462
463 LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
464
465 ssize_t result = virtio_block_read_write(dev->dev, buf, (off_t)block * dev->bdev.block_size,
466 count * dev->bdev.block_size, false);
467 return result;
468 }
469
virtio_bdev_read_async(struct bdev * bdev,void * buf,off_t offset,size_t len,bio_async_callback_t callback,void * cookie)470 static status_t virtio_bdev_read_async(struct bdev *bdev, void *buf,
471 off_t offset, size_t len,
472 bio_async_callback_t callback,
473 void *cookie) {
474 struct virtio_block_dev *dev =
475 containerof(bdev, struct virtio_block_dev, bdev);
476
477 return virtio_block_do_txn(dev->dev, buf, offset, len, false, callback,
478 cookie, NULL);
479 }
480
virtio_bdev_write_block(struct bdev * bdev,const void * buf,bnum_t block,uint count)481 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count) {
482 struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
483
484 LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
485
486 ssize_t result = virtio_block_read_write(dev->dev, (void *)buf, (off_t)block * dev->bdev.block_size,
487 count * dev->bdev.block_size, true);
488 return result;
489 }
490
491