1 /*
2  * Copyright (c) 2014-2015 Travis Geiselbrecht
3  *
4  * Use of this source code is governed by a MIT-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/MIT
7  */
8 #include <assert.h>
9 #include <dev/virtio/block.h>
10 #include <inttypes.h>
11 #include <kernel/event.h>
12 #include <kernel/spinlock.h>
13 #include <kernel/thread.h>
14 #include <lib/bio.h>
15 #include <lk/compiler.h>
16 #include <lk/debug.h>
17 #include <lk/err.h>
18 #include <lk/list.h>
19 #include <lk/trace.h>
20 #include <stdlib.h>
21 
22 #if WITH_KERNEL_VM
23 #include <kernel/vm.h>
24 #endif
25 
26 #define LOCAL_TRACE 0
27 
28 struct virtio_blk_config {
29     uint64_t capacity;
30     uint32_t size_max;
31     uint32_t seg_max;
32     struct virtio_blk_geometry {
33         uint16_t cylinders;
34         uint8_t heads;
35         uint8_t sectors;
36     } geometry;
37     uint32_t blk_size;
38     struct virtio_blk_topology {
39         uint8_t physical_block_exp;
40         uint8_t alignment_offset;
41         uint16_t min_io_size;
42         uint32_t opt_io_size;
43     } topology;
44     uint8_t writeback;
45     uint8_t unused[3];
46     uint32_t max_discard_sectors;
47     uint32_t max_discard_seq;
48     uint32_t discard_sector_alignment;
49     uint32_t max_write_zeroes_sectors;
50     uint32_t max_write_zeroes_seq;
51     uint8_t write_zeros_may_unmap;
52     uint8_t unused1[3];
53     uint32_t max_secure_erase_sectors;
54     uint32_t max_secure_erase_seg;
55     uint32_t secure_erase_sector_alignment;
56     struct virtio_blk_zoned_characteristics {
57         uint32_t zone_sectors;
58         uint32_t max_open_zones;
59         uint32_t max_active_zones;
60         uint32_t max_append_sectors;
61         uint32_t write_granularity;
62         uint8_t model;
63         uint8_t unused2[3];
64     } zoned;
65 };
66 STATIC_ASSERT(sizeof(struct virtio_blk_config) == 96);
67 
68 struct virtio_blk_req {
69     uint32_t type;
70     uint32_t ioprio; // v1.3 says this is 'reserved'
71     uint64_t sector;
72 };
73 STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
74 
75 struct virtio_blk_discard_write_zeroes {
76     uint64_t sector;
77     uint32_t num_sectors;
78     struct {
79         uint32_t unmap:1;
80         uint32_t reserved:31;
81     } flags;
82 };
83 STATIC_ASSERT(sizeof(struct virtio_blk_req) == 16);
84 
85 struct virtio_block_txn {
86   /* bio callback, for async */
87   void *cookie;
88   size_t len;
89 
90   /* for async calls */
91   void (*callback)(void *, struct bdev *, ssize_t);
92   /* virtio request structure, must be DMA-able */
93   struct virtio_blk_req req;
94 
95   /* response status, must be DMA-able */
96   uint8_t status;
97 };
98 
99 #define VIRTIO_BLK_F_BARRIER  (1<<0) // legacy
100 #define VIRTIO_BLK_F_SIZE_MAX (1<<1)
101 #define VIRTIO_BLK_F_SEG_MAX  (1<<2)
102 #define VIRTIO_BLK_F_GEOMETRY (1<<4)
103 #define VIRTIO_BLK_F_RO       (1<<5)
104 #define VIRTIO_BLK_F_BLK_SIZE (1<<6)
105 #define VIRTIO_BLK_F_SCSI     (1<<7) // legacy
106 #define VIRTIO_BLK_F_FLUSH    (1<<9)
107 #define VIRTIO_BLK_F_TOPOLOGY (1<<10)
108 #define VIRTIO_BLK_F_CONFIG_WCE (1<<11)
109 #define VIRTIO_BLK_F_DISCARD  (1<<13)
110 #define VIRTIO_BLK_F_WRITE_ZEROES (1<<14)
111 #define VIRTIO_BLK_F_LIFETIME (1<<15)
112 #define VIRTIO_BLK_F_SECURE_ERASE (1<<16)
113 #define VIRTIO_BLK_F_ZONED    (1<<17)
114 
115 #define VIRTIO_BLK_T_IN           0
116 #define VIRTIO_BLK_T_OUT          1
117 #define VIRTIO_BLK_T_FLUSH        4
118 #define VIRTIO_BLK_T_GET_ID       8
119 #define VIRTIO_BLK_T_GET_LIFETIME 10
120 #define VIRTIO_BLK_T_DISCARD      11
121 #define VIRTIO_BLK_T_WRITE_ZEROES 13
122 #define VIRTIO_BLK_T_SECURE_ERASE 13
123 
124 #define VIRTIO_BLK_S_OK         0
125 #define VIRTIO_BLK_S_IOERR      1
126 #define VIRTIO_BLK_S_UNSUPP     2
127 
128 #define VIRTIO_BLK_RING_LEN 256
129 
130 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e);
131 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count);
132 static status_t virtio_bdev_read_async(
133     struct bdev *bdev, void *buf, off_t offset, size_t len,
134     void (*callback)(void *, struct bdev *, ssize_t), void *cookie);
135 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count);
136 
137 struct virtio_block_dev {
138     struct virtio_device *dev;
139 
140     /* bio block device */
141     bdev_t bdev;
142 
143     /* our negotiated guest features */
144     uint32_t guest_features;
145     struct virtio_block_txn *txns;
146 };
147 
dump_feature_bits(const char * name,uint32_t feature)148 static void dump_feature_bits(const char *name, uint32_t feature) {
149     printf("virtio-block %s features (%#x):", name, feature);
150     if (feature & VIRTIO_BLK_F_BARRIER) printf(" BARRIER");
151     if (feature & VIRTIO_BLK_F_SIZE_MAX) printf(" SIZE_MAX");
152     if (feature & VIRTIO_BLK_F_SEG_MAX) printf(" SEG_MAX");
153     if (feature & VIRTIO_BLK_F_GEOMETRY) printf(" GEOMETRY");
154     if (feature & VIRTIO_BLK_F_RO) printf(" RO");
155     if (feature & VIRTIO_BLK_F_BLK_SIZE) printf(" BLK_SIZE");
156     if (feature & VIRTIO_BLK_F_SCSI) printf(" SCSI");
157     if (feature & VIRTIO_BLK_F_FLUSH) printf(" FLUSH");
158     if (feature & VIRTIO_BLK_F_TOPOLOGY) printf(" TOPOLOGY");
159     if (feature & VIRTIO_BLK_F_CONFIG_WCE) printf(" CONFIG_WCE");
160     if (feature & VIRTIO_BLK_F_DISCARD) printf(" DISCARD");
161     if (feature & VIRTIO_BLK_F_WRITE_ZEROES) printf(" WRITE_ZEROES");
162     if (feature & VIRTIO_BLK_F_LIFETIME) printf(" LIFETIME");
163     if (feature & VIRTIO_BLK_F_SECURE_ERASE) printf(" SECURE_ERASE");
164     if (feature & VIRTIO_BLK_F_ZONED) printf(" ZONED");
165     printf("\n");
166 }
167 
virtio_block_init(struct virtio_device * dev,uint32_t host_features)168 status_t virtio_block_init(struct virtio_device *dev, uint32_t host_features) {
169     LTRACEF("dev %p, host_features %#x\n", dev, host_features);
170 
171     /* allocate a new block device */
172     struct virtio_block_dev *bdev = malloc(sizeof(struct virtio_block_dev));
173     if (!bdev)
174         return ERR_NO_MEMORY;
175 
176     bdev->dev = dev;
177     dev->priv = bdev;
178 
179     /* make sure the device is reset */
180     virtio_reset_device(dev);
181 
182     volatile struct virtio_blk_config *config = (struct virtio_blk_config *)dev->config_ptr;
183 
184     LTRACEF("capacity %" PRIx64 "\n", config->capacity);
185     LTRACEF("size_max %#x\n", config->size_max);
186     LTRACEF("seg_max  %#x\n", config->seg_max);
187     LTRACEF("blk_size %#x\n", config->blk_size);
188 
189     /* ack and set the driver status bit */
190     virtio_status_acknowledge_driver(dev);
191 
192     /* check features bits and ack/nak them */
193     bdev->guest_features = host_features;
194 
195     /* keep the features we understand or can tolerate */
196     bdev->guest_features &= (VIRTIO_BLK_F_SIZE_MAX |
197                              VIRTIO_BLK_F_BLK_SIZE |
198                              VIRTIO_BLK_F_GEOMETRY |
199                              VIRTIO_BLK_F_TOPOLOGY |
200                              VIRTIO_BLK_F_DISCARD |
201                              VIRTIO_BLK_F_WRITE_ZEROES);
202     virtio_set_guest_features(dev, 0, bdev->guest_features);
203 
204     /* TODO: handle a RO feature */
205 
206     /* allocate a virtio ring */
207     virtio_alloc_ring(dev, 0, VIRTIO_BLK_RING_LEN);
208     // descriptor index would be used to index into the txns array
209     // This is a simple way to keep track of which transaction entry is
210     // free, and which transaction entry corresponds to which descriptor.
211     // Hence, we allocate txns array with the same size as the ring.
212     bdev->txns = memalign(sizeof(struct virtio_block_txn), VIRTIO_BLK_RING_LEN * sizeof(struct virtio_block_txn));
213 
214     /* set our irq handler */
215     dev->irq_driver_callback = &virtio_block_irq_driver_callback;
216 
217     /* set DRIVER_OK */
218     virtio_status_driver_ok(dev);
219 
220     /* construct the block device */
221     static uint8_t found_index = 0;
222     char buf[16];
223     snprintf(buf, sizeof(buf), "virtio%u", found_index++);
224     bio_initialize_bdev(&bdev->bdev, buf,
225                         config->blk_size, config->capacity,
226                         0, NULL, BIO_FLAGS_NONE);
227 
228     /* override our block device hooks */
229     bdev->bdev.read_block = &virtio_bdev_read_block;
230     bdev->bdev.write_block = &virtio_bdev_write_block;
231     bdev->bdev.read_async = &virtio_bdev_read_async;
232 
233     bio_register_device(&bdev->bdev);
234 
235     printf("virtio-block found device of size %" PRIu64 "\n", config->capacity * config->blk_size);
236 
237     /* dump feature bits */
238     dump_feature_bits("host", host_features);
239     dump_feature_bits("guest", bdev->guest_features);
240     printf("\tsize_max %u seg_max %u\n", config->size_max, config->seg_max);
241     if (host_features & VIRTIO_BLK_F_GEOMETRY) {
242         printf("\tgeometry: cyl %u head %u sector %u\n", config->geometry.cylinders, config->geometry.heads, config->geometry.sectors);
243     }
244     if (host_features & VIRTIO_BLK_F_BLK_SIZE) {
245         printf("\tblock_size: %u\n", config->blk_size);
246     }
247     if (host_features & VIRTIO_BLK_F_TOPOLOGY) {
248         printf("\ttopology: block exp %u alignment_offset %u min_io_size %u opt_io_size %u\n",
249                config->topology.physical_block_exp, config->topology.alignment_offset,
250                config->topology.min_io_size, config->topology.opt_io_size);
251     }
252     if (host_features & VIRTIO_BLK_F_DISCARD) {
253         printf("\tdiscard: max sectors %u max sequence %u alignment %u\n",
254                config->max_discard_sectors, config->max_discard_sectors, config->discard_sector_alignment);
255     }
256     if (host_features & VIRTIO_BLK_F_WRITE_ZEROES) {
257         printf("\twrite zeroes: max sectors %u max sequence %u may unmap %u\n",
258                config->max_write_zeroes_sectors, config->max_write_zeroes_seq, config->write_zeros_may_unmap);
259     }
260 
261     return NO_ERROR;
262 }
263 
virtio_block_irq_driver_callback(struct virtio_device * dev,uint ring,const struct vring_used_elem * e)264 static enum handler_return virtio_block_irq_driver_callback(struct virtio_device *dev, uint ring, const struct vring_used_elem *e) {
265     struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
266 
267 
268     struct virtio_block_txn *txn = &bdev->txns[e->id];
269     LTRACEF("dev %p, ring %u, e %p, id %u, len %u, status %d\n", dev, ring, e, e->id, e->len, txn->status);
270 
271     /* parse our descriptor chain, add back to the free queue */
272     uint16_t i = e->id;
273     for (;;) {
274         int next;
275         struct vring_desc *desc = virtio_desc_index_to_desc(dev, ring, i);
276 
277         //virtio_dump_desc(desc);
278 
279         if (desc->flags & VRING_DESC_F_NEXT) {
280             next = desc->next;
281         } else {
282             /* end of chain */
283             next = -1;
284         }
285 
286         virtio_free_desc(dev, ring, i);
287 
288         if (next < 0)
289             break;
290         i = next;
291     }
292 
293     if (txn->callback) {
294         // async
295         ssize_t result =
296         (txn->status == VIRTIO_BLK_S_OK) ? (ssize_t)txn->len : ERR_IO;
297         LTRACEF("calling callback %p with cookie %p, len %zu\n", txn->callback,
298                 txn->cookie, result);
299       txn->callback(txn->cookie, &bdev->bdev, result);
300     }
301 
302     return INT_RESCHEDULE;
303 }
304 
virtio_block_do_txn(struct virtio_device * dev,void * buf,off_t offset,size_t len,bool write,bio_async_callback_t callback,void * cookie,struct virtio_block_txn ** txn_out)305 static status_t virtio_block_do_txn(struct virtio_device *dev, void *buf,
306                                     off_t offset, size_t len, bool write,
307                                     bio_async_callback_t callback, void *cookie,
308                                     struct virtio_block_txn **txn_out) {
309     struct virtio_block_dev *bdev = (struct virtio_block_dev *)dev->priv;
310 
311     uint16_t i;
312     struct vring_desc *desc;
313 
314     LTRACEF("dev %p, buf %p, offset 0x%llx, len %zu\n", dev, buf, offset, len);
315     /* put together a transfer */
316     desc = virtio_alloc_desc_chain(dev, 0, 3, &i);
317     LTRACEF("after alloc chain desc %p, i %u\n", desc, i);
318     if (desc == NULL) {
319         return ERR_NO_RESOURCES;
320     }
321     struct virtio_block_txn *txn = &bdev->txns[i];
322     /* set up the request */
323     txn->req.type = write ? VIRTIO_BLK_T_OUT : VIRTIO_BLK_T_IN;
324     txn->req.ioprio = 0;
325     txn->req.sector = offset / 512;
326 
327     txn->callback = callback;
328     txn->cookie = cookie;
329     txn->len = len;
330     LTRACEF("blk_req type %u ioprio %u sector %llu\n", txn->req.type,
331             txn->req.ioprio, txn->req.sector);
332 
333     if (txn_out) {
334         *txn_out = txn;
335     }
336 
337     // XXX not cache safe.
338     // At the moment only tested on arm qemu, which doesn't emulate cache.
339 
340   /* set up the descriptor pointing to the head */
341 #if WITH_KERNEL_VM
342     paddr_t req_phys = vaddr_to_paddr(&txn->req);
343 #else
344     paddr_t req_phys = (uint64_t)(uintptr_t)&txn->req;
345 #endif
346     desc->addr = req_phys;
347     desc->len = sizeof(struct virtio_blk_req);
348     desc->flags |= VRING_DESC_F_NEXT;
349 
350     /* set up the descriptor pointing to the buffer */
351     desc = virtio_desc_index_to_desc(dev, 0, desc->next);
352 #if WITH_KERNEL_VM
353     /* translate the first buffer */
354     vaddr_t va = (vaddr_t)buf;
355     paddr_t pa = vaddr_to_paddr((void *)va);
356     desc->addr = (uint64_t)pa;
357     /* desc->len is filled in below */
358 #else
359     /* non VM world simply queues a single buffer that transfers the whole thing */
360     desc->addr = (uint64_t)(uintptr_t)buf;
361     desc->len = len;
362 #endif
363     desc->flags |= write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
364     desc->flags |= VRING_DESC_F_NEXT;
365 
366 #if WITH_KERNEL_VM
367     /* see if we need to add more descriptors due to scatter gather */
368     paddr_t next_pa = PAGE_ALIGN(pa + 1);
369     desc->len = MIN(next_pa - pa, len);
370     LTRACEF("first descriptor va 0x%lx desc->addr 0x%llx desc->len %u\n", va, desc->addr, desc->len);
371 
372     size_t remaining_len = len;
373     remaining_len -= desc->len;
374     while (remaining_len > 0) {
375         /* amount of source buffer handled by this iteration of the loop */
376         size_t len_tohandle = MIN(remaining_len, PAGE_SIZE);
377 
378         /* translate the next page in the buffer */
379         va = PAGE_ALIGN(va + 1);
380         pa = vaddr_to_paddr((void *)va);
381         LTRACEF("va now 0x%lx, pa 0x%lx, next_pa 0x%lx, remaining len %zu\n", va, pa, next_pa, remaining_len);
382 
383         /* is the new translated physical address contiguous to the last one? */
384         if (next_pa == pa) {
385             /* we can simply extend the previous descriptor by another page */
386             LTRACEF("extending last one by %zu bytes\n", len_tohandle);
387             desc->len += len_tohandle;
388         } else {
389             /* new physical page needed, allocate a new descriptor and start again */
390             uint16_t next_i = virtio_alloc_desc(dev, 0);
391             struct vring_desc *next_desc = virtio_desc_index_to_desc(dev, 0, next_i);
392             DEBUG_ASSERT(next_desc);
393 
394             LTRACEF("doesn't extend, need new desc, allocated desc %i (%p)\n", next_i, next_desc);
395 
396             /* fill this descriptor in and put it after the last one but before the response descriptor */
397             next_desc->addr = (uint64_t)pa;
398             next_desc->len = len_tohandle;
399             next_desc->flags = write ? 0 : VRING_DESC_F_WRITE; /* mark buffer as write-only if its a block read */
400             next_desc->flags |= VRING_DESC_F_NEXT;
401             next_desc->next = desc->next;
402             desc->next = next_i;
403 
404             desc = next_desc;
405         }
406         remaining_len -= len_tohandle;
407         next_pa += PAGE_SIZE;
408     }
409 #endif
410 
411     /* set up the descriptor pointing to the response */
412 #if WITH_KERNEL_VM
413     paddr_t status_phys = vaddr_to_paddr(&txn->status);
414 #else
415     paddr_t status_phys = (uint64_t)(uintptr_t)&txn->status;
416 #endif
417     desc = virtio_desc_index_to_desc(dev, 0, desc->next);
418     desc->addr = status_phys;
419     desc->len = 1;
420     desc->flags = VRING_DESC_F_WRITE;
421 
422     /* submit the transfer */
423     virtio_submit_chain(dev, 0, i);
424 
425     /* kick it off */
426     virtio_kick(dev, 0);
427 
428     return NO_ERROR;
429 }
430 
sync_completion_cb(void * cookie,struct bdev * dev,ssize_t bytes)431 static void sync_completion_cb(void *cookie, struct bdev *dev, ssize_t bytes) {
432     DEBUG_ASSERT(cookie);
433     event_t *event = (event_t *)cookie;
434     event_signal(event, false);
435 }
436 
virtio_block_read_write(struct virtio_device * dev,void * buf,const off_t offset,const size_t len,const bool write)437 ssize_t virtio_block_read_write(struct virtio_device *dev, void *buf,
438                                 const off_t offset, const size_t len,
439                                 const bool write) {
440     struct virtio_block_txn *txn;
441     event_t event;
442     event_init(&event, false, EVENT_FLAG_AUTOUNSIGNAL);
443 
444     status_t err = virtio_block_do_txn(dev, buf, offset, len, write,
445                                         &sync_completion_cb, &event, &txn);
446     if (err < 0) {
447         return err;
448     }
449 
450     /* wait for the transfer to complete */
451     event_wait(&event);
452 
453     LTRACEF("status 0x%hhx\n", txn->status);
454 
455     ssize_t result = (txn->status == VIRTIO_BLK_S_OK) ? (ssize_t)len : ERR_IO;
456 
457     return result;
458 }
459 
virtio_bdev_read_block(struct bdev * bdev,void * buf,bnum_t block,uint count)460 static ssize_t virtio_bdev_read_block(struct bdev *bdev, void *buf, bnum_t block, uint count) {
461     struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
462 
463     LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
464 
465     ssize_t result = virtio_block_read_write(dev->dev, buf, (off_t)block * dev->bdev.block_size,
466                      count * dev->bdev.block_size, false);
467     return result;
468 }
469 
virtio_bdev_read_async(struct bdev * bdev,void * buf,off_t offset,size_t len,bio_async_callback_t callback,void * cookie)470 static status_t virtio_bdev_read_async(struct bdev *bdev, void *buf,
471                                        off_t offset, size_t len,
472                                        bio_async_callback_t callback,
473                                        void *cookie) {
474     struct virtio_block_dev *dev =
475         containerof(bdev, struct virtio_block_dev, bdev);
476 
477     return virtio_block_do_txn(dev->dev, buf, offset, len, false, callback,
478                                cookie, NULL);
479 }
480 
virtio_bdev_write_block(struct bdev * bdev,const void * buf,bnum_t block,uint count)481 static ssize_t virtio_bdev_write_block(struct bdev *bdev, const void *buf, bnum_t block, uint count) {
482     struct virtio_block_dev *dev = containerof(bdev, struct virtio_block_dev, bdev);
483 
484     LTRACEF("dev %p, buf %p, block 0x%x, count %u\n", bdev, buf, block, count);
485 
486     ssize_t result = virtio_block_read_write(dev->dev, (void *)buf, (off_t)block * dev->bdev.block_size,
487                      count * dev->bdev.block_size, true);
488     return result;
489 }
490 
491