1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 #include <stdio.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <regex.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <libgen.h>
35 #include <sys/mman.h>
36 #include <sys/ioctl.h>
37 #ifdef MEMSHR
38 #include <memshr.h>
39 #endif
40
41 #include "tapdisk-image.h"
42 #include "tapdisk-driver.h"
43 #include "tapdisk-server.h"
44 #include "tapdisk-interface.h"
45 #include "tapdisk-disktype.h"
46 #include "tapdisk-vbd.h"
47 #include "blktap2.h"
48
49 #define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a)
50 #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a)
51
52 #if 1
53 #define ASSERT(p) \
54 do { \
55 if (!(p)) { \
56 DPRINTF("Assertion '%s' failed, line %d, " \
57 "file %s", #p, __LINE__, __FILE__); \
58 abort(); \
59 } \
60 } while (0)
61 #else
62 #define ASSERT(p) ((void)0)
63 #endif
64
65
66 #define TD_VBD_EIO_RETRIES 10
67 #define TD_VBD_EIO_SLEEP 1
68 #define TD_VBD_WATCHDOG_TIMEOUT 10
69
70 static void tapdisk_vbd_ring_event(event_id_t, char, void *);
71 static void tapdisk_vbd_callback(void *, blkif_response_t *);
72
73 /*
74 * initialization
75 */
76
77 static inline void
tapdisk_vbd_initialize_vreq(td_vbd_request_t * vreq)78 tapdisk_vbd_initialize_vreq(td_vbd_request_t *vreq)
79 {
80 memset(vreq, 0, sizeof(td_vbd_request_t));
81 INIT_LIST_HEAD(&vreq->next);
82 }
83
84 void
tapdisk_vbd_free(td_vbd_t * vbd)85 tapdisk_vbd_free(td_vbd_t *vbd)
86 {
87 if (vbd) {
88 tapdisk_vbd_free_stack(vbd);
89 list_del_init(&vbd->next);
90 free(vbd->name);
91 free(vbd);
92 }
93 }
94
95 td_vbd_t*
tapdisk_vbd_create(uint16_t uuid)96 tapdisk_vbd_create(uint16_t uuid)
97 {
98 td_vbd_t *vbd;
99 int i;
100
101 vbd = calloc(1, sizeof(td_vbd_t));
102 if (!vbd) {
103 EPRINTF("failed to allocate tapdisk state\n");
104 return NULL;
105 }
106
107 vbd->uuid = uuid;
108 vbd->minor = -1;
109 vbd->ring.fd = -1;
110
111 /* default blktap ring completion */
112 vbd->callback = tapdisk_vbd_callback;
113 vbd->argument = vbd;
114
115 #ifdef MEMSHR
116 memshr_vbd_initialize();
117 #endif
118
119 INIT_LIST_HEAD(&vbd->driver_stack);
120 INIT_LIST_HEAD(&vbd->images);
121 INIT_LIST_HEAD(&vbd->new_requests);
122 INIT_LIST_HEAD(&vbd->pending_requests);
123 INIT_LIST_HEAD(&vbd->failed_requests);
124 INIT_LIST_HEAD(&vbd->completed_requests);
125 INIT_LIST_HEAD(&vbd->next);
126 gettimeofday(&vbd->ts, NULL);
127
128 for (i = 0; i < MAX_REQUESTS; i++)
129 tapdisk_vbd_initialize_vreq(vbd->request_list + i);
130
131 return vbd;
132 }
133
134 int
tapdisk_vbd_initialize(uint16_t uuid)135 tapdisk_vbd_initialize(uint16_t uuid)
136 {
137 td_vbd_t *vbd;
138
139 vbd = tapdisk_server_get_vbd(uuid);
140 if (vbd) {
141 EPRINTF("duplicate vbds! %u\n", uuid);
142 return -EEXIST;
143 }
144
145 vbd = tapdisk_vbd_create(uuid);
146
147 tapdisk_server_add_vbd(vbd);
148
149 return 0;
150 }
151
152 void
tapdisk_vbd_set_callback(td_vbd_t * vbd,td_vbd_cb_t callback,void * argument)153 tapdisk_vbd_set_callback(td_vbd_t *vbd, td_vbd_cb_t callback, void *argument)
154 {
155 vbd->callback = callback;
156 vbd->argument = argument;
157 }
158
159 static int
tapdisk_vbd_validate_chain(td_vbd_t * vbd)160 tapdisk_vbd_validate_chain(td_vbd_t *vbd)
161 {
162 int err;
163 td_image_t *image, *parent, *tmp;
164
165 DPRINTF("VBD CHAIN:\n");
166
167 tapdisk_vbd_for_each_image(vbd, image, tmp) {
168 DPRINTF("%s: %d\n", image->name, image->type);
169
170 if (tapdisk_vbd_is_last_image(vbd, image))
171 break;
172
173 parent = tapdisk_vbd_next_image(image);
174 err = td_validate_parent(image, parent);
175 if (err)
176 return err;
177 }
178
179 return 0;
180 }
181
182 void
tapdisk_vbd_close_vdi(td_vbd_t * vbd)183 tapdisk_vbd_close_vdi(td_vbd_t *vbd)
184 {
185 td_image_t *image, *tmp;
186
187 tapdisk_vbd_for_each_image(vbd, image, tmp) {
188 td_close(image);
189 tapdisk_image_free(image);
190 }
191
192 INIT_LIST_HEAD(&vbd->images);
193 td_flag_set(vbd->state, TD_VBD_CLOSED);
194
195 tapdisk_vbd_free_stack(vbd);
196 }
197
198 static int
tapdisk_vbd_add_block_cache(td_vbd_t * vbd)199 tapdisk_vbd_add_block_cache(td_vbd_t *vbd)
200 {
201 int err;
202 td_driver_t *driver;
203 td_image_t *cache, *image, *target, *tmp;
204
205 target = NULL;
206
207 tapdisk_vbd_for_each_image(vbd, image, tmp)
208 if (td_flag_test(image->flags, TD_OPEN_RDONLY) &&
209 td_flag_test(image->flags, TD_OPEN_SHAREABLE)) {
210 target = image;
211 break;
212 }
213
214 if (!target)
215 return 0;
216
217 cache = tapdisk_image_allocate(target->name,
218 DISK_TYPE_BLOCK_CACHE,
219 target->storage,
220 target->flags,
221 target->private);
222 if (!cache)
223 return -ENOMEM;
224
225 /* try to load existing cache */
226 err = td_load(cache);
227 if (!err)
228 goto done;
229
230 /* hack driver to send open() correct image size */
231 if (!target->driver) {
232 err = -ENODEV;
233 goto fail;
234 }
235
236 cache->driver = tapdisk_driver_allocate(cache->type,
237 cache->name,
238 cache->flags,
239 cache->storage);
240 if (!cache->driver) {
241 err = -ENOMEM;
242 goto fail;
243 }
244
245 cache->driver->info = target->driver->info;
246
247 /* try to open new cache */
248 err = td_open(cache);
249 if (!err)
250 goto done;
251
252 fail:
253 /* give up */
254 tapdisk_image_free(target);
255 return err;
256
257 done:
258 /* insert cache before image */
259 list_add(&cache->next, target->next.prev);
260 return 0;
261 }
262
263 static int
tapdisk_vbd_add_dirty_log(td_vbd_t * vbd)264 tapdisk_vbd_add_dirty_log(td_vbd_t *vbd)
265 {
266 int err;
267 td_driver_t *driver;
268 td_image_t *log, *parent;
269
270 driver = NULL;
271 log = NULL;
272
273 parent = tapdisk_vbd_first_image(vbd);
274
275 log = tapdisk_image_allocate(parent->name,
276 DISK_TYPE_LOG,
277 parent->storage,
278 parent->flags,
279 vbd);
280 if (!log)
281 return -ENOMEM;
282
283 driver = tapdisk_driver_allocate(log->type,
284 log->name,
285 log->flags,
286 log->storage);
287 if (!driver) {
288 err = -ENOMEM;
289 goto fail;
290 }
291
292 driver->info = parent->driver->info;
293 log->driver = driver;
294
295 err = td_open(log);
296 if (err)
297 goto fail;
298
299 list_add(&log->next, &vbd->images);
300 return 0;
301
302 fail:
303 tapdisk_image_free(log);
304 return err;
305 }
306
307 static int
tapdisk_vbd_open_level(td_vbd_t * vbd,struct list_head * head,const char * params,int driver_type,td_disk_info_t * driver_info,td_flag_t flags)308 tapdisk_vbd_open_level(td_vbd_t *vbd, struct list_head *head,
309 const char *params, int driver_type,
310 td_disk_info_t *driver_info, td_flag_t flags)
311 {
312 const char *name;
313 int type, err;
314 td_image_t *image;
315 td_disk_id_t id;
316 td_driver_t *driver;
317
318 name = params;
319 id.name = NULL;
320 type = driver_type;
321 INIT_LIST_HEAD(head);
322
323 for (;;) {
324 err = -ENOMEM;
325 image = tapdisk_image_allocate(name, type,
326 vbd->storage, flags, vbd);
327
328 free(id.name);
329
330 if (!image)
331 goto out;
332
333
334 /* this breaks if a driver modifies its info within a layer */
335 err = __td_open(image, driver_info);
336 if (err)
337 goto out;
338
339 /* TODO: non-sink drivers that don't care about their child
340 * currently return EINVAL. Could return TD_PARENT_OK or
341 * TD_ANY_PARENT */
342
343 err = td_get_parent_id(image, &id);
344 if (err && (err != TD_NO_PARENT && err != -EINVAL)) {
345 td_close(image);
346 goto out;
347 }
348
349 /* add this image to the end of the list */
350 list_add_tail(&image->next, head);
351 image = NULL;
352
353 /* if the image does not have a parent we return the
354 * list of images generated by this level of the stack */
355 if (err == TD_NO_PARENT || err == -EINVAL) {
356 err = 0;
357 goto out;
358 }
359
360 name = id.name;
361 type = id.drivertype;
362
363 flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE);
364 }
365
366 out:
367 if (err) {
368 if (image) {
369 td_close(image);
370 tapdisk_image_free(image);
371 }
372 while (!list_empty(head)) {
373 image = list_entry(&head->next, td_image_t, next);
374 td_close(image);
375 tapdisk_image_free(image);
376 }
377 }
378
379 return err;
380 }
381
382 static int
__tapdisk_vbd_open_vdi(td_vbd_t * vbd,td_flag_t extra_flags)383 __tapdisk_vbd_open_vdi(td_vbd_t *vbd, td_flag_t extra_flags)
384 {
385 int err;
386 td_flag_t flags;
387 td_image_t *tmp;
388 td_vbd_driver_info_t *driver_info;
389 struct list_head *images;
390 td_disk_info_t *parent_info = NULL;
391
392 if (list_empty(&vbd->driver_stack))
393 return -ENOENT;
394
395 flags = (vbd->flags & ~TD_OPEN_SHAREABLE) | extra_flags;
396
397 /* loop on each user specified driver.
398 * NOTE: driver_info is in reverse order. That is, the first
399 * item is the 'parent' or 'sink' driver */
400 list_for_each_entry(driver_info, &vbd->driver_stack, next) {
401 LIST_HEAD(images);
402
403 err = tapdisk_vbd_open_level(vbd, &images,
404 driver_info->params,
405 driver_info->type,
406 parent_info, flags);
407 if (err)
408 goto fail;
409
410 /* after each loop,
411 * append the created stack to the result stack */
412 list_splice(&images, &vbd->images);
413
414 /* set the parent_info to the first diskinfo on the stack */
415 tmp = tapdisk_vbd_first_image(vbd);
416 parent_info = &tmp->info;
417 }
418
419 if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) {
420 err = tapdisk_vbd_add_dirty_log(vbd);
421 if (err)
422 goto fail;
423 }
424
425 if (td_flag_test(vbd->flags, TD_OPEN_ADD_CACHE)) {
426 err = tapdisk_vbd_add_block_cache(vbd);
427 if (err)
428 goto fail;
429 }
430
431 err = tapdisk_vbd_validate_chain(vbd);
432 if (err)
433 goto fail;
434
435 td_flag_clear(vbd->state, TD_VBD_CLOSED);
436
437 return 0;
438
439 fail:
440 tapdisk_vbd_close_vdi(vbd);
441 return err;
442 }
443
444 /* this populates a vbd type based on path */
445 int
tapdisk_vbd_parse_stack(td_vbd_t * vbd,const char * path)446 tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path)
447 {
448 int err;
449 char *params, *driver_str;
450 td_vbd_driver_info_t *driver;
451
452 err = tapdisk_namedup(¶ms, path);
453 if (err)
454 return err;
455
456 /* tokenize params based on pipe '|' */
457 driver_str = strtok(params, "|");
458 while (driver_str != NULL) {
459 const char *path;
460 int type;
461
462 /* parse driver info and add to vbd */
463 driver = calloc(1, sizeof(td_vbd_driver_info_t));
464 if (!driver) {
465 PERROR("malloc");
466 err = -errno;
467 goto out;
468 }
469 INIT_LIST_HEAD(&driver->next);
470
471 err = tapdisk_parse_disk_type(driver_str, &path, &type);
472 if (err) {
473 free(driver);
474 goto out;
475 }
476
477 driver->type = type;
478 driver->params = strdup(path);
479 if (!driver->params) {
480 err = -ENOMEM;
481 free(driver);
482 goto out;
483 }
484
485 /* build the list backwards as the last driver will be the
486 * first driver to open in the stack */
487 list_add(&driver->next, &vbd->driver_stack);
488
489 /* get next driver string */
490 driver_str = strtok(NULL, "|");
491 }
492
493 out:
494 free(params);
495 if (err)
496 tapdisk_vbd_free_stack(vbd);
497
498 return err;
499 }
500
501 void
tapdisk_vbd_free_stack(td_vbd_t * vbd)502 tapdisk_vbd_free_stack(td_vbd_t *vbd)
503 {
504 td_vbd_driver_info_t *driver;
505
506 while (!list_empty(&vbd->driver_stack)) {
507 driver = list_entry(vbd->driver_stack.next,
508 td_vbd_driver_info_t, next);
509 list_del(&driver->next);
510 free(driver->params);
511 free(driver);
512 }
513 }
514
515 /* NOTE: driver type, etc. must be set */
516 int
tapdisk_vbd_open_stack(td_vbd_t * vbd,uint16_t storage,td_flag_t flags)517 tapdisk_vbd_open_stack(td_vbd_t *vbd, uint16_t storage, td_flag_t flags)
518 {
519 int i, err = 0;
520
521 vbd->flags = flags;
522 vbd->storage = storage;
523
524 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
525 err = __tapdisk_vbd_open_vdi(vbd, 0);
526 if (err != -EIO)
527 break;
528
529 sleep(TD_VBD_EIO_SLEEP);
530 }
531 if (err)
532 goto fail;
533
534 return 0;
535
536 fail:
537 return err;
538 }
539
540 int
tapdisk_vbd_open_vdi(td_vbd_t * vbd,const char * path,uint16_t drivertype,uint16_t storage,td_flag_t flags)541 tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *path,
542 uint16_t drivertype, uint16_t storage, td_flag_t flags)
543 {
544 int i, err;
545 const struct tap_disk *ops;
546
547 ops = tapdisk_disk_drivers[drivertype];
548 if (!ops)
549 return -EINVAL;
550 DPRINTF("Loaded %s driver for vbd %u %s 0x%08x\n",
551 ops->disk_type, vbd->uuid, path, flags);
552
553 err = tapdisk_namedup(&vbd->name, path);
554 if (err)
555 return err;
556
557 vbd->flags = flags;
558 vbd->storage = storage;
559
560 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
561 err = __tapdisk_vbd_open_vdi(vbd, 0);
562 if (err != -EIO)
563 break;
564
565 sleep(TD_VBD_EIO_SLEEP);
566 }
567 if (err)
568 goto fail;
569
570 return 0;
571
572 fail:
573 free(vbd->name);
574 vbd->name = NULL;
575 return err;
576 }
577
578 static int
tapdisk_vbd_register_event_watches(td_vbd_t * vbd)579 tapdisk_vbd_register_event_watches(td_vbd_t *vbd)
580 {
581 event_id_t id;
582
583 id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
584 vbd->ring.fd, 0,
585 tapdisk_vbd_ring_event, vbd);
586 if (id < 0)
587 return id;
588
589 vbd->ring_event_id = id;
590
591 return 0;
592 }
593
594 static void
tapdisk_vbd_unregister_events(td_vbd_t * vbd)595 tapdisk_vbd_unregister_events(td_vbd_t *vbd)
596 {
597 if (vbd->ring_event_id)
598 tapdisk_server_unregister_event(vbd->ring_event_id);
599 }
600
601 static int
tapdisk_vbd_map_device(td_vbd_t * vbd,const char * devname)602 tapdisk_vbd_map_device(td_vbd_t *vbd, const char *devname)
603 {
604
605 int err, psize;
606 td_ring_t *ring;
607
608 ring = &vbd->ring;
609 psize = getpagesize();
610
611 ring->fd = open(devname, O_RDWR);
612 if (ring->fd == -1) {
613 err = -errno;
614 EPRINTF("failed to open %s: %d\n", devname, err);
615 goto fail;
616 }
617
618 ring->mem = mmap(0, psize * BLKTAP_MMAP_REGION_SIZE,
619 PROT_READ | PROT_WRITE, MAP_SHARED, ring->fd, 0);
620 if (ring->mem == MAP_FAILED) {
621 err = -errno;
622 EPRINTF("failed to mmap %s: %d\n", devname, err);
623 goto fail;
624 }
625
626 ring->sring = (blkif_sring_t *)((unsigned long)ring->mem);
627 BACK_RING_INIT(&ring->fe_ring, ring->sring, psize);
628
629 ring->vstart =
630 (unsigned long)ring->mem + (BLKTAP_RING_PAGES * psize);
631
632 ioctl(ring->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE);
633
634 return 0;
635
636 fail:
637 if (ring->mem && ring->mem != MAP_FAILED)
638 munmap(ring->mem, psize * BLKTAP_MMAP_REGION_SIZE);
639 if (ring->fd != -1)
640 close(ring->fd);
641 ring->fd = -1;
642 ring->mem = NULL;
643 return err;
644 }
645
646 static int
tapdisk_vbd_unmap_device(td_vbd_t * vbd)647 tapdisk_vbd_unmap_device(td_vbd_t *vbd)
648 {
649 int psize;
650
651 psize = getpagesize();
652
653 if (vbd->ring.fd != -1)
654 close(vbd->ring.fd);
655 if (vbd->ring.mem > 0)
656 munmap(vbd->ring.mem, psize * BLKTAP_MMAP_REGION_SIZE);
657
658 return 0;
659 }
660
661 void
tapdisk_vbd_detach(td_vbd_t * vbd)662 tapdisk_vbd_detach(td_vbd_t *vbd)
663 {
664 tapdisk_vbd_unregister_events(vbd);
665
666 tapdisk_vbd_unmap_device(vbd);
667 vbd->minor = -1;
668 }
669
670
671 int
tapdisk_vbd_attach(td_vbd_t * vbd,const char * devname,int minor)672 tapdisk_vbd_attach(td_vbd_t *vbd, const char *devname, int minor)
673 {
674 int err;
675
676 err = tapdisk_vbd_map_device(vbd, devname);
677 if (err)
678 goto fail;
679
680 err = tapdisk_vbd_register_event_watches(vbd);
681 if (err)
682 goto fail;
683
684 vbd->minor = minor;
685
686 return 0;
687
688 fail:
689 tapdisk_vbd_detach(vbd);
690
691 return err;
692 }
693
694 int
tapdisk_vbd_open(td_vbd_t * vbd,const char * name,uint16_t type,uint16_t storage,int minor,const char * ring,td_flag_t flags)695 tapdisk_vbd_open(td_vbd_t *vbd, const char *name, uint16_t type,
696 uint16_t storage, int minor, const char *ring, td_flag_t flags)
697 {
698 int err;
699
700 err = tapdisk_vbd_open_stack(vbd, storage, flags);
701 if (err)
702 goto out;
703
704 err = tapdisk_vbd_attach(vbd, ring, minor);
705 if (err)
706 goto out;
707
708 return 0;
709
710 out:
711 tapdisk_vbd_detach(vbd);
712 tapdisk_vbd_close_vdi(vbd);
713 free(vbd->name);
714 vbd->name = NULL;
715 return err;
716 }
717
718 static void
tapdisk_vbd_queue_count(td_vbd_t * vbd,int * new,int * pending,int * failed,int * completed)719 tapdisk_vbd_queue_count(td_vbd_t *vbd, int *new,
720 int *pending, int *failed, int *completed)
721 {
722 int n, p, f, c;
723 td_vbd_request_t *vreq, *tvreq;
724
725 n = 0;
726 p = 0;
727 f = 0;
728 c = 0;
729
730 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->new_requests)
731 n++;
732
733 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->pending_requests)
734 p++;
735
736 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->failed_requests)
737 f++;
738
739 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->completed_requests)
740 c++;
741
742 *new = n;
743 *pending = p;
744 *failed = f;
745 *completed = c;
746 }
747
748 static int
tapdisk_vbd_shutdown(td_vbd_t * vbd)749 tapdisk_vbd_shutdown(td_vbd_t *vbd)
750 {
751 int new, pending, failed, completed;
752
753 if (!list_empty(&vbd->pending_requests))
754 return -EAGAIN;
755
756 tapdisk_vbd_kick(vbd);
757 tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
758
759 DPRINTF("%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
760 "failed: 0x%02x, completed: 0x%02x\n",
761 vbd->name, vbd->state, new, pending, failed, completed);
762 DPRINTF("last activity: %010ld.%06lld, errors: 0x%04"PRIx64", "
763 "retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
764 "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
765 vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec,
766 vbd->errors, vbd->retries, vbd->received, vbd->returned,
767 vbd->kicked);
768
769 tapdisk_vbd_close_vdi(vbd);
770 tapdisk_vbd_detach(vbd);
771 tapdisk_server_remove_vbd(vbd);
772 tapdisk_vbd_free(vbd);
773
774 tlog_print_errors();
775
776 return 0;
777 }
778
779 int
tapdisk_vbd_close(td_vbd_t * vbd)780 tapdisk_vbd_close(td_vbd_t *vbd)
781 {
782 /*
783 * don't close if any requests are pending in the aio layer
784 */
785 if (!list_empty(&vbd->pending_requests))
786 goto fail;
787
788 /*
789 * if the queue is still active and we have more
790 * requests, try to complete them before closing.
791 */
792 if (tapdisk_vbd_queue_ready(vbd) &&
793 (!list_empty(&vbd->new_requests) ||
794 !list_empty(&vbd->failed_requests) ||
795 !list_empty(&vbd->completed_requests)))
796 goto fail;
797
798 return tapdisk_vbd_shutdown(vbd);
799
800 fail:
801 td_flag_set(vbd->state, TD_VBD_SHUTDOWN_REQUESTED);
802 DBG(TLOG_WARN, "%s: requests pending\n", vbd->name);
803 return -EAGAIN;
804 }
805
806 /*
807 * control operations
808 */
809
810 void
tapdisk_vbd_debug(td_vbd_t * vbd)811 tapdisk_vbd_debug(td_vbd_t *vbd)
812 {
813 td_image_t *image, *tmp;
814 int new, pending, failed, completed;
815
816 tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
817
818 DBG(TLOG_WARN, "%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
819 "failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06lld, "
820 "errors: 0x%04"PRIx64", retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
821 "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
822 vbd->name, vbd->state, new, pending, failed, completed,
823 vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec,
824 vbd->errors, vbd->retries,
825 vbd->received, vbd->returned, vbd->kicked);
826
827 tapdisk_vbd_for_each_image(vbd, image, tmp)
828 td_debug(image);
829 }
830
831 static void
tapdisk_vbd_drop_log(td_vbd_t * vbd)832 tapdisk_vbd_drop_log(td_vbd_t *vbd)
833 {
834 if (td_flag_test(vbd->state, TD_VBD_LOG_DROPPED))
835 return;
836
837 tapdisk_vbd_debug(vbd);
838 tlog_flush();
839 td_flag_set(vbd->state, TD_VBD_LOG_DROPPED);
840 }
841
842 int
tapdisk_vbd_get_image_info(td_vbd_t * vbd,image_t * img)843 tapdisk_vbd_get_image_info(td_vbd_t *vbd, image_t *img)
844 {
845 td_image_t *image;
846
847 memset(img, 0, sizeof(image_t));
848
849 if (list_empty(&vbd->images))
850 return -EINVAL;
851
852 image = tapdisk_vbd_first_image(vbd);
853 img->size = image->info.size;
854 img->secsize = image->info.sector_size;
855 img->info = image->info.info;
856
857 return 0;
858 }
859
860 int
tapdisk_vbd_queue_ready(td_vbd_t * vbd)861 tapdisk_vbd_queue_ready(td_vbd_t *vbd)
862 {
863 return (!td_flag_test(vbd->state, TD_VBD_DEAD) &&
864 !td_flag_test(vbd->state, TD_VBD_CLOSED) &&
865 !td_flag_test(vbd->state, TD_VBD_QUIESCED) &&
866 !td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED));
867 }
868
869 int
tapdisk_vbd_retry_needed(td_vbd_t * vbd)870 tapdisk_vbd_retry_needed(td_vbd_t *vbd)
871 {
872 return td_flag_test(vbd->state, TD_VBD_RETRY_NEEDED);
873 }
874
875 int
tapdisk_vbd_lock(td_vbd_t * vbd)876 tapdisk_vbd_lock(td_vbd_t *vbd)
877 {
878 return 0;
879 }
880
881 int
tapdisk_vbd_quiesce_queue(td_vbd_t * vbd)882 tapdisk_vbd_quiesce_queue(td_vbd_t *vbd)
883 {
884 if (!list_empty(&vbd->pending_requests)) {
885 td_flag_set(vbd->state, TD_VBD_QUIESCE_REQUESTED);
886 return -EAGAIN;
887 }
888
889 td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED);
890 td_flag_set(vbd->state, TD_VBD_QUIESCED);
891 return 0;
892 }
893
894 int
tapdisk_vbd_start_queue(td_vbd_t * vbd)895 tapdisk_vbd_start_queue(td_vbd_t *vbd)
896 {
897 td_flag_clear(vbd->state, TD_VBD_QUIESCED);
898 td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED);
899 return 0;
900 }
901
902 int
tapdisk_vbd_kill_queue(td_vbd_t * vbd)903 tapdisk_vbd_kill_queue(td_vbd_t *vbd)
904 {
905 tapdisk_vbd_quiesce_queue(vbd);
906 td_flag_set(vbd->state, TD_VBD_DEAD);
907 return 0;
908 }
909
910 static int
tapdisk_vbd_open_image(td_vbd_t * vbd,td_image_t * image)911 tapdisk_vbd_open_image(td_vbd_t *vbd, td_image_t *image)
912 {
913 int err;
914 td_image_t *parent;
915
916 err = td_open(image);
917 if (err)
918 return err;
919
920 if (!tapdisk_vbd_is_last_image(vbd, image)) {
921 parent = tapdisk_vbd_next_image(image);
922 err = td_validate_parent(image, parent);
923 if (err) {
924 td_close(image);
925 return err;
926 }
927 }
928
929 return 0;
930 }
931
932 static int
tapdisk_vbd_close_and_reopen_image(td_vbd_t * vbd,td_image_t * image)933 tapdisk_vbd_close_and_reopen_image(td_vbd_t *vbd, td_image_t *image)
934 {
935 int i, err = 0;
936
937 td_close(image);
938
939 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
940 err = tapdisk_vbd_open_image(vbd, image);
941 if (err != -EIO)
942 break;
943
944 sleep(TD_VBD_EIO_SLEEP);
945 }
946
947 if (err)
948 td_flag_set(vbd->state, TD_VBD_CLOSED);
949
950 return err;
951 }
952
953 int
tapdisk_vbd_pause(td_vbd_t * vbd)954 tapdisk_vbd_pause(td_vbd_t *vbd)
955 {
956 int err;
957
958 td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
959
960 err = tapdisk_vbd_quiesce_queue(vbd);
961 if (err)
962 return err;
963
964 tapdisk_vbd_close_vdi(vbd);
965
966 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
967 td_flag_set(vbd->state, TD_VBD_PAUSED);
968
969 return 0;
970 }
971
972 int
tapdisk_vbd_resume(td_vbd_t * vbd,const char * path,uint16_t drivertype)973 tapdisk_vbd_resume(td_vbd_t *vbd, const char *path, uint16_t drivertype)
974 {
975 int i, err = 0;
976
977 if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
978 EPRINTF("resume request for unpaused vbd %s\n", vbd->name);
979 return -EINVAL;
980 }
981
982 if (path) {
983 free(vbd->name);
984 vbd->name = strdup(path);
985 if (!vbd->name) {
986 EPRINTF("copying new vbd %s name failed\n", path);
987 return -EINVAL;
988 }
989 }
990
991 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
992 err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT);
993 if (err != -EIO)
994 break;
995
996 sleep(TD_VBD_EIO_SLEEP);
997 }
998
999 if (err)
1000 return err;
1001
1002 tapdisk_vbd_start_queue(vbd);
1003 td_flag_clear(vbd->state, TD_VBD_PAUSED);
1004 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1005 tapdisk_vbd_check_state(vbd);
1006
1007 return 0;
1008 }
1009
1010 int
tapdisk_vbd_kick(td_vbd_t * vbd)1011 tapdisk_vbd_kick(td_vbd_t *vbd)
1012 {
1013 int n;
1014 td_ring_t *ring;
1015
1016 tapdisk_vbd_check_state(vbd);
1017
1018 ring = &vbd->ring;
1019 if (!ring->sring)
1020 return 0;
1021
1022 n = (ring->fe_ring.rsp_prod_pvt - ring->fe_ring.sring->rsp_prod);
1023 if (!n)
1024 return 0;
1025
1026 vbd->kicked += n;
1027 RING_PUSH_RESPONSES(&ring->fe_ring);
1028 ioctl(ring->fd, BLKTAP_IOCTL_KICK_FE, 0);
1029
1030 DBG(TLOG_INFO, "kicking %d: rec: 0x%08"PRIx64", ret: 0x%08"PRIx64", kicked: "
1031 "0x%08"PRIx64"\n", n, vbd->received, vbd->returned, vbd->kicked);
1032
1033 return n;
1034 }
1035
1036 static inline void
tapdisk_vbd_write_response_to_ring(td_vbd_t * vbd,blkif_response_t * rsp)1037 tapdisk_vbd_write_response_to_ring(td_vbd_t *vbd, blkif_response_t *rsp)
1038 {
1039 td_ring_t *ring;
1040 blkif_response_t *rspp;
1041
1042 ring = &vbd->ring;
1043 rspp = RING_GET_RESPONSE(&ring->fe_ring, ring->fe_ring.rsp_prod_pvt);
1044 memcpy(rspp, rsp, sizeof(blkif_response_t));
1045 ring->fe_ring.rsp_prod_pvt++;
1046 }
1047
1048 static void
tapdisk_vbd_callback(void * arg,blkif_response_t * rsp)1049 tapdisk_vbd_callback(void *arg, blkif_response_t *rsp)
1050 {
1051 td_vbd_t *vbd = (td_vbd_t *)arg;
1052 tapdisk_vbd_write_response_to_ring(vbd, rsp);
1053 }
1054
1055 static void
tapdisk_vbd_make_response(td_vbd_t * vbd,td_vbd_request_t * vreq)1056 tapdisk_vbd_make_response(td_vbd_t *vbd, td_vbd_request_t *vreq)
1057 {
1058 blkif_request_t tmp;
1059 blkif_response_t *rsp;
1060
1061 tmp = vreq->req;
1062 rsp = (blkif_response_t *)&vreq->req;
1063
1064 rsp->id = tmp.id;
1065 rsp->operation = tmp.operation;
1066 rsp->status = vreq->status;
1067
1068 DBG(TLOG_DBG, "writing req %d, sec 0x%08"PRIx64", res %d to ring\n",
1069 (int)tmp.id, tmp.sector_number, vreq->status);
1070
1071 if (rsp->status != BLKIF_RSP_OKAY)
1072 ERR(EIO, "returning BLKIF_RSP %d", rsp->status);
1073
1074 vbd->returned++;
1075 vbd->callback(vbd->argument, rsp);
1076 }
1077
1078 void
tapdisk_vbd_check_state(td_vbd_t * vbd)1079 tapdisk_vbd_check_state(td_vbd_t *vbd)
1080 {
1081 td_vbd_request_t *vreq, *tmp;
1082
1083 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests)
1084 if (vreq->num_retries >= TD_VBD_MAX_RETRIES)
1085 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1086
1087 if (!list_empty(&vbd->new_requests) ||
1088 !list_empty(&vbd->failed_requests))
1089 tapdisk_vbd_issue_requests(vbd);
1090
1091 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->completed_requests) {
1092 tapdisk_vbd_make_response(vbd, vreq);
1093 list_del(&vreq->next);
1094 tapdisk_vbd_initialize_vreq(vreq);
1095 }
1096
1097 if (td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED))
1098 tapdisk_vbd_quiesce_queue(vbd);
1099
1100 if (td_flag_test(vbd->state, TD_VBD_PAUSE_REQUESTED))
1101 tapdisk_vbd_pause(vbd);
1102
1103 if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1104 tapdisk_vbd_close(vbd);
1105 }
1106
1107 void
tapdisk_vbd_check_progress(td_vbd_t * vbd)1108 tapdisk_vbd_check_progress(td_vbd_t *vbd)
1109 {
1110 int diff;
1111 struct timeval now;
1112
1113 if (list_empty(&vbd->pending_requests))
1114 return;
1115
1116 gettimeofday(&now, NULL);
1117 diff = now.tv_sec - vbd->ts.tv_sec;
1118
1119 if (diff >= TD_VBD_WATCHDOG_TIMEOUT) {
1120 DBG(TLOG_WARN, "%s: watchdog timeout: pending requests "
1121 "idle for %d seconds\n", vbd->name, diff);
1122 tapdisk_vbd_drop_log(vbd);
1123 return;
1124 }
1125
1126 tapdisk_server_set_max_timeout(TD_VBD_WATCHDOG_TIMEOUT - diff);
1127 }
1128
1129 /*
1130 * request submission
1131 */
1132
1133 static int
tapdisk_vbd_check_queue(td_vbd_t * vbd)1134 tapdisk_vbd_check_queue(td_vbd_t *vbd)
1135 {
1136 int err;
1137 td_image_t *image;
1138
1139 if (list_empty(&vbd->images))
1140 return -ENOSYS;
1141
1142 if (!tapdisk_vbd_queue_ready(vbd))
1143 return -EAGAIN;
1144
1145 if (!vbd->reopened) {
1146 if (td_flag_test(vbd->state, TD_VBD_LOCKING)) {
1147 err = tapdisk_vbd_lock(vbd);
1148 if (err)
1149 return err;
1150 }
1151
1152 image = tapdisk_vbd_first_image(vbd);
1153 td_flag_set(image->flags, TD_OPEN_STRICT);
1154
1155 if (tapdisk_vbd_close_and_reopen_image(vbd, image))
1156 EPRINTF("reopening disks failed\n");
1157 else {
1158 DPRINTF("reopening disks succeeded\n");
1159 vbd->reopened = 1;
1160 }
1161 }
1162
1163 return 0;
1164 }
1165
1166 void
tapdisk_vbd_complete_vbd_request(td_vbd_t * vbd,td_vbd_request_t * vreq)1167 tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
1168 {
1169 if (!vreq->submitting && !vreq->secs_pending) {
1170 if (vreq->status == BLKIF_RSP_ERROR &&
1171 vreq->num_retries < TD_VBD_MAX_RETRIES &&
1172 !td_flag_test(vbd->state, TD_VBD_DEAD) &&
1173 !td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1174 tapdisk_vbd_move_request(vreq, &vbd->failed_requests);
1175 else
1176 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1177 }
1178 }
1179
1180 static uint64_t
tapdisk_vbd_breq_get_sector(blkif_request_t * breq,td_request_t treq)1181 tapdisk_vbd_breq_get_sector(blkif_request_t *breq, td_request_t treq)
1182 {
1183 int seg, nsects;
1184 uint64_t sector_nr = breq->sector_number;
1185
1186 for(seg=0; seg < treq.sidx; seg++) {
1187 nsects = breq->seg[seg].last_sect - breq->seg[seg].first_sect + 1;
1188 sector_nr += nsects;
1189 }
1190
1191 return sector_nr;
1192 }
1193
1194 static void
__tapdisk_vbd_complete_td_request(td_vbd_t * vbd,td_vbd_request_t * vreq,td_request_t treq,int res)1195 __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq,
1196 td_request_t treq, int res)
1197 {
1198 int err;
1199 td_image_t *image = treq.image;
1200
1201 err = (res <= 0 ? res : -res);
1202 vbd->secs_pending -= treq.secs;
1203 vreq->secs_pending -= treq.secs;
1204
1205 vreq->blocked = treq.blocked;
1206
1207 if (err) {
1208 vreq->status = BLKIF_RSP_ERROR;
1209 vreq->error = (vreq->error ? : err);
1210 if (err != -EBUSY) {
1211 vbd->errors++;
1212 ERR(err, "req %"PRIu64": %s 0x%04x secs to "
1213 "0x%08"PRIx64, vreq->req.id,
1214 (treq.op == TD_OP_WRITE ? "write" : "read"),
1215 treq.secs, treq.sec);
1216 }
1217 } else {
1218 #ifdef MEMSHR
1219 if (treq.op == TD_OP_READ
1220 && td_flag_test(image->flags, TD_OPEN_RDONLY)) {
1221 share_tuple_t hnd = treq.memshr_hnd;
1222 uint16_t uid = image->memshr_id;
1223 blkif_request_t *breq = &vreq->req;
1224 uint64_t sec = tapdisk_vbd_breq_get_sector(breq, treq);
1225 int secs = breq->seg[treq.sidx].last_sect -
1226 breq->seg[treq.sidx].first_sect + 1;
1227
1228 if (hnd.handle != 0)
1229 memshr_vbd_complete_ro_request(hnd, uid,
1230 sec, secs);
1231 }
1232 #endif
1233 }
1234
1235 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1236 }
1237
1238 static void
__tapdisk_vbd_reissue_td_request(td_vbd_t * vbd,td_image_t * image,td_request_t treq)1239 __tapdisk_vbd_reissue_td_request(td_vbd_t *vbd,
1240 td_image_t *image, td_request_t treq)
1241 {
1242 td_image_t *parent;
1243 td_vbd_request_t *vreq;
1244
1245 vreq = (td_vbd_request_t *)treq.private;
1246 gettimeofday(&vreq->last_try, NULL);
1247
1248 vreq->submitting++;
1249
1250 if (tapdisk_vbd_is_last_image(vbd, image)) {
1251 memset(treq.buf, 0, treq.secs << SECTOR_SHIFT);
1252 td_complete_request(treq, 0);
1253 goto done;
1254 }
1255
1256 parent = tapdisk_vbd_next_image(image);
1257 treq.image = parent;
1258
1259 /* return zeros for requests that extend beyond end of parent image */
1260 if (treq.sec + treq.secs > parent->info.size) {
1261 td_request_t clone = treq;
1262
1263 if (parent->info.size > treq.sec) {
1264 int secs = parent->info.size - treq.sec;
1265 clone.sec += secs;
1266 clone.secs -= secs;
1267 clone.buf += (secs << SECTOR_SHIFT);
1268 treq.secs = secs;
1269 } else
1270 treq.secs = 0;
1271
1272 memset(clone.buf, 0, clone.secs << SECTOR_SHIFT);
1273 td_complete_request(clone, 0);
1274
1275 if (!treq.secs)
1276 goto done;
1277 }
1278
1279 switch (treq.op) {
1280 case TD_OP_WRITE:
1281 td_queue_write(parent, treq);
1282 break;
1283
1284 case TD_OP_READ:
1285 #ifdef MEMSHR
1286 if(td_flag_test(parent->flags, TD_OPEN_RDONLY)) {
1287 int ret, seg = treq.sidx;
1288 blkif_request_t *breq = &vreq->req;
1289
1290 ret = memshr_vbd_issue_ro_request(treq.buf,
1291 breq->seg[seg].gref,
1292 parent->memshr_id,
1293 treq.sec,
1294 treq.secs,
1295 &treq.memshr_hnd);
1296 if(ret == 0) {
1297 /* Reset memshr handle. This'll prevent
1298 * memshr_vbd_complete_ro_request being called
1299 */
1300 treq.memshr_hnd.handle = 0;
1301 td_complete_request(treq, 0);
1302 } else
1303 td_queue_read(parent, treq);
1304 } else
1305 #endif
1306 td_queue_read(parent, treq);
1307 break;
1308 }
1309
1310 done:
1311 vreq->submitting--;
1312 if (!vreq->secs_pending)
1313 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1314 }
1315
1316 void
tapdisk_vbd_forward_request(td_request_t treq)1317 tapdisk_vbd_forward_request(td_request_t treq)
1318 {
1319 td_vbd_t *vbd;
1320 td_image_t *image;
1321 td_vbd_request_t *vreq;
1322
1323 image = treq.image;
1324 vbd = (td_vbd_t *)image->private;
1325 vreq = (td_vbd_request_t *)treq.private;
1326
1327 gettimeofday(&vbd->ts, NULL);
1328
1329 if (tapdisk_vbd_queue_ready(vbd))
1330 __tapdisk_vbd_reissue_td_request(vbd, image, treq);
1331 else
1332 __tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EIO);
1333 }
1334
1335 static void
tapdisk_vbd_complete_td_request(td_request_t treq,int res)1336 tapdisk_vbd_complete_td_request(td_request_t treq, int res)
1337 {
1338 td_vbd_t *vbd;
1339 td_image_t *image;
1340 td_vbd_request_t *vreq;
1341
1342 image = treq.image;
1343 vbd = (td_vbd_t *)image->private;
1344 vreq = (td_vbd_request_t *)treq.private;
1345
1346 gettimeofday(&vbd->ts, NULL);
1347 DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" "
1348 "secs 0x%04x buf %p op %d res %d\n", image->name,
1349 (int)treq.id, treq.sidx, treq.sec, treq.secs,
1350 treq.buf, (int)vreq->req.operation, res);
1351
1352 __tapdisk_vbd_complete_td_request(vbd, vreq, treq, res);
1353 }
1354
1355 static int
tapdisk_vbd_issue_request(td_vbd_t * vbd,td_vbd_request_t * vreq)1356 tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
1357 {
1358 char *page;
1359 td_ring_t *ring;
1360 td_image_t *image;
1361 td_request_t treq;
1362 uint64_t sector_nr;
1363 blkif_request_t *req;
1364 int i, err, id, nsects;
1365
1366 req = &vreq->req;
1367 id = req->id;
1368 ring = &vbd->ring;
1369 sector_nr = req->sector_number;
1370 image = tapdisk_vbd_first_image(vbd);
1371
1372 vreq->submitting = 1;
1373 gettimeofday(&vbd->ts, NULL);
1374 gettimeofday(&vreq->last_try, NULL);
1375 tapdisk_vbd_move_request(vreq, &vbd->pending_requests);
1376
1377 #if 0
1378 err = tapdisk_vbd_check_queue(vbd);
1379 if (err)
1380 goto fail;
1381 #endif
1382
1383 err = tapdisk_image_check_ring_request(image, req);
1384 if (err)
1385 goto fail;
1386
1387 for (i = 0; i < req->nr_segments; i++) {
1388 nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1;
1389 page = (char *)MMAP_VADDR(ring->vstart,
1390 (unsigned long)req->id, i);
1391 page += (req->seg[i].first_sect << SECTOR_SHIFT);
1392
1393 treq.id = id;
1394 treq.sidx = i;
1395 treq.blocked = 0;
1396 treq.buf = page;
1397 treq.sec = sector_nr;
1398 treq.secs = nsects;
1399 treq.image = image;
1400 treq.cb = tapdisk_vbd_complete_td_request;
1401 treq.cb_data = NULL;
1402 treq.private = vreq;
1403
1404 DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" secs 0x%04x "
1405 "buf %p op %d\n", image->name, id, i, treq.sec, treq.secs,
1406 treq.buf, (int)req->operation);
1407
1408 vreq->secs_pending += nsects;
1409 vbd->secs_pending += nsects;
1410
1411 switch (req->operation) {
1412 case BLKIF_OP_WRITE:
1413 treq.op = TD_OP_WRITE;
1414 td_queue_write(image, treq);
1415 break;
1416
1417 case BLKIF_OP_READ:
1418 treq.op = TD_OP_READ;
1419 td_queue_read(image, treq);
1420 break;
1421 }
1422
1423 sector_nr += nsects;
1424 }
1425
1426 err = 0;
1427
1428 out:
1429 vreq->submitting--;
1430 if (!vreq->secs_pending) {
1431 err = (err ? : vreq->error);
1432 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1433 }
1434
1435 return err;
1436
1437 fail:
1438 vreq->status = BLKIF_RSP_ERROR;
1439 goto out;
1440 }
1441
1442 static int
tapdisk_vbd_reissue_failed_requests(td_vbd_t * vbd)1443 tapdisk_vbd_reissue_failed_requests(td_vbd_t *vbd)
1444 {
1445 int err;
1446 struct timeval now;
1447 td_vbd_request_t *vreq, *tmp;
1448
1449 err = 0;
1450 gettimeofday(&now, NULL);
1451
1452 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
1453 if (vreq->secs_pending)
1454 continue;
1455
1456 if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1457 goto fail;
1458
1459 if (vreq->error != -EBUSY &&
1460 now.tv_sec - vreq->last_try.tv_sec < TD_VBD_RETRY_INTERVAL)
1461 continue;
1462
1463 if (vreq->num_retries >= TD_VBD_MAX_RETRIES) {
1464 fail:
1465 DBG(TLOG_INFO, "req %"PRIu64"retried %d times\n",
1466 vreq->req.id, vreq->num_retries);
1467 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1468 continue;
1469 }
1470
1471 /*
1472 * never fail due to too many retries if we are blocked on a
1473 * dependency
1474 */
1475 if (vreq->blocked) {
1476 vreq->blocked = 0;
1477 } else {
1478 vbd->retries++;
1479 vreq->num_retries++;
1480 }
1481 vreq->error = 0;
1482 vreq->status = BLKIF_RSP_OKAY;
1483 DBG(TLOG_DBG, "retry #%d of req %"PRIu64", "
1484 "sec 0x%08"PRIx64", nr_segs: %d\n", vreq->num_retries,
1485 vreq->req.id, vreq->req.sector_number,
1486 vreq->req.nr_segments);
1487
1488 err = tapdisk_vbd_issue_request(vbd, vreq);
1489 if (err)
1490 break;
1491 }
1492
1493 if (list_empty(&vbd->failed_requests))
1494 td_flag_clear(vbd->state, TD_VBD_RETRY_NEEDED);
1495 else
1496 td_flag_set(vbd->state, TD_VBD_RETRY_NEEDED);
1497
1498 return err;
1499 }
1500
1501 static int
tapdisk_vbd_issue_new_requests(td_vbd_t * vbd)1502 tapdisk_vbd_issue_new_requests(td_vbd_t *vbd)
1503 {
1504 int err;
1505 td_vbd_request_t *vreq, *tmp;
1506
1507 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
1508 err = tapdisk_vbd_issue_request(vbd, vreq);
1509 if (err)
1510 return err;
1511 }
1512
1513 return 0;
1514 }
1515
1516 static int
tapdisk_vbd_kill_requests(td_vbd_t * vbd)1517 tapdisk_vbd_kill_requests(td_vbd_t *vbd)
1518 {
1519 td_vbd_request_t *vreq, *tmp;
1520
1521 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
1522 vreq->status = BLKIF_RSP_ERROR;
1523 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1524 }
1525
1526 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
1527 vreq->status = BLKIF_RSP_ERROR;
1528 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1529 }
1530
1531 return 0;
1532 }
1533
1534 int
tapdisk_vbd_issue_requests(td_vbd_t * vbd)1535 tapdisk_vbd_issue_requests(td_vbd_t *vbd)
1536 {
1537 int err;
1538
1539 if (td_flag_test(vbd->state, TD_VBD_DEAD))
1540 return tapdisk_vbd_kill_requests(vbd);
1541
1542 if (!tapdisk_vbd_queue_ready(vbd))
1543 return -EAGAIN;
1544
1545 err = tapdisk_vbd_reissue_failed_requests(vbd);
1546 if (err)
1547 return err;
1548
1549 return tapdisk_vbd_issue_new_requests(vbd);
1550 }
1551
1552 static void
tapdisk_vbd_pull_ring_requests(td_vbd_t * vbd)1553 tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd)
1554 {
1555 int idx;
1556 RING_IDX rp, rc;
1557 td_ring_t *ring;
1558 blkif_request_t *req;
1559 td_vbd_request_t *vreq;
1560
1561 ring = &vbd->ring;
1562 if (!ring->sring)
1563 return;
1564
1565 rp = ring->fe_ring.sring->req_prod;
1566 xen_rmb();
1567
1568 for (rc = ring->fe_ring.req_cons; rc != rp; rc++) {
1569 req = RING_GET_REQUEST(&ring->fe_ring, rc);
1570 ++ring->fe_ring.req_cons;
1571
1572 idx = req->id;
1573 vreq = &vbd->request_list[idx];
1574
1575 ASSERT(list_empty(&vreq->next));
1576 ASSERT(vreq->secs_pending == 0);
1577
1578 memcpy(&vreq->req, req, sizeof(blkif_request_t));
1579 vbd->received++;
1580 vreq->vbd = vbd;
1581
1582 tapdisk_vbd_move_request(vreq, &vbd->new_requests);
1583
1584 DBG(TLOG_DBG, "%s: request %d \n", vbd->name, idx);
1585 }
1586 }
1587
1588 static int
tapdisk_vbd_pause_ring(td_vbd_t * vbd)1589 tapdisk_vbd_pause_ring(td_vbd_t *vbd)
1590 {
1591 int err;
1592
1593 if (td_flag_test(vbd->state, TD_VBD_PAUSED))
1594 return 0;
1595
1596 td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
1597
1598 err = tapdisk_vbd_quiesce_queue(vbd);
1599 if (err) {
1600 EPRINTF("%s: ring pause request on active queue\n", vbd->name);
1601 return err;
1602 }
1603
1604 tapdisk_vbd_close_vdi(vbd);
1605
1606 err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_PAUSE, 0);
1607 if (err)
1608 EPRINTF("%s: pause ioctl failed: %d\n", vbd->name, errno);
1609 else {
1610 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1611 td_flag_set(vbd->state, TD_VBD_PAUSED);
1612 }
1613
1614 return err;
1615 }
1616
1617 static int
tapdisk_vbd_resume_ring(td_vbd_t * vbd)1618 tapdisk_vbd_resume_ring(td_vbd_t *vbd)
1619 {
1620 int i, err, type;
1621 char message[BLKTAP2_MAX_MESSAGE_LEN];
1622 const char *path;
1623
1624 memset(message, 0, sizeof(message));
1625
1626 if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
1627 EPRINTF("%s: resume message for unpaused vbd\n", vbd->name);
1628 return -EINVAL;
1629 }
1630
1631 err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_REOPEN, &message);
1632 if (err) {
1633 EPRINTF("%s: resume ioctl failed: %d\n", vbd->name, errno);
1634 return err;
1635 }
1636
1637 err = tapdisk_parse_disk_type(message, &path, &type);
1638 if (err) {
1639 EPRINTF("%s: invalid resume string %s\n", vbd->name, message);
1640 goto out;
1641 }
1642
1643 free(vbd->name);
1644 vbd->name = strdup(path);
1645 if (!vbd->name) {
1646 EPRINTF("resume malloc failed\n");
1647 err = -ENOMEM;
1648 goto out;
1649 }
1650
1651 tapdisk_vbd_start_queue(vbd);
1652
1653 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
1654 err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT);
1655 if (err != -EIO)
1656 break;
1657
1658 sleep(TD_VBD_EIO_SLEEP);
1659 }
1660
1661 out:
1662 if (!err) {
1663 image_t image;
1664 struct blktap2_params params;
1665
1666 memset(¶ms, 0, sizeof(params));
1667 tapdisk_vbd_get_image_info(vbd, &image);
1668
1669 params.sector_size = image.secsize;
1670 params.capacity = image.size;
1671 snprintf(params.name, sizeof(params.name) - 1, "%s", message);
1672
1673 ioctl(vbd->ring.fd, BLKTAP2_IOCTL_SET_PARAMS, ¶ms);
1674 td_flag_clear(vbd->state, TD_VBD_PAUSED);
1675 }
1676
1677 ioctl(vbd->ring.fd, BLKTAP2_IOCTL_RESUME, err);
1678 return err;
1679 }
1680
1681 static int
tapdisk_vbd_check_ring_message(td_vbd_t * vbd)1682 tapdisk_vbd_check_ring_message(td_vbd_t *vbd)
1683 {
1684 if (!vbd->ring.sring)
1685 return -EINVAL;
1686
1687 switch (vbd->ring.sring->pvt.tapif_user.msg) {
1688 case 0:
1689 return 0;
1690
1691 case BLKTAP2_RING_MESSAGE_PAUSE:
1692 return tapdisk_vbd_pause_ring(vbd);
1693
1694 case BLKTAP2_RING_MESSAGE_RESUME:
1695 return tapdisk_vbd_resume_ring(vbd);
1696
1697 case BLKTAP2_RING_MESSAGE_CLOSE:
1698 return tapdisk_vbd_close(vbd);
1699
1700 default:
1701 return -EINVAL;
1702 }
1703 }
1704
1705 static void
tapdisk_vbd_ring_event(event_id_t id,char mode,void * private)1706 tapdisk_vbd_ring_event(event_id_t id, char mode, void *private)
1707 {
1708 td_vbd_t *vbd;
1709
1710 vbd = (td_vbd_t *)private;
1711
1712 tapdisk_vbd_pull_ring_requests(vbd);
1713 tapdisk_vbd_issue_requests(vbd);
1714
1715 /* vbd may be destroyed after this call */
1716 tapdisk_vbd_check_ring_message(vbd);
1717 }
1718
1719 td_image_t *
tapdisk_vbd_first_image(td_vbd_t * vbd)1720 tapdisk_vbd_first_image(td_vbd_t *vbd)
1721 {
1722 return list_entry(vbd->images.next, td_image_t, next);
1723 }
1724