1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <time.h>
30 #include <stdio.h>
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <inttypes.h>
35
36 #include "io-optimize.h"
37 #include "tapdisk-log.h"
38
39 #if (!defined(TEST) && defined(DEBUG))
40 #define DBG(ctx, f, a...) tlog_write(TLOG_DBG, f, ##a)
41 #elif defined(TEST)
42 #define DBG(ctx, f, a...) printf(f, ##a)
43 #else
44 #define DBG(ctx, f, a...) ((void)0)
45 #endif
46
47 static void print_merged_iocbs(struct opioctx *ctx,
48 struct iocb **iocbs, int num_iocbs);
49
50 void
opio_free(struct opioctx * ctx)51 opio_free(struct opioctx *ctx)
52 {
53 free(ctx->opios);
54 ctx->opios = NULL;
55
56 free(ctx->free_opios);
57 ctx->free_opios = NULL;
58
59 free(ctx->iocb_queue);
60 ctx->iocb_queue = NULL;
61
62 free(ctx->event_queue);
63 ctx->event_queue = NULL;
64 }
65
66 int
opio_init(struct opioctx * ctx,int num_iocbs)67 opio_init(struct opioctx *ctx, int num_iocbs)
68 {
69 int i;
70
71 memset(ctx, 0, sizeof(struct opioctx));
72
73 ctx->num_opios = num_iocbs;
74 ctx->free_opio_cnt = num_iocbs;
75 ctx->opios = calloc(1, sizeof(struct opio) * num_iocbs);
76 ctx->free_opios = calloc(1, sizeof(struct opio *) * num_iocbs);
77 ctx->iocb_queue = calloc(1, sizeof(struct iocb *) * num_iocbs);
78 ctx->event_queue = calloc(1, sizeof(struct io_event) * num_iocbs);
79
80 if (!ctx->opios || !ctx->free_opios ||
81 !ctx->iocb_queue || !ctx->event_queue)
82 goto fail;
83
84 for (i = 0; i < num_iocbs; i++)
85 ctx->free_opios[i] = &ctx->opios[i];
86
87 return 0;
88
89 fail:
90 opio_free(ctx);
91 return -ENOMEM;
92 }
93
94 static inline struct opio *
alloc_opio(struct opioctx * ctx)95 alloc_opio(struct opioctx *ctx)
96 {
97 if (ctx->free_opio_cnt <= 0)
98 return NULL;
99 return ctx->free_opios[--ctx->free_opio_cnt];
100 }
101
102 static inline void
free_opio(struct opioctx * ctx,struct opio * op)103 free_opio(struct opioctx *ctx, struct opio *op)
104 {
105 memset(op, 0, sizeof(struct opio));
106 ctx->free_opios[ctx->free_opio_cnt++] = op;
107 }
108
109 static inline void
restore_iocb(struct opio * op)110 restore_iocb(struct opio *op)
111 {
112 struct iocb *io = op->iocb;
113
114 io->data = op->data;
115 io->u.c.buf = op->buf;
116 io->u.c.nbytes = op->nbytes;
117 }
118
119 static inline int
iocb_optimized(struct opioctx * ctx,struct iocb * io)120 iocb_optimized(struct opioctx *ctx, struct iocb *io)
121 {
122 unsigned long iop = (unsigned long)io->data;
123 unsigned long start = (unsigned long)ctx->opios;
124 unsigned long end = start + (ctx->num_opios * sizeof(struct opio));
125
126 return (iop >= start && iop < end);
127 }
128
129 static inline int
contiguous_sectors(struct iocb * l,struct iocb * r)130 contiguous_sectors(struct iocb *l, struct iocb *r)
131 {
132 return (l->u.c.offset + l->u.c.nbytes == r->u.c.offset);
133 }
134
135 static inline int
contiguous_buffers(struct iocb * l,struct iocb * r)136 contiguous_buffers(struct iocb *l, struct iocb *r)
137 {
138 return (l->u.c.buf + l->u.c.nbytes == r->u.c.buf);
139 }
140
141 static inline int
contiguous_iocbs(struct iocb * l,struct iocb * r)142 contiguous_iocbs(struct iocb *l, struct iocb *r)
143 {
144 return ((l->aio_fildes == r->aio_fildes) &&
145 contiguous_sectors(l, r) &&
146 contiguous_buffers(l, r));
147 }
148
149 static inline void
init_opio_list(struct opio * op)150 init_opio_list(struct opio *op)
151 {
152 op->list.head = op->list.tail = op;
153 }
154
155 static struct opio *
opio_iocb_init(struct opioctx * ctx,struct iocb * io)156 opio_iocb_init(struct opioctx *ctx, struct iocb *io)
157 {
158 struct opio *op;
159
160 op = alloc_opio(ctx);
161 if (!op)
162 return NULL;
163
164 op->buf = io->u.c.buf;
165 op->nbytes = io->u.c.nbytes;
166 op->offset = io->u.c.offset;
167 op->data = io->data;
168 op->iocb = io;
169 io->data = op;
170
171 init_opio_list(op);
172
173 return op;
174 }
175
176 static inline struct opio *
opio_get(struct opioctx * ctx,struct iocb * io)177 opio_get(struct opioctx *ctx, struct iocb *io)
178 {
179 if (iocb_optimized(ctx, io))
180 return (struct opio *)io->data;
181 else
182 return opio_iocb_init(ctx, io);
183 }
184
185 static int
merge_tail(struct opioctx * ctx,struct iocb * head,struct iocb * io)186 merge_tail(struct opioctx *ctx, struct iocb *head, struct iocb *io)
187 {
188 struct opio *ophead, *opio;
189
190 ophead = opio_get(ctx, head);
191 if (!ophead)
192 return -ENOMEM;
193
194 opio = opio_get(ctx, io);
195 if (!opio)
196 return -ENOMEM;
197
198 opio->head = ophead;
199 head->u.c.nbytes += io->u.c.nbytes;
200 ophead->list.tail = ophead->list.tail->next = opio;
201
202 return 0;
203 }
204
205 static int
merge(struct opioctx * ctx,struct iocb * head,struct iocb * io)206 merge(struct opioctx *ctx, struct iocb *head, struct iocb *io)
207 {
208 if (head->aio_lio_opcode != io->aio_lio_opcode)
209 return -EINVAL;
210
211 if (!contiguous_iocbs(head, io))
212 return -EINVAL;
213
214 return merge_tail(ctx, head, io);
215 }
216
217 int
io_merge(struct opioctx * ctx,struct iocb ** queue,int num)218 io_merge(struct opioctx *ctx, struct iocb **queue, int num)
219 {
220 int i, on_queue;
221 struct iocb *io, **q;
222 struct opio *ophead;
223
224 if (!num)
225 return 0;
226
227 on_queue = 0;
228 q = ctx->iocb_queue;
229 memcpy(q, queue, num * sizeof(struct iocb *));
230
231 for (i = 1; i < num; i++) {
232 io = q[i];
233 if (merge(ctx, queue[on_queue], io) != 0)
234 queue[++on_queue] = io;
235 }
236
237 #if (defined(TEST) || defined(DEBUG))
238 print_merged_iocbs(ctx, queue, on_queue + 1);
239 #endif
240
241 return ++on_queue;
242 }
243
244 static int
expand_iocb(struct opioctx * ctx,struct iocb ** queue,struct iocb * io)245 expand_iocb(struct opioctx *ctx, struct iocb **queue, struct iocb *io)
246 {
247 int idx;
248 struct opio *op, *next;
249
250 idx = 0;
251 op = (struct opio *)io->data;
252 while (op) {
253 next = op->next;
254 restore_iocb(op);
255 queue[idx++] = op->iocb;
256 free_opio(ctx, op);
257 op = next;
258 }
259
260 return idx;
261 }
262
263 int
io_expand_iocbs(struct opioctx * ctx,struct iocb ** queue,int idx,int num)264 io_expand_iocbs(struct opioctx *ctx, struct iocb **queue, int idx, int num)
265 {
266 int i, on_queue;
267 struct iocb *io, **q;
268
269 if (!num)
270 return 0;
271
272 on_queue = 0;
273 q = ctx->iocb_queue;
274 memcpy(q, queue, num * sizeof(struct iocb *));
275
276 for (i = idx; i < num; i++) {
277 io = q[i];
278 if (!iocb_optimized(ctx, io))
279 queue[on_queue++] = io;
280 else
281 on_queue += expand_iocb(ctx, queue + on_queue, io);
282 }
283
284 return on_queue;
285 }
286
287 static int
expand_event(struct opioctx * ctx,struct io_event * event,struct io_event * queue,int idx)288 expand_event(struct opioctx *ctx,
289 struct io_event *event, struct io_event *queue, int idx)
290 {
291 int err;
292 struct iocb *io;
293 struct io_event *ep;
294 struct opio *ophead, *op, *next;
295
296 io = event->obj;
297 ophead = (struct opio *)io->data;
298 op = ophead;
299
300 if (event->res == io->u.c.nbytes)
301 err = 0;
302 else if ((int)event->res < 0)
303 err = (int)event->res;
304 else
305 err = -EIO;
306
307 while (op) {
308 next = op->next;
309 ep = &queue[idx++];
310 ep->obj = op->iocb;
311 ep->res = (err ? err : op->nbytes);
312 restore_iocb(op);
313 free_opio(ctx, op);
314 op = next;
315 }
316
317 return idx;
318 }
319
320 int
io_split(struct opioctx * ctx,struct io_event * events,int num)321 io_split(struct opioctx *ctx, struct io_event *events, int num)
322 {
323 int on_queue;
324 struct iocb *io;
325 struct io_event *ep, *q;
326
327 if (!num)
328 return 0;
329
330 on_queue = 0;
331 q = ctx->event_queue;
332 memcpy(q, events, num * sizeof(struct io_event));
333
334 for (ep = q; num-- > 0; ep++) {
335 io = ep->obj;
336 if (!iocb_optimized(ctx, io))
337 events[on_queue++] = *ep;
338 else
339 on_queue = expand_event(ctx, ep, events, on_queue);
340 }
341
342 return on_queue;
343 }
344
345 /******************************************************************************
346 debug print functions
347 ******************************************************************************/
348 static inline void
__print_iocb(struct opioctx * ctx,struct iocb * io,char * prefix)349 __print_iocb(struct opioctx *ctx, struct iocb *io, char *prefix)
350 {
351 char *type;
352
353 type = (io->aio_lio_opcode == IO_CMD_PREAD ? "read" : "write");
354
355 DBG(ctx, "%soff: %08llx, nbytes: %04lx, buf: %p, type: %s, data: %08lx,"
356 " optimized: %d\n", prefix, io->u.c.offset, io->u.c.nbytes,
357 io->u.c.buf, type, (unsigned long)io->data,
358 iocb_optimized(ctx, io));
359 }
360
361 static char *null_prefix = "";
362 #define print_iocb(ctx, io) __print_iocb(ctx, io, null_prefix)
363
364 static void
print_iocbs(struct opioctx * ctx,struct iocb ** iocbs,int num_iocbs)365 print_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs)
366 {
367 int i;
368 char pref[10];
369 struct iocb *io;
370
371 DBG(ctx, "iocbs:\n");
372 for (i = 0; i < num_iocbs; i++) {
373 io = iocbs[i];
374 snprintf(pref, 10, "%d: ", i);
375 __print_iocb(ctx, io, pref);
376 }
377 }
378
379 static void
print_optimized_iocbs(struct opioctx * ctx,struct opio * op,int * cnt)380 print_optimized_iocbs(struct opioctx *ctx, struct opio *op, int *cnt)
381 {
382 char pref[10];
383
384 while (op) {
385 snprintf(pref, 10, " %d: ", (*cnt)++);
386 __print_iocb(ctx, op->iocb, pref);
387 op = op->next;
388 }
389 }
390
391 static void
print_merged_iocbs(struct opioctx * ctx,struct iocb ** iocbs,int num_iocbs)392 print_merged_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs)
393 {
394 int i, cnt;
395 char pref[10];
396 struct iocb *io;
397 struct opio *op;
398
399 DBG(ctx, "merged iocbs:\n");
400 for (i = 0, cnt = 0; i < num_iocbs; i++) {
401 io = iocbs[i];
402 snprintf(pref, 10, "%d: ", cnt++);
403 __print_iocb(ctx, io, pref);
404
405 if (iocb_optimized(ctx, io)) {
406 op = (struct opio *)io->data;
407 print_optimized_iocbs(ctx, op->next, &cnt);
408 }
409 }
410 }
411
412 static void
print_events(struct opioctx * ctx,struct io_event * events,int num_events)413 print_events(struct opioctx *ctx, struct io_event *events, int num_events)
414 {
415 int i;
416 struct iocb *io;
417
418 for (i = 0; i < num_events; i++) {
419 io = events[i].obj;
420 print_iocb(ctx, io);
421 }
422 }
423 /******************************************************************************
424 end debug print functions
425 ******************************************************************************/
426
427 #if defined(TEST)
428
429 #define hmask 0x80000000UL
430 #define smask 0x40000000UL
431 #define make_data(idx, is_head, sparse) \
432 (void *)((idx) | ((is_head) ? hmask : 0) | ((sparse) ? smask : 0))
433 #define data_idx(data) (int)((unsigned long)(data) & (0x0fffffff))
434 #define data_is_head(data) (((unsigned long)(data) & hmask) ? 1 : 0)
435 #define data_is_sparse(data) (((unsigned long)(data) & smask) ? 1 : 0)
436
437 static void
usage(void)438 usage(void)
439 {
440 fprintf(stderr, "usage: io_optimize [-n num_runs] "
441 "[-i num_iocbs] [-s num_secs] [-r random_seed]\n");
442 exit(-1);
443 }
444
445 static int xalloc_cnt, xfree_cnt;
446 static inline char *
xalloc(int size)447 xalloc(int size)
448 {
449 char *buf = malloc(size);
450 if (!buf) {
451 fprintf(stderr, "xalloc failed\n");
452 exit(ENOMEM);
453 }
454 xalloc_cnt++;
455 return buf;
456 }
457
458 static inline void
xfree(void * buf)459 xfree(void *buf)
460 {
461 free(buf);
462 xfree_cnt++;
463 }
464
465 static void
randomize_iocbs(struct iocb ** iocbs,int num_iocbs,int num_secs)466 randomize_iocbs(struct iocb **iocbs, int num_iocbs, int num_secs)
467 {
468 int i, j;
469
470 i = 0;
471 while (i < num_iocbs) {
472 char *buf;
473 short type;
474 int segs, sparse_mem;
475 uint64_t offset, nbytes;
476
477 type = (random() % 10 < 5 ? IO_CMD_PREAD : IO_CMD_PWRITE);
478 offset = ((random() % num_secs) << 9);
479
480 if (random() % 10 < 4) {
481 segs = 1;
482 nbytes = (((random() % 7) + 1) << 9);
483 } else {
484 segs = (random() % 10) + 1;
485 nbytes = 4096;
486 }
487
488 if (i + segs > num_iocbs)
489 segs = (num_iocbs - i);
490
491 sparse_mem = (random() % 10 < 2 ? 1 : 0);
492
493 if (sparse_mem)
494 buf = xalloc(nbytes);
495 else
496 buf = xalloc(segs * nbytes);
497
498 for (j = 0; j < segs; j++) {
499 struct iocb *io = iocbs[i + j];
500 io->aio_lio_opcode = type;
501 io->u.c.nbytes = nbytes;
502 io->u.c.offset = offset;
503 io->u.c.buf = buf;
504 offset += nbytes;
505
506 io->data = make_data(i + j, (j == 0), sparse_mem);
507
508 if (j + 1 < segs && sparse_mem)
509 buf = xalloc(nbytes);
510 else
511 buf += nbytes;
512 }
513
514 i += segs;
515 }
516 }
517
518 static int
simulate_io(struct iocb ** iocbs,struct io_event * events,int num_iocbs)519 simulate_io(struct iocb **iocbs, struct io_event *events, int num_iocbs)
520 {
521 int i, done;
522 struct iocb *io;
523 struct io_event *ep;
524
525 if (num_iocbs > 1)
526 done = (random() % (num_iocbs - 1)) + 1;
527 else
528 done = num_iocbs;
529
530 for (i = 0; i < done; i++) {
531 io = iocbs[i];
532 ep = &events[i];
533 ep->obj = io;
534 ep->res = (random() % 10 < 8 ? io->u.c.nbytes : 0);
535 }
536
537 return done;
538 }
539
540 static inline void
process_events(struct opioctx * ctx,struct iocb * iocb_list,struct io_event * events,int num)541 process_events(struct opioctx *ctx,
542 struct iocb *iocb_list, struct io_event *events, int num)
543 {
544 int i;
545 struct iocb *io;
546
547 for (i = 0; i < num; i++) {
548 io = events[i].obj;
549 print_iocb(ctx, io);
550 if (data_idx(io->data) != (io - iocb_list)) {
551 printf("corrupt data! data_idx = %d, io = %d\n",
552 data_idx(io->data), (io - iocb_list));
553 exit(-1);
554 }
555 if (data_is_head(io->data) || data_is_sparse(io->data))
556 xfree(io->u.c.buf);
557 memset(io, 0, sizeof(struct iocb));
558 }
559 }
560
561 static inline void
init_optest(struct iocb * iocb_list,struct iocb ** iocbs,struct io_event * events,int num)562 init_optest(struct iocb *iocb_list,
563 struct iocb **iocbs, struct io_event *events, int num)
564 {
565 int i;
566
567 memset(iocb_list, 0, num * sizeof(struct iocb));
568 memset(events, 0, num * sizeof(struct io_event));
569
570 for (i = 0; i < num; i++)
571 iocbs[i] = &iocb_list[i];
572 }
573
574 int
main(int argc,char ** argv)575 main(int argc, char **argv)
576 {
577 uint64_t num_secs;
578 struct opioctx ctx;
579 struct io_event *events;
580 int i, c, num_runs, num_iocbs, seed;
581 struct iocb *iocb_list, **iocbs, **ioqueue;
582
583 num_runs = 1;
584 num_iocbs = 300;
585 seed = time(NULL);
586 num_secs = ((4ULL << 20) >> 9); /* 4GB disk */
587
588 while ((c = getopt(argc, argv, "n:i:s:r:h")) != -1) {
589 switch (c) {
590 case 'n':
591 num_runs = atoi(optarg);
592 break;
593 case 'i':
594 num_iocbs = atoi(optarg);
595 break;
596 case 's':
597 num_secs = strtoull(optarg, NULL, 10);
598 break;
599 case 'r':
600 seed = atoi(optarg);
601 break;
602 case 'h':
603 usage();
604 case '?':
605 fprintf(stderr, "Unrecognized option: -%c\n", optopt);
606 usage();
607 }
608 }
609
610 printf("Running %d tests with %d iocbs on %llu sectors, seed = %d\n",
611 num_runs, num_iocbs, num_secs, seed);
612
613 srand(seed);
614
615 iocb_list = malloc(num_iocbs * sizeof(struct iocb));
616 iocbs = malloc(num_iocbs * sizeof(struct iocb *));
617 events = malloc(num_iocbs * sizeof(struct io_event));
618
619 if (!iocb_list || !iocbs || !events || opio_init(&ctx, num_iocbs)) {
620 fprintf(stderr, "initialization failed\n");
621 exit(ENOMEM);
622 }
623
624 for (i = 0; i < num_runs; i++) {
625 int op_rem, op_done, num_split, num_events, num_done;
626
627 ioqueue = iocbs;
628 init_optest(iocb_list, ioqueue, events, num_iocbs);
629 randomize_iocbs(ioqueue, num_iocbs, num_secs);
630 print_iocbs(&ctx, ioqueue, num_iocbs);
631
632 op_done = 0;
633 num_done = 0;
634 op_rem = io_merge(&ctx, ioqueue, num_iocbs);
635 print_iocbs(&ctx, ioqueue, op_rem);
636 print_merged_iocbs(&ctx, ioqueue, op_rem);
637
638 while (num_done < num_iocbs) {
639 DBG(&ctx, "optimized remaining: %d\n", op_rem);
640
641 DBG(&ctx, "simulating\n");
642 num_events = simulate_io(ioqueue + op_done, events, op_rem);
643 print_events(&ctx, events, num_events);
644
645 DBG(&ctx, "splitting %d\n", num_events);
646 num_split = io_split(&ctx, events, num_events);
647 print_events(&ctx, events, num_split);
648
649 DBG(&ctx, "processing %d\n", num_split);
650 process_events(&ctx, iocb_list, events, num_split);
651
652 op_rem -= num_events;
653 op_done += num_events;
654 num_done += num_split;
655 }
656
657 DBG(&ctx, "run %d: processed: %d, xallocs: %d, xfrees: %d\n",
658 i, num_done, xalloc_cnt, xfree_cnt);
659 if (xalloc_cnt != xfree_cnt)
660 exit(-1);
661 xalloc_cnt = xfree_cnt = 0;
662 }
663
664 free(iocbs);
665 free(events);
666 free(iocb_list);
667 opio_free(&ctx);
668
669 return 0;
670 }
671 #endif
672