1 /*
2  * Copyright (c) 2008, XenSource Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of XenSource Inc. nor the names of its contributors
13  *       may be used to endorse or promote products derived from this software
14  *       without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 #include <time.h>
30 #include <stdio.h>
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <inttypes.h>
35 
36 #include "io-optimize.h"
37 #include "tapdisk-log.h"
38 
39 #if (!defined(TEST) && defined(DEBUG))
40 #define DBG(ctx, f, a...) tlog_write(TLOG_DBG, f, ##a)
41 #elif defined(TEST)
42 #define DBG(ctx, f, a...) printf(f, ##a)
43 #else
44 #define DBG(ctx, f, a...) ((void)0)
45 #endif
46 
47 static void print_merged_iocbs(struct opioctx *ctx,
48 			       struct iocb **iocbs, int num_iocbs);
49 
50 void
opio_free(struct opioctx * ctx)51 opio_free(struct opioctx *ctx)
52 {
53 	free(ctx->opios);
54 	ctx->opios = NULL;
55 
56 	free(ctx->free_opios);
57 	ctx->free_opios = NULL;
58 
59 	free(ctx->iocb_queue);
60 	ctx->iocb_queue = NULL;
61 
62 	free(ctx->event_queue);
63 	ctx->event_queue = NULL;
64 }
65 
66 int
opio_init(struct opioctx * ctx,int num_iocbs)67 opio_init(struct opioctx *ctx, int num_iocbs)
68 {
69 	int i;
70 
71 	memset(ctx, 0, sizeof(struct opioctx));
72 
73 	ctx->num_opios     = num_iocbs;
74 	ctx->free_opio_cnt = num_iocbs;
75 	ctx->opios         = calloc(1, sizeof(struct opio) * num_iocbs);
76 	ctx->free_opios    = calloc(1, sizeof(struct opio *) * num_iocbs);
77 	ctx->iocb_queue    = calloc(1, sizeof(struct iocb *) * num_iocbs);
78 	ctx->event_queue   = calloc(1, sizeof(struct io_event) * num_iocbs);
79 
80 	if (!ctx->opios || !ctx->free_opios ||
81 	    !ctx->iocb_queue || !ctx->event_queue)
82 		goto fail;
83 
84 	for (i = 0; i < num_iocbs; i++)
85 		ctx->free_opios[i] = &ctx->opios[i];
86 
87 	return 0;
88 
89  fail:
90 	opio_free(ctx);
91 	return -ENOMEM;
92 }
93 
94 static inline struct opio *
alloc_opio(struct opioctx * ctx)95 alloc_opio(struct opioctx *ctx)
96 {
97 	if (ctx->free_opio_cnt <= 0)
98 		return NULL;
99 	return ctx->free_opios[--ctx->free_opio_cnt];
100 }
101 
102 static inline void
free_opio(struct opioctx * ctx,struct opio * op)103 free_opio(struct opioctx *ctx, struct opio *op)
104 {
105 	memset(op, 0, sizeof(struct opio));
106 	ctx->free_opios[ctx->free_opio_cnt++] = op;
107 }
108 
109 static inline void
restore_iocb(struct opio * op)110 restore_iocb(struct opio *op)
111 {
112 	struct iocb *io = op->iocb;
113 
114 	io->data        = op->data;
115 	io->u.c.buf     = op->buf;
116 	io->u.c.nbytes  = op->nbytes;
117 }
118 
119 static inline int
iocb_optimized(struct opioctx * ctx,struct iocb * io)120 iocb_optimized(struct opioctx *ctx, struct iocb *io)
121 {
122 	unsigned long iop   = (unsigned long)io->data;
123 	unsigned long start = (unsigned long)ctx->opios;
124 	unsigned long end   = start + (ctx->num_opios * sizeof(struct opio));
125 
126 	return (iop >= start && iop < end);
127 }
128 
129 static inline int
contiguous_sectors(struct iocb * l,struct iocb * r)130 contiguous_sectors(struct iocb *l, struct iocb *r)
131 {
132 	return (l->u.c.offset + l->u.c.nbytes == r->u.c.offset);
133 }
134 
135 static inline int
contiguous_buffers(struct iocb * l,struct iocb * r)136 contiguous_buffers(struct iocb *l, struct iocb *r)
137 {
138 	return (l->u.c.buf + l->u.c.nbytes == r->u.c.buf);
139 }
140 
141 static inline int
contiguous_iocbs(struct iocb * l,struct iocb * r)142 contiguous_iocbs(struct iocb *l, struct iocb *r)
143 {
144 	return ((l->aio_fildes == r->aio_fildes) &&
145 		contiguous_sectors(l, r) &&
146 		contiguous_buffers(l, r));
147 }
148 
149 static inline void
init_opio_list(struct opio * op)150 init_opio_list(struct opio *op)
151 {
152 	op->list.head = op->list.tail = op;
153 }
154 
155 static struct opio *
opio_iocb_init(struct opioctx * ctx,struct iocb * io)156 opio_iocb_init(struct opioctx *ctx, struct iocb *io)
157 {
158 	struct opio *op;
159 
160 	op = alloc_opio(ctx);
161 	if (!op)
162 		return NULL;
163 
164 	op->buf    = io->u.c.buf;
165 	op->nbytes = io->u.c.nbytes;
166 	op->offset = io->u.c.offset;
167 	op->data   = io->data;
168 	op->iocb   = io;
169 	io->data   = op;
170 
171 	init_opio_list(op);
172 
173 	return op;
174 }
175 
176 static inline struct opio *
opio_get(struct opioctx * ctx,struct iocb * io)177 opio_get(struct opioctx *ctx, struct iocb *io)
178 {
179 	if (iocb_optimized(ctx, io))
180 		return (struct opio *)io->data;
181 	else
182 	        return opio_iocb_init(ctx, io);
183 }
184 
185 static int
merge_tail(struct opioctx * ctx,struct iocb * head,struct iocb * io)186 merge_tail(struct opioctx *ctx, struct iocb *head, struct iocb *io)
187 {
188 	struct opio *ophead, *opio;
189 
190 	ophead = opio_get(ctx, head);
191 	if (!ophead)
192 		return -ENOMEM;
193 
194 	opio = opio_get(ctx, io);
195 	if (!opio)
196 		return -ENOMEM;
197 
198 	opio->head        = ophead;
199 	head->u.c.nbytes += io->u.c.nbytes;
200 	ophead->list.tail = ophead->list.tail->next = opio;
201 
202 	return 0;
203 }
204 
205 static int
merge(struct opioctx * ctx,struct iocb * head,struct iocb * io)206 merge(struct opioctx *ctx, struct iocb *head, struct iocb *io)
207 {
208 	if (head->aio_lio_opcode != io->aio_lio_opcode)
209 		return -EINVAL;
210 
211 	if (!contiguous_iocbs(head, io))
212 		return -EINVAL;
213 
214 	return merge_tail(ctx, head, io);
215 }
216 
217 int
io_merge(struct opioctx * ctx,struct iocb ** queue,int num)218 io_merge(struct opioctx *ctx, struct iocb **queue, int num)
219 {
220 	int i, on_queue;
221 	struct iocb *io, **q;
222 	struct opio *ophead;
223 
224 	if (!num)
225 		return 0;
226 
227 	on_queue = 0;
228 	q = ctx->iocb_queue;
229 	memcpy(q, queue, num * sizeof(struct iocb *));
230 
231 	for (i = 1; i < num; i++) {
232 		io = q[i];
233 		if (merge(ctx, queue[on_queue], io) != 0)
234 			queue[++on_queue] = io;
235 	}
236 
237 #if (defined(TEST) || defined(DEBUG))
238 	print_merged_iocbs(ctx, queue, on_queue + 1);
239 #endif
240 
241 	return ++on_queue;
242 }
243 
244 static int
expand_iocb(struct opioctx * ctx,struct iocb ** queue,struct iocb * io)245 expand_iocb(struct opioctx *ctx, struct iocb **queue, struct iocb *io)
246 {
247 	int idx;
248 	struct opio *op, *next;
249 
250 	idx = 0;
251 	op  = (struct opio *)io->data;
252 	while (op) {
253 		next = op->next;
254 		restore_iocb(op);
255 		queue[idx++] = op->iocb;
256 		free_opio(ctx, op);
257 		op   = next;
258 	}
259 
260 	return idx;
261 }
262 
263 int
io_expand_iocbs(struct opioctx * ctx,struct iocb ** queue,int idx,int num)264 io_expand_iocbs(struct opioctx *ctx, struct iocb **queue, int idx, int num)
265 {
266 	int i, on_queue;
267 	struct iocb *io, **q;
268 
269 	if (!num)
270 		return 0;
271 
272 	on_queue = 0;
273 	q = ctx->iocb_queue;
274 	memcpy(q, queue, num * sizeof(struct iocb *));
275 
276 	for (i = idx; i < num; i++) {
277 		io = q[i];
278 		if (!iocb_optimized(ctx, io))
279 			queue[on_queue++] = io;
280 		else
281 			on_queue += expand_iocb(ctx, queue + on_queue, io);
282 	}
283 
284 	return on_queue;
285 }
286 
287 static int
expand_event(struct opioctx * ctx,struct io_event * event,struct io_event * queue,int idx)288 expand_event(struct opioctx *ctx,
289 	     struct io_event *event, struct io_event *queue, int idx)
290 {
291 	int err;
292 	struct iocb *io;
293 	struct io_event *ep;
294 	struct opio *ophead, *op, *next;
295 
296 	io     = event->obj;
297 	ophead = (struct opio *)io->data;
298 	op     = ophead;
299 
300 	if (event->res == io->u.c.nbytes)
301 		err = 0;
302 	else if ((int)event->res < 0)
303 		err = (int)event->res;
304 	else
305 		err = -EIO;
306 
307 	while (op) {
308 		next    = op->next;
309 		ep      = &queue[idx++];
310 		ep->obj = op->iocb;
311 		ep->res = (err ? err : op->nbytes);
312 		restore_iocb(op);
313 		free_opio(ctx, op);
314 		op      = next;
315 	}
316 
317 	return idx;
318 }
319 
320 int
io_split(struct opioctx * ctx,struct io_event * events,int num)321 io_split(struct opioctx *ctx, struct io_event *events, int num)
322 {
323 	int on_queue;
324 	struct iocb *io;
325 	struct io_event *ep, *q;
326 
327 	if (!num)
328 		return 0;
329 
330 	on_queue = 0;
331 	q = ctx->event_queue;
332 	memcpy(q, events, num * sizeof(struct io_event));
333 
334 	for (ep = q; num-- > 0; ep++) {
335 		io = ep->obj;
336 		if (!iocb_optimized(ctx, io))
337 			events[on_queue++] = *ep;
338 		else
339 			on_queue = expand_event(ctx, ep, events, on_queue);
340 	}
341 
342 	return on_queue;
343 }
344 
345 /******************************************************************************
346 debug print functions
347 ******************************************************************************/
348 static inline void
__print_iocb(struct opioctx * ctx,struct iocb * io,char * prefix)349 __print_iocb(struct opioctx *ctx, struct iocb *io, char *prefix)
350 {
351 	char *type;
352 
353 	type = (io->aio_lio_opcode == IO_CMD_PREAD ? "read" : "write");
354 
355 	DBG(ctx, "%soff: %08llx, nbytes: %04lx, buf: %p, type: %s, data: %08lx,"
356 	    " optimized: %d\n", prefix, io->u.c.offset, io->u.c.nbytes,
357 	    io->u.c.buf, type, (unsigned long)io->data,
358 	    iocb_optimized(ctx, io));
359 }
360 
361 static char *null_prefix = "";
362 #define print_iocb(ctx, io) __print_iocb(ctx, io, null_prefix)
363 
364 static void
print_iocbs(struct opioctx * ctx,struct iocb ** iocbs,int num_iocbs)365 print_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs)
366 {
367 	int i;
368 	char pref[10];
369 	struct iocb *io;
370 
371 	DBG(ctx, "iocbs:\n");
372 	for (i = 0; i < num_iocbs; i++) {
373 		io = iocbs[i];
374 		snprintf(pref, 10, "%d: ", i);
375 		__print_iocb(ctx, io, pref);
376 	}
377 }
378 
379 static void
print_optimized_iocbs(struct opioctx * ctx,struct opio * op,int * cnt)380 print_optimized_iocbs(struct opioctx *ctx, struct opio *op, int *cnt)
381 {
382 	char pref[10];
383 
384 	while (op) {
385 		snprintf(pref, 10, "  %d: ", (*cnt)++);
386 		__print_iocb(ctx, op->iocb, pref);
387 		op = op->next;
388 	}
389 }
390 
391 static void
print_merged_iocbs(struct opioctx * ctx,struct iocb ** iocbs,int num_iocbs)392 print_merged_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs)
393 {
394 	int i, cnt;
395 	char pref[10];
396 	struct iocb *io;
397 	struct opio *op;
398 
399 	DBG(ctx, "merged iocbs:\n");
400 	for (i = 0, cnt = 0; i < num_iocbs; i++) {
401 		io = iocbs[i];
402 		snprintf(pref, 10, "%d: ", cnt++);
403 		__print_iocb(ctx, io, pref);
404 
405 		if (iocb_optimized(ctx, io)) {
406 			op = (struct opio *)io->data;
407 			print_optimized_iocbs(ctx, op->next, &cnt);
408 		}
409 	}
410 }
411 
412 static void
print_events(struct opioctx * ctx,struct io_event * events,int num_events)413 print_events(struct opioctx *ctx, struct io_event *events, int num_events)
414 {
415 	int i;
416 	struct iocb *io;
417 
418 	for (i = 0; i < num_events; i++) {
419 		io = events[i].obj;
420 		print_iocb(ctx, io);
421 	}
422 }
423 /******************************************************************************
424 end debug print functions
425 ******************************************************************************/
426 
427 #if defined(TEST)
428 
429 #define hmask 0x80000000UL
430 #define smask 0x40000000UL
431 #define make_data(idx, is_head, sparse) \
432          (void *)((idx) | ((is_head) ? hmask : 0) | ((sparse) ? smask : 0))
433 #define data_idx(data)          (int)((unsigned long)(data) & (0x0fffffff))
434 #define data_is_head(data)      (((unsigned long)(data) & hmask) ? 1 : 0)
435 #define data_is_sparse(data)    (((unsigned long)(data) & smask) ? 1 : 0)
436 
437 static void
usage(void)438 usage(void)
439 {
440 	fprintf(stderr, "usage: io_optimize [-n num_runs] "
441 		"[-i num_iocbs] [-s num_secs] [-r random_seed]\n");
442 	exit(-1);
443 }
444 
445 static int xalloc_cnt, xfree_cnt;
446 static inline char *
xalloc(int size)447 xalloc(int size)
448 {
449 	char *buf = malloc(size);
450 	if (!buf) {
451 		fprintf(stderr, "xalloc failed\n");
452 		exit(ENOMEM);
453 	}
454 	xalloc_cnt++;
455 	return buf;
456 }
457 
458 static inline void
xfree(void * buf)459 xfree(void *buf)
460 {
461 	free(buf);
462 	xfree_cnt++;
463 }
464 
465 static void
randomize_iocbs(struct iocb ** iocbs,int num_iocbs,int num_secs)466 randomize_iocbs(struct iocb **iocbs, int num_iocbs, int num_secs)
467 {
468 	int i, j;
469 
470 	i = 0;
471 	while (i < num_iocbs) {
472 		char *buf;
473 		short type;
474 		int segs, sparse_mem;
475 		uint64_t offset, nbytes;
476 
477 		type   = (random() % 10 < 5 ? IO_CMD_PREAD : IO_CMD_PWRITE);
478 		offset = ((random() % num_secs) << 9);
479 
480 		if (random() % 10 < 4) {
481 			segs   = 1;
482 			nbytes = (((random() % 7) + 1) << 9);
483 		} else {
484 			segs   = (random() % 10) + 1;
485 			nbytes = 4096;
486 		}
487 
488 		if (i + segs > num_iocbs)
489 			segs = (num_iocbs - i);
490 
491 		sparse_mem = (random() % 10 < 2 ? 1 : 0);
492 
493 		if (sparse_mem)
494 			buf = xalloc(nbytes);
495 		else
496 			buf = xalloc(segs * nbytes);
497 
498 		for (j = 0; j < segs; j++) {
499 			struct iocb *io    = iocbs[i + j];
500 			io->aio_lio_opcode = type;
501 			io->u.c.nbytes     = nbytes;
502 			io->u.c.offset     = offset;
503 			io->u.c.buf        = buf;
504 			offset            += nbytes;
505 
506 			io->data = make_data(i + j, (j == 0), sparse_mem);
507 
508 			if (j + 1 < segs && sparse_mem)
509 				buf  = xalloc(nbytes);
510 			else
511 				buf += nbytes;
512 		}
513 
514 		i += segs;
515 	}
516 }
517 
518 static int
simulate_io(struct iocb ** iocbs,struct io_event * events,int num_iocbs)519 simulate_io(struct iocb **iocbs, struct io_event *events, int num_iocbs)
520 {
521 	int i, done;
522 	struct iocb *io;
523 	struct io_event *ep;
524 
525 	if (num_iocbs > 1)
526 		done = (random() % (num_iocbs - 1)) + 1;
527 	else
528 		done = num_iocbs;
529 
530 	for (i = 0; i < done; i++) {
531 		io      = iocbs[i];
532 		ep      = &events[i];
533 		ep->obj = io;
534 		ep->res = (random() % 10 < 8 ? io->u.c.nbytes : 0);
535 	}
536 
537 	return done;
538 }
539 
540 static inline void
process_events(struct opioctx * ctx,struct iocb * iocb_list,struct io_event * events,int num)541 process_events(struct opioctx *ctx,
542 	       struct iocb *iocb_list, struct io_event *events, int num)
543 {
544 	int i;
545 	struct iocb *io;
546 
547 	for (i = 0; i < num; i++) {
548 		io = events[i].obj;
549 		print_iocb(ctx, io);
550 		if (data_idx(io->data) != (io - iocb_list)) {
551 			printf("corrupt data! data_idx = %d, io = %d\n",
552 			       data_idx(io->data), (io - iocb_list));
553 			exit(-1);
554 		}
555 		if (data_is_head(io->data) || data_is_sparse(io->data))
556 			xfree(io->u.c.buf);
557 		memset(io, 0, sizeof(struct iocb));
558 	}
559 }
560 
561 static inline void
init_optest(struct iocb * iocb_list,struct iocb ** iocbs,struct io_event * events,int num)562 init_optest(struct iocb *iocb_list,
563 	    struct iocb **iocbs, struct io_event *events, int num)
564 {
565 	int i;
566 
567 	memset(iocb_list, 0, num * sizeof(struct iocb));
568 	memset(events, 0, num * sizeof(struct io_event));
569 
570 	for (i = 0; i < num; i++)
571 		iocbs[i]  = &iocb_list[i];
572 }
573 
574 int
main(int argc,char ** argv)575 main(int argc, char **argv)
576 {
577 	uint64_t num_secs;
578 	struct opioctx ctx;
579 	struct io_event *events;
580 	int i, c, num_runs, num_iocbs, seed;
581 	struct iocb *iocb_list, **iocbs, **ioqueue;
582 
583 	num_runs  = 1;
584 	num_iocbs = 300;
585 	seed      = time(NULL);
586 	num_secs  = ((4ULL << 20) >> 9); /* 4GB disk */
587 
588 	while ((c = getopt(argc, argv, "n:i:s:r:h")) != -1) {
589 		switch (c) {
590 		case 'n':
591 			num_runs  = atoi(optarg);
592 			break;
593 		case 'i':
594 			num_iocbs = atoi(optarg);
595 			break;
596 		case 's':
597 			num_secs  = strtoull(optarg, NULL, 10);
598 			break;
599 		case 'r':
600 			seed      = atoi(optarg);
601 			break;
602 		case 'h':
603 			usage();
604 		case '?':
605 			fprintf(stderr, "Unrecognized option: -%c\n", optopt);
606 			usage();
607 		}
608 	}
609 
610 	printf("Running %d tests with %d iocbs on %llu sectors, seed = %d\n",
611 	       num_runs, num_iocbs, num_secs, seed);
612 
613 	srand(seed);
614 
615 	iocb_list = malloc(num_iocbs * sizeof(struct iocb));
616 	iocbs     = malloc(num_iocbs * sizeof(struct iocb *));
617 	events    = malloc(num_iocbs * sizeof(struct io_event));
618 
619 	if (!iocb_list || !iocbs || !events || opio_init(&ctx, num_iocbs)) {
620 		fprintf(stderr, "initialization failed\n");
621 		exit(ENOMEM);
622 	}
623 
624 	for (i = 0; i < num_runs; i++) {
625 		int op_rem, op_done, num_split, num_events, num_done;
626 
627 		ioqueue = iocbs;
628 		init_optest(iocb_list, ioqueue, events, num_iocbs);
629 		randomize_iocbs(ioqueue, num_iocbs, num_secs);
630 		print_iocbs(&ctx, ioqueue, num_iocbs);
631 
632 		op_done  = 0;
633 		num_done = 0;
634 		op_rem   = io_merge(&ctx, ioqueue, num_iocbs);
635 		print_iocbs(&ctx, ioqueue, op_rem);
636 		print_merged_iocbs(&ctx, ioqueue, op_rem);
637 
638 		while (num_done < num_iocbs) {
639 			DBG(&ctx, "optimized remaining: %d\n", op_rem);
640 
641 			DBG(&ctx, "simulating\n");
642 			num_events = simulate_io(ioqueue + op_done, events, op_rem);
643 			print_events(&ctx, events, num_events);
644 
645 			DBG(&ctx, "splitting %d\n", num_events);
646 			num_split = io_split(&ctx, events, num_events);
647 			print_events(&ctx, events, num_split);
648 
649 			DBG(&ctx, "processing %d\n", num_split);
650 			process_events(&ctx, iocb_list, events, num_split);
651 
652 			op_rem   -= num_events;
653 			op_done  += num_events;
654 			num_done += num_split;
655 		}
656 
657 		DBG(&ctx, "run %d: processed: %d, xallocs: %d, xfrees: %d\n",
658 		    i, num_done, xalloc_cnt, xfree_cnt);
659 		if (xalloc_cnt != xfree_cnt)
660 			exit(-1);
661 		xalloc_cnt = xfree_cnt = 0;
662 	}
663 
664 	free(iocbs);
665 	free(events);
666 	free(iocb_list);
667 	opio_free(&ctx);
668 
669 	return 0;
670 }
671 #endif
672