1 /* block-qcow.c
2 *
3 * Asynchronous Qemu copy-on-write disk implementation.
4 * Code based on the Qemu implementation
5 * (see copyright notice below)
6 *
7 * (c) 2006 Andrew Warfield and Julian Chesterfield
8 *
9 */
10
11 /*
12 * Block driver for the QCOW format
13 *
14 * Copyright (c) 2004 Fabrice Bellard
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this software and associated documentation files(the "Software"), to deal
18 * in the Software without restriction, including without limitation the rights
19 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20 * copies of the Software, and to permit persons to whom the Software is
21 * furnished to do so, subject to the following conditions:
22 */
23
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <sys/statvfs.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <string.h>
33 #include <zlib.h>
34 #include <inttypes.h>
35 #include <libaio.h>
36 #include <limits.h>
37 #include "bswap.h"
38 #include "aes.h"
39 #include "md5.h"
40
41 #include "tapdisk.h"
42 #include "tapdisk-driver.h"
43 #include "tapdisk-interface.h"
44 #include "tapdisk-disktype.h"
45 #include "qcow.h"
46 #include "blk.h"
47 #include "atomicio.h"
48
49 /* *BSD has no O_LARGEFILE */
50 #ifndef O_LARGEFILE
51 #define O_LARGEFILE 0
52 #endif
53
54 #if 1
55 #define ASSERT(_p) \
56 if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
57 __LINE__, __FILE__); *(int*)0=0; }
58 #else
59 #define ASSERT(_p) ((void)0)
60 #endif
61
62 struct pending_aio {
63 td_callback_t cb;
64 int id;
65 void *private;
66 int nb_sectors;
67 char *buf;
68 uint64_t sector;
69 };
70
71 #undef IOCB_IDX
72 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
73
74 #define ZERO_TEST(_b) (_b | 0x00)
75
76 struct qcow_request {
77 td_request_t treq;
78 struct tiocb tiocb;
79 struct tdqcow_state *state;
80 };
81
82 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
83
gen_cksum(char * ptr,int len)84 uint32_t gen_cksum(char *ptr, int len)
85 {
86 int i;
87 uint32_t md[4];
88
89 /* Generate checksum */
90 md5_sum((const uint8_t*)ptr, len, (uint8_t*)md);
91
92 return md[0];
93 }
94
free_aio_state(struct tdqcow_state * s)95 static void free_aio_state(struct tdqcow_state* s)
96 {
97 free(s->aio_requests);
98 free(s->aio_free_list);
99 }
100
init_aio_state(td_driver_t * driver)101 static int init_aio_state(td_driver_t *driver)
102 {
103 int i, ret;
104 td_disk_info_t *bs = &(driver->info);
105 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
106
107 // A segment (i.e. a page) can span multiple clusters
108 s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
109 MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
110
111 s->aio_free_count = s->max_aio_reqs;
112
113 if (!(s->aio_requests = calloc(s->max_aio_reqs, sizeof(struct qcow_request))) ||
114 !(s->aio_free_list = calloc(s->max_aio_reqs, sizeof(struct qcow_request)))) {
115 DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
116 s->max_aio_reqs);
117 goto fail;
118 }
119
120 for (i = 0; i < s->max_aio_reqs; i++)
121 s->aio_free_list[i] = &s->aio_requests[i];
122
123 DPRINTF("AIO state initialised\n");
124
125 return 0;
126 fail:
127 return -1;
128 }
129
get_filesize(char * filename,uint64_t * size,struct stat * st)130 int get_filesize(char *filename, uint64_t *size, struct stat *st)
131 {
132 int fd;
133 QCowHeader header;
134
135 /*Set to the backing file size*/
136 fd = open(filename, O_RDONLY);
137 if (fd < 0)
138 return -1;
139 if (read(fd, &header, sizeof(header)) < sizeof(header)) {
140 close(fd);
141 return -1;
142 }
143 close(fd);
144
145 be32_to_cpus(&header.magic);
146 be64_to_cpus(&header.size);
147 if (header.magic == QCOW_MAGIC) {
148 *size = header.size >> SECTOR_SHIFT;
149 return 0;
150 }
151
152 if(S_ISBLK(st->st_mode)) {
153 fd = open(filename, O_RDONLY);
154 if (fd < 0)
155 return -1;
156 if (blk_getimagesize(fd, size) != 0) {
157 printf("Unable to get Block device size\n");
158 close(fd);
159 return -1;
160 }
161 close(fd);
162 } else *size = (st->st_size >> SECTOR_SHIFT);
163 return 0;
164 }
165
qcow_set_key(struct tdqcow_state * s,const char * key)166 static int qcow_set_key(struct tdqcow_state *s, const char *key)
167 {
168 uint8_t keybuf[16];
169 int len, i;
170
171 memset(keybuf, 0, 16);
172 len = strlen(key);
173 if (len > 16)
174 len = 16;
175 /* XXX: we could compress the chars to 7 bits to increase
176 entropy */
177 for (i = 0; i < len; i++) {
178 keybuf[i] = key[i];
179 }
180 s->crypt_method = s->crypt_method_header;
181
182 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
183 return -1;
184 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
185 return -1;
186 #if 0
187 /* test */
188 {
189 uint8_t in[16];
190 uint8_t out[16];
191 uint8_t tmp[16];
192 for (i=0; i<16; i++)
193 in[i] = i;
194 AES_encrypt(in, tmp, &s->aes_encrypt_key);
195 AES_decrypt(tmp, out, &s->aes_decrypt_key);
196 for (i = 0; i < 16; i++)
197 DPRINTF(" %02x", tmp[i]);
198 DPRINTF("\n");
199 for (i = 0; i < 16; i++)
200 DPRINTF(" %02x", out[i]);
201 DPRINTF("\n");
202 }
203 #endif
204 return 0;
205 }
206
tdqcow_complete(void * arg,struct tiocb * tiocb,int err)207 void tdqcow_complete(void *arg, struct tiocb *tiocb, int err)
208 {
209 struct qcow_request *aio = (struct qcow_request *)arg;
210 struct tdqcow_state *s = aio->state;
211
212 td_complete_request(aio->treq, err);
213
214 s->aio_free_list[s->aio_free_count++] = aio;
215 }
216
async_read(td_driver_t * driver,td_request_t treq)217 static void async_read(td_driver_t *driver, td_request_t treq)
218 {
219 int size;
220 uint64_t offset;
221 struct qcow_request *aio;
222 struct tdqcow_state *prv;
223
224 prv = (struct tdqcow_state *)driver->data;
225 size = treq.secs * driver->info.sector_size;
226 offset = treq.sec * (uint64_t)driver->info.sector_size;
227
228 if (prv->aio_free_count == 0)
229 goto fail;
230
231 aio = prv->aio_free_list[--prv->aio_free_count];
232 aio->treq = treq;
233 aio->state = prv;
234
235 td_prep_read(&aio->tiocb, prv->fd, treq.buf,
236 size, offset, tdqcow_complete, aio);
237 td_queue_tiocb(driver, &aio->tiocb);
238
239 return;
240
241 fail:
242 td_complete_request(treq, -EBUSY);
243 }
244
async_write(td_driver_t * driver,td_request_t treq)245 static void async_write(td_driver_t *driver, td_request_t treq)
246 {
247 int size;
248 uint64_t offset;
249 struct qcow_request *aio;
250 struct tdqcow_state *prv;
251
252 prv = (struct tdqcow_state *)driver->data;
253 size = treq.secs * driver->info.sector_size;
254 offset = treq.sec * (uint64_t)driver->info.sector_size;
255
256 if (prv->aio_free_count == 0)
257 goto fail;
258
259 aio = prv->aio_free_list[--prv->aio_free_count];
260 aio->treq = treq;
261 aio->state = prv;
262
263 td_prep_write(&aio->tiocb, prv->fd, treq.buf,
264 size, offset, tdqcow_complete, aio);
265 td_queue_tiocb(driver, &aio->tiocb);
266
267 return;
268
269 fail:
270 td_complete_request(treq, -EBUSY);
271 }
272
273 /*
274 * The crypt function is compatible with the linux cryptoloop
275 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
276 * supported .
277 */
encrypt_sectors(struct tdqcow_state * s,int64_t sector_num,uint8_t * out_buf,const uint8_t * in_buf,int nb_sectors,int enc,const AES_KEY * key)278 static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
279 uint8_t *out_buf, const uint8_t *in_buf,
280 int nb_sectors, int enc,
281 const AES_KEY *key)
282 {
283 union {
284 uint64_t ll[2];
285 uint8_t b[16];
286 } ivec;
287 int i;
288
289 for (i = 0; i < nb_sectors; i++) {
290 ivec.ll[0] = cpu_to_le64(sector_num);
291 ivec.ll[1] = 0;
292 AES_cbc_encrypt(in_buf, out_buf, 512, key,
293 ivec.b, enc);
294 sector_num++;
295 in_buf += 512;
296 out_buf += 512;
297 }
298 }
299
qtruncate(int fd,off_t length,int sparse)300 int qtruncate(int fd, off_t length, int sparse)
301 {
302 int ret, i;
303 int current = 0, rem = 0;
304 uint64_t sectors;
305 struct stat st;
306 char *buf;
307
308 /* If length is greater than the current file len
309 * we synchronously write zeroes to the end of the
310 * file, otherwise we truncate the length down
311 */
312 ret = fstat(fd, &st);
313 if (ret == -1)
314 return -1;
315 if (S_ISBLK(st.st_mode))
316 return 0;
317
318 sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
319 current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
320 rem = st.st_size % DEFAULT_SECTOR_SIZE;
321
322 /* If we are extending this file, we write zeros to the end --
323 * this tries to ensure that the extents allocated wind up being
324 * contiguous on disk.
325 */
326 if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
327 /*We are extending the file*/
328 if ((ret = posix_memalign((void **)&buf,
329 512, DEFAULT_SECTOR_SIZE))) {
330 DPRINTF("posix_memalign failed: %d\n", ret);
331 return -1;
332 }
333 memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
334 if (lseek(fd, 0, SEEK_END)==-1) {
335 DPRINTF("Lseek EOF failed (%d), internal error\n",
336 errno);
337 free(buf);
338 return -1;
339 }
340 if (rem) {
341 ret = write(fd, buf, rem);
342 if (ret != rem) {
343 DPRINTF("write failed: ret = %d, err = %s\n",
344 ret, strerror(errno));
345 free(buf);
346 return -1;
347 }
348 }
349 for (i = current; i < sectors; i++ ) {
350 ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
351 if (ret != DEFAULT_SECTOR_SIZE) {
352 DPRINTF("write failed: ret = %d, err = %s\n",
353 ret, strerror(errno));
354 free(buf);
355 return -1;
356 }
357 }
358 free(buf);
359 } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
360 if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
361 DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
362 return -1;
363 }
364 return 0;
365 }
366
367 /* 'allocate' is:
368 *
369 * 0 to not allocate.
370 *
371 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
372 * 'n_end')
373 *
374 * 2 to allocate a compressed cluster of size
375 * 'compressed_size'. 'compressed_size' must be > 0 and <
376 * cluster_size
377 *
378 * return 0 if not allocated.
379 */
get_cluster_offset(struct tdqcow_state * s,uint64_t offset,int allocate,int compressed_size,int n_start,int n_end)380 static uint64_t get_cluster_offset(struct tdqcow_state *s,
381 uint64_t offset, int allocate,
382 int compressed_size,
383 int n_start, int n_end)
384 {
385 int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
386 char *tmp_ptr2, *l2_ptr, *l1_ptr;
387 uint64_t *tmp_ptr;
388 uint64_t l2_offset, *l2_table, cluster_offset, tmp;
389 uint32_t min_count;
390 int new_l2_table;
391
392 /*Check L1 table for the extent offset*/
393 l1_index = offset >> (s->l2_bits + s->cluster_bits);
394 l2_offset = s->l1_table[l1_index];
395 new_l2_table = 0;
396 if (!l2_offset) {
397 if (!allocate)
398 return 0;
399 /*
400 * allocating a new l2 entry + extent
401 * at the end of the file, we must also
402 * update the L1 entry safely.
403 */
404 l2_offset = s->fd_end;
405
406 /* round to cluster size */
407 l2_offset = (l2_offset + s->cluster_size - 1)
408 & ~(s->cluster_size - 1);
409
410 /* update the L1 entry */
411 s->l1_table[l1_index] = l2_offset;
412
413 /*Truncate file for L2 table
414 *(initialised to zero in case we crash)*/
415 if (qtruncate(s->fd,
416 l2_offset + (s->l2_size * sizeof(uint64_t)),
417 s->sparse) != 0) {
418 DPRINTF("ERROR truncating file\n");
419 return 0;
420 }
421 s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));
422
423 /*Update the L1 table entry on disk
424 * (for O_DIRECT we write 4KByte blocks)*/
425 l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
426 l1_ptr = (char *)s->l1_table + (l1_sector << 12);
427
428 if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
429 DPRINTF("ERROR allocating memory for L1 table\n");
430 return 0;
431 }
432 memcpy(tmp_ptr, l1_ptr, 4096);
433
434 /* Convert block to write to big endian */
435 for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
436 cpu_to_be64s(&tmp_ptr[i]);
437 }
438
439 /*
440 * Issue non-asynchronous L1 write.
441 * For safety, we must ensure that
442 * entry is written before blocks.
443 */
444 lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
445 if (write(s->fd, tmp_ptr, 4096) != 4096) {
446 free(tmp_ptr);
447 return 0;
448 }
449 free(tmp_ptr);
450
451 new_l2_table = 1;
452 goto cache_miss;
453 } else if (s->min_cluster_alloc == s->l2_size) {
454 /*Fast-track the request*/
455 cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
456 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
457 return cluster_offset + (l2_index * s->cluster_size);
458 }
459
460 /*Check to see if L2 entry is already cached*/
461 for (i = 0; i < L2_CACHE_SIZE; i++) {
462 if (l2_offset == s->l2_cache_offsets[i]) {
463 /* increment the hit count */
464 if (++s->l2_cache_counts[i] == 0xffffffff) {
465 for (j = 0; j < L2_CACHE_SIZE; j++) {
466 s->l2_cache_counts[j] >>= 1;
467 }
468 }
469 l2_table = s->l2_cache + (i << s->l2_bits);
470 goto found;
471 }
472 }
473
474 cache_miss:
475 /* not found: load a new entry in the least used one */
476 min_index = 0;
477 min_count = 0xffffffff;
478 for (i = 0; i < L2_CACHE_SIZE; i++) {
479 if (s->l2_cache_counts[i] < min_count) {
480 min_count = s->l2_cache_counts[i];
481 min_index = i;
482 }
483 }
484 l2_table = s->l2_cache + (min_index << s->l2_bits);
485
486 /*If extent pre-allocated, read table from disk,
487 *otherwise write new table to disk*/
488 if (new_l2_table) {
489 /*Should we allocate the whole extent? Adjustable parameter.*/
490 if (s->cluster_alloc == s->l2_size) {
491 cluster_offset = l2_offset +
492 (s->l2_size * sizeof(uint64_t));
493 cluster_offset = (cluster_offset + s->cluster_size - 1)
494 & ~(s->cluster_size - 1);
495 if (qtruncate(s->fd, cluster_offset +
496 (s->cluster_size * s->l2_size),
497 s->sparse) != 0) {
498 DPRINTF("ERROR truncating file\n");
499 return 0;
500 }
501 s->fd_end = cluster_offset +
502 (s->cluster_size * s->l2_size);
503 for (i = 0; i < s->l2_size; i++) {
504 l2_table[i] = cpu_to_be64(cluster_offset +
505 (i*s->cluster_size));
506 }
507 } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
508
509 lseek(s->fd, l2_offset, SEEK_SET);
510 if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
511 s->l2_size * sizeof(uint64_t))
512 return 0;
513 } else {
514 lseek(s->fd, l2_offset, SEEK_SET);
515 if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
516 s->l2_size * sizeof(uint64_t))
517 return 0;
518 }
519
520 /*Update the cache entries*/
521 s->l2_cache_offsets[min_index] = l2_offset;
522 s->l2_cache_counts[min_index] = 1;
523
524 found:
525 /*The extent is split into 's->l2_size' blocks of
526 *size 's->cluster_size'*/
527 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
528 cluster_offset = be64_to_cpu(l2_table[l2_index]);
529
530 if (!cluster_offset ||
531 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
532 if (!allocate)
533 return 0;
534
535 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
536 (n_end - n_start) < s->cluster_sectors) {
537 /* cluster is already allocated but compressed, we must
538 decompress it in the case it is not completely
539 overwritten */
540 if (decompress_cluster(s, cluster_offset) < 0)
541 return 0;
542 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
543 cluster_offset = (cluster_offset + s->cluster_size - 1)
544 & ~(s->cluster_size - 1);
545 /* write the cluster content - not asynchronous */
546 lseek(s->fd, cluster_offset, SEEK_SET);
547 if (write(s->fd, s->cluster_cache, s->cluster_size) !=
548 s->cluster_size)
549 return -1;
550 } else {
551 /* allocate a new cluster */
552 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
553 if (allocate == 1) {
554 /* round to cluster size */
555 cluster_offset =
556 (cluster_offset + s->cluster_size - 1)
557 & ~(s->cluster_size - 1);
558 if (qtruncate(s->fd, cluster_offset +
559 s->cluster_size, s->sparse)!=0) {
560 DPRINTF("ERROR truncating file\n");
561 return 0;
562 }
563 s->fd_end = (cluster_offset + s->cluster_size);
564 /* if encrypted, we must initialize the cluster
565 content which won't be written */
566 if (s->crypt_method &&
567 (n_end - n_start) < s->cluster_sectors) {
568 uint64_t start_sect;
569 start_sect = (offset &
570 ~(s->cluster_size - 1))
571 >> 9;
572 memset(s->cluster_data + 512,
573 0xaa, 512);
574 for (i = 0; i < s->cluster_sectors;i++)
575 {
576 if (i < n_start || i >= n_end)
577 {
578 encrypt_sectors(s, start_sect + i,
579 s->cluster_data,
580 s->cluster_data + 512, 1, 1,
581 &s->aes_encrypt_key);
582 lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
583 if (write(s->fd, s->cluster_data, 512) != 512)
584 return -1;
585 }
586 }
587 }
588 } else {
589 cluster_offset |= QCOW_OFLAG_COMPRESSED |
590 (uint64_t)compressed_size
591 << (63 - s->cluster_bits);
592 }
593 }
594 /* update L2 table */
595 tmp = cpu_to_be64(cluster_offset);
596 l2_table[l2_index] = tmp;
597
598 /*For IO_DIRECT we write 4KByte blocks*/
599 l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
600 l2_ptr = (char *)l2_table + (l2_sector << 12);
601
602 if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
603 DPRINTF("ERROR allocating memory for L1 table\n");
604 return 0;
605 }
606 memcpy(tmp_ptr2, l2_ptr, 4096);
607 lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
608 if (write(s->fd, tmp_ptr2, 4096) != 4096) {
609 free(tmp_ptr2);
610 return -1;
611 }
612 free(tmp_ptr2);
613 }
614 return cluster_offset;
615 }
616
qcow_is_allocated(struct tdqcow_state * s,int64_t sector_num,int nb_sectors,int * pnum)617 static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
618 int nb_sectors, int *pnum)
619 {
620 int index_in_cluster, n;
621 uint64_t cluster_offset;
622
623 cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
624 index_in_cluster = sector_num & (s->cluster_sectors - 1);
625 n = s->cluster_sectors - index_in_cluster;
626 if (n > nb_sectors)
627 n = nb_sectors;
628 *pnum = n;
629 return (cluster_offset != 0);
630 }
631
decompress_buffer(uint8_t * out_buf,int out_buf_size,const uint8_t * buf,int buf_size)632 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
633 const uint8_t *buf, int buf_size)
634 {
635 z_stream strm1, *strm = &strm1;
636 int ret, out_len;
637
638 memset(strm, 0, sizeof(*strm));
639
640 strm->next_in = (uint8_t *)buf;
641 strm->avail_in = buf_size;
642 strm->next_out = out_buf;
643 strm->avail_out = out_buf_size;
644
645 ret = inflateInit2(strm, -12);
646 if (ret != Z_OK)
647 return -1;
648 ret = inflate(strm, Z_FINISH);
649 out_len = strm->next_out - out_buf;
650 if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
651 (out_len != out_buf_size) ) {
652 inflateEnd(strm);
653 return -1;
654 }
655 inflateEnd(strm);
656 return 0;
657 }
658
decompress_cluster(struct tdqcow_state * s,uint64_t cluster_offset)659 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
660 {
661 int ret, csize;
662 uint64_t coffset;
663
664 coffset = cluster_offset & s->cluster_offset_mask;
665 if (s->cluster_cache_offset != coffset) {
666 csize = cluster_offset >> (63 - s->cluster_bits);
667 csize &= (s->cluster_size - 1);
668 lseek(s->fd, coffset, SEEK_SET);
669 ret = read(s->fd, s->cluster_data, csize);
670 if (ret != csize)
671 return -1;
672 if (decompress_buffer(s->cluster_cache, s->cluster_size,
673 s->cluster_data, csize) < 0) {
674 return -1;
675 }
676 s->cluster_cache_offset = coffset;
677 }
678 return 0;
679 }
680
681 static int
tdqcow_read_header(int fd,QCowHeader * header)682 tdqcow_read_header(int fd, QCowHeader *header)
683 {
684 int err;
685 char *buf;
686 struct stat st;
687 size_t size, expected;
688
689 memset(header, 0, sizeof(*header));
690
691 err = fstat(fd, &st);
692 if (err)
693 return -errno;
694
695 err = lseek(fd, 0, SEEK_SET);
696 if (err == (off_t)-1)
697 return -errno;
698
699 size = (sizeof(*header) + 511) & ~511;
700 err = posix_memalign((void **)&buf, 512, size);
701 if (err)
702 return err;
703
704 expected = size;
705 if (st.st_size < size)
706 expected = st.st_size;
707
708 errno = 0;
709 err = read(fd, buf, size);
710 if (err != expected) {
711 err = (errno ? -errno : -EIO);
712 goto out;
713 }
714
715 memcpy(header, buf, sizeof(*header));
716 be32_to_cpus(&header->magic);
717 be32_to_cpus(&header->version);
718 be64_to_cpus(&header->backing_file_offset);
719 be32_to_cpus(&header->backing_file_size);
720 be32_to_cpus(&header->mtime);
721 be64_to_cpus(&header->size);
722 be32_to_cpus(&header->crypt_method);
723 be64_to_cpus(&header->l1_table_offset);
724
725 err = 0;
726
727 out:
728 free(buf);
729 return err;
730 }
731
732 static int
tdqcow_load_l1_table(struct tdqcow_state * s,QCowHeader * header)733 tdqcow_load_l1_table(struct tdqcow_state *s, QCowHeader *header)
734 {
735 char *buf;
736 struct stat st;
737 size_t expected;
738 int i, err, shift;
739 QCowHeader_ext *exthdr;
740 uint32_t l1_table_bytes, l1_table_block, l1_table_size;
741
742 buf = NULL;
743 s->l1_table = NULL;
744
745 shift = s->cluster_bits + s->l2_bits;
746
747 s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
748 s->l1_table_offset = header->l1_table_offset;
749
750 s->min_cluster_alloc = 1; /* default */
751
752 l1_table_bytes = s->l1_size * sizeof(uint64_t);
753 l1_table_size = (l1_table_bytes + 4095) & ~4095;
754 l1_table_block = (l1_table_bytes + s->l1_table_offset + 4095) & ~4095;
755
756 DPRINTF("L1 Table offset detected: %"PRIu64", size %d (%d)\n",
757 (uint64_t)s->l1_table_offset,
758 (int) (s->l1_size * sizeof(uint64_t)),
759 l1_table_size);
760
761 err = fstat(s->fd, &st);
762 if (err) {
763 err = -errno;
764 goto out;
765 }
766
767 err = lseek(s->fd, 0, SEEK_SET);
768 if (err == (off_t)-1) {
769 err = -errno;
770 goto out;
771 }
772
773 err = posix_memalign((void **)&buf, 512, l1_table_block);
774 if (err) {
775 buf = NULL;
776 goto out;
777 }
778
779 err = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
780 if (err) {
781 s->l1_table = NULL;
782 goto out;
783 }
784
785 memset(buf, 0, l1_table_block);
786 memset(s->l1_table, 0, l1_table_size);
787
788 expected = l1_table_block;
789 if (st.st_size < l1_table_block)
790 expected = st.st_size;
791
792 errno = 0;
793 err = read(s->fd, buf, l1_table_block);
794 if (err != expected) {
795 err = (errno ? -errno : -EIO);
796 goto out;
797 }
798
799 memcpy(s->l1_table, buf + s->l1_table_offset, l1_table_size);
800 exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
801
802 /* check for xen extended header */
803 if (s->l1_table_offset % 4096 == 0 &&
804 be32_to_cpu(exthdr->xmagic) == XEN_MAGIC) {
805 uint32_t flags = be32_to_cpu(exthdr->flags);
806 uint32_t cksum = be32_to_cpu(exthdr->cksum);
807
808 /*
809 * Try to detect old tapdisk images. They have to be fixed
810 * because they use big endian rather than native endian for
811 * the L1 table. After this block, the l1 table will
812 * definitely be in BIG endian.
813 */
814 if (!(flags & EXTHDR_L1_BIG_ENDIAN)) {
815 DPRINTF("qcow: converting to big endian L1 table\n");
816
817 /* convert to big endian */
818 for (i = 0; i < s->l1_size; i++)
819 cpu_to_be64s(&s->l1_table[i]);
820
821 flags |= EXTHDR_L1_BIG_ENDIAN;
822 exthdr->flags = cpu_to_be32(flags);
823
824 memcpy(buf + s->l1_table_offset,
825 s->l1_table, l1_table_size);
826
827 err = lseek(s->fd, 0, SEEK_SET);
828 if (err == (off_t)-1) {
829 err = -errno;
830 goto out;
831 }
832
833 err = atomicio(vwrite, s->fd, buf, l1_table_block);
834 if (err != l1_table_block) {
835 err = -errno;
836 goto out;
837 }
838 }
839
840 /* check the L1 table checksum */
841 if (cksum != gen_cksum((char *)s->l1_table,
842 s->l1_size * sizeof(uint64_t)))
843 DPRINTF("qcow: bad L1 checksum\n");
844 else {
845 s->extended = 1;
846 s->sparse = (be32_to_cpu(exthdr->flags) & SPARSE_FILE);
847 s->min_cluster_alloc =
848 be32_to_cpu(exthdr->min_cluster_alloc);
849 }
850 }
851
852 /* convert L1 table to native endian for operation */
853 for (i = 0; i < s->l1_size; i++)
854 be64_to_cpus(&s->l1_table[i]);
855
856 err = 0;
857
858 out:
859 if (err) {
860 free(buf);
861 free(s->l1_table);
862 s->l1_table = NULL;
863 }
864 return err;
865 }
866
867 /* Open the disk file and initialize qcow state. */
tdqcow_open(td_driver_t * driver,const char * name,td_flag_t flags)868 int tdqcow_open (td_driver_t *driver, const char *name, td_flag_t flags)
869 {
870 int fd, len, i, ret, size, o_flags;
871 td_disk_info_t *bs = &(driver->info);
872 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
873 QCowHeader header;
874 uint64_t final_cluster = 0;
875
876 DPRINTF("QCOW: Opening %s\n", name);
877
878 o_flags = O_DIRECT | O_LARGEFILE |
879 ((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
880 fd = open(name, o_flags);
881 if (fd < 0) {
882 DPRINTF("Unable to open %s (%d)\n", name, -errno);
883 return -1;
884 }
885
886 s->fd = fd;
887 s->name = strdup(name);
888 if (!s->name)
889 goto fail;
890
891 if (tdqcow_read_header(fd, &header))
892 goto fail;
893
894 if (header.magic != QCOW_MAGIC)
895 goto fail;
896
897 switch (header.version) {
898 case QCOW_VERSION:
899 break;
900 case 2:
901 //TODO: Port qcow2 to new blktap framework.
902 // close(fd);
903 // dd->drv = &tapdisk_qcow2;
904 // return dd->drv->td_open(dd, name, flags);
905 goto fail;
906 default:
907 goto fail;
908 }
909
910 if (header.size <= 1 || header.cluster_bits < 9)
911 goto fail;
912 if (header.crypt_method > QCOW_CRYPT_AES)
913 goto fail;
914 s->crypt_method_header = header.crypt_method;
915 if (s->crypt_method_header)
916 s->encrypted = 1;
917 s->cluster_bits = header.cluster_bits;
918 s->cluster_size = 1 << s->cluster_bits;
919 s->cluster_sectors = 1 << (s->cluster_bits - 9);
920 s->l2_bits = header.l2_bits;
921 s->l2_size = 1 << s->l2_bits;
922 s->cluster_alloc = s->l2_size;
923 bs->size = header.size / 512;
924 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
925 s->backing_file_offset = header.backing_file_offset;
926 s->backing_file_size = header.backing_file_size;
927
928 /* allocate and load l1 table */
929 if (tdqcow_load_l1_table(s, &header))
930 goto fail;
931
932 /* alloc L2 cache */
933 size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
934 ret = posix_memalign((void **)&s->l2_cache, 4096, size);
935 if(ret != 0) goto fail;
936
937 size = s->cluster_size;
938 ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
939 if(ret != 0) goto fail;
940
941 ret = posix_memalign((void **)&s->cluster_data, 4096, size);
942 if(ret != 0) goto fail;
943 s->cluster_cache_offset = -1;
944
945 if (s->backing_file_offset != 0)
946 s->cluster_alloc = 1; /*Cannot use pre-alloc*/
947
948 bs->sector_size = 512;
949 bs->info = 0;
950
951 for(i = 0; i < s->l1_size; i++)
952 if (s->l1_table[i] > final_cluster)
953 final_cluster = s->l1_table[i];
954
955 if (init_aio_state(driver)!=0) {
956 DPRINTF("Unable to initialise AIO state\n");
957 free_aio_state(s);
958 goto fail;
959 }
960
961 if (!final_cluster)
962 s->fd_end = s->l1_table_offset +
963 ((s->l1_size * sizeof(uint64_t) + 4095) & ~4095);
964 else {
965 s->fd_end = lseek(fd, 0, SEEK_END);
966 if (s->fd_end == (off_t)-1)
967 goto fail;
968 }
969
970 return 0;
971
972 fail:
973 DPRINTF("QCOW Open failed\n");
974
975 free_aio_state(s);
976 free(s->l1_table);
977 free(s->l2_cache);
978 free(s->cluster_cache);
979 free(s->cluster_data);
980 close(fd);
981 return -1;
982 }
983
tdqcow_queue_read(td_driver_t * driver,td_request_t treq)984 void tdqcow_queue_read(td_driver_t *driver, td_request_t treq)
985 {
986 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
987 int ret = 0, index_in_cluster, n, i;
988 uint64_t cluster_offset, sector, nb_sectors;
989 struct qcow_prv* prv;
990 td_request_t clone = treq;
991 char* buf = treq.buf;
992
993 sector = treq.sec;
994 nb_sectors = treq.secs;
995
996 /*We store a local record of the request*/
997 while (nb_sectors > 0) {
998 cluster_offset =
999 get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
1000 index_in_cluster = sector & (s->cluster_sectors - 1);
1001 n = s->cluster_sectors - index_in_cluster;
1002 if (n > nb_sectors)
1003 n = nb_sectors;
1004
1005 if (s->aio_free_count == 0) {
1006 td_complete_request(treq, -EBUSY);
1007 return;
1008 }
1009
1010 if(!cluster_offset) {
1011 int i;
1012 /* Forward entire request if possible. */
1013 for(i=0; i<nb_sectors; i++)
1014 if(get_cluster_offset(s, (sector+i) << 9, 0, 0, 0, 0))
1015 goto coalesce_failed;
1016 treq.buf = buf;
1017 treq.sec = sector;
1018 treq.secs = nb_sectors;
1019 td_forward_request(treq);
1020 return;
1021 coalesce_failed:
1022 treq.buf = buf;
1023 treq.sec = sector;
1024 treq.secs = n;
1025 td_forward_request(treq);
1026
1027 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
1028 if (decompress_cluster(s, cluster_offset) < 0) {
1029 td_complete_request(treq, -EIO);
1030 goto done;
1031 }
1032 memcpy(buf, s->cluster_cache + index_in_cluster * 512,
1033 512 * n);
1034
1035 treq.buf = buf;
1036 treq.sec = sector;
1037 treq.secs = n;
1038 td_complete_request(treq, 0);
1039 } else {
1040 clone.buf = buf;
1041 clone.sec = (cluster_offset>>9)+index_in_cluster;
1042 clone.secs = n;
1043 async_read(driver, clone);
1044 }
1045 nb_sectors -= n;
1046 sector += n;
1047 buf += n * 512;
1048 }
1049 done:
1050 return;
1051 }
1052
tdqcow_queue_write(td_driver_t * driver,td_request_t treq)1053 void tdqcow_queue_write(td_driver_t *driver, td_request_t treq)
1054 {
1055 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1056 int ret = 0, index_in_cluster, n, i;
1057 uint64_t cluster_offset, sector, nb_sectors;
1058 td_callback_t cb;
1059 struct qcow_prv* prv;
1060 char* buf = treq.buf;
1061 td_request_t clone=treq;
1062
1063 sector = treq.sec;
1064 nb_sectors = treq.secs;
1065
1066 /*We store a local record of the request*/
1067 while (nb_sectors > 0) {
1068 index_in_cluster = sector & (s->cluster_sectors - 1);
1069 n = s->cluster_sectors - index_in_cluster;
1070 if (n > nb_sectors)
1071 n = nb_sectors;
1072
1073 if (s->aio_free_count == 0) {
1074 td_complete_request(treq, -EBUSY);
1075 return;
1076 }
1077
1078 cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
1079 index_in_cluster,
1080 index_in_cluster+n);
1081 if (!cluster_offset) {
1082 DPRINTF("Ooops, no write cluster offset!\n");
1083 td_complete_request(treq, -EIO);
1084 return;
1085 }
1086
1087 if (s->crypt_method) {
1088 encrypt_sectors(s, sector, s->cluster_data,
1089 (unsigned char *)buf, n, 1,
1090 &s->aes_encrypt_key);
1091
1092 clone.buf = buf;
1093 clone.sec = (cluster_offset>>9) + index_in_cluster;
1094 clone.secs = n;
1095 async_write(driver, clone);
1096 } else {
1097 clone.buf = buf;
1098 clone.sec = (cluster_offset>>9) + index_in_cluster;
1099 clone.secs = n;
1100
1101 async_write(driver, clone);
1102 }
1103
1104 nb_sectors -= n;
1105 sector += n;
1106 buf += n * 512;
1107 }
1108 s->cluster_cache_offset = -1; /* disable compressed cache */
1109
1110 return;
1111 }
1112
1113 static int
tdqcow_update_checksum(struct tdqcow_state * s)1114 tdqcow_update_checksum(struct tdqcow_state *s)
1115 {
1116 int i, fd, err;
1117 uint32_t offset, cksum, out;
1118
1119 if (!s->extended)
1120 return 0;
1121
1122 fd = open(s->name, O_WRONLY | O_LARGEFILE); /* open without O_DIRECT */
1123 if (fd == -1) {
1124 err = errno;
1125 goto out;
1126 }
1127
1128 offset = sizeof(QCowHeader) + offsetof(QCowHeader_ext, cksum);
1129 if (lseek(fd, offset, SEEK_SET) == (off_t)-1) {
1130 err = errno;
1131 goto out;
1132 }
1133
1134 /* convert to big endian for checksum */
1135 for (i = 0; i < s->l1_size; i++)
1136 cpu_to_be64s(&s->l1_table[i]);
1137
1138 cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
1139
1140 /* and back again... */
1141 for (i = 0; i < s->l1_size; i++)
1142 be64_to_cpus(&s->l1_table[i]);
1143
1144 DPRINTF("Writing cksum: %d", cksum);
1145
1146 out = cpu_to_be32(cksum);
1147 if (write(fd, &out, sizeof(out)) != sizeof(out)) {
1148 err = errno;
1149 goto out;
1150 }
1151
1152 err = 0;
1153
1154 out:
1155 if (err)
1156 DPRINTF("failed to update checksum: %d\n", err);
1157 if (fd != -1)
1158 close(fd);
1159 return err;
1160 }
1161
tdqcow_close(td_driver_t * driver)1162 int tdqcow_close(td_driver_t *driver)
1163 {
1164 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1165
1166 /*Update the hdr cksum*/
1167 tdqcow_update_checksum(s);
1168
1169 free_aio_state(s);
1170 free(s->name);
1171 free(s->l1_table);
1172 free(s->l2_cache);
1173 free(s->cluster_cache);
1174 free(s->cluster_data);
1175 close(s->fd);
1176 return 0;
1177 }
1178
qcow_create(const char * filename,uint64_t total_size,const char * backing_file,int sparse)1179 int qcow_create(const char *filename, uint64_t total_size,
1180 const char *backing_file, int sparse)
1181 {
1182 int fd, header_size, backing_filename_len, l1_size, i;
1183 int shift, length, adjust, flags = 0, ret = 0;
1184 QCowHeader header;
1185 QCowHeader_ext exthdr;
1186 char backing_filename[PATH_MAX], *ptr;
1187 uint64_t tmp, size, total_length;
1188 struct stat st;
1189
1190 DPRINTF("Qcow_create: size %"PRIu64"\n",total_size);
1191
1192 fd = open(filename,
1193 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1194 0644);
1195 if (fd < 0)
1196 return -1;
1197
1198 memset(&header, 0, sizeof(header));
1199 header.magic = cpu_to_be32(QCOW_MAGIC);
1200 header.version = cpu_to_be32(QCOW_VERSION);
1201
1202 /*Create extended header fields*/
1203 exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
1204
1205 header_size = sizeof(header) + sizeof(QCowHeader_ext);
1206 backing_filename_len = 0;
1207 size = (total_size >> SECTOR_SHIFT);
1208 if (backing_file) {
1209 if (strcmp(backing_file, "fat:")) {
1210 const char *p;
1211 /* XXX: this is a hack: we do not attempt to
1212 *check for URL like syntax */
1213 p = strchr(backing_file, ':');
1214 if (p && (p - backing_file) >= 2) {
1215 /* URL like but exclude "c:" like filenames */
1216 strncpy(backing_filename, backing_file,
1217 sizeof(backing_filename));
1218 } else {
1219 if (realpath(backing_file, backing_filename) == NULL ||
1220 stat(backing_filename, &st) != 0) {
1221 return -1;
1222 }
1223 }
1224 header.backing_file_offset = cpu_to_be64(header_size);
1225 backing_filename_len = strlen(backing_filename);
1226 header.backing_file_size = cpu_to_be32(
1227 backing_filename_len);
1228 header_size += backing_filename_len;
1229
1230 /*Set to the backing file size*/
1231 if(get_filesize(backing_filename, &size, &st)) {
1232 return -1;
1233 }
1234 DPRINTF("Backing file size detected: %"PRId64" sectors"
1235 "(total %"PRId64" [%"PRId64" MB])\n",
1236 size,
1237 (uint64_t)(size << SECTOR_SHIFT),
1238 (uint64_t)(size >> 11));
1239 } else {
1240 backing_file = NULL;
1241 DPRINTF("Setting file size: %"PRId64" (total %"PRId64")\n",
1242 total_size,
1243 (uint64_t) (total_size << SECTOR_SHIFT));
1244 }
1245 header.mtime = cpu_to_be32(st.st_mtime);
1246 header.cluster_bits = 9; /* 512 byte cluster to avoid copying
1247 unmodifyed sectors */
1248 header.l2_bits = 12; /* 32 KB L2 tables */
1249 exthdr.min_cluster_alloc = cpu_to_be32(1);
1250 } else {
1251 DPRINTF("Setting file size: %"PRId64" sectors"
1252 "(total %"PRId64" [%"PRId64" MB])\n",
1253 size,
1254 (uint64_t) (size << SECTOR_SHIFT),
1255 (uint64_t) (size >> 11));
1256 header.cluster_bits = 12; /* 4 KB clusters */
1257 header.l2_bits = 9; /* 4 KB L2 tables */
1258 exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
1259 }
1260 /*Set the header size value*/
1261 header.size = cpu_to_be64(size * 512);
1262
1263 header_size = (header_size + 7) & ~7;
1264 if (header_size % 4096 > 0) {
1265 header_size = ((header_size >> 12) + 1) << 12;
1266 }
1267
1268 shift = header.cluster_bits + header.l2_bits;
1269 l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
1270
1271 header.l1_table_offset = cpu_to_be64(header_size);
1272 DPRINTF("L1 Table offset: %d, size %d\n",
1273 header_size,
1274 (int)(l1_size * sizeof(uint64_t)));
1275 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1276
1277 ptr = calloc(1, l1_size * sizeof(uint64_t));
1278 exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
1279 printf("Created cksum: %d\n",exthdr.cksum);
1280 free(ptr);
1281
1282 /*adjust file length to system page size boundary*/
1283 length = ROUNDUP(header_size + (l1_size * sizeof(uint64_t)),
1284 getpagesize());
1285 if (qtruncate(fd, length, 0)!=0) {
1286 DPRINTF("ERROR truncating file\n");
1287 return -1;
1288 }
1289
1290 if (sparse == 0) {
1291 /*Filesize is length+l1_size*(1 << s->l2_bits)+(size*512)*/
1292 total_length = length + (l1_size * (1 << 9)) + (size * 512);
1293 if (qtruncate(fd, total_length, 0)!=0) {
1294 DPRINTF("ERROR truncating file\n");
1295 return -1;
1296 }
1297 printf("File truncated to length %"PRIu64"\n",total_length);
1298 } else
1299 flags = SPARSE_FILE;
1300
1301 flags |= EXTHDR_L1_BIG_ENDIAN;
1302 exthdr.flags = cpu_to_be32(flags);
1303
1304 /* write all the data */
1305 lseek(fd, 0, SEEK_SET);
1306 ret += write(fd, &header, sizeof(header));
1307 ret += write(fd, &exthdr, sizeof(exthdr));
1308 if (backing_file)
1309 ret += write(fd, backing_filename, backing_filename_len);
1310
1311 lseek(fd, header_size, SEEK_SET);
1312 tmp = 0;
1313 for (i = 0;i < l1_size; i++) {
1314 ret += write(fd, &tmp, sizeof(tmp));
1315 }
1316
1317 close(fd);
1318
1319 return 0;
1320 }
1321
qcow_make_empty(struct tdqcow_state * s)1322 static int qcow_make_empty(struct tdqcow_state *s)
1323 {
1324 uint32_t l1_length = s->l1_size * sizeof(uint64_t);
1325
1326 memset(s->l1_table, 0, l1_length);
1327 lseek(s->fd, s->l1_table_offset, SEEK_SET);
1328 if (write(s->fd, s->l1_table, l1_length) < 0)
1329 return -1;
1330 if (qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse)!=0) {
1331 DPRINTF("ERROR truncating file\n");
1332 return -1;
1333 }
1334
1335 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
1336 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
1337 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
1338
1339 return 0;
1340 }
1341
qcow_get_cluster_size(struct tdqcow_state * s)1342 static int qcow_get_cluster_size(struct tdqcow_state *s)
1343 {
1344 return s->cluster_size;
1345 }
1346
1347 /* XXX: put compressed sectors first, then all the cluster aligned
1348 tables to avoid losing bytes in alignment */
qcow_compress_cluster(struct tdqcow_state * s,int64_t sector_num,const uint8_t * buf)1349 static int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num,
1350 const uint8_t *buf)
1351 {
1352 z_stream strm;
1353 int ret, out_len;
1354 uint8_t *out_buf;
1355 uint64_t cluster_offset;
1356
1357 out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
1358 if (!out_buf)
1359 return -1;
1360
1361 /* best compression, small window, no zlib header */
1362 memset(&strm, 0, sizeof(strm));
1363 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
1364 Z_DEFLATED, -12,
1365 9, Z_DEFAULT_STRATEGY);
1366 if (ret != 0) {
1367 free(out_buf);
1368 return -1;
1369 }
1370
1371 strm.avail_in = s->cluster_size;
1372 strm.next_in = (uint8_t *)buf;
1373 strm.avail_out = s->cluster_size;
1374 strm.next_out = out_buf;
1375
1376 ret = deflate(&strm, Z_FINISH);
1377 if (ret != Z_STREAM_END && ret != Z_OK) {
1378 free(out_buf);
1379 deflateEnd(&strm);
1380 return -1;
1381 }
1382 out_len = strm.next_out - out_buf;
1383
1384 deflateEnd(&strm);
1385
1386 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
1387 /* could not compress: write normal cluster */
1388 //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
1389 } else {
1390 cluster_offset = get_cluster_offset(s, sector_num << 9, 2,
1391 out_len, 0, 0);
1392 cluster_offset &= s->cluster_offset_mask;
1393 lseek(s->fd, cluster_offset, SEEK_SET);
1394 if (write(s->fd, out_buf, out_len) != out_len) {
1395 free(out_buf);
1396 return -1;
1397 }
1398 }
1399
1400 free(out_buf);
1401 return 0;
1402 }
1403
1404 static int
tdqcow_get_image_type(const char * file,int * type)1405 tdqcow_get_image_type(const char *file, int *type)
1406 {
1407 int fd;
1408 size_t size;
1409 QCowHeader header;
1410
1411 fd = open(file, O_RDONLY);
1412 if (fd == -1)
1413 return -errno;
1414
1415 size = read(fd, &header, sizeof(header));
1416 close(fd);
1417 if (size != sizeof(header))
1418 return (errno ? -errno : -EIO);
1419
1420 be32_to_cpus(&header.magic);
1421 if (header.magic == QCOW_MAGIC)
1422 *type = DISK_TYPE_QCOW;
1423 else
1424 *type = DISK_TYPE_AIO;
1425
1426 return 0;
1427 }
1428
tdqcow_get_parent_id(td_driver_t * driver,td_disk_id_t * id)1429 int tdqcow_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
1430 {
1431 off_t off;
1432 char *buf, *filename;
1433 int len, secs, type = 0, err = -EINVAL;
1434 struct tdqcow_state *child = (struct tdqcow_state *)driver->data;
1435
1436 if (!child->backing_file_offset)
1437 return TD_NO_PARENT;
1438
1439 /* read the backing file name */
1440 len = child->backing_file_size;
1441 off = child->backing_file_offset - (child->backing_file_offset % 512);
1442 secs = (len + (child->backing_file_offset - off) + 511) >> 9;
1443
1444 if (posix_memalign((void **)&buf, 512, secs << 9))
1445 return -1;
1446
1447 if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
1448 goto out;
1449
1450 if (read(child->fd, buf, secs << 9) != secs << 9)
1451 goto out;
1452 filename = buf + (child->backing_file_offset - off);
1453 filename[len] = '\0';
1454
1455 if (tdqcow_get_image_type(filename, &type))
1456 goto out;
1457
1458 id->name = strdup(filename);
1459 id->drivertype = type;
1460 err = 0;
1461 out:
1462 free(buf);
1463 return err;
1464 }
1465
tdqcow_validate_parent(td_driver_t * driver,td_driver_t * pdriver,td_flag_t flags)1466 int tdqcow_validate_parent(td_driver_t *driver,
1467 td_driver_t *pdriver, td_flag_t flags)
1468 {
1469 struct stat stats;
1470 uint64_t psize, csize;
1471 struct tdqcow_state *c = (struct tdqcow_state *)driver->data;
1472 struct tdqcow_state *p = (struct tdqcow_state *)pdriver->data;
1473
1474 if (stat(p->name, &stats))
1475 return -EINVAL;
1476 if (get_filesize(p->name, &psize, &stats))
1477 return -EINVAL;
1478
1479 if (stat(c->name, &stats))
1480 return -EINVAL;
1481 if (get_filesize(c->name, &csize, &stats))
1482 return -EINVAL;
1483
1484 if (csize != psize)
1485 return -EINVAL;
1486
1487 return 0;
1488 }
1489
1490 struct tap_disk tapdisk_qcow = {
1491 .disk_type = "tapdisk_qcow",
1492 .flags = 0,
1493 .private_data_size = sizeof(struct tdqcow_state),
1494 .td_open = tdqcow_open,
1495 .td_close = tdqcow_close,
1496 .td_queue_read = tdqcow_queue_read,
1497 .td_queue_write = tdqcow_queue_write,
1498 .td_get_parent_id = tdqcow_get_parent_id,
1499 .td_validate_parent = tdqcow_validate_parent,
1500 .td_debug = NULL,
1501 };
1502