1 /******************************************************************************
2 * xc_compression.c
3 *
4 * Checkpoint Compression using Page Delta Algorithm.
5 * - A LRU cache of recently dirtied guest pages is maintained.
6 * - For each dirty guest page in the checkpoint, if a previous version of the
7 * page exists in the cache, XOR both pages and send the non-zero sections
8 * to the receiver. The cache is then updated with the newer copy of guest page.
9 * - The receiver will XOR the non-zero sections against its copy of the guest
10 * page, thereby bringing the guest page up-to-date with the sender side.
11 *
12 * Copyright (c) 2011 Shriram Rajagopalan (rshriram@cs.ubc.ca).
13 *
14 * This library is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU Lesser General Public
16 * License as published by the Free Software Foundation;
17 * version 2.1 of the License.
18 *
19 * This library is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * Lesser General Public License for more details.
23 *
24 * You should have received a copy of the GNU Lesser General Public
25 * License along with this library; If not, see <http://www.gnu.org/licenses/>.
26 *
27 */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <sys/types.h>
33 #include <inttypes.h>
34 #include "xc_private.h"
35 #include "xenctrl.h"
36 #include "xg_save_restore.h"
37 #include "xg_private.h"
38 #include "xc_dom.h"
39
40 /* Page Cache for Delta Compression*/
41 #define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192)
42
43 /* Internal page buffer to hold dirty pages of a checkpoint,
44 * to be compressed after the domain is resumed for execution.
45 */
46 #define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192)
47
48 struct cache_page
49 {
50 char *page;
51 xen_pfn_t pfn;
52 struct cache_page *next;
53 struct cache_page *prev;
54 };
55
56 struct compression_ctx
57 {
58 /* compression buffer - holds compressed data */
59 char *compbuf;
60 unsigned long compbuf_size;
61 unsigned long compbuf_pos;
62
63 /* Page buffer to hold pages to be compressed */
64 char *inputbuf;
65 /* pfns of pages to be compressed */
66 xen_pfn_t *sendbuf_pfns;
67 unsigned int pfns_len;
68 unsigned int pfns_index;
69
70 /* Compression Cache (LRU) */
71 char *cache_base;
72 struct cache_page **pfn2cache;
73 struct cache_page *cache;
74 struct cache_page *page_list_head;
75 struct cache_page *page_list_tail;
76 unsigned long dom_pfnlist_size;
77 };
78
79 #define RUNFLAG 0
80 #define SKIPFLAG ((char)128)
81 #define FLAGMASK SKIPFLAG
82 #define LENMASK ((char)127)
83
84 /*
85 * see xg_save_restore.h for details on the compressed stream format.
86 * delta size = 4 bytes.
87 * run header = 1 byte (1 bit for runtype, 7bits for run length).
88 * i.e maximum size of a run = 127 * 4 = 508 bytes.
89 * Worst case compression: Entire page has changed.
90 * In the worst case, the size of the compressed page is
91 * 8 runs of 508 bytes + 1 run of 32 bytes + 9 run headers
92 * = 4105 bytes.
93 * We could detect this worst case and send the entire page with a
94 * FULL_PAGE marker, reducing the total size to 4097 bytes. The cost
95 * of this size reduction is an additional memcpy, on top of two previous
96 * memcpy (to the compressed stream and the cache page in the for loop).
97 *
98 * We might as well sacrifice an extra 8 bytes instead of a memcpy.
99 */
100 #define WORST_COMP_PAGE_SIZE (XC_PAGE_SIZE + 9)
101
102 /*
103 * A zero length skip indicates full page.
104 */
105 #define EMPTY_PAGE 0
106 #define FULL_PAGE SKIPFLAG
107 #define FULL_PAGE_SIZE (XC_PAGE_SIZE + 1)
108 #define MAX_DELTAS (XC_PAGE_SIZE/sizeof(uint32_t))
109
110 /*
111 * Add a pagetable page or a new page (uncached)
112 * if srcpage is a pagetable page, cache_page is null.
113 * if srcpage is a page that was not previously in the cache,
114 * cache_page points to a free page slot in the cache where
115 * this new page can be copied to.
116 */
add_full_page(comp_ctx * ctx,char * srcpage,char * cache_page)117 static int add_full_page(comp_ctx *ctx, char *srcpage, char *cache_page)
118 {
119 char *dest = (ctx->compbuf + ctx->compbuf_pos);
120
121 if ( (ctx->compbuf_pos + FULL_PAGE_SIZE) > ctx->compbuf_size)
122 return -1;
123
124 if (cache_page)
125 memcpy(cache_page, srcpage, XC_PAGE_SIZE);
126 dest[0] = FULL_PAGE;
127 memcpy(&dest[1], srcpage, XC_PAGE_SIZE);
128 ctx->compbuf_pos += FULL_PAGE_SIZE;
129
130 return FULL_PAGE_SIZE;
131 }
132
compress_page(comp_ctx * ctx,char * srcpage,char * cache_page)133 static int compress_page(comp_ctx *ctx, char *srcpage, char *cache_page)
134 {
135 char *dest = (ctx->compbuf + ctx->compbuf_pos);
136 uint32_t *new, *old;
137
138 int off, runptr = 0;
139 int wascopying = 0, copying = 0, bytes_skipped = 0;
140 int complen = 0, pageoff = 0, runbytes = 0;
141
142 char runlen = 0;
143
144 if ( (ctx->compbuf_pos + WORST_COMP_PAGE_SIZE) > ctx->compbuf_size)
145 return -1;
146
147 /*
148 * There are no alignment issues here since srcpage is
149 * domU's page passed from xc_domain_save and cache_page is
150 * a ptr to cache page (cache is page aligned).
151 */
152 new = (uint32_t*)srcpage;
153 old = (uint32_t*)cache_page;
154
155 for (off = 0; off <= MAX_DELTAS; off++)
156 {
157 /*
158 * At (off == MAX_DELTAS), we are processing the last run
159 * in the page. Since there is no XORing, make wascopying != copying
160 * to satisfy the if-block below.
161 */
162 copying = ((off < MAX_DELTAS) ? (old[off] != new[off]) : !wascopying);
163
164 if (runlen)
165 {
166 /* switching between run types or current run is full */
167 if ( (wascopying != copying) || (runlen == LENMASK) )
168 {
169 runbytes = runlen * sizeof(uint32_t);
170 runlen |= (wascopying ? RUNFLAG : SKIPFLAG);
171 dest[complen++] = runlen;
172
173 if (wascopying) /* RUNFLAG */
174 {
175 pageoff = runptr * sizeof(uint32_t);
176 memcpy(dest + complen, srcpage + pageoff, runbytes);
177 memcpy(cache_page + pageoff, srcpage + pageoff, runbytes);
178 complen += runbytes;
179 }
180 else /* SKIPFLAG */
181 {
182 bytes_skipped += runbytes;
183 }
184
185 runlen = 0;
186 runptr = off;
187 }
188 }
189 runlen++;
190 wascopying = copying;
191 }
192
193 /*
194 * Check for empty page.
195 */
196 if (bytes_skipped == XC_PAGE_SIZE)
197 {
198 complen = 1;
199 dest[0] = EMPTY_PAGE;
200 }
201 ctx->compbuf_pos += complen;
202
203 return complen;
204 }
205
206 static
get_cache_page(comp_ctx * ctx,xen_pfn_t pfn,int * israw)207 char *get_cache_page(comp_ctx *ctx, xen_pfn_t pfn,
208 int *israw)
209 {
210 struct cache_page *item = NULL;
211
212 item = ctx->pfn2cache[pfn];
213
214 if (!item)
215 {
216 *israw = 1;
217
218 /* If the list is full, evict a page from the tail end. */
219 item = ctx->page_list_tail;
220 if (item->pfn != INVALID_PFN)
221 ctx->pfn2cache[item->pfn] = NULL;
222
223 item->pfn = pfn;
224 ctx->pfn2cache[pfn] = item;
225 }
226
227 /* if requested item is in cache move to head of list */
228 if (item != ctx->page_list_head)
229 {
230 if (item == ctx->page_list_tail)
231 {
232 /* item at tail of list. */
233 ctx->page_list_tail = item->prev;
234 (ctx->page_list_tail)->next = NULL;
235 }
236 else
237 {
238 /* item in middle of list */
239 item->prev->next = item->next;
240 item->next->prev = item->prev;
241 }
242
243 item->prev = NULL;
244 item->next = ctx->page_list_head;
245 (ctx->page_list_head)->prev = item;
246 ctx->page_list_head = item;
247 }
248
249 return (ctx->page_list_head)->page;
250 }
251
252 /* Remove pagetable pages from cache and move to tail, as free pages */
253 static
invalidate_cache_page(comp_ctx * ctx,xen_pfn_t pfn)254 void invalidate_cache_page(comp_ctx *ctx, xen_pfn_t pfn)
255 {
256 struct cache_page *item = NULL;
257
258 item = ctx->pfn2cache[pfn];
259 if (item)
260 {
261 if (item != ctx->page_list_tail)
262 {
263 /* item at head of list */
264 if (item == ctx->page_list_head)
265 {
266 ctx->page_list_head = (ctx->page_list_head)->next;
267 (ctx->page_list_head)->prev = NULL;
268 }
269 else /* item in middle of list */
270 {
271 item->prev->next = item->next;
272 item->next->prev = item->prev;
273 }
274
275 item->next = NULL;
276 item->prev = ctx->page_list_tail;
277 (ctx->page_list_tail)->next = item;
278 ctx->page_list_tail = item;
279 }
280 ctx->pfn2cache[pfn] = NULL;
281 (ctx->page_list_tail)->pfn = INVALID_PFN;
282 }
283 }
284
xc_compression_add_page(xc_interface * xch,comp_ctx * ctx,char * page,xen_pfn_t pfn,int israw)285 int xc_compression_add_page(xc_interface *xch, comp_ctx *ctx,
286 char *page, xen_pfn_t pfn, int israw)
287 {
288 if (pfn > ctx->dom_pfnlist_size)
289 {
290 ERROR("Invalid pfn passed into "
291 "xc_compression_add_page %" PRIpfn "\n", pfn);
292 return -2;
293 }
294
295 /* pagetable page */
296 if (israw)
297 invalidate_cache_page(ctx, pfn);
298 ctx->sendbuf_pfns[ctx->pfns_len] = israw ? INVALID_PFN : pfn;
299 memcpy(ctx->inputbuf + ctx->pfns_len * XC_PAGE_SIZE, page, XC_PAGE_SIZE);
300 ctx->pfns_len++;
301
302 /* check if we have run out of space. If so,
303 * we need to synchronously compress the pages and flush them out
304 */
305 if (ctx->pfns_len == NRPAGES(PAGE_BUFFER_SIZE))
306 return -1;
307 return 0;
308 }
309
xc_compression_compress_pages(xc_interface * xch,comp_ctx * ctx,char * compbuf,unsigned long compbuf_size,unsigned long * compbuf_len)310 int xc_compression_compress_pages(xc_interface *xch, comp_ctx *ctx,
311 char *compbuf, unsigned long compbuf_size,
312 unsigned long *compbuf_len)
313 {
314 char *cache_copy = NULL, *current_page = NULL;
315 int israw, rc = 1;
316
317 if (!ctx->pfns_len || (ctx->pfns_index == ctx->pfns_len)) {
318 ctx->pfns_len = ctx->pfns_index = 0;
319 return 0;
320 }
321
322 ctx->compbuf_pos = 0;
323 ctx->compbuf = compbuf;
324 ctx->compbuf_size = compbuf_size;
325
326 for (; ctx->pfns_index < ctx->pfns_len; ctx->pfns_index++)
327 {
328 israw = 0;
329 cache_copy = NULL;
330 current_page = ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE;
331
332 if (ctx->sendbuf_pfns[ctx->pfns_index] == INVALID_PFN)
333 israw = 1;
334 else
335 cache_copy = get_cache_page(ctx,
336 ctx->sendbuf_pfns[ctx->pfns_index],
337 &israw);
338
339 if (israw)
340 rc = (add_full_page(ctx, current_page, cache_copy) >= 0);
341 else
342 rc = (compress_page(ctx, current_page, cache_copy) >= 0);
343
344 if ( !rc )
345 {
346 /* Out of space in outbuf! flush and come back */
347 rc = -1;
348 break;
349 }
350 }
351 if (compbuf_len)
352 *compbuf_len = ctx->compbuf_pos;
353
354 return rc;
355 }
356
357 inline
xc_compression_reset_pagebuf(xc_interface * xch,comp_ctx * ctx)358 void xc_compression_reset_pagebuf(xc_interface *xch, comp_ctx *ctx)
359 {
360 ctx->pfns_index = ctx->pfns_len = 0;
361 }
362
xc_compression_uncompress_page(xc_interface * xch,char * compbuf,unsigned long compbuf_size,unsigned long * compbuf_pos,char * destpage)363 int xc_compression_uncompress_page(xc_interface *xch, char *compbuf,
364 unsigned long compbuf_size,
365 unsigned long *compbuf_pos, char *destpage)
366 {
367 unsigned long pos;
368 unsigned int len = 0, pagepos = 0;
369 char flag;
370
371 pos = *compbuf_pos;
372 if (pos >= compbuf_size)
373 {
374 ERROR("Out of bounds exception in compression buffer (a):"
375 "read ptr:%lu, bufsize = %lu\n",
376 *compbuf_pos, compbuf_size);
377 return -1;
378 }
379
380 switch (compbuf[pos])
381 {
382 case EMPTY_PAGE:
383 pos++;
384 break;
385
386 case FULL_PAGE:
387 {
388 /* Check if the input buffer has 4KB of data */
389 if ((pos + FULL_PAGE_SIZE) > compbuf_size)
390 {
391 ERROR("Out of bounds exception in compression buffer (b):"
392 "read ptr = %lu, bufsize = %lu\n",
393 *compbuf_pos, compbuf_size);
394 return -1;
395 }
396 memcpy(destpage, &compbuf[pos + 1], XC_PAGE_SIZE);
397 pos += FULL_PAGE_SIZE;
398 }
399 break;
400
401 default: /* Normal page with one or more runs */
402 {
403 do
404 {
405 flag = compbuf[pos] & FLAGMASK;
406 len = (compbuf[pos] & LENMASK) * sizeof(uint32_t);
407 /* Sanity Check: Zero-length runs are allowed only for
408 * FULL_PAGE and EMPTY_PAGE.
409 */
410 if (!len)
411 {
412 ERROR("Zero length run encountered for normal page: "
413 "buffer (d):read ptr = %lu, flag = %u, "
414 "bufsize = %lu, pagepos = %u\n",
415 pos, (unsigned int)flag, compbuf_size, pagepos);
416 return -1;
417 }
418
419 pos++;
420 if (flag == RUNFLAG)
421 {
422 /* Check if the input buffer has len bytes of data
423 * and whether it would fit in the destination page.
424 */
425 if (((pos + len) > compbuf_size)
426 || ((pagepos + len) > XC_PAGE_SIZE))
427 {
428 ERROR("Out of bounds exception in compression "
429 "buffer (c):read ptr = %lu, runlen = %u, "
430 "bufsize = %lu, pagepos = %u\n",
431 pos, len, compbuf_size, pagepos);
432 return -1;
433 }
434 memcpy(&destpage[pagepos], &compbuf[pos], len);
435 pos += len;
436 }
437 pagepos += len;
438 } while ((pagepos < XC_PAGE_SIZE) && (pos < compbuf_size));
439
440 /* Make sure we have copied/skipped 4KB worth of data */
441 if (pagepos != XC_PAGE_SIZE)
442 {
443 ERROR("Invalid data in compression buffer:"
444 "read ptr = %lu, bufsize = %lu, pagepos = %u\n",
445 pos, compbuf_size, pagepos);
446 return -1;
447 }
448 }
449 }
450 *compbuf_pos = pos;
451 return 0;
452 }
453
xc_compression_free_context(xc_interface * xch,comp_ctx * ctx)454 void xc_compression_free_context(xc_interface *xch, comp_ctx *ctx)
455 {
456 if (!ctx) return;
457
458 free(ctx->inputbuf);
459 free(ctx->sendbuf_pfns);
460 free(ctx->cache_base);
461 free(ctx->pfn2cache);
462 free(ctx->cache);
463 free(ctx);
464 }
465
xc_compression_create_context(xc_interface * xch,unsigned long p2m_size)466 comp_ctx *xc_compression_create_context(xc_interface *xch,
467 unsigned long p2m_size)
468 {
469 unsigned long i;
470 comp_ctx *ctx = NULL;
471 unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE;
472
473 ctx = (comp_ctx *)malloc(sizeof(comp_ctx));
474 if (!ctx)
475 {
476 ERROR("Failed to allocate compression_ctx\n");
477 goto error;
478 }
479 memset(ctx, 0, sizeof(comp_ctx));
480
481 ctx->inputbuf = xc_memalign(xch, XC_PAGE_SIZE, PAGE_BUFFER_SIZE);
482 if (!ctx->inputbuf)
483 {
484 ERROR("Failed to allocate page buffer\n");
485 goto error;
486 }
487
488 ctx->cache_base = xc_memalign(xch, XC_PAGE_SIZE, DELTA_CACHE_SIZE);
489 if (!ctx->cache_base)
490 {
491 ERROR("Failed to allocate delta cache\n");
492 goto error;
493 }
494
495 ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) *
496 sizeof(xen_pfn_t));
497 if (!ctx->sendbuf_pfns)
498 {
499 ERROR("Could not alloc sendbuf_pfns\n");
500 goto error;
501 }
502 memset(ctx->sendbuf_pfns, -1,
503 NRPAGES(PAGE_BUFFER_SIZE) * sizeof(xen_pfn_t));
504
505 ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *));
506 if (!ctx->pfn2cache)
507 {
508 ERROR("Could not alloc pfn2cache map\n");
509 goto error;
510 }
511
512 ctx->cache = malloc(num_cache_pages * sizeof(struct cache_page));
513 if (!ctx->cache)
514 {
515 ERROR("Could not alloc compression cache\n");
516 goto error;
517 }
518
519 for (i = 0; i < num_cache_pages; i++)
520 {
521 ctx->cache[i].pfn = INVALID_PFN;
522 ctx->cache[i].page = ctx->cache_base + i * XC_PAGE_SIZE;
523 ctx->cache[i].prev = (i == 0) ? NULL : &(ctx->cache[i - 1]);
524 ctx->cache[i].next = ((i+1) == num_cache_pages)? NULL :
525 &(ctx->cache[i + 1]);
526 }
527 ctx->page_list_head = &(ctx->cache[0]);
528 ctx->page_list_tail = &(ctx->cache[num_cache_pages -1]);
529 ctx->dom_pfnlist_size = p2m_size;
530
531 return ctx;
532 error:
533 xc_compression_free_context(xch, ctx);
534 return NULL;
535 }
536
537 /*
538 * Local variables:
539 * mode: C
540 * c-file-style: "BSD"
541 * c-basic-offset: 4
542 * tab-width: 4
543 * indent-tabs-mode: nil
544 * End:
545 */
546