1 /******************************************************************************
2  * xc_compression.c
3  *
4  * Checkpoint Compression using Page Delta Algorithm.
5  * - A LRU cache of recently dirtied guest pages is maintained.
6  * - For each dirty guest page in the checkpoint, if a previous version of the
7  * page exists in the cache, XOR both pages and send the non-zero sections
8  * to the receiver. The cache is then updated with the newer copy of guest page.
9  * - The receiver will XOR the non-zero sections against its copy of the guest
10  * page, thereby bringing the guest page up-to-date with the sender side.
11  *
12  * Copyright (c) 2011 Shriram Rajagopalan (rshriram@cs.ubc.ca).
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation;
17  * version 2.1 of the License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <sys/types.h>
33 #include <inttypes.h>
34 #include "xc_private.h"
35 #include "xenctrl.h"
36 #include "xg_save_restore.h"
37 #include "xg_private.h"
38 #include "xc_dom.h"
39 
40 /* Page Cache for Delta Compression*/
41 #define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192)
42 
43 /* Internal page buffer to hold dirty pages of a checkpoint,
44  * to be compressed after the domain is resumed for execution.
45  */
46 #define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192)
47 
48 struct cache_page
49 {
50     char *page;
51     xen_pfn_t pfn;
52     struct cache_page *next;
53     struct cache_page *prev;
54 };
55 
56 struct compression_ctx
57 {
58     /* compression buffer - holds compressed data */
59     char *compbuf;
60     unsigned long compbuf_size;
61     unsigned long compbuf_pos;
62 
63     /* Page buffer to hold pages to be compressed */
64     char *inputbuf;
65     /* pfns of pages to be compressed */
66     xen_pfn_t *sendbuf_pfns;
67     unsigned int pfns_len;
68     unsigned int pfns_index;
69 
70     /* Compression Cache (LRU) */
71     char *cache_base;
72     struct cache_page **pfn2cache;
73     struct cache_page *cache;
74     struct cache_page *page_list_head;
75     struct cache_page *page_list_tail;
76     unsigned long dom_pfnlist_size;
77 };
78 
79 #define RUNFLAG 0
80 #define SKIPFLAG ((char)128)
81 #define FLAGMASK SKIPFLAG
82 #define LENMASK ((char)127)
83 
84 /*
85  * see xg_save_restore.h for details on the compressed stream format.
86  * delta size = 4 bytes.
87  * run header = 1 byte (1 bit for runtype, 7bits for run length).
88  *  i.e maximum size of a run = 127 * 4 = 508 bytes.
89  * Worst case compression: Entire page has changed.
90  * In the worst case, the size of the compressed page is
91  *  8 runs of 508 bytes + 1 run of 32 bytes + 9 run headers
92  *  = 4105 bytes.
93  * We could detect this worst case and send the entire page with a
94  * FULL_PAGE marker, reducing the total size to 4097 bytes. The cost
95  * of this size reduction is an additional memcpy, on top of two previous
96  * memcpy (to the compressed stream and the cache page in the for loop).
97  *
98  * We might as well sacrifice an extra 8 bytes instead of a memcpy.
99  */
100 #define WORST_COMP_PAGE_SIZE (XC_PAGE_SIZE + 9)
101 
102 /*
103  * A zero length skip indicates full page.
104  */
105 #define EMPTY_PAGE 0
106 #define FULL_PAGE SKIPFLAG
107 #define FULL_PAGE_SIZE (XC_PAGE_SIZE + 1)
108 #define MAX_DELTAS (XC_PAGE_SIZE/sizeof(uint32_t))
109 
110 /*
111  * Add a pagetable page or a new page (uncached)
112  * if srcpage is a pagetable page, cache_page is null.
113  * if srcpage is a page that was not previously in the cache,
114  *  cache_page points to a free page slot in the cache where
115  *  this new page can be copied to.
116  */
add_full_page(comp_ctx * ctx,char * srcpage,char * cache_page)117 static int add_full_page(comp_ctx *ctx, char *srcpage, char *cache_page)
118 {
119     char *dest = (ctx->compbuf + ctx->compbuf_pos);
120 
121     if ( (ctx->compbuf_pos + FULL_PAGE_SIZE) > ctx->compbuf_size)
122         return -1;
123 
124     if (cache_page)
125         memcpy(cache_page, srcpage, XC_PAGE_SIZE);
126     dest[0] = FULL_PAGE;
127     memcpy(&dest[1], srcpage, XC_PAGE_SIZE);
128     ctx->compbuf_pos += FULL_PAGE_SIZE;
129 
130     return FULL_PAGE_SIZE;
131 }
132 
compress_page(comp_ctx * ctx,char * srcpage,char * cache_page)133 static int compress_page(comp_ctx *ctx, char *srcpage, char *cache_page)
134 {
135     char *dest = (ctx->compbuf + ctx->compbuf_pos);
136     uint32_t *new, *old;
137 
138     int off, runptr = 0;
139     int wascopying = 0, copying = 0, bytes_skipped = 0;
140     int complen = 0, pageoff = 0, runbytes = 0;
141 
142     char runlen = 0;
143 
144     if ( (ctx->compbuf_pos + WORST_COMP_PAGE_SIZE) > ctx->compbuf_size)
145         return -1;
146 
147     /*
148      * There are no alignment issues here since srcpage is
149      * domU's page passed from xc_domain_save and cache_page is
150      * a ptr to cache page (cache is page aligned).
151      */
152     new = (uint32_t*)srcpage;
153     old = (uint32_t*)cache_page;
154 
155     for (off = 0; off <= MAX_DELTAS; off++)
156     {
157         /*
158          * At (off == MAX_DELTAS), we are processing the last run
159          * in the page. Since there is no XORing, make wascopying != copying
160          * to satisfy the if-block below.
161          */
162         copying = ((off < MAX_DELTAS) ? (old[off] != new[off]) : !wascopying);
163 
164         if (runlen)
165         {
166             /* switching between run types or current run is full */
167             if ( (wascopying != copying) || (runlen == LENMASK) )
168             {
169                 runbytes = runlen * sizeof(uint32_t);
170                 runlen |= (wascopying ? RUNFLAG : SKIPFLAG);
171                 dest[complen++] = runlen;
172 
173                 if (wascopying) /* RUNFLAG */
174                 {
175                     pageoff = runptr * sizeof(uint32_t);
176                     memcpy(dest + complen, srcpage + pageoff, runbytes);
177                     memcpy(cache_page + pageoff, srcpage + pageoff, runbytes);
178                     complen += runbytes;
179                 }
180                 else /* SKIPFLAG */
181                 {
182                     bytes_skipped += runbytes;
183                 }
184 
185                 runlen = 0;
186                 runptr = off;
187             }
188         }
189         runlen++;
190         wascopying = copying;
191     }
192 
193     /*
194      * Check for empty page.
195      */
196     if (bytes_skipped == XC_PAGE_SIZE)
197     {
198         complen = 1;
199         dest[0] = EMPTY_PAGE;
200     }
201     ctx->compbuf_pos += complen;
202 
203     return complen;
204 }
205 
206 static
get_cache_page(comp_ctx * ctx,xen_pfn_t pfn,int * israw)207 char *get_cache_page(comp_ctx *ctx, xen_pfn_t pfn,
208                      int *israw)
209 {
210     struct cache_page *item = NULL;
211 
212     item = ctx->pfn2cache[pfn];
213 
214     if (!item)
215     {
216         *israw = 1;
217 
218         /* If the list is full, evict a page from the tail end. */
219         item = ctx->page_list_tail;
220         if (item->pfn != INVALID_PFN)
221             ctx->pfn2cache[item->pfn] = NULL;
222 
223         item->pfn = pfn;
224         ctx->pfn2cache[pfn] = item;
225     }
226 
227     /* 	if requested item is in cache move to head of list */
228     if (item != ctx->page_list_head)
229     {
230         if (item == ctx->page_list_tail)
231         {
232             /* item at tail of list. */
233             ctx->page_list_tail = item->prev;
234             (ctx->page_list_tail)->next = NULL;
235         }
236         else
237         {
238             /* item in middle of list */
239             item->prev->next = item->next;
240             item->next->prev = item->prev;
241         }
242 
243         item->prev = NULL;
244         item->next = ctx->page_list_head;
245         (ctx->page_list_head)->prev = item;
246         ctx->page_list_head = item;
247     }
248 
249     return (ctx->page_list_head)->page;
250 }
251 
252 /* Remove pagetable pages from cache and move to tail, as free pages */
253 static
invalidate_cache_page(comp_ctx * ctx,xen_pfn_t pfn)254 void invalidate_cache_page(comp_ctx *ctx, xen_pfn_t pfn)
255 {
256     struct cache_page *item = NULL;
257 
258     item = ctx->pfn2cache[pfn];
259     if (item)
260     {
261         if (item != ctx->page_list_tail)
262         {
263             /* item at head of list */
264             if (item == ctx->page_list_head)
265             {
266                 ctx->page_list_head = (ctx->page_list_head)->next;
267                 (ctx->page_list_head)->prev = NULL;
268             }
269             else /* item in middle of list */
270             {
271                 item->prev->next = item->next;
272                 item->next->prev = item->prev;
273             }
274 
275             item->next = NULL;
276             item->prev = ctx->page_list_tail;
277             (ctx->page_list_tail)->next = item;
278             ctx->page_list_tail = item;
279         }
280         ctx->pfn2cache[pfn] = NULL;
281         (ctx->page_list_tail)->pfn = INVALID_PFN;
282     }
283 }
284 
xc_compression_add_page(xc_interface * xch,comp_ctx * ctx,char * page,xen_pfn_t pfn,int israw)285 int xc_compression_add_page(xc_interface *xch, comp_ctx *ctx,
286                             char *page, xen_pfn_t pfn, int israw)
287 {
288     if (pfn > ctx->dom_pfnlist_size)
289     {
290         ERROR("Invalid pfn passed into "
291               "xc_compression_add_page %" PRIpfn "\n", pfn);
292         return -2;
293     }
294 
295     /* pagetable page */
296     if (israw)
297         invalidate_cache_page(ctx, pfn);
298     ctx->sendbuf_pfns[ctx->pfns_len] = israw ? INVALID_PFN : pfn;
299     memcpy(ctx->inputbuf + ctx->pfns_len * XC_PAGE_SIZE, page, XC_PAGE_SIZE);
300     ctx->pfns_len++;
301 
302     /* check if we have run out of space. If so,
303      * we need to synchronously compress the pages and flush them out
304      */
305     if (ctx->pfns_len == NRPAGES(PAGE_BUFFER_SIZE))
306         return -1;
307     return 0;
308 }
309 
xc_compression_compress_pages(xc_interface * xch,comp_ctx * ctx,char * compbuf,unsigned long compbuf_size,unsigned long * compbuf_len)310 int xc_compression_compress_pages(xc_interface *xch, comp_ctx *ctx,
311                                   char *compbuf, unsigned long compbuf_size,
312                                   unsigned long *compbuf_len)
313 {
314     char *cache_copy = NULL, *current_page = NULL;
315     int israw, rc = 1;
316 
317     if (!ctx->pfns_len || (ctx->pfns_index == ctx->pfns_len)) {
318         ctx->pfns_len = ctx->pfns_index = 0;
319         return 0;
320     }
321 
322     ctx->compbuf_pos = 0;
323     ctx->compbuf = compbuf;
324     ctx->compbuf_size = compbuf_size;
325 
326     for (; ctx->pfns_index < ctx->pfns_len; ctx->pfns_index++)
327     {
328         israw = 0;
329         cache_copy = NULL;
330         current_page = ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE;
331 
332         if (ctx->sendbuf_pfns[ctx->pfns_index] == INVALID_PFN)
333             israw = 1;
334         else
335             cache_copy = get_cache_page(ctx,
336                                         ctx->sendbuf_pfns[ctx->pfns_index],
337                                         &israw);
338 
339         if (israw)
340             rc = (add_full_page(ctx, current_page, cache_copy) >= 0);
341         else
342             rc = (compress_page(ctx, current_page, cache_copy) >= 0);
343 
344         if ( !rc )
345         {
346             /* Out of space in outbuf! flush and come back */
347             rc = -1;
348             break;
349         }
350     }
351     if (compbuf_len)
352         *compbuf_len = ctx->compbuf_pos;
353 
354     return rc;
355 }
356 
357 inline
xc_compression_reset_pagebuf(xc_interface * xch,comp_ctx * ctx)358 void xc_compression_reset_pagebuf(xc_interface *xch, comp_ctx *ctx)
359 {
360     ctx->pfns_index = ctx->pfns_len = 0;
361 }
362 
xc_compression_uncompress_page(xc_interface * xch,char * compbuf,unsigned long compbuf_size,unsigned long * compbuf_pos,char * destpage)363 int xc_compression_uncompress_page(xc_interface *xch, char *compbuf,
364                                    unsigned long compbuf_size,
365                                    unsigned long *compbuf_pos, char *destpage)
366 {
367     unsigned long pos;
368     unsigned int len = 0, pagepos = 0;
369     char flag;
370 
371     pos = *compbuf_pos;
372     if (pos >= compbuf_size)
373     {
374         ERROR("Out of bounds exception in compression buffer (a):"
375               "read ptr:%lu, bufsize = %lu\n",
376               *compbuf_pos, compbuf_size);
377         return -1;
378     }
379 
380     switch (compbuf[pos])
381     {
382     case EMPTY_PAGE:
383         pos++;
384         break;
385 
386     case FULL_PAGE:
387         {
388             /* Check if the input buffer has 4KB of data */
389             if ((pos + FULL_PAGE_SIZE) > compbuf_size)
390             {
391                 ERROR("Out of bounds exception in compression buffer (b):"
392                       "read ptr = %lu, bufsize = %lu\n",
393                       *compbuf_pos, compbuf_size);
394                 return -1;
395             }
396             memcpy(destpage, &compbuf[pos + 1], XC_PAGE_SIZE);
397             pos += FULL_PAGE_SIZE;
398         }
399         break;
400 
401     default: /* Normal page with one or more runs */
402         {
403             do
404             {
405                 flag = compbuf[pos] & FLAGMASK;
406                 len = (compbuf[pos] & LENMASK) * sizeof(uint32_t);
407                 /* Sanity Check: Zero-length runs are allowed only for
408                  * FULL_PAGE and EMPTY_PAGE.
409                  */
410                 if (!len)
411                 {
412                     ERROR("Zero length run encountered for normal page: "
413                           "buffer (d):read ptr = %lu, flag = %u, "
414                           "bufsize = %lu, pagepos = %u\n",
415                           pos, (unsigned int)flag, compbuf_size, pagepos);
416                     return -1;
417                 }
418 
419                 pos++;
420                 if (flag == RUNFLAG)
421                 {
422                     /* Check if the input buffer has len bytes of data
423                      * and whether it would fit in the destination page.
424                      */
425                     if (((pos + len) > compbuf_size)
426                         || ((pagepos + len) > XC_PAGE_SIZE))
427                     {
428                         ERROR("Out of bounds exception in compression "
429                               "buffer (c):read ptr = %lu, runlen = %u, "
430                               "bufsize = %lu, pagepos = %u\n",
431                               pos, len, compbuf_size, pagepos);
432                         return -1;
433                     }
434                     memcpy(&destpage[pagepos], &compbuf[pos], len);
435                     pos += len;
436                 }
437                 pagepos += len;
438             } while ((pagepos < XC_PAGE_SIZE) && (pos < compbuf_size));
439 
440             /* Make sure we have copied/skipped 4KB worth of data */
441             if (pagepos != XC_PAGE_SIZE)
442             {
443                 ERROR("Invalid data in compression buffer:"
444                       "read ptr = %lu, bufsize = %lu, pagepos = %u\n",
445                       pos, compbuf_size, pagepos);
446                 return -1;
447             }
448         }
449     }
450     *compbuf_pos = pos;
451     return 0;
452 }
453 
xc_compression_free_context(xc_interface * xch,comp_ctx * ctx)454 void xc_compression_free_context(xc_interface *xch, comp_ctx *ctx)
455 {
456     if (!ctx) return;
457 
458     free(ctx->inputbuf);
459     free(ctx->sendbuf_pfns);
460     free(ctx->cache_base);
461     free(ctx->pfn2cache);
462     free(ctx->cache);
463     free(ctx);
464 }
465 
xc_compression_create_context(xc_interface * xch,unsigned long p2m_size)466 comp_ctx *xc_compression_create_context(xc_interface *xch,
467                                         unsigned long p2m_size)
468 {
469     unsigned long i;
470     comp_ctx *ctx = NULL;
471     unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE;
472 
473     ctx = (comp_ctx *)malloc(sizeof(comp_ctx));
474     if (!ctx)
475     {
476         ERROR("Failed to allocate compression_ctx\n");
477         goto error;
478     }
479     memset(ctx, 0, sizeof(comp_ctx));
480 
481     ctx->inputbuf = xc_memalign(xch, XC_PAGE_SIZE, PAGE_BUFFER_SIZE);
482     if (!ctx->inputbuf)
483     {
484         ERROR("Failed to allocate page buffer\n");
485         goto error;
486     }
487 
488     ctx->cache_base = xc_memalign(xch, XC_PAGE_SIZE, DELTA_CACHE_SIZE);
489     if (!ctx->cache_base)
490     {
491         ERROR("Failed to allocate delta cache\n");
492         goto error;
493     }
494 
495     ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) *
496                                sizeof(xen_pfn_t));
497     if (!ctx->sendbuf_pfns)
498     {
499         ERROR("Could not alloc sendbuf_pfns\n");
500         goto error;
501     }
502     memset(ctx->sendbuf_pfns, -1,
503            NRPAGES(PAGE_BUFFER_SIZE) * sizeof(xen_pfn_t));
504 
505     ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *));
506     if (!ctx->pfn2cache)
507     {
508         ERROR("Could not alloc pfn2cache map\n");
509         goto error;
510     }
511 
512     ctx->cache = malloc(num_cache_pages * sizeof(struct cache_page));
513     if (!ctx->cache)
514     {
515         ERROR("Could not alloc compression cache\n");
516         goto error;
517     }
518 
519     for (i = 0; i < num_cache_pages; i++)
520     {
521         ctx->cache[i].pfn = INVALID_PFN;
522         ctx->cache[i].page = ctx->cache_base + i * XC_PAGE_SIZE;
523         ctx->cache[i].prev = (i == 0) ? NULL : &(ctx->cache[i - 1]);
524         ctx->cache[i].next = ((i+1) == num_cache_pages)? NULL :
525             &(ctx->cache[i + 1]);
526     }
527     ctx->page_list_head = &(ctx->cache[0]);
528     ctx->page_list_tail = &(ctx->cache[num_cache_pages -1]);
529     ctx->dom_pfnlist_size = p2m_size;
530 
531     return ctx;
532 error:
533     xc_compression_free_context(xch, ctx);
534     return NULL;
535 }
536 
537 /*
538  * Local variables:
539  * mode: C
540  * c-file-style: "BSD"
541  * c-basic-offset: 4
542  * tab-width: 4
543  * indent-tabs-mode: nil
544  * End:
545  */
546