1 /******************************************************************************
2  * xc_tmem.c
3  *
4  * Copyright (C) 2008 Oracle Corp.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation;
9  * version 2.1 of the License.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "xc_private.h"
21 #include <inttypes.h>
22 #include <assert.h>
23 #include <xen/tmem.h>
24 
xc_tmem_control(xc_interface * xch,int32_t pool_id,uint32_t cmd,uint32_t cli_id,uint32_t len,uint32_t arg,void * buf)25 int xc_tmem_control(xc_interface *xch,
26                     int32_t pool_id,
27                     uint32_t cmd,
28                     uint32_t cli_id,
29                     uint32_t len,
30                     uint32_t arg,
31                     void *buf)
32 {
33     DECLARE_SYSCTL;
34     DECLARE_HYPERCALL_BOUNCE(buf, len, XC_HYPERCALL_BUFFER_BOUNCE_OUT);
35     int rc;
36 
37     sysctl.cmd = XEN_SYSCTL_tmem_op;
38     sysctl.u.tmem_op.pool_id = pool_id;
39     sysctl.u.tmem_op.cmd = cmd;
40     sysctl.u.tmem_op.cli_id = cli_id;
41     sysctl.u.tmem_op.len = len;
42     sysctl.u.tmem_op.arg = arg;
43     sysctl.u.tmem_op.pad = 0;
44     sysctl.u.tmem_op.oid.oid[0] = 0;
45     sysctl.u.tmem_op.oid.oid[1] = 0;
46     sysctl.u.tmem_op.oid.oid[2] = 0;
47 
48     if ( cmd == XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO ||
49          cmd == XEN_SYSCTL_TMEM_OP_SET_AUTH )
50         HYPERCALL_BOUNCE_SET_DIR(buf, XC_HYPERCALL_BUFFER_BOUNCE_IN);
51     if ( len )
52     {
53         if ( buf == NULL )
54         {
55             errno = EINVAL;
56             return -1;
57         }
58         if ( xc_hypercall_bounce_pre(xch, buf) )
59         {
60             PERROR("Could not bounce buffer for tmem control hypercall");
61             return -1;
62         }
63     }
64 
65     set_xen_guest_handle(sysctl.u.tmem_op.u.buf, buf);
66 
67     rc = do_sysctl(xch, &sysctl);
68 
69     if ( len )
70         xc_hypercall_bounce_post(xch, buf);
71 
72     return rc;
73 }
74 
xc_tmem_control_oid(xc_interface * xch,int32_t pool_id,uint32_t cmd,uint32_t cli_id,uint32_t len,uint32_t arg,struct xen_tmem_oid oid,void * buf)75 int xc_tmem_control_oid(xc_interface *xch,
76                         int32_t pool_id,
77                         uint32_t cmd,
78                         uint32_t cli_id,
79                         uint32_t len,
80                         uint32_t arg,
81                         struct xen_tmem_oid oid,
82                         void *buf)
83 {
84     DECLARE_SYSCTL;
85     DECLARE_HYPERCALL_BOUNCE(buf, len, XC_HYPERCALL_BUFFER_BOUNCE_OUT);
86     int rc;
87 
88     sysctl.cmd = XEN_SYSCTL_tmem_op;
89     sysctl.u.tmem_op.pool_id = pool_id;
90     sysctl.u.tmem_op.cmd = cmd;
91     sysctl.u.tmem_op.cli_id = cli_id;
92     sysctl.u.tmem_op.len = len;
93     sysctl.u.tmem_op.arg = arg;
94     sysctl.u.tmem_op.pad = 0;
95     sysctl.u.tmem_op.oid = oid;
96 
97     if ( len  )
98     {
99         if ( buf == NULL )
100         {
101             errno = EINVAL;
102             return -1;
103         }
104         if ( xc_hypercall_bounce_pre(xch, buf) )
105         {
106             PERROR("Could not bounce buffer for tmem control (OID) hypercall");
107             return -1;
108         }
109     }
110 
111     set_xen_guest_handle(sysctl.u.tmem_op.u.buf, buf);
112 
113     rc = do_sysctl(xch, &sysctl);
114 
115     if ( len )
116         xc_hypercall_bounce_post(xch, buf);
117 
118     return rc;
119 }
120 
xc_tmem_uuid_parse(char * uuid_str,uint64_t * uuid_lo,uint64_t * uuid_hi)121 static int xc_tmem_uuid_parse(char *uuid_str, uint64_t *uuid_lo, uint64_t *uuid_hi)
122 {
123     char *p = uuid_str;
124     uint64_t *x = uuid_hi;
125     int i = 0, digit;
126 
127     *uuid_lo = 0; *uuid_hi = 0;
128     for ( p = uuid_str, i = 0; i != 36 && *p != '\0'; p++, i++ )
129     {
130         if ( (i == 8 || i == 13 || i == 18 || i == 23) )
131         {
132             if ( *p != '-' )
133                 return -1;
134             if ( i == 18 )
135                 x = uuid_lo;
136             continue;
137         }
138         else if ( *p >= '0' && *p <= '9' )
139             digit = *p - '0';
140         else if ( *p >= 'A' && *p <= 'F' )
141             digit = *p - 'A' + 10;
142         else if ( *p >= 'a' && *p <= 'f' )
143             digit = *p - 'a' + 10;
144         else
145             return -1;
146         *x = (*x << 4) | digit;
147     }
148     if ( (i != 1 && i != 36) || *p != '\0' )
149         return -1;
150     return 0;
151 }
152 
xc_tmem_auth(xc_interface * xch,int cli_id,char * uuid_str,int enable)153 int xc_tmem_auth(xc_interface *xch,
154                  int cli_id,
155                  char *uuid_str,
156                  int enable)
157 {
158     xen_tmem_pool_info_t pool = {
159         .flags.u.auth = enable,
160         .id = 0,
161         .n_pages = 0,
162         .uuid[0] = 0,
163         .uuid[1] = 0,
164     };
165     if ( xc_tmem_uuid_parse(uuid_str, &pool.uuid[0],
166                                       &pool.uuid[1]) < 0 )
167     {
168         PERROR("Can't parse uuid, use xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx");
169         return -1;
170     }
171     return xc_tmem_control(xch, 0 /* pool_id */,
172                            XEN_SYSCTL_TMEM_OP_SET_AUTH,
173                            cli_id, sizeof(pool),
174                            0 /* arg */, &pool);
175 }
176 
177 /* Save/restore/live migrate */
178 
179 /*
180    Note that live migration complicates the save/restore format in
181    multiple ways: Though saving/migration can only occur when all
182    tmem pools belonging to the domain-being-saved are frozen and
183    this ensures that new pools can't be created or existing pools
184    grown (in number of pages), it is possible during a live migration
185    that pools may be destroyed and pages invalidated while the migration
186    is in process.  As a result, (1) it is not safe to pre-specify counts
187    for these values precisely, but only as a "max", and (2) a "invalidation"
188    list (of pools, objects, pages) must be appended when the domain is truly
189    suspended.
190  */
191 
192 /* returns 0 if nothing to save, -1 if error saving, 1 if saved successfully */
xc_tmem_save(xc_interface * xch,uint32_t domid,int io_fd,int live,int field_marker)193 int xc_tmem_save(xc_interface *xch,
194                  uint32_t domid, int io_fd, int live, int field_marker)
195 {
196     int marker = field_marker;
197     int i, j, rc;
198     uint32_t minusone = -1;
199     struct tmem_handle *h;
200     xen_tmem_client_t info;
201     xen_tmem_pool_info_t *pools;
202     char *buf = NULL;
203 
204     rc = xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_SAVE_BEGIN,
205                          domid, 0 /* len*/ , live, NULL);
206     if ( rc )
207     {
208         /* Nothing to save - no tmem enabled. */
209         if ( errno == ENOENT )
210             return 0;
211 
212         return rc;
213     }
214 
215     if ( xc_tmem_control(xch, 0 /* pool_id */,
216                          XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO,
217                          domid /* cli_id */, sizeof(info), 0 /* arg */,
218                          &info) < 0 )
219         return -1;
220 
221     /* Nothing to do. */
222     if ( !info.nr_pools )
223         return 0;
224 
225     pools = calloc(info.nr_pools, sizeof(*pools));
226     if ( !pools )
227         return -1;
228 
229     rc = xc_tmem_control(xch, 0 /* pool_id is ignored. */,
230                          XEN_SYSCTL_TMEM_OP_GET_POOLS,
231                          domid /* cli_id */, sizeof(*pools) * info.nr_pools,
232                          0 /* arg */, pools);
233 
234     if ( rc < 0 || (uint32_t)rc > info.nr_pools )
235         goto out_memory;
236 
237     /* Update it - as we have less pools between the two hypercalls. */
238     info.nr_pools = (uint32_t)rc;
239 
240     if ( write_exact(io_fd, &marker, sizeof(marker)) )
241         goto out_memory;
242 
243     if ( write_exact(io_fd, &info, sizeof(info)) )
244         goto out_memory;
245 
246     if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
247         goto out_memory;
248 
249     for ( i = 0; i < info.nr_pools; i++ )
250     {
251         uint32_t pagesize;
252         int bufsize = 0;
253         int checksum = 0;
254         xen_tmem_pool_info_t *pool = &pools[i];
255 
256         if ( pool->flags.raw != -1 )
257         {
258             if ( !pool->flags.u.persist )
259                 pool->n_pages = 0;
260 
261             if ( write_exact(io_fd, pool, sizeof(*pool)) )
262                 goto out_memory;
263 
264             if ( !pool->flags.u.persist )
265                 continue;
266 
267             pagesize = 1 << (pool->flags.u.pagebits + 12);
268             if ( pagesize > bufsize )
269             {
270                 bufsize = pagesize + sizeof(struct tmem_handle);
271                 if ( (buf = realloc(buf,bufsize)) == NULL )
272                     goto out_memory;
273             }
274             for ( j = pool->n_pages; j > 0; j-- )
275             {
276                 int ret;
277                 if ( (ret = xc_tmem_control(
278                           xch, pool->id, XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE,
279                           domid, bufsize, 0, buf)) > 0 )
280                 {
281                     h = (struct tmem_handle *)buf;
282                     if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
283                         goto out_memory;
284 
285                     if ( write_exact(io_fd, &h->index, sizeof(h->index)) )
286                         goto out_memory;
287                     h++;
288                     checksum += *(char *)h;
289                     if ( write_exact(io_fd, h, pagesize) )
290                         goto out_memory;
291                 } else if ( ret == 0 ) {
292                     continue;
293                 } else {
294                     /* page list terminator */
295                     h = (struct tmem_handle *)buf;
296                     h->oid.oid[0] = h->oid.oid[1] = h->oid.oid[2] = -1L;
297                     if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
298                     {
299  out_memory:
300                         free(pools);
301                         free(buf);
302                         return -1;
303                     }
304                     break;
305                 }
306             }
307             DPRINTF("saved %"PRId64" tmem pages for dom=%d pool=%d, checksum=%x\n",
308                     pool->n_pages - j, domid, pool->id, checksum);
309         }
310     }
311     free(pools);
312     free(buf);
313 
314     /* pool list terminator */
315     minusone = -1;
316     if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
317         return -1;
318 
319     return 1;
320 }
321 
322 /* only called for live migration */
xc_tmem_save_extra(xc_interface * xch,uint32_t domid,int io_fd,int field_marker)323 int xc_tmem_save_extra(xc_interface *xch, uint32_t domid, int io_fd, int field_marker)
324 {
325     struct tmem_handle handle;
326     int marker = field_marker;
327     uint32_t minusone;
328     int count = 0, checksum = 0;
329 
330     if ( write_exact(io_fd, &marker, sizeof(marker)) )
331         return -1;
332     while ( xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV, domid,
333                             sizeof(handle),0,&handle) > 0 ) {
334         if ( write_exact(io_fd, &handle.pool_id, sizeof(handle.pool_id)) )
335             return -1;
336         if ( write_exact(io_fd, &handle.oid, sizeof(handle.oid)) )
337             return -1;
338         if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) )
339             return -1;
340         count++;
341         checksum += handle.pool_id + handle.oid.oid[0] + handle.oid.oid[1] +
342                     handle.oid.oid[2] + handle.index;
343     }
344     if ( count )
345             DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum);
346     minusone = -1;
347     if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
348         return -1;
349     return 0;
350 }
351 
352 /* only called for live migration */
xc_tmem_save_done(xc_interface * xch,uint32_t domid)353 void xc_tmem_save_done(xc_interface *xch, uint32_t domid)
354 {
355     xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_SAVE_END, domid, 0, 0, NULL);
356 }
357 
358 /* restore routines */
359 
xc_tmem_restore_new_pool(xc_interface * xch,int cli_id,uint32_t pool_id,uint32_t flags,uint64_t uuid_lo,uint64_t uuid_hi)360 static int xc_tmem_restore_new_pool(
361                     xc_interface *xch,
362                     int cli_id,
363                     uint32_t pool_id,
364                     uint32_t flags,
365                     uint64_t uuid_lo,
366                     uint64_t uuid_hi)
367 {
368     xen_tmem_pool_info_t pool = {
369         .flags.raw = flags,
370         .id = pool_id,
371         .n_pages = 0,
372         .uuid[0] = uuid_lo,
373         .uuid[1] = uuid_hi,
374     };
375 
376     return xc_tmem_control(xch, pool_id,
377                            XEN_SYSCTL_TMEM_OP_SET_POOLS,
378                            cli_id, sizeof(pool),
379                            0 /* arg */, &pool);
380 }
381 
xc_tmem_restore(xc_interface * xch,uint32_t domid,int io_fd)382 int xc_tmem_restore(xc_interface *xch, uint32_t domid, int io_fd)
383 {
384     uint32_t minusone;
385     xen_tmem_client_t info;
386     int checksum = 0;
387     unsigned int i;
388     char *buf = NULL;
389 
390     if ( read_exact(io_fd, &info, sizeof(info)) )
391         return -1;
392 
393     /* We would never save if there weren't any pools! */
394     if ( !info.nr_pools )
395         return -1;
396 
397     if ( xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN, domid, 0, 0, NULL) < 0 )
398         return -1;
399 
400     if ( xc_tmem_control(xch, 0 /* pool_id */,
401                          XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO,
402                          domid /* cli_id */, sizeof(info), 0 /* arg */,
403                          &info) < 0 )
404         return -1;
405 
406     if ( read_exact(io_fd, &minusone, sizeof(minusone)) )
407         return -1;
408 
409     for ( i = 0; i < info.nr_pools; i++ )
410     {
411         int bufsize = 0, pagesize;
412         int j;
413         xen_tmem_pool_info_t pool;
414 
415         if ( read_exact(io_fd, &pool, sizeof(pool)) )
416             goto out_memory;
417 
418         if ( xc_tmem_restore_new_pool(xch, domid, pool.id, pool.flags.raw,
419                                       pool.uuid[0], pool.uuid[1]) < 0 )
420             goto out_memory;
421 
422         if ( pool.n_pages <= 0 )
423             continue;
424 
425         pagesize = 1 << (pool.flags.u.pagebits + 12);
426         if ( pagesize > bufsize )
427         {
428             bufsize = pagesize;
429             if ( (buf = realloc(buf,bufsize)) == NULL )
430                 goto out_memory;
431         }
432         for ( j = pool.n_pages; j > 0; j-- )
433         {
434             struct xen_tmem_oid oid;
435             uint32_t index;
436             int rc;
437 
438             if ( read_exact(io_fd, &oid, sizeof(oid)) )
439                 goto out_memory;
440 
441             if ( oid.oid[0] == -1L && oid.oid[1] == -1L && oid.oid[2] == -1L )
442                 break;
443             if ( read_exact(io_fd, &index, sizeof(index)) )
444                 goto out_memory;
445 
446             if ( read_exact(io_fd, buf, pagesize) )
447                 goto out_memory;
448 
449             checksum += *buf;
450             if ( (rc = xc_tmem_control_oid(
451                       xch, pool.id, XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE,
452                       domid, bufsize, index, oid, buf)) <= 0 )
453             {
454                 DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc);
455  out_memory:
456                 free(buf);
457                 return -1;
458             }
459         }
460         if ( pool.n_pages )
461             DPRINTF("restored %"PRId64" tmem pages for dom=%d pool=%d, check=%x\n",
462                     pool.n_pages - j, domid, pool.id, checksum);
463     }
464     free(buf);
465 
466     return 0;
467 }
468 
469 /* only called for live migration, must be called after suspend */
xc_tmem_restore_extra(xc_interface * xch,uint32_t domid,int io_fd)470 int xc_tmem_restore_extra(xc_interface *xch, uint32_t domid, int io_fd)
471 {
472     uint32_t pool_id;
473     struct xen_tmem_oid oid;
474     uint32_t index;
475     int count = 0;
476     int checksum = 0;
477 
478     while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 )
479     {
480         if ( read_exact(io_fd, &oid, sizeof(oid)) )
481             return -1;
482         if ( read_exact(io_fd, &index, sizeof(index)) )
483             return -1;
484         if ( xc_tmem_control_oid(
485                  xch, pool_id, XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE,
486                  domid, 0, index, oid, NULL) <= 0 )
487             return -1;
488         count++;
489         checksum += pool_id + oid.oid[0] + oid.oid[1] + oid.oid[2] + index;
490     }
491     if ( pool_id != -1 )
492         return -1;
493     if ( count )
494             DPRINTF("invalidated %d tmem pages, check=%d\n",count,checksum);
495 
496     return 0;
497 }
498 
499 /*
500  * Local variables:
501  * mode: C
502  * c-file-style: "BSD"
503  * c-basic-offset: 4
504  * tab-width: 4
505  * indent-tabs-mode: nil
506  * End:
507  */
508