1 /******************************************************************************
2 * xc_tmem.c
3 *
4 * Copyright (C) 2008 Oracle Corp.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation;
9 * version 2.1 of the License.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "xc_private.h"
21 #include <inttypes.h>
22 #include <assert.h>
23 #include <xen/tmem.h>
24
xc_tmem_control(xc_interface * xch,int32_t pool_id,uint32_t cmd,uint32_t cli_id,uint32_t len,uint32_t arg,void * buf)25 int xc_tmem_control(xc_interface *xch,
26 int32_t pool_id,
27 uint32_t cmd,
28 uint32_t cli_id,
29 uint32_t len,
30 uint32_t arg,
31 void *buf)
32 {
33 DECLARE_SYSCTL;
34 DECLARE_HYPERCALL_BOUNCE(buf, len, XC_HYPERCALL_BUFFER_BOUNCE_OUT);
35 int rc;
36
37 sysctl.cmd = XEN_SYSCTL_tmem_op;
38 sysctl.u.tmem_op.pool_id = pool_id;
39 sysctl.u.tmem_op.cmd = cmd;
40 sysctl.u.tmem_op.cli_id = cli_id;
41 sysctl.u.tmem_op.len = len;
42 sysctl.u.tmem_op.arg = arg;
43 sysctl.u.tmem_op.pad = 0;
44 sysctl.u.tmem_op.oid.oid[0] = 0;
45 sysctl.u.tmem_op.oid.oid[1] = 0;
46 sysctl.u.tmem_op.oid.oid[2] = 0;
47
48 if ( cmd == XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO ||
49 cmd == XEN_SYSCTL_TMEM_OP_SET_AUTH )
50 HYPERCALL_BOUNCE_SET_DIR(buf, XC_HYPERCALL_BUFFER_BOUNCE_IN);
51 if ( len )
52 {
53 if ( buf == NULL )
54 {
55 errno = EINVAL;
56 return -1;
57 }
58 if ( xc_hypercall_bounce_pre(xch, buf) )
59 {
60 PERROR("Could not bounce buffer for tmem control hypercall");
61 return -1;
62 }
63 }
64
65 set_xen_guest_handle(sysctl.u.tmem_op.u.buf, buf);
66
67 rc = do_sysctl(xch, &sysctl);
68
69 if ( len )
70 xc_hypercall_bounce_post(xch, buf);
71
72 return rc;
73 }
74
xc_tmem_control_oid(xc_interface * xch,int32_t pool_id,uint32_t cmd,uint32_t cli_id,uint32_t len,uint32_t arg,struct xen_tmem_oid oid,void * buf)75 int xc_tmem_control_oid(xc_interface *xch,
76 int32_t pool_id,
77 uint32_t cmd,
78 uint32_t cli_id,
79 uint32_t len,
80 uint32_t arg,
81 struct xen_tmem_oid oid,
82 void *buf)
83 {
84 DECLARE_SYSCTL;
85 DECLARE_HYPERCALL_BOUNCE(buf, len, XC_HYPERCALL_BUFFER_BOUNCE_OUT);
86 int rc;
87
88 sysctl.cmd = XEN_SYSCTL_tmem_op;
89 sysctl.u.tmem_op.pool_id = pool_id;
90 sysctl.u.tmem_op.cmd = cmd;
91 sysctl.u.tmem_op.cli_id = cli_id;
92 sysctl.u.tmem_op.len = len;
93 sysctl.u.tmem_op.arg = arg;
94 sysctl.u.tmem_op.pad = 0;
95 sysctl.u.tmem_op.oid = oid;
96
97 if ( len )
98 {
99 if ( buf == NULL )
100 {
101 errno = EINVAL;
102 return -1;
103 }
104 if ( xc_hypercall_bounce_pre(xch, buf) )
105 {
106 PERROR("Could not bounce buffer for tmem control (OID) hypercall");
107 return -1;
108 }
109 }
110
111 set_xen_guest_handle(sysctl.u.tmem_op.u.buf, buf);
112
113 rc = do_sysctl(xch, &sysctl);
114
115 if ( len )
116 xc_hypercall_bounce_post(xch, buf);
117
118 return rc;
119 }
120
xc_tmem_uuid_parse(char * uuid_str,uint64_t * uuid_lo,uint64_t * uuid_hi)121 static int xc_tmem_uuid_parse(char *uuid_str, uint64_t *uuid_lo, uint64_t *uuid_hi)
122 {
123 char *p = uuid_str;
124 uint64_t *x = uuid_hi;
125 int i = 0, digit;
126
127 *uuid_lo = 0; *uuid_hi = 0;
128 for ( p = uuid_str, i = 0; i != 36 && *p != '\0'; p++, i++ )
129 {
130 if ( (i == 8 || i == 13 || i == 18 || i == 23) )
131 {
132 if ( *p != '-' )
133 return -1;
134 if ( i == 18 )
135 x = uuid_lo;
136 continue;
137 }
138 else if ( *p >= '0' && *p <= '9' )
139 digit = *p - '0';
140 else if ( *p >= 'A' && *p <= 'F' )
141 digit = *p - 'A' + 10;
142 else if ( *p >= 'a' && *p <= 'f' )
143 digit = *p - 'a' + 10;
144 else
145 return -1;
146 *x = (*x << 4) | digit;
147 }
148 if ( (i != 1 && i != 36) || *p != '\0' )
149 return -1;
150 return 0;
151 }
152
xc_tmem_auth(xc_interface * xch,int cli_id,char * uuid_str,int enable)153 int xc_tmem_auth(xc_interface *xch,
154 int cli_id,
155 char *uuid_str,
156 int enable)
157 {
158 xen_tmem_pool_info_t pool = {
159 .flags.u.auth = enable,
160 .id = 0,
161 .n_pages = 0,
162 .uuid[0] = 0,
163 .uuid[1] = 0,
164 };
165 if ( xc_tmem_uuid_parse(uuid_str, &pool.uuid[0],
166 &pool.uuid[1]) < 0 )
167 {
168 PERROR("Can't parse uuid, use xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx");
169 return -1;
170 }
171 return xc_tmem_control(xch, 0 /* pool_id */,
172 XEN_SYSCTL_TMEM_OP_SET_AUTH,
173 cli_id, sizeof(pool),
174 0 /* arg */, &pool);
175 }
176
177 /* Save/restore/live migrate */
178
179 /*
180 Note that live migration complicates the save/restore format in
181 multiple ways: Though saving/migration can only occur when all
182 tmem pools belonging to the domain-being-saved are frozen and
183 this ensures that new pools can't be created or existing pools
184 grown (in number of pages), it is possible during a live migration
185 that pools may be destroyed and pages invalidated while the migration
186 is in process. As a result, (1) it is not safe to pre-specify counts
187 for these values precisely, but only as a "max", and (2) a "invalidation"
188 list (of pools, objects, pages) must be appended when the domain is truly
189 suspended.
190 */
191
192 /* returns 0 if nothing to save, -1 if error saving, 1 if saved successfully */
xc_tmem_save(xc_interface * xch,uint32_t domid,int io_fd,int live,int field_marker)193 int xc_tmem_save(xc_interface *xch,
194 uint32_t domid, int io_fd, int live, int field_marker)
195 {
196 int marker = field_marker;
197 int i, j, rc;
198 uint32_t minusone = -1;
199 struct tmem_handle *h;
200 xen_tmem_client_t info;
201 xen_tmem_pool_info_t *pools;
202 char *buf = NULL;
203
204 rc = xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_SAVE_BEGIN,
205 domid, 0 /* len*/ , live, NULL);
206 if ( rc )
207 {
208 /* Nothing to save - no tmem enabled. */
209 if ( errno == ENOENT )
210 return 0;
211
212 return rc;
213 }
214
215 if ( xc_tmem_control(xch, 0 /* pool_id */,
216 XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO,
217 domid /* cli_id */, sizeof(info), 0 /* arg */,
218 &info) < 0 )
219 return -1;
220
221 /* Nothing to do. */
222 if ( !info.nr_pools )
223 return 0;
224
225 pools = calloc(info.nr_pools, sizeof(*pools));
226 if ( !pools )
227 return -1;
228
229 rc = xc_tmem_control(xch, 0 /* pool_id is ignored. */,
230 XEN_SYSCTL_TMEM_OP_GET_POOLS,
231 domid /* cli_id */, sizeof(*pools) * info.nr_pools,
232 0 /* arg */, pools);
233
234 if ( rc < 0 || (uint32_t)rc > info.nr_pools )
235 goto out_memory;
236
237 /* Update it - as we have less pools between the two hypercalls. */
238 info.nr_pools = (uint32_t)rc;
239
240 if ( write_exact(io_fd, &marker, sizeof(marker)) )
241 goto out_memory;
242
243 if ( write_exact(io_fd, &info, sizeof(info)) )
244 goto out_memory;
245
246 if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
247 goto out_memory;
248
249 for ( i = 0; i < info.nr_pools; i++ )
250 {
251 uint32_t pagesize;
252 int bufsize = 0;
253 int checksum = 0;
254 xen_tmem_pool_info_t *pool = &pools[i];
255
256 if ( pool->flags.raw != -1 )
257 {
258 if ( !pool->flags.u.persist )
259 pool->n_pages = 0;
260
261 if ( write_exact(io_fd, pool, sizeof(*pool)) )
262 goto out_memory;
263
264 if ( !pool->flags.u.persist )
265 continue;
266
267 pagesize = 1 << (pool->flags.u.pagebits + 12);
268 if ( pagesize > bufsize )
269 {
270 bufsize = pagesize + sizeof(struct tmem_handle);
271 if ( (buf = realloc(buf,bufsize)) == NULL )
272 goto out_memory;
273 }
274 for ( j = pool->n_pages; j > 0; j-- )
275 {
276 int ret;
277 if ( (ret = xc_tmem_control(
278 xch, pool->id, XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE,
279 domid, bufsize, 0, buf)) > 0 )
280 {
281 h = (struct tmem_handle *)buf;
282 if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
283 goto out_memory;
284
285 if ( write_exact(io_fd, &h->index, sizeof(h->index)) )
286 goto out_memory;
287 h++;
288 checksum += *(char *)h;
289 if ( write_exact(io_fd, h, pagesize) )
290 goto out_memory;
291 } else if ( ret == 0 ) {
292 continue;
293 } else {
294 /* page list terminator */
295 h = (struct tmem_handle *)buf;
296 h->oid.oid[0] = h->oid.oid[1] = h->oid.oid[2] = -1L;
297 if ( write_exact(io_fd, &h->oid, sizeof(h->oid)) )
298 {
299 out_memory:
300 free(pools);
301 free(buf);
302 return -1;
303 }
304 break;
305 }
306 }
307 DPRINTF("saved %"PRId64" tmem pages for dom=%d pool=%d, checksum=%x\n",
308 pool->n_pages - j, domid, pool->id, checksum);
309 }
310 }
311 free(pools);
312 free(buf);
313
314 /* pool list terminator */
315 minusone = -1;
316 if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
317 return -1;
318
319 return 1;
320 }
321
322 /* only called for live migration */
xc_tmem_save_extra(xc_interface * xch,uint32_t domid,int io_fd,int field_marker)323 int xc_tmem_save_extra(xc_interface *xch, uint32_t domid, int io_fd, int field_marker)
324 {
325 struct tmem_handle handle;
326 int marker = field_marker;
327 uint32_t minusone;
328 int count = 0, checksum = 0;
329
330 if ( write_exact(io_fd, &marker, sizeof(marker)) )
331 return -1;
332 while ( xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV, domid,
333 sizeof(handle),0,&handle) > 0 ) {
334 if ( write_exact(io_fd, &handle.pool_id, sizeof(handle.pool_id)) )
335 return -1;
336 if ( write_exact(io_fd, &handle.oid, sizeof(handle.oid)) )
337 return -1;
338 if ( write_exact(io_fd, &handle.index, sizeof(handle.index)) )
339 return -1;
340 count++;
341 checksum += handle.pool_id + handle.oid.oid[0] + handle.oid.oid[1] +
342 handle.oid.oid[2] + handle.index;
343 }
344 if ( count )
345 DPRINTF("needed %d tmem invalidates, check=%d\n",count,checksum);
346 minusone = -1;
347 if ( write_exact(io_fd, &minusone, sizeof(minusone)) )
348 return -1;
349 return 0;
350 }
351
352 /* only called for live migration */
xc_tmem_save_done(xc_interface * xch,uint32_t domid)353 void xc_tmem_save_done(xc_interface *xch, uint32_t domid)
354 {
355 xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_SAVE_END, domid, 0, 0, NULL);
356 }
357
358 /* restore routines */
359
xc_tmem_restore_new_pool(xc_interface * xch,int cli_id,uint32_t pool_id,uint32_t flags,uint64_t uuid_lo,uint64_t uuid_hi)360 static int xc_tmem_restore_new_pool(
361 xc_interface *xch,
362 int cli_id,
363 uint32_t pool_id,
364 uint32_t flags,
365 uint64_t uuid_lo,
366 uint64_t uuid_hi)
367 {
368 xen_tmem_pool_info_t pool = {
369 .flags.raw = flags,
370 .id = pool_id,
371 .n_pages = 0,
372 .uuid[0] = uuid_lo,
373 .uuid[1] = uuid_hi,
374 };
375
376 return xc_tmem_control(xch, pool_id,
377 XEN_SYSCTL_TMEM_OP_SET_POOLS,
378 cli_id, sizeof(pool),
379 0 /* arg */, &pool);
380 }
381
xc_tmem_restore(xc_interface * xch,uint32_t domid,int io_fd)382 int xc_tmem_restore(xc_interface *xch, uint32_t domid, int io_fd)
383 {
384 uint32_t minusone;
385 xen_tmem_client_t info;
386 int checksum = 0;
387 unsigned int i;
388 char *buf = NULL;
389
390 if ( read_exact(io_fd, &info, sizeof(info)) )
391 return -1;
392
393 /* We would never save if there weren't any pools! */
394 if ( !info.nr_pools )
395 return -1;
396
397 if ( xc_tmem_control(xch, 0, XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN, domid, 0, 0, NULL) < 0 )
398 return -1;
399
400 if ( xc_tmem_control(xch, 0 /* pool_id */,
401 XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO,
402 domid /* cli_id */, sizeof(info), 0 /* arg */,
403 &info) < 0 )
404 return -1;
405
406 if ( read_exact(io_fd, &minusone, sizeof(minusone)) )
407 return -1;
408
409 for ( i = 0; i < info.nr_pools; i++ )
410 {
411 int bufsize = 0, pagesize;
412 int j;
413 xen_tmem_pool_info_t pool;
414
415 if ( read_exact(io_fd, &pool, sizeof(pool)) )
416 goto out_memory;
417
418 if ( xc_tmem_restore_new_pool(xch, domid, pool.id, pool.flags.raw,
419 pool.uuid[0], pool.uuid[1]) < 0 )
420 goto out_memory;
421
422 if ( pool.n_pages <= 0 )
423 continue;
424
425 pagesize = 1 << (pool.flags.u.pagebits + 12);
426 if ( pagesize > bufsize )
427 {
428 bufsize = pagesize;
429 if ( (buf = realloc(buf,bufsize)) == NULL )
430 goto out_memory;
431 }
432 for ( j = pool.n_pages; j > 0; j-- )
433 {
434 struct xen_tmem_oid oid;
435 uint32_t index;
436 int rc;
437
438 if ( read_exact(io_fd, &oid, sizeof(oid)) )
439 goto out_memory;
440
441 if ( oid.oid[0] == -1L && oid.oid[1] == -1L && oid.oid[2] == -1L )
442 break;
443 if ( read_exact(io_fd, &index, sizeof(index)) )
444 goto out_memory;
445
446 if ( read_exact(io_fd, buf, pagesize) )
447 goto out_memory;
448
449 checksum += *buf;
450 if ( (rc = xc_tmem_control_oid(
451 xch, pool.id, XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE,
452 domid, bufsize, index, oid, buf)) <= 0 )
453 {
454 DPRINTF("xc_tmem_restore: putting page failed, rc=%d\n",rc);
455 out_memory:
456 free(buf);
457 return -1;
458 }
459 }
460 if ( pool.n_pages )
461 DPRINTF("restored %"PRId64" tmem pages for dom=%d pool=%d, check=%x\n",
462 pool.n_pages - j, domid, pool.id, checksum);
463 }
464 free(buf);
465
466 return 0;
467 }
468
469 /* only called for live migration, must be called after suspend */
xc_tmem_restore_extra(xc_interface * xch,uint32_t domid,int io_fd)470 int xc_tmem_restore_extra(xc_interface *xch, uint32_t domid, int io_fd)
471 {
472 uint32_t pool_id;
473 struct xen_tmem_oid oid;
474 uint32_t index;
475 int count = 0;
476 int checksum = 0;
477
478 while ( read_exact(io_fd, &pool_id, sizeof(pool_id)) == 0 && pool_id != -1 )
479 {
480 if ( read_exact(io_fd, &oid, sizeof(oid)) )
481 return -1;
482 if ( read_exact(io_fd, &index, sizeof(index)) )
483 return -1;
484 if ( xc_tmem_control_oid(
485 xch, pool_id, XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE,
486 domid, 0, index, oid, NULL) <= 0 )
487 return -1;
488 count++;
489 checksum += pool_id + oid.oid[0] + oid.oid[1] + oid.oid[2] + index;
490 }
491 if ( pool_id != -1 )
492 return -1;
493 if ( count )
494 DPRINTF("invalidated %d tmem pages, check=%d\n",count,checksum);
495
496 return 0;
497 }
498
499 /*
500 * Local variables:
501 * mode: C
502 * c-file-style: "BSD"
503 * c-basic-offset: 4
504 * tab-width: 4
505 * indent-tabs-mode: nil
506 * End:
507 */
508