1 /*
2 * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
3 *
4 */
5
6 #include <xen/init.h>
7 #include <xen/list.h>
8 #include <xen/radix-tree.h>
9 #include <xen/rbtree.h>
10 #include <xen/rwlock.h>
11 #include <xen/tmem_control.h>
12 #include <xen/tmem.h>
13 #include <xen/tmem_xen.h>
14 #include <public/sysctl.h>
15
16 /************ TMEM CONTROL OPERATIONS ************************************/
17
18 /* Freeze/thaw all pools belonging to client cli_id (all domains if -1). */
tmemc_freeze_pools(domid_t cli_id,int arg)19 static int tmemc_freeze_pools(domid_t cli_id, int arg)
20 {
21 struct client *client;
22 bool freeze = arg == XEN_SYSCTL_TMEM_OP_FREEZE;
23 bool destroy = arg == XEN_SYSCTL_TMEM_OP_DESTROY;
24 char *s;
25
26 s = destroy ? "destroyed" : ( freeze ? "frozen" : "thawed" );
27 if ( cli_id == TMEM_CLI_ID_NULL )
28 {
29 list_for_each_entry(client,&tmem_global.client_list,client_list)
30 client->info.flags.u.frozen = freeze;
31 tmem_client_info("tmem: all pools %s for all %ss\n", s, tmem_client_str);
32 }
33 else
34 {
35 if ( (client = tmem_client_from_cli_id(cli_id)) == NULL)
36 return -1;
37 client->info.flags.u.frozen = freeze;
38 tmem_client_info("tmem: all pools %s for %s=%d\n",
39 s, tmem_cli_id_str, cli_id);
40 }
41 return 0;
42 }
43
tmem_flush_npages(unsigned long n)44 static unsigned long tmem_flush_npages(unsigned long n)
45 {
46 unsigned long avail_pages = 0;
47
48 while ( (avail_pages = tmem_page_list_pages) < n )
49 {
50 if ( !tmem_evict() )
51 break;
52 }
53 if ( avail_pages )
54 {
55 spin_lock(&tmem_page_list_lock);
56 while ( !page_list_empty(&tmem_page_list) )
57 {
58 struct page_info *pg = page_list_remove_head(&tmem_page_list);
59 scrub_one_page(pg);
60 tmem_page_list_pages--;
61 free_domheap_page(pg);
62 }
63 ASSERT(tmem_page_list_pages == 0);
64 INIT_PAGE_LIST_HEAD(&tmem_page_list);
65 spin_unlock(&tmem_page_list_lock);
66 }
67 return avail_pages;
68 }
69
tmemc_flush_mem(domid_t cli_id,uint32_t kb)70 static int tmemc_flush_mem(domid_t cli_id, uint32_t kb)
71 {
72 uint32_t npages, flushed_pages, flushed_kb;
73
74 if ( cli_id != TMEM_CLI_ID_NULL )
75 {
76 tmem_client_warn("tmem: %s-specific flush not supported yet, use --all\n",
77 tmem_client_str);
78 return -1;
79 }
80 /* Convert kb to pages, rounding up if necessary. */
81 npages = (kb + ((1 << (PAGE_SHIFT-10))-1)) >> (PAGE_SHIFT-10);
82 flushed_pages = tmem_flush_npages(npages);
83 flushed_kb = flushed_pages << (PAGE_SHIFT-10);
84 return flushed_kb;
85 }
86
87 /*
88 * These tmemc_list* routines output lots of stats in a format that is
89 * intended to be program-parseable, not human-readable. Further, by
90 * tying each group of stats to a line format indicator (e.g. G= for
91 * global stats) and each individual stat to a two-letter specifier
92 * (e.g. Ec:nnnnn in the G= line says there are nnnnn pages in the
93 * global ephemeral pool), it should allow the stats reported to be
94 * forward and backwards compatible as tmem evolves.
95 */
96 #define BSIZE 1024
97
tmemc_list_client(struct client * c,tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)98 static int tmemc_list_client(struct client *c, tmem_cli_va_param_t buf,
99 int off, uint32_t len, bool use_long)
100 {
101 char info[BSIZE];
102 int i, n = 0, sum = 0;
103 struct tmem_pool *p;
104 bool s;
105
106 n = scnprintf(info,BSIZE,"C=CI:%d,ww:%d,co:%d,fr:%d,"
107 "Tc:%"PRIu64",Ge:%ld,Pp:%ld,Gp:%ld%c",
108 c->cli_id, c->info.weight, c->info.flags.u.compress, c->info.flags.u.frozen,
109 c->total_cycles, c->succ_eph_gets, c->succ_pers_puts, c->succ_pers_gets,
110 use_long ? ',' : '\n');
111 if (use_long)
112 n += scnprintf(info+n,BSIZE-n,
113 "Ec:%ld,Em:%ld,cp:%ld,cb:%"PRId64",cn:%ld,cm:%ld\n",
114 c->eph_count, c->eph_count_max,
115 c->compressed_pages, c->compressed_sum_size,
116 c->compress_poor, c->compress_nomem);
117 if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
118 sum += n;
119 for ( i = 0; i < MAX_POOLS_PER_DOMAIN; i++ )
120 {
121 if ( (p = c->pools[i]) == NULL )
122 continue;
123 s = is_shared(p);
124 n = scnprintf(info,BSIZE,"P=CI:%d,PI:%d,"
125 "PT:%c%c,U0:%"PRIx64",U1:%"PRIx64"%c",
126 c->cli_id, p->pool_id,
127 is_persistent(p) ? 'P' : 'E', s ? 'S' : 'P',
128 (uint64_t)(s ? p->uuid[0] : 0),
129 (uint64_t)(s ? p->uuid[1] : 0LL),
130 use_long ? ',' : '\n');
131 if (use_long)
132 n += scnprintf(info+n,BSIZE-n,
133 "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu,"
134 "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu,"
135 "fs:%lu,ft:%lu,os:%lu,ot:%lu\n",
136 _atomic_read(p->pgp_count), p->pgp_count_max,
137 p->obj_count, p->obj_count_max,
138 p->objnode_count, p->objnode_count_max,
139 p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced,
140 p->no_mem_puts,
141 p->found_gets, p->gets,
142 p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs);
143 if ( sum + n >= len )
144 return sum;
145 if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
146 sum += n;
147 }
148 return sum;
149 }
150
tmemc_list_shared(tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)151 static int tmemc_list_shared(tmem_cli_va_param_t buf, int off, uint32_t len,
152 bool use_long)
153 {
154 char info[BSIZE];
155 int i, n = 0, sum = 0;
156 struct tmem_pool *p;
157 struct share_list *sl;
158
159 for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ )
160 {
161 if ( (p = tmem_global.shared_pools[i]) == NULL )
162 continue;
163 n = scnprintf(info+n,BSIZE-n,"S=SI:%d,PT:%c%c,U0:%"PRIx64",U1:%"PRIx64,
164 i, is_persistent(p) ? 'P' : 'E',
165 is_shared(p) ? 'S' : 'P',
166 p->uuid[0], p->uuid[1]);
167 list_for_each_entry(sl,&p->share_list, share_list)
168 n += scnprintf(info+n,BSIZE-n,",SC:%d",sl->client->cli_id);
169 n += scnprintf(info+n,BSIZE-n,"%c", use_long ? ',' : '\n');
170 if (use_long)
171 n += scnprintf(info+n,BSIZE-n,
172 "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu,"
173 "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu,"
174 "fs:%lu,ft:%lu,os:%lu,ot:%lu\n",
175 _atomic_read(p->pgp_count), p->pgp_count_max,
176 p->obj_count, p->obj_count_max,
177 p->objnode_count, p->objnode_count_max,
178 p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced,
179 p->no_mem_puts,
180 p->found_gets, p->gets,
181 p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs);
182 if ( sum + n >= len )
183 return sum;
184 if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
185 sum += n;
186 }
187 return sum;
188 }
189
tmemc_list_global_perf(tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)190 static int tmemc_list_global_perf(tmem_cli_va_param_t buf, int off,
191 uint32_t len, bool use_long)
192 {
193 char info[BSIZE];
194 int n = 0, sum = 0;
195
196 n = scnprintf(info+n,BSIZE-n,"T=");
197 n--; /* Overwrite trailing comma. */
198 n += scnprintf(info+n,BSIZE-n,"\n");
199 if ( sum + n >= len )
200 return sum;
201 if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
202 sum += n;
203 return sum;
204 }
205
tmemc_list_global(tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)206 static int tmemc_list_global(tmem_cli_va_param_t buf, int off, uint32_t len,
207 bool use_long)
208 {
209 char info[BSIZE];
210 int n = 0, sum = off;
211
212 n += scnprintf(info,BSIZE,"G="
213 "Tt:%lu,Te:%lu,Cf:%lu,Af:%lu,Pf:%lu,Ta:%lu,"
214 "Lm:%lu,Et:%lu,Ea:%lu,Rt:%lu,Ra:%lu,Rx:%lu,Fp:%lu%c",
215 tmem_stats.total_tmem_ops, tmem_stats.errored_tmem_ops, tmem_stats.failed_copies,
216 tmem_stats.alloc_failed, tmem_stats.alloc_page_failed, tmem_page_list_pages,
217 tmem_stats.low_on_memory, tmem_stats.evicted_pgs,
218 tmem_stats.evict_attempts, tmem_stats.relinq_pgs, tmem_stats.relinq_attempts,
219 tmem_stats.max_evicts_per_relinq,
220 tmem_stats.total_flush_pool, use_long ? ',' : '\n');
221 if (use_long)
222 n += scnprintf(info+n,BSIZE-n,
223 "Ec:%ld,Em:%ld,Oc:%d,Om:%d,Nc:%d,Nm:%d,Pc:%d,Pm:%d,"
224 "Fc:%d,Fm:%d,Sc:%d,Sm:%d,Ep:%lu,Gd:%lu,Zt:%lu,Gz:%lu\n",
225 tmem_global.eph_count, tmem_stats.global_eph_count_max,
226 _atomic_read(tmem_stats.global_obj_count), tmem_stats.global_obj_count_max,
227 _atomic_read(tmem_stats.global_rtree_node_count), tmem_stats.global_rtree_node_count_max,
228 _atomic_read(tmem_stats.global_pgp_count), tmem_stats.global_pgp_count_max,
229 _atomic_read(tmem_stats.global_page_count), tmem_stats.global_page_count_max,
230 _atomic_read(tmem_stats.global_pcd_count), tmem_stats.global_pcd_count_max,
231 tmem_stats.tot_good_eph_puts,tmem_stats.deduped_puts,tmem_stats.pcd_tot_tze_size,
232 tmem_stats.pcd_tot_csize);
233 if ( sum + n >= len )
234 return sum;
235 if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
236 sum += n;
237 return sum;
238 }
239
tmemc_list(domid_t cli_id,tmem_cli_va_param_t buf,uint32_t len,bool use_long)240 static int tmemc_list(domid_t cli_id, tmem_cli_va_param_t buf, uint32_t len,
241 bool use_long)
242 {
243 struct client *client;
244 int off = 0;
245
246 if ( cli_id == TMEM_CLI_ID_NULL ) {
247 off = tmemc_list_global(buf,0,len,use_long);
248 off += tmemc_list_shared(buf,off,len-off,use_long);
249 list_for_each_entry(client,&tmem_global.client_list,client_list)
250 off += tmemc_list_client(client, buf, off, len-off, use_long);
251 off += tmemc_list_global_perf(buf,off,len-off,use_long);
252 }
253 else if ( (client = tmem_client_from_cli_id(cli_id)) == NULL)
254 return -1;
255 else
256 off = tmemc_list_client(client, buf, 0, len, use_long);
257
258 return 0;
259 }
260
__tmemc_set_client_info(struct client * client,XEN_GUEST_HANDLE (xen_tmem_client_t)buf)261 static int __tmemc_set_client_info(struct client *client,
262 XEN_GUEST_HANDLE(xen_tmem_client_t) buf)
263 {
264 domid_t cli_id;
265 uint32_t old_weight;
266 xen_tmem_client_t info = { };
267
268 ASSERT(client);
269
270 if ( copy_from_guest(&info, buf, 1) )
271 return -EFAULT;
272
273 if ( info.version != TMEM_SPEC_VERSION )
274 return -EOPNOTSUPP;
275
276 if ( info.maxpools > MAX_POOLS_PER_DOMAIN )
277 return -ERANGE;
278
279 /* Ignore info.nr_pools. */
280 cli_id = client->cli_id;
281
282 if ( info.weight != client->info.weight )
283 {
284 old_weight = client->info.weight;
285 client->info.weight = info.weight;
286 tmem_client_info("tmem: weight set to %d for %s=%d\n",
287 info.weight, tmem_cli_id_str, cli_id);
288 atomic_sub(old_weight,&tmem_global.client_weight_total);
289 atomic_add(client->info.weight,&tmem_global.client_weight_total);
290 }
291
292
293 if ( info.flags.u.compress != client->info.flags.u.compress )
294 {
295 client->info.flags.u.compress = info.flags.u.compress;
296 tmem_client_info("tmem: compression %s for %s=%d\n",
297 info.flags.u.compress ? "enabled" : "disabled",
298 tmem_cli_id_str,cli_id);
299 }
300 return 0;
301 }
302
tmemc_set_client_info(domid_t cli_id,XEN_GUEST_HANDLE (xen_tmem_client_t)info)303 static int tmemc_set_client_info(domid_t cli_id,
304 XEN_GUEST_HANDLE(xen_tmem_client_t) info)
305 {
306 struct client *client;
307 int ret = -ENOENT;
308
309 if ( cli_id == TMEM_CLI_ID_NULL )
310 {
311 list_for_each_entry(client,&tmem_global.client_list,client_list)
312 {
313 ret = __tmemc_set_client_info(client, info);
314 if (ret)
315 break;
316 }
317 }
318 else
319 {
320 client = tmem_client_from_cli_id(cli_id);
321 if ( client )
322 ret = __tmemc_set_client_info(client, info);
323 }
324 return ret;
325 }
326
tmemc_get_client_info(int cli_id,XEN_GUEST_HANDLE (xen_tmem_client_t)info)327 static int tmemc_get_client_info(int cli_id,
328 XEN_GUEST_HANDLE(xen_tmem_client_t) info)
329 {
330 struct client *client = tmem_client_from_cli_id(cli_id);
331
332 if ( client )
333 {
334 if ( copy_to_guest(info, &client->info, 1) )
335 return -EFAULT;
336 }
337 else
338 {
339 static const xen_tmem_client_t generic = {
340 .version = TMEM_SPEC_VERSION,
341 .maxpools = MAX_POOLS_PER_DOMAIN
342 };
343
344 if ( copy_to_guest(info, &generic, 1) )
345 return -EFAULT;
346 }
347
348 return 0;
349 }
350
tmemc_get_pool(int cli_id,XEN_GUEST_HANDLE (xen_tmem_pool_info_t)pools,uint32_t len)351 static int tmemc_get_pool(int cli_id,
352 XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
353 uint32_t len)
354 {
355 struct client *client = tmem_client_from_cli_id(cli_id);
356 unsigned int i, idx;
357 int rc = 0;
358 unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
359
360 if ( len % sizeof(xen_tmem_pool_info_t) )
361 return -EINVAL;
362
363 if ( nr > MAX_POOLS_PER_DOMAIN )
364 return -E2BIG;
365
366 if ( !guest_handle_okay(pools, nr) )
367 return -EINVAL;
368
369 if ( !client )
370 return -EINVAL;
371
372 for ( idx = 0, i = 0; i < MAX_POOLS_PER_DOMAIN; i++ )
373 {
374 struct tmem_pool *pool = client->pools[i];
375 xen_tmem_pool_info_t out;
376
377 if ( pool == NULL )
378 continue;
379
380 out.flags.raw = (pool->persistent ? TMEM_POOL_PERSIST : 0) |
381 (pool->shared ? TMEM_POOL_SHARED : 0) |
382 (POOL_PAGESHIFT << TMEM_POOL_PAGESIZE_SHIFT) |
383 (TMEM_SPEC_VERSION << TMEM_POOL_VERSION_SHIFT);
384 out.n_pages = _atomic_read(pool->pgp_count);
385 out.uuid[0] = pool->uuid[0];
386 out.uuid[1] = pool->uuid[1];
387 out.id = i;
388
389 /* N.B. 'idx' != 'i'. */
390 if ( __copy_to_guest_offset(pools, idx, &out, 1) )
391 {
392 rc = -EFAULT;
393 break;
394 }
395 idx++;
396 /* Don't try to put more than what was requested. */
397 if ( idx >= nr )
398 break;
399 }
400
401 /* And how many we have processed. */
402 return rc ? : idx;
403 }
404
tmemc_set_pools(int cli_id,XEN_GUEST_HANDLE (xen_tmem_pool_info_t)pools,uint32_t len)405 static int tmemc_set_pools(int cli_id,
406 XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
407 uint32_t len)
408 {
409 unsigned int i;
410 int rc = 0;
411 unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
412 struct client *client = tmem_client_from_cli_id(cli_id);
413
414 if ( len % sizeof(xen_tmem_pool_info_t) )
415 return -EINVAL;
416
417 if ( nr > MAX_POOLS_PER_DOMAIN )
418 return -E2BIG;
419
420 if ( !guest_handle_okay(pools, nr) )
421 return -EINVAL;
422
423 if ( !client )
424 {
425 client = client_create(cli_id);
426 if ( !client )
427 return -ENOMEM;
428 }
429 for ( i = 0; i < nr; i++ )
430 {
431 xen_tmem_pool_info_t pool;
432
433 if ( __copy_from_guest_offset(&pool, pools, i, 1 ) )
434 return -EFAULT;
435
436 if ( pool.n_pages )
437 return -EINVAL;
438
439 rc = do_tmem_new_pool(cli_id, pool.id, pool.flags.raw,
440 pool.uuid[0], pool.uuid[1]);
441 if ( rc < 0 )
442 break;
443
444 pool.id = rc;
445 if ( __copy_to_guest_offset(pools, i, &pool, 1) )
446 return -EFAULT;
447 }
448
449 /* And how many we have processed. */
450 return rc ? : i;
451 }
452
tmemc_auth_pools(int cli_id,XEN_GUEST_HANDLE (xen_tmem_pool_info_t)pools,uint32_t len)453 static int tmemc_auth_pools(int cli_id,
454 XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
455 uint32_t len)
456 {
457 unsigned int i;
458 int rc = 0;
459 unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
460 struct client *client = tmem_client_from_cli_id(cli_id);
461
462 if ( len % sizeof(xen_tmem_pool_info_t) )
463 return -EINVAL;
464
465 if ( nr > MAX_POOLS_PER_DOMAIN )
466 return -E2BIG;
467
468 if ( !guest_handle_okay(pools, nr) )
469 return -EINVAL;
470
471 if ( !client )
472 {
473 client = client_create(cli_id);
474 if ( !client )
475 return -ENOMEM;
476 }
477
478 for ( i = 0; i < nr; i++ )
479 {
480 xen_tmem_pool_info_t pool;
481
482 if ( __copy_from_guest_offset(&pool, pools, i, 1 ) )
483 return -EFAULT;
484
485 if ( pool.n_pages )
486 return -EINVAL;
487
488 rc = tmemc_shared_pool_auth(cli_id, pool.uuid[0], pool.uuid[1],
489 pool.flags.u.auth);
490
491 if ( rc < 0 )
492 break;
493
494 }
495
496 /* And how many we have processed. */
497 return rc ? : i;
498 }
499
tmem_control(struct xen_sysctl_tmem_op * op)500 int tmem_control(struct xen_sysctl_tmem_op *op)
501 {
502 int ret;
503 uint32_t cmd = op->cmd;
504
505 if ( op->pad != 0 )
506 return -EINVAL;
507
508 write_lock(&tmem_rwlock);
509
510 switch (cmd)
511 {
512 case XEN_SYSCTL_TMEM_OP_THAW:
513 case XEN_SYSCTL_TMEM_OP_FREEZE:
514 case XEN_SYSCTL_TMEM_OP_DESTROY:
515 ret = tmemc_freeze_pools(op->cli_id, cmd);
516 break;
517 case XEN_SYSCTL_TMEM_OP_FLUSH:
518 ret = tmemc_flush_mem(op->cli_id, op->arg);
519 break;
520 case XEN_SYSCTL_TMEM_OP_LIST:
521 ret = tmemc_list(op->cli_id,
522 guest_handle_cast(op->u.buf, char), op->len, op->arg);
523 break;
524 case XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO:
525 ret = tmemc_set_client_info(op->cli_id, op->u.client);
526 break;
527 case XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB:
528 ret = tmem_freeable_pages() >> (20 - PAGE_SHIFT);
529 break;
530 case XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO:
531 ret = tmemc_get_client_info(op->cli_id, op->u.client);
532 break;
533 case XEN_SYSCTL_TMEM_OP_GET_POOLS:
534 ret = tmemc_get_pool(op->cli_id, op->u.pool, op->len);
535 break;
536 case XEN_SYSCTL_TMEM_OP_SET_POOLS: /* TMEM_RESTORE_NEW */
537 ret = tmemc_set_pools(op->cli_id, op->u.pool, op->len);
538 break;
539 case XEN_SYSCTL_TMEM_OP_SET_AUTH: /* TMEM_AUTH */
540 ret = tmemc_auth_pools(op->cli_id, op->u.pool, op->len);
541 break;
542 default:
543 ret = do_tmem_control(op);
544 break;
545 }
546
547 write_unlock(&tmem_rwlock);
548
549 return ret;
550 }
551
552 /*
553 * Local variables:
554 * mode: C
555 * c-file-style: "BSD"
556 * c-basic-offset: 4
557 * tab-width: 4
558 * indent-tabs-mode: nil
559 * End:
560 */
561