1 /*
2  * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
3  *
4  */
5 
6 #include <xen/init.h>
7 #include <xen/list.h>
8 #include <xen/radix-tree.h>
9 #include <xen/rbtree.h>
10 #include <xen/rwlock.h>
11 #include <xen/tmem_control.h>
12 #include <xen/tmem.h>
13 #include <xen/tmem_xen.h>
14 #include <public/sysctl.h>
15 
16 /************ TMEM CONTROL OPERATIONS ************************************/
17 
18 /* Freeze/thaw all pools belonging to client cli_id (all domains if -1). */
tmemc_freeze_pools(domid_t cli_id,int arg)19 static int tmemc_freeze_pools(domid_t cli_id, int arg)
20 {
21     struct client *client;
22     bool freeze = arg == XEN_SYSCTL_TMEM_OP_FREEZE;
23     bool destroy = arg == XEN_SYSCTL_TMEM_OP_DESTROY;
24     char *s;
25 
26     s = destroy ? "destroyed" : ( freeze ? "frozen" : "thawed" );
27     if ( cli_id == TMEM_CLI_ID_NULL )
28     {
29         list_for_each_entry(client,&tmem_global.client_list,client_list)
30             client->info.flags.u.frozen = freeze;
31         tmem_client_info("tmem: all pools %s for all %ss\n", s, tmem_client_str);
32     }
33     else
34     {
35         if ( (client = tmem_client_from_cli_id(cli_id)) == NULL)
36             return -1;
37         client->info.flags.u.frozen = freeze;
38         tmem_client_info("tmem: all pools %s for %s=%d\n",
39                          s, tmem_cli_id_str, cli_id);
40     }
41     return 0;
42 }
43 
tmem_flush_npages(unsigned long n)44 static unsigned long tmem_flush_npages(unsigned long n)
45 {
46     unsigned long avail_pages = 0;
47 
48     while ( (avail_pages = tmem_page_list_pages) < n )
49     {
50         if (  !tmem_evict() )
51             break;
52     }
53     if ( avail_pages )
54     {
55         spin_lock(&tmem_page_list_lock);
56         while ( !page_list_empty(&tmem_page_list) )
57         {
58             struct page_info *pg = page_list_remove_head(&tmem_page_list);
59             scrub_one_page(pg);
60             tmem_page_list_pages--;
61             free_domheap_page(pg);
62         }
63         ASSERT(tmem_page_list_pages == 0);
64         INIT_PAGE_LIST_HEAD(&tmem_page_list);
65         spin_unlock(&tmem_page_list_lock);
66     }
67     return avail_pages;
68 }
69 
tmemc_flush_mem(domid_t cli_id,uint32_t kb)70 static int tmemc_flush_mem(domid_t cli_id, uint32_t kb)
71 {
72     uint32_t npages, flushed_pages, flushed_kb;
73 
74     if ( cli_id != TMEM_CLI_ID_NULL )
75     {
76         tmem_client_warn("tmem: %s-specific flush not supported yet, use --all\n",
77            tmem_client_str);
78         return -1;
79     }
80     /* Convert kb to pages, rounding up if necessary. */
81     npages = (kb + ((1 << (PAGE_SHIFT-10))-1)) >> (PAGE_SHIFT-10);
82     flushed_pages = tmem_flush_npages(npages);
83     flushed_kb = flushed_pages << (PAGE_SHIFT-10);
84     return flushed_kb;
85 }
86 
87 /*
88  * These tmemc_list* routines output lots of stats in a format that is
89  *  intended to be program-parseable, not human-readable. Further, by
90  *  tying each group of stats to a line format indicator (e.g. G= for
91  *  global stats) and each individual stat to a two-letter specifier
92  *  (e.g. Ec:nnnnn in the G= line says there are nnnnn pages in the
93  *  global ephemeral pool), it should allow the stats reported to be
94  *  forward and backwards compatible as tmem evolves.
95  */
96 #define BSIZE 1024
97 
tmemc_list_client(struct client * c,tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)98 static int tmemc_list_client(struct client *c, tmem_cli_va_param_t buf,
99                              int off, uint32_t len, bool use_long)
100 {
101     char info[BSIZE];
102     int i, n = 0, sum = 0;
103     struct tmem_pool *p;
104     bool s;
105 
106     n = scnprintf(info,BSIZE,"C=CI:%d,ww:%d,co:%d,fr:%d,"
107         "Tc:%"PRIu64",Ge:%ld,Pp:%ld,Gp:%ld%c",
108         c->cli_id, c->info.weight, c->info.flags.u.compress, c->info.flags.u.frozen,
109         c->total_cycles, c->succ_eph_gets, c->succ_pers_puts, c->succ_pers_gets,
110         use_long ? ',' : '\n');
111     if (use_long)
112         n += scnprintf(info+n,BSIZE-n,
113              "Ec:%ld,Em:%ld,cp:%ld,cb:%"PRId64",cn:%ld,cm:%ld\n",
114              c->eph_count, c->eph_count_max,
115              c->compressed_pages, c->compressed_sum_size,
116              c->compress_poor, c->compress_nomem);
117     if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
118         sum += n;
119     for ( i = 0; i < MAX_POOLS_PER_DOMAIN; i++ )
120     {
121         if ( (p = c->pools[i]) == NULL )
122             continue;
123         s = is_shared(p);
124         n = scnprintf(info,BSIZE,"P=CI:%d,PI:%d,"
125                       "PT:%c%c,U0:%"PRIx64",U1:%"PRIx64"%c",
126                       c->cli_id, p->pool_id,
127                       is_persistent(p) ? 'P' : 'E', s ? 'S' : 'P',
128                       (uint64_t)(s ? p->uuid[0] : 0),
129                       (uint64_t)(s ? p->uuid[1] : 0LL),
130                       use_long ? ',' : '\n');
131         if (use_long)
132             n += scnprintf(info+n,BSIZE-n,
133              "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu,"
134              "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu,"
135              "fs:%lu,ft:%lu,os:%lu,ot:%lu\n",
136              _atomic_read(p->pgp_count), p->pgp_count_max,
137              p->obj_count, p->obj_count_max,
138              p->objnode_count, p->objnode_count_max,
139              p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced,
140              p->no_mem_puts,
141              p->found_gets, p->gets,
142              p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs);
143         if ( sum + n >= len )
144             return sum;
145         if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
146             sum += n;
147     }
148     return sum;
149 }
150 
tmemc_list_shared(tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)151 static int tmemc_list_shared(tmem_cli_va_param_t buf, int off, uint32_t len,
152                              bool use_long)
153 {
154     char info[BSIZE];
155     int i, n = 0, sum = 0;
156     struct tmem_pool *p;
157     struct share_list *sl;
158 
159     for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ )
160     {
161         if ( (p = tmem_global.shared_pools[i]) == NULL )
162             continue;
163         n = scnprintf(info+n,BSIZE-n,"S=SI:%d,PT:%c%c,U0:%"PRIx64",U1:%"PRIx64,
164                       i, is_persistent(p) ? 'P' : 'E',
165                       is_shared(p) ? 'S' : 'P',
166                       p->uuid[0], p->uuid[1]);
167         list_for_each_entry(sl,&p->share_list, share_list)
168             n += scnprintf(info+n,BSIZE-n,",SC:%d",sl->client->cli_id);
169         n += scnprintf(info+n,BSIZE-n,"%c", use_long ? ',' : '\n');
170         if (use_long)
171             n += scnprintf(info+n,BSIZE-n,
172              "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu,"
173              "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu,"
174              "fs:%lu,ft:%lu,os:%lu,ot:%lu\n",
175              _atomic_read(p->pgp_count), p->pgp_count_max,
176              p->obj_count, p->obj_count_max,
177              p->objnode_count, p->objnode_count_max,
178              p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced,
179              p->no_mem_puts,
180              p->found_gets, p->gets,
181              p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs);
182         if ( sum + n >= len )
183             return sum;
184         if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
185             sum += n;
186     }
187     return sum;
188 }
189 
tmemc_list_global_perf(tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)190 static int tmemc_list_global_perf(tmem_cli_va_param_t buf, int off,
191                                   uint32_t len, bool use_long)
192 {
193     char info[BSIZE];
194     int n = 0, sum = 0;
195 
196     n = scnprintf(info+n,BSIZE-n,"T=");
197     n--; /* Overwrite trailing comma. */
198     n += scnprintf(info+n,BSIZE-n,"\n");
199     if ( sum + n >= len )
200         return sum;
201     if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
202         sum += n;
203     return sum;
204 }
205 
tmemc_list_global(tmem_cli_va_param_t buf,int off,uint32_t len,bool use_long)206 static int tmemc_list_global(tmem_cli_va_param_t buf, int off, uint32_t len,
207                              bool use_long)
208 {
209     char info[BSIZE];
210     int n = 0, sum = off;
211 
212     n += scnprintf(info,BSIZE,"G="
213       "Tt:%lu,Te:%lu,Cf:%lu,Af:%lu,Pf:%lu,Ta:%lu,"
214       "Lm:%lu,Et:%lu,Ea:%lu,Rt:%lu,Ra:%lu,Rx:%lu,Fp:%lu%c",
215       tmem_stats.total_tmem_ops, tmem_stats.errored_tmem_ops, tmem_stats.failed_copies,
216       tmem_stats.alloc_failed, tmem_stats.alloc_page_failed, tmem_page_list_pages,
217       tmem_stats.low_on_memory, tmem_stats.evicted_pgs,
218       tmem_stats.evict_attempts, tmem_stats.relinq_pgs, tmem_stats.relinq_attempts,
219       tmem_stats.max_evicts_per_relinq,
220       tmem_stats.total_flush_pool, use_long ? ',' : '\n');
221     if (use_long)
222         n += scnprintf(info+n,BSIZE-n,
223           "Ec:%ld,Em:%ld,Oc:%d,Om:%d,Nc:%d,Nm:%d,Pc:%d,Pm:%d,"
224           "Fc:%d,Fm:%d,Sc:%d,Sm:%d,Ep:%lu,Gd:%lu,Zt:%lu,Gz:%lu\n",
225           tmem_global.eph_count, tmem_stats.global_eph_count_max,
226           _atomic_read(tmem_stats.global_obj_count), tmem_stats.global_obj_count_max,
227           _atomic_read(tmem_stats.global_rtree_node_count), tmem_stats.global_rtree_node_count_max,
228           _atomic_read(tmem_stats.global_pgp_count), tmem_stats.global_pgp_count_max,
229           _atomic_read(tmem_stats.global_page_count), tmem_stats.global_page_count_max,
230           _atomic_read(tmem_stats.global_pcd_count), tmem_stats.global_pcd_count_max,
231          tmem_stats.tot_good_eph_puts,tmem_stats.deduped_puts,tmem_stats.pcd_tot_tze_size,
232          tmem_stats.pcd_tot_csize);
233     if ( sum + n >= len )
234         return sum;
235     if ( !copy_to_guest_offset(buf, off + sum, info, n + 1) )
236         sum += n;
237     return sum;
238 }
239 
tmemc_list(domid_t cli_id,tmem_cli_va_param_t buf,uint32_t len,bool use_long)240 static int tmemc_list(domid_t cli_id, tmem_cli_va_param_t buf, uint32_t len,
241                       bool use_long)
242 {
243     struct client *client;
244     int off = 0;
245 
246     if ( cli_id == TMEM_CLI_ID_NULL ) {
247         off = tmemc_list_global(buf,0,len,use_long);
248         off += tmemc_list_shared(buf,off,len-off,use_long);
249         list_for_each_entry(client,&tmem_global.client_list,client_list)
250             off += tmemc_list_client(client, buf, off, len-off, use_long);
251         off += tmemc_list_global_perf(buf,off,len-off,use_long);
252     }
253     else if ( (client = tmem_client_from_cli_id(cli_id)) == NULL)
254         return -1;
255     else
256         off = tmemc_list_client(client, buf, 0, len, use_long);
257 
258     return 0;
259 }
260 
__tmemc_set_client_info(struct client * client,XEN_GUEST_HANDLE (xen_tmem_client_t)buf)261 static int __tmemc_set_client_info(struct client *client,
262                                    XEN_GUEST_HANDLE(xen_tmem_client_t) buf)
263 {
264     domid_t cli_id;
265     uint32_t old_weight;
266     xen_tmem_client_t info = { };
267 
268     ASSERT(client);
269 
270     if ( copy_from_guest(&info, buf, 1) )
271         return -EFAULT;
272 
273     if ( info.version != TMEM_SPEC_VERSION )
274         return -EOPNOTSUPP;
275 
276     if ( info.maxpools > MAX_POOLS_PER_DOMAIN )
277         return -ERANGE;
278 
279     /* Ignore info.nr_pools. */
280     cli_id = client->cli_id;
281 
282     if ( info.weight != client->info.weight )
283     {
284         old_weight = client->info.weight;
285         client->info.weight = info.weight;
286         tmem_client_info("tmem: weight set to %d for %s=%d\n",
287                          info.weight, tmem_cli_id_str, cli_id);
288         atomic_sub(old_weight,&tmem_global.client_weight_total);
289         atomic_add(client->info.weight,&tmem_global.client_weight_total);
290     }
291 
292 
293     if ( info.flags.u.compress != client->info.flags.u.compress )
294     {
295         client->info.flags.u.compress = info.flags.u.compress;
296         tmem_client_info("tmem: compression %s for %s=%d\n",
297                          info.flags.u.compress ? "enabled" : "disabled",
298                          tmem_cli_id_str,cli_id);
299     }
300     return 0;
301 }
302 
tmemc_set_client_info(domid_t cli_id,XEN_GUEST_HANDLE (xen_tmem_client_t)info)303 static int tmemc_set_client_info(domid_t cli_id,
304                                  XEN_GUEST_HANDLE(xen_tmem_client_t) info)
305 {
306     struct client *client;
307     int ret = -ENOENT;
308 
309     if ( cli_id == TMEM_CLI_ID_NULL )
310     {
311         list_for_each_entry(client,&tmem_global.client_list,client_list)
312         {
313             ret =  __tmemc_set_client_info(client, info);
314             if (ret)
315                 break;
316         }
317     }
318     else
319     {
320         client = tmem_client_from_cli_id(cli_id);
321         if ( client )
322             ret = __tmemc_set_client_info(client, info);
323     }
324     return ret;
325 }
326 
tmemc_get_client_info(int cli_id,XEN_GUEST_HANDLE (xen_tmem_client_t)info)327 static int tmemc_get_client_info(int cli_id,
328                                  XEN_GUEST_HANDLE(xen_tmem_client_t) info)
329 {
330     struct client *client = tmem_client_from_cli_id(cli_id);
331 
332     if ( client )
333     {
334         if ( copy_to_guest(info, &client->info, 1) )
335             return  -EFAULT;
336     }
337     else
338     {
339         static const xen_tmem_client_t generic = {
340             .version = TMEM_SPEC_VERSION,
341             .maxpools = MAX_POOLS_PER_DOMAIN
342         };
343 
344         if ( copy_to_guest(info, &generic, 1) )
345             return -EFAULT;
346     }
347 
348     return 0;
349 }
350 
tmemc_get_pool(int cli_id,XEN_GUEST_HANDLE (xen_tmem_pool_info_t)pools,uint32_t len)351 static int tmemc_get_pool(int cli_id,
352                           XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
353                           uint32_t len)
354 {
355     struct client *client = tmem_client_from_cli_id(cli_id);
356     unsigned int i, idx;
357     int rc = 0;
358     unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
359 
360     if ( len % sizeof(xen_tmem_pool_info_t) )
361         return -EINVAL;
362 
363     if ( nr > MAX_POOLS_PER_DOMAIN )
364         return -E2BIG;
365 
366     if ( !guest_handle_okay(pools, nr) )
367         return -EINVAL;
368 
369     if ( !client )
370         return -EINVAL;
371 
372     for ( idx = 0, i = 0; i < MAX_POOLS_PER_DOMAIN; i++ )
373     {
374         struct tmem_pool *pool = client->pools[i];
375         xen_tmem_pool_info_t out;
376 
377         if ( pool == NULL )
378             continue;
379 
380         out.flags.raw = (pool->persistent ? TMEM_POOL_PERSIST : 0) |
381               (pool->shared ? TMEM_POOL_SHARED : 0) |
382               (POOL_PAGESHIFT << TMEM_POOL_PAGESIZE_SHIFT) |
383               (TMEM_SPEC_VERSION << TMEM_POOL_VERSION_SHIFT);
384         out.n_pages = _atomic_read(pool->pgp_count);
385         out.uuid[0] = pool->uuid[0];
386         out.uuid[1] = pool->uuid[1];
387         out.id = i;
388 
389         /* N.B. 'idx' != 'i'. */
390         if ( __copy_to_guest_offset(pools, idx, &out, 1) )
391         {
392             rc = -EFAULT;
393             break;
394         }
395         idx++;
396         /* Don't try to put more than what was requested. */
397         if ( idx >= nr )
398             break;
399     }
400 
401     /* And how many we have processed. */
402     return rc ? : idx;
403 }
404 
tmemc_set_pools(int cli_id,XEN_GUEST_HANDLE (xen_tmem_pool_info_t)pools,uint32_t len)405 static int tmemc_set_pools(int cli_id,
406                            XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
407                            uint32_t len)
408 {
409     unsigned int i;
410     int rc = 0;
411     unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
412     struct client *client = tmem_client_from_cli_id(cli_id);
413 
414     if ( len % sizeof(xen_tmem_pool_info_t) )
415         return -EINVAL;
416 
417     if ( nr > MAX_POOLS_PER_DOMAIN )
418         return -E2BIG;
419 
420     if ( !guest_handle_okay(pools, nr) )
421         return -EINVAL;
422 
423     if ( !client )
424     {
425         client = client_create(cli_id);
426         if ( !client )
427             return -ENOMEM;
428     }
429     for ( i = 0; i < nr; i++ )
430     {
431         xen_tmem_pool_info_t pool;
432 
433         if ( __copy_from_guest_offset(&pool, pools, i, 1 ) )
434             return -EFAULT;
435 
436         if ( pool.n_pages )
437             return -EINVAL;
438 
439         rc = do_tmem_new_pool(cli_id, pool.id, pool.flags.raw,
440                               pool.uuid[0], pool.uuid[1]);
441         if ( rc < 0 )
442             break;
443 
444         pool.id = rc;
445         if ( __copy_to_guest_offset(pools, i, &pool, 1) )
446             return -EFAULT;
447     }
448 
449     /* And how many we have processed. */
450     return rc ? : i;
451 }
452 
tmemc_auth_pools(int cli_id,XEN_GUEST_HANDLE (xen_tmem_pool_info_t)pools,uint32_t len)453 static int tmemc_auth_pools(int cli_id,
454                             XEN_GUEST_HANDLE(xen_tmem_pool_info_t) pools,
455                             uint32_t len)
456 {
457     unsigned int i;
458     int rc = 0;
459     unsigned int nr = len / sizeof(xen_tmem_pool_info_t);
460     struct client *client = tmem_client_from_cli_id(cli_id);
461 
462     if ( len % sizeof(xen_tmem_pool_info_t) )
463         return -EINVAL;
464 
465     if ( nr > MAX_POOLS_PER_DOMAIN )
466         return -E2BIG;
467 
468     if ( !guest_handle_okay(pools, nr) )
469         return -EINVAL;
470 
471     if ( !client )
472     {
473         client = client_create(cli_id);
474         if ( !client )
475             return -ENOMEM;
476     }
477 
478     for ( i = 0; i < nr; i++ )
479     {
480         xen_tmem_pool_info_t pool;
481 
482         if ( __copy_from_guest_offset(&pool, pools, i, 1 ) )
483             return -EFAULT;
484 
485         if ( pool.n_pages )
486             return -EINVAL;
487 
488         rc = tmemc_shared_pool_auth(cli_id, pool.uuid[0], pool.uuid[1],
489                                     pool.flags.u.auth);
490 
491         if ( rc < 0 )
492             break;
493 
494     }
495 
496     /* And how many we have processed. */
497     return rc ? : i;
498 }
499 
tmem_control(struct xen_sysctl_tmem_op * op)500 int tmem_control(struct xen_sysctl_tmem_op *op)
501 {
502     int ret;
503     uint32_t cmd = op->cmd;
504 
505     if ( op->pad != 0 )
506         return -EINVAL;
507 
508     write_lock(&tmem_rwlock);
509 
510     switch (cmd)
511     {
512     case XEN_SYSCTL_TMEM_OP_THAW:
513     case XEN_SYSCTL_TMEM_OP_FREEZE:
514     case XEN_SYSCTL_TMEM_OP_DESTROY:
515         ret = tmemc_freeze_pools(op->cli_id, cmd);
516         break;
517     case XEN_SYSCTL_TMEM_OP_FLUSH:
518         ret = tmemc_flush_mem(op->cli_id, op->arg);
519         break;
520     case XEN_SYSCTL_TMEM_OP_LIST:
521         ret = tmemc_list(op->cli_id,
522                          guest_handle_cast(op->u.buf, char), op->len, op->arg);
523         break;
524     case XEN_SYSCTL_TMEM_OP_SET_CLIENT_INFO:
525         ret = tmemc_set_client_info(op->cli_id, op->u.client);
526         break;
527     case XEN_SYSCTL_TMEM_OP_QUERY_FREEABLE_MB:
528         ret = tmem_freeable_pages() >> (20 - PAGE_SHIFT);
529         break;
530     case XEN_SYSCTL_TMEM_OP_GET_CLIENT_INFO:
531         ret = tmemc_get_client_info(op->cli_id, op->u.client);
532         break;
533     case XEN_SYSCTL_TMEM_OP_GET_POOLS:
534         ret = tmemc_get_pool(op->cli_id, op->u.pool, op->len);
535         break;
536     case XEN_SYSCTL_TMEM_OP_SET_POOLS: /* TMEM_RESTORE_NEW */
537         ret = tmemc_set_pools(op->cli_id, op->u.pool, op->len);
538         break;
539     case XEN_SYSCTL_TMEM_OP_SET_AUTH: /* TMEM_AUTH */
540         ret = tmemc_auth_pools(op->cli_id, op->u.pool, op->len);
541         break;
542     default:
543         ret = do_tmem_control(op);
544         break;
545     }
546 
547     write_unlock(&tmem_rwlock);
548 
549     return ret;
550 }
551 
552 /*
553  * Local variables:
554  * mode: C
555  * c-file-style: "BSD"
556  * c-basic-offset: 4
557  * tab-width: 4
558  * indent-tabs-mode: nil
559  * End:
560  */
561