1 /******************************************************************************
2  * sysctl.c
3  *
4  * System management operations. For use by node control stack.
5  *
6  * Copyright (c) 2002-2006, K Fraser
7  */
8 
9 #include <xen/types.h>
10 #include <xen/lib.h>
11 #include <xen/mm.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/event.h>
15 #include <xen/grant_table.h>
16 #include <xen/domain_page.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <xen/iocap.h>
20 #include <xen/guest_access.h>
21 #include <xen/keyhandler.h>
22 #include <asm/current.h>
23 #include <xen/hypercall.h>
24 #include <public/sysctl.h>
25 #include <xen/nodemask.h>
26 #include <xen/numa.h>
27 #include <xsm/xsm.h>
28 #include <xen/pmstat.h>
29 #include <xen/livepatch.h>
30 #include <xen/coverage.h>
31 
do_sysctl(XEN_GUEST_HANDLE_PARAM (xen_sysctl_t)u_sysctl)32 long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
33 {
34     long ret = 0;
35     int copyback = -1;
36     struct xen_sysctl curop, *op = &curop;
37     static DEFINE_SPINLOCK(sysctl_lock);
38 
39     if ( copy_from_guest(op, u_sysctl, 1) )
40         return -EFAULT;
41 
42     if ( op->interface_version != XEN_SYSCTL_INTERFACE_VERSION )
43         return -EACCES;
44 
45     ret = xsm_sysctl(XSM_PRIV, op->cmd);
46     if ( ret )
47         return ret;
48 
49     /*
50      * Trylock here avoids deadlock with an existing sysctl critical section
51      * which might (for some current or future reason) want to synchronise
52      * with this vcpu.
53      */
54     while ( !spin_trylock(&sysctl_lock) )
55         if ( hypercall_preempt_check() )
56             return hypercall_create_continuation(
57                 __HYPERVISOR_sysctl, "h", u_sysctl);
58 
59     switch ( op->cmd )
60     {
61     case XEN_SYSCTL_readconsole:
62         ret = xsm_readconsole(XSM_HOOK, op->u.readconsole.clear);
63         if ( ret )
64             break;
65 
66         ret = read_console_ring(&op->u.readconsole);
67         break;
68 
69     case XEN_SYSCTL_tbuf_op:
70         ret = tb_control(&op->u.tbuf_op);
71         break;
72 
73     case XEN_SYSCTL_sched_id:
74         op->u.sched_id.sched_id = scheduler_id();
75         break;
76 
77     case XEN_SYSCTL_getdomaininfolist:
78     {
79         struct domain *d;
80         struct xen_domctl_getdomaininfo info;
81         u32 num_domains = 0;
82 
83         rcu_read_lock(&domlist_read_lock);
84 
85         for_each_domain ( d )
86         {
87             if ( d->domain_id < op->u.getdomaininfolist.first_domain )
88                 continue;
89             if ( num_domains == op->u.getdomaininfolist.max_domains )
90                 break;
91 
92             if ( xsm_getdomaininfo(XSM_XS_PRIV, d) )
93                 continue;
94 
95             getdomaininfo(d, &info);
96 
97             if ( copy_to_guest_offset(op->u.getdomaininfolist.buffer,
98                                       num_domains, &info, 1) )
99             {
100                 ret = -EFAULT;
101                 break;
102             }
103 
104             num_domains++;
105         }
106 
107         rcu_read_unlock(&domlist_read_lock);
108 
109         if ( ret != 0 )
110             break;
111 
112         op->u.getdomaininfolist.num_domains = num_domains;
113     }
114     break;
115 
116 #ifdef CONFIG_PERF_COUNTERS
117     case XEN_SYSCTL_perfc_op:
118         ret = perfc_control(&op->u.perfc_op);
119         break;
120 #endif
121 
122 #ifdef CONFIG_DEBUG_LOCK_PROFILE
123     case XEN_SYSCTL_lockprof_op:
124         ret = spinlock_profile_control(&op->u.lockprof_op);
125         break;
126 #endif
127     case XEN_SYSCTL_debug_keys:
128     {
129         char c;
130         uint32_t i;
131 
132         ret = -EFAULT;
133         for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
134         {
135             if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
136                 goto out;
137             handle_keypress(c, false);
138         }
139         ret = 0;
140         copyback = 0;
141     }
142     break;
143 
144     case XEN_SYSCTL_getcpuinfo:
145     {
146         uint32_t i, nr_cpus;
147         struct xen_sysctl_cpuinfo cpuinfo = { 0 };
148 
149         nr_cpus = min(op->u.getcpuinfo.max_cpus, nr_cpu_ids);
150 
151         ret = -EFAULT;
152         for ( i = 0; i < nr_cpus; i++ )
153         {
154             cpuinfo.idletime = get_cpu_idle_time(i);
155 
156             if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
157                 goto out;
158         }
159 
160         op->u.getcpuinfo.nr_cpus = i;
161         ret = 0;
162     }
163     break;
164 
165     case XEN_SYSCTL_availheap:
166         op->u.availheap.avail_bytes = avail_domheap_pages_region(
167             op->u.availheap.node,
168             op->u.availheap.min_bitwidth,
169             op->u.availheap.max_bitwidth);
170         op->u.availheap.avail_bytes <<= PAGE_SHIFT;
171         break;
172 
173 #ifdef CONFIG_PM_STATS
174     case XEN_SYSCTL_get_pmstat:
175         ret = do_get_pm_info(&op->u.get_pmstat);
176         break;
177 #endif
178 
179 #ifdef CONFIG_PM_OP
180     case XEN_SYSCTL_pm_op:
181         ret = do_pm_op(&op->u.pm_op);
182         if ( ret == -EAGAIN )
183             copyback = 1;
184         break;
185 #endif
186 
187     case XEN_SYSCTL_page_offline_op:
188     {
189         uint32_t *status, *ptr;
190         mfn_t mfn;
191 
192         ret = -EINVAL;
193         if ( op->u.page_offline.end < op->u.page_offline.start )
194             break;
195 
196         ret = xsm_page_offline(XSM_HOOK, op->u.page_offline.cmd);
197         if ( ret )
198             break;
199 
200         ptr = status = xmalloc_array(uint32_t,
201                                      (op->u.page_offline.end -
202                                       op->u.page_offline.start + 1));
203         if ( !status )
204         {
205             dprintk(XENLOG_WARNING, "Out of memory for page offline op\n");
206             ret = -ENOMEM;
207             break;
208         }
209 
210         memset(status, PG_OFFLINE_INVALID, sizeof(uint32_t) *
211                       (op->u.page_offline.end - op->u.page_offline.start + 1));
212 
213         for ( mfn = _mfn(op->u.page_offline.start);
214               mfn_x(mfn) <= op->u.page_offline.end;
215               mfn = mfn_add(mfn, 1) )
216         {
217             switch ( op->u.page_offline.cmd )
218             {
219                 /* Shall revert her if failed, or leave caller do it? */
220                 case sysctl_page_offline:
221                     ret = offline_page(mfn, 0, ptr++);
222                     break;
223                 case sysctl_page_online:
224                     ret = online_page(mfn, ptr++);
225                     break;
226                 case sysctl_query_page_offline:
227                     ret = query_page_offline(mfn, ptr++);
228                     break;
229                 default:
230                     ret = -EINVAL;
231                     break;
232             }
233 
234             if (ret)
235                 break;
236         }
237 
238         if ( copy_to_guest(
239                  op->u.page_offline.status, status,
240                  op->u.page_offline.end - op->u.page_offline.start + 1) )
241             ret = -EFAULT;
242 
243         xfree(status);
244         copyback = 0;
245     }
246     break;
247 
248     case XEN_SYSCTL_cpupool_op:
249         ret = cpupool_do_sysctl(&op->u.cpupool_op);
250         break;
251 
252     case XEN_SYSCTL_scheduler_op:
253         ret = sched_adjust_global(&op->u.scheduler_op);
254         break;
255 
256     case XEN_SYSCTL_physinfo:
257     {
258         struct xen_sysctl_physinfo *pi = &op->u.physinfo;
259 
260         memset(pi, 0, sizeof(*pi));
261         pi->threads_per_core =
262             cpumask_weight(per_cpu(cpu_sibling_mask, 0));
263         pi->cores_per_socket =
264             cpumask_weight(per_cpu(cpu_core_mask, 0)) / pi->threads_per_core;
265         pi->nr_cpus = num_online_cpus();
266         pi->nr_nodes = num_online_nodes();
267         pi->max_node_id = MAX_NUMNODES-1;
268         pi->max_cpu_id = nr_cpu_ids - 1;
269         pi->total_pages = total_pages;
270         /* Protected by lock */
271         get_outstanding_claims(&pi->free_pages, &pi->outstanding_pages);
272         pi->scrub_pages = 0;
273         pi->cpu_khz = cpu_khz;
274         pi->max_mfn = get_upper_mfn_bound();
275         arch_do_physinfo(pi);
276         if ( iommu_enabled )
277         {
278             pi->capabilities |= XEN_SYSCTL_PHYSCAP_directio;
279             if ( iommu_hap_pt_share )
280                 pi->capabilities |= XEN_SYSCTL_PHYSCAP_iommu_hap_pt_share;
281         }
282         if ( vmtrace_available )
283             pi->capabilities |= XEN_SYSCTL_PHYSCAP_vmtrace;
284 
285         if ( vpmu_is_available )
286             pi->capabilities |= XEN_SYSCTL_PHYSCAP_vpmu;
287 
288         if ( opt_gnttab_max_version >= 1 )
289             pi->capabilities |= XEN_SYSCTL_PHYSCAP_gnttab_v1;
290         if ( opt_gnttab_max_version >= 2 )
291             pi->capabilities |= XEN_SYSCTL_PHYSCAP_gnttab_v2;
292 
293         if ( copy_to_guest(u_sysctl, op, 1) )
294             ret = -EFAULT;
295     }
296     break;
297 
298     case XEN_SYSCTL_numainfo:
299     {
300         unsigned int i, j, num_nodes;
301         struct xen_sysctl_numainfo *ni = &op->u.numainfo;
302         bool do_meminfo = !guest_handle_is_null(ni->meminfo);
303         bool do_distance = !guest_handle_is_null(ni->distance);
304 
305         num_nodes = last_node(node_online_map) + 1;
306 
307         if ( do_meminfo || do_distance )
308         {
309             struct xen_sysctl_meminfo meminfo = { };
310 
311             if ( num_nodes > ni->num_nodes )
312                 num_nodes = ni->num_nodes;
313             for ( i = 0; i < num_nodes; ++i )
314             {
315                 static uint32_t distance[MAX_NUMNODES];
316 
317                 if ( do_meminfo )
318                 {
319                     if ( node_online(i) )
320                     {
321                         meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT;
322                         meminfo.memfree = avail_node_heap_pages(i) << PAGE_SHIFT;
323                     }
324                     else
325                         meminfo.memsize = meminfo.memfree = XEN_INVALID_MEM_SZ;
326 
327                     if ( copy_to_guest_offset(ni->meminfo, i, &meminfo, 1) )
328                     {
329                         ret = -EFAULT;
330                         break;
331                     }
332                 }
333 
334                 if ( do_distance )
335                 {
336                     for ( j = 0; j < num_nodes; j++ )
337                     {
338                         distance[j] = __node_distance(i, j);
339                         if ( distance[j] == NUMA_NO_DISTANCE )
340                             distance[j] = XEN_INVALID_NODE_DIST;
341                     }
342 
343                     if ( copy_to_guest_offset(ni->distance, i * num_nodes,
344                                               distance, num_nodes) )
345                     {
346                         ret = -EFAULT;
347                         break;
348                     }
349                 }
350             }
351         }
352         else
353             i = num_nodes;
354 
355         if ( !ret && (ni->num_nodes != i) )
356         {
357             ni->num_nodes = i;
358             if ( __copy_field_to_guest(u_sysctl, op,
359                                        u.numainfo.num_nodes) )
360             {
361                 ret = -EFAULT;
362                 break;
363             }
364         }
365     }
366     break;
367 
368     case XEN_SYSCTL_cputopoinfo:
369     {
370         unsigned int i, num_cpus;
371         struct xen_sysctl_cputopoinfo *ti = &op->u.cputopoinfo;
372 
373         num_cpus = cpumask_last(&cpu_present_map) + 1;
374         if ( !guest_handle_is_null(ti->cputopo) )
375         {
376             struct xen_sysctl_cputopo cputopo = { };
377 
378             if ( num_cpus > ti->num_cpus )
379                 num_cpus = ti->num_cpus;
380             for ( i = 0; i < num_cpus; ++i )
381             {
382                 if ( cpu_present(i) )
383                 {
384                     cputopo.core = cpu_to_core(i);
385                     cputopo.socket = cpu_to_socket(i);
386                     cputopo.node = cpu_to_node(i);
387                     if ( cputopo.node == NUMA_NO_NODE )
388                         cputopo.node = XEN_INVALID_NODE_ID;
389                 }
390                 else
391                 {
392                     cputopo.core = XEN_INVALID_CORE_ID;
393                     cputopo.socket = XEN_INVALID_SOCKET_ID;
394                     cputopo.node = XEN_INVALID_NODE_ID;
395                 }
396 
397                 if ( copy_to_guest_offset(ti->cputopo, i, &cputopo, 1) )
398                 {
399                     ret = -EFAULT;
400                     break;
401                 }
402             }
403         }
404         else
405             i = num_cpus;
406 
407         if ( !ret && (ti->num_cpus != i) )
408         {
409             ti->num_cpus = i;
410             if ( __copy_field_to_guest(u_sysctl, op,
411                                        u.cputopoinfo.num_cpus) )
412             {
413                 ret = -EFAULT;
414                 break;
415             }
416         }
417     }
418     break;
419 
420     case XEN_SYSCTL_coverage_op:
421         ret = sysctl_cov_op(&op->u.coverage_op);
422         copyback = 1;
423         break;
424 
425 #ifdef CONFIG_HAS_PCI
426     case XEN_SYSCTL_pcitopoinfo:
427     {
428         struct xen_sysctl_pcitopoinfo *ti = &op->u.pcitopoinfo;
429         unsigned int i = 0;
430 
431         if ( guest_handle_is_null(ti->devs) ||
432              guest_handle_is_null(ti->nodes) )
433         {
434             ret = -EINVAL;
435             break;
436         }
437 
438         while ( i < ti->num_devs )
439         {
440             physdev_pci_device_t dev;
441             uint32_t node;
442             const struct pci_dev *pdev;
443 
444             if ( copy_from_guest_offset(&dev, ti->devs, i, 1) )
445             {
446                 ret = -EFAULT;
447                 break;
448             }
449 
450             pcidevs_lock();
451             pdev = pci_get_pdev(NULL, PCI_SBDF(dev.seg, dev.bus, dev.devfn));
452             if ( !pdev )
453                 node = XEN_INVALID_DEV;
454             else if ( pdev->node == NUMA_NO_NODE )
455                 node = XEN_INVALID_NODE_ID;
456             else
457                 node = pdev->node;
458             pcidevs_unlock();
459 
460             if ( copy_to_guest_offset(ti->nodes, i, &node, 1) )
461             {
462                 ret = -EFAULT;
463                 break;
464             }
465 
466             if ( (++i > 0x3f) && hypercall_preempt_check() )
467                 break;
468         }
469 
470         if ( !ret && (ti->num_devs != i) )
471         {
472             ti->num_devs = i;
473             if ( __copy_field_to_guest(u_sysctl, op, u.pcitopoinfo.num_devs) )
474                 ret = -EFAULT;
475         }
476         break;
477     }
478 #endif
479 
480     case XEN_SYSCTL_livepatch_op:
481         ret = livepatch_op(&op->u.livepatch);
482         if ( ret != -ENOSYS && ret != -EOPNOTSUPP )
483             copyback = 1;
484         break;
485 
486     default:
487         ret = arch_do_sysctl(op, u_sysctl);
488         copyback = 0;
489         break;
490     }
491 
492  out:
493     spin_unlock(&sysctl_lock);
494 
495     if ( copyback && (!ret || copyback > 0) &&
496          __copy_to_guest(u_sysctl, op, 1) )
497         ret = -EFAULT;
498 
499     return ret;
500 }
501 
502 /*
503  * Local variables:
504  * mode: C
505  * c-file-style: "BSD"
506  * c-basic-offset: 4
507  * tab-width: 4
508  * indent-tabs-mode: nil
509  * End:
510  */
511