1 /******************************************************************************
2 * sysctl.c
3 *
4 * System management operations. For use by node control stack.
5 *
6 * Copyright (c) 2002-2006, K Fraser
7 */
8
9 #include <xen/types.h>
10 #include <xen/lib.h>
11 #include <xen/mm.h>
12 #include <xen/sched.h>
13 #include <xen/domain.h>
14 #include <xen/event.h>
15 #include <xen/grant_table.h>
16 #include <xen/domain_page.h>
17 #include <xen/trace.h>
18 #include <xen/console.h>
19 #include <xen/iocap.h>
20 #include <xen/guest_access.h>
21 #include <xen/keyhandler.h>
22 #include <asm/current.h>
23 #include <xen/hypercall.h>
24 #include <public/sysctl.h>
25 #include <xen/nodemask.h>
26 #include <xen/numa.h>
27 #include <xsm/xsm.h>
28 #include <xen/pmstat.h>
29 #include <xen/livepatch.h>
30 #include <xen/coverage.h>
31
do_sysctl(XEN_GUEST_HANDLE_PARAM (xen_sysctl_t)u_sysctl)32 long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
33 {
34 long ret = 0;
35 int copyback = -1;
36 struct xen_sysctl curop, *op = &curop;
37 static DEFINE_SPINLOCK(sysctl_lock);
38
39 if ( copy_from_guest(op, u_sysctl, 1) )
40 return -EFAULT;
41
42 if ( op->interface_version != XEN_SYSCTL_INTERFACE_VERSION )
43 return -EACCES;
44
45 ret = xsm_sysctl(XSM_PRIV, op->cmd);
46 if ( ret )
47 return ret;
48
49 /*
50 * Trylock here avoids deadlock with an existing sysctl critical section
51 * which might (for some current or future reason) want to synchronise
52 * with this vcpu.
53 */
54 while ( !spin_trylock(&sysctl_lock) )
55 if ( hypercall_preempt_check() )
56 return hypercall_create_continuation(
57 __HYPERVISOR_sysctl, "h", u_sysctl);
58
59 switch ( op->cmd )
60 {
61 case XEN_SYSCTL_readconsole:
62 ret = xsm_readconsole(XSM_HOOK, op->u.readconsole.clear);
63 if ( ret )
64 break;
65
66 ret = read_console_ring(&op->u.readconsole);
67 break;
68
69 case XEN_SYSCTL_tbuf_op:
70 ret = tb_control(&op->u.tbuf_op);
71 break;
72
73 case XEN_SYSCTL_sched_id:
74 op->u.sched_id.sched_id = scheduler_id();
75 break;
76
77 case XEN_SYSCTL_getdomaininfolist:
78 {
79 struct domain *d;
80 struct xen_domctl_getdomaininfo info;
81 u32 num_domains = 0;
82
83 rcu_read_lock(&domlist_read_lock);
84
85 for_each_domain ( d )
86 {
87 if ( d->domain_id < op->u.getdomaininfolist.first_domain )
88 continue;
89 if ( num_domains == op->u.getdomaininfolist.max_domains )
90 break;
91
92 if ( xsm_getdomaininfo(XSM_XS_PRIV, d) )
93 continue;
94
95 getdomaininfo(d, &info);
96
97 if ( copy_to_guest_offset(op->u.getdomaininfolist.buffer,
98 num_domains, &info, 1) )
99 {
100 ret = -EFAULT;
101 break;
102 }
103
104 num_domains++;
105 }
106
107 rcu_read_unlock(&domlist_read_lock);
108
109 if ( ret != 0 )
110 break;
111
112 op->u.getdomaininfolist.num_domains = num_domains;
113 }
114 break;
115
116 #ifdef CONFIG_PERF_COUNTERS
117 case XEN_SYSCTL_perfc_op:
118 ret = perfc_control(&op->u.perfc_op);
119 break;
120 #endif
121
122 #ifdef CONFIG_DEBUG_LOCK_PROFILE
123 case XEN_SYSCTL_lockprof_op:
124 ret = spinlock_profile_control(&op->u.lockprof_op);
125 break;
126 #endif
127 case XEN_SYSCTL_debug_keys:
128 {
129 char c;
130 uint32_t i;
131
132 ret = -EFAULT;
133 for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
134 {
135 if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
136 goto out;
137 handle_keypress(c, false);
138 }
139 ret = 0;
140 copyback = 0;
141 }
142 break;
143
144 case XEN_SYSCTL_getcpuinfo:
145 {
146 uint32_t i, nr_cpus;
147 struct xen_sysctl_cpuinfo cpuinfo = { 0 };
148
149 nr_cpus = min(op->u.getcpuinfo.max_cpus, nr_cpu_ids);
150
151 ret = -EFAULT;
152 for ( i = 0; i < nr_cpus; i++ )
153 {
154 cpuinfo.idletime = get_cpu_idle_time(i);
155
156 if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
157 goto out;
158 }
159
160 op->u.getcpuinfo.nr_cpus = i;
161 ret = 0;
162 }
163 break;
164
165 case XEN_SYSCTL_availheap:
166 op->u.availheap.avail_bytes = avail_domheap_pages_region(
167 op->u.availheap.node,
168 op->u.availheap.min_bitwidth,
169 op->u.availheap.max_bitwidth);
170 op->u.availheap.avail_bytes <<= PAGE_SHIFT;
171 break;
172
173 #ifdef CONFIG_PM_STATS
174 case XEN_SYSCTL_get_pmstat:
175 ret = do_get_pm_info(&op->u.get_pmstat);
176 break;
177 #endif
178
179 #ifdef CONFIG_PM_OP
180 case XEN_SYSCTL_pm_op:
181 ret = do_pm_op(&op->u.pm_op);
182 if ( ret == -EAGAIN )
183 copyback = 1;
184 break;
185 #endif
186
187 case XEN_SYSCTL_page_offline_op:
188 {
189 uint32_t *status, *ptr;
190 mfn_t mfn;
191
192 ret = -EINVAL;
193 if ( op->u.page_offline.end < op->u.page_offline.start )
194 break;
195
196 ret = xsm_page_offline(XSM_HOOK, op->u.page_offline.cmd);
197 if ( ret )
198 break;
199
200 ptr = status = xmalloc_array(uint32_t,
201 (op->u.page_offline.end -
202 op->u.page_offline.start + 1));
203 if ( !status )
204 {
205 dprintk(XENLOG_WARNING, "Out of memory for page offline op\n");
206 ret = -ENOMEM;
207 break;
208 }
209
210 memset(status, PG_OFFLINE_INVALID, sizeof(uint32_t) *
211 (op->u.page_offline.end - op->u.page_offline.start + 1));
212
213 for ( mfn = _mfn(op->u.page_offline.start);
214 mfn_x(mfn) <= op->u.page_offline.end;
215 mfn = mfn_add(mfn, 1) )
216 {
217 switch ( op->u.page_offline.cmd )
218 {
219 /* Shall revert her if failed, or leave caller do it? */
220 case sysctl_page_offline:
221 ret = offline_page(mfn, 0, ptr++);
222 break;
223 case sysctl_page_online:
224 ret = online_page(mfn, ptr++);
225 break;
226 case sysctl_query_page_offline:
227 ret = query_page_offline(mfn, ptr++);
228 break;
229 default:
230 ret = -EINVAL;
231 break;
232 }
233
234 if (ret)
235 break;
236 }
237
238 if ( copy_to_guest(
239 op->u.page_offline.status, status,
240 op->u.page_offline.end - op->u.page_offline.start + 1) )
241 ret = -EFAULT;
242
243 xfree(status);
244 copyback = 0;
245 }
246 break;
247
248 case XEN_SYSCTL_cpupool_op:
249 ret = cpupool_do_sysctl(&op->u.cpupool_op);
250 break;
251
252 case XEN_SYSCTL_scheduler_op:
253 ret = sched_adjust_global(&op->u.scheduler_op);
254 break;
255
256 case XEN_SYSCTL_physinfo:
257 {
258 struct xen_sysctl_physinfo *pi = &op->u.physinfo;
259
260 memset(pi, 0, sizeof(*pi));
261 pi->threads_per_core =
262 cpumask_weight(per_cpu(cpu_sibling_mask, 0));
263 pi->cores_per_socket =
264 cpumask_weight(per_cpu(cpu_core_mask, 0)) / pi->threads_per_core;
265 pi->nr_cpus = num_online_cpus();
266 pi->nr_nodes = num_online_nodes();
267 pi->max_node_id = MAX_NUMNODES-1;
268 pi->max_cpu_id = nr_cpu_ids - 1;
269 pi->total_pages = total_pages;
270 /* Protected by lock */
271 get_outstanding_claims(&pi->free_pages, &pi->outstanding_pages);
272 pi->scrub_pages = 0;
273 pi->cpu_khz = cpu_khz;
274 pi->max_mfn = get_upper_mfn_bound();
275 arch_do_physinfo(pi);
276 if ( iommu_enabled )
277 {
278 pi->capabilities |= XEN_SYSCTL_PHYSCAP_directio;
279 if ( iommu_hap_pt_share )
280 pi->capabilities |= XEN_SYSCTL_PHYSCAP_iommu_hap_pt_share;
281 }
282 if ( vmtrace_available )
283 pi->capabilities |= XEN_SYSCTL_PHYSCAP_vmtrace;
284
285 if ( vpmu_is_available )
286 pi->capabilities |= XEN_SYSCTL_PHYSCAP_vpmu;
287
288 if ( opt_gnttab_max_version >= 1 )
289 pi->capabilities |= XEN_SYSCTL_PHYSCAP_gnttab_v1;
290 if ( opt_gnttab_max_version >= 2 )
291 pi->capabilities |= XEN_SYSCTL_PHYSCAP_gnttab_v2;
292
293 if ( copy_to_guest(u_sysctl, op, 1) )
294 ret = -EFAULT;
295 }
296 break;
297
298 case XEN_SYSCTL_numainfo:
299 {
300 unsigned int i, j, num_nodes;
301 struct xen_sysctl_numainfo *ni = &op->u.numainfo;
302 bool do_meminfo = !guest_handle_is_null(ni->meminfo);
303 bool do_distance = !guest_handle_is_null(ni->distance);
304
305 num_nodes = last_node(node_online_map) + 1;
306
307 if ( do_meminfo || do_distance )
308 {
309 struct xen_sysctl_meminfo meminfo = { };
310
311 if ( num_nodes > ni->num_nodes )
312 num_nodes = ni->num_nodes;
313 for ( i = 0; i < num_nodes; ++i )
314 {
315 static uint32_t distance[MAX_NUMNODES];
316
317 if ( do_meminfo )
318 {
319 if ( node_online(i) )
320 {
321 meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT;
322 meminfo.memfree = avail_node_heap_pages(i) << PAGE_SHIFT;
323 }
324 else
325 meminfo.memsize = meminfo.memfree = XEN_INVALID_MEM_SZ;
326
327 if ( copy_to_guest_offset(ni->meminfo, i, &meminfo, 1) )
328 {
329 ret = -EFAULT;
330 break;
331 }
332 }
333
334 if ( do_distance )
335 {
336 for ( j = 0; j < num_nodes; j++ )
337 {
338 distance[j] = __node_distance(i, j);
339 if ( distance[j] == NUMA_NO_DISTANCE )
340 distance[j] = XEN_INVALID_NODE_DIST;
341 }
342
343 if ( copy_to_guest_offset(ni->distance, i * num_nodes,
344 distance, num_nodes) )
345 {
346 ret = -EFAULT;
347 break;
348 }
349 }
350 }
351 }
352 else
353 i = num_nodes;
354
355 if ( !ret && (ni->num_nodes != i) )
356 {
357 ni->num_nodes = i;
358 if ( __copy_field_to_guest(u_sysctl, op,
359 u.numainfo.num_nodes) )
360 {
361 ret = -EFAULT;
362 break;
363 }
364 }
365 }
366 break;
367
368 case XEN_SYSCTL_cputopoinfo:
369 {
370 unsigned int i, num_cpus;
371 struct xen_sysctl_cputopoinfo *ti = &op->u.cputopoinfo;
372
373 num_cpus = cpumask_last(&cpu_present_map) + 1;
374 if ( !guest_handle_is_null(ti->cputopo) )
375 {
376 struct xen_sysctl_cputopo cputopo = { };
377
378 if ( num_cpus > ti->num_cpus )
379 num_cpus = ti->num_cpus;
380 for ( i = 0; i < num_cpus; ++i )
381 {
382 if ( cpu_present(i) )
383 {
384 cputopo.core = cpu_to_core(i);
385 cputopo.socket = cpu_to_socket(i);
386 cputopo.node = cpu_to_node(i);
387 if ( cputopo.node == NUMA_NO_NODE )
388 cputopo.node = XEN_INVALID_NODE_ID;
389 }
390 else
391 {
392 cputopo.core = XEN_INVALID_CORE_ID;
393 cputopo.socket = XEN_INVALID_SOCKET_ID;
394 cputopo.node = XEN_INVALID_NODE_ID;
395 }
396
397 if ( copy_to_guest_offset(ti->cputopo, i, &cputopo, 1) )
398 {
399 ret = -EFAULT;
400 break;
401 }
402 }
403 }
404 else
405 i = num_cpus;
406
407 if ( !ret && (ti->num_cpus != i) )
408 {
409 ti->num_cpus = i;
410 if ( __copy_field_to_guest(u_sysctl, op,
411 u.cputopoinfo.num_cpus) )
412 {
413 ret = -EFAULT;
414 break;
415 }
416 }
417 }
418 break;
419
420 case XEN_SYSCTL_coverage_op:
421 ret = sysctl_cov_op(&op->u.coverage_op);
422 copyback = 1;
423 break;
424
425 #ifdef CONFIG_HAS_PCI
426 case XEN_SYSCTL_pcitopoinfo:
427 {
428 struct xen_sysctl_pcitopoinfo *ti = &op->u.pcitopoinfo;
429 unsigned int i = 0;
430
431 if ( guest_handle_is_null(ti->devs) ||
432 guest_handle_is_null(ti->nodes) )
433 {
434 ret = -EINVAL;
435 break;
436 }
437
438 while ( i < ti->num_devs )
439 {
440 physdev_pci_device_t dev;
441 uint32_t node;
442 const struct pci_dev *pdev;
443
444 if ( copy_from_guest_offset(&dev, ti->devs, i, 1) )
445 {
446 ret = -EFAULT;
447 break;
448 }
449
450 pcidevs_lock();
451 pdev = pci_get_pdev(NULL, PCI_SBDF(dev.seg, dev.bus, dev.devfn));
452 if ( !pdev )
453 node = XEN_INVALID_DEV;
454 else if ( pdev->node == NUMA_NO_NODE )
455 node = XEN_INVALID_NODE_ID;
456 else
457 node = pdev->node;
458 pcidevs_unlock();
459
460 if ( copy_to_guest_offset(ti->nodes, i, &node, 1) )
461 {
462 ret = -EFAULT;
463 break;
464 }
465
466 if ( (++i > 0x3f) && hypercall_preempt_check() )
467 break;
468 }
469
470 if ( !ret && (ti->num_devs != i) )
471 {
472 ti->num_devs = i;
473 if ( __copy_field_to_guest(u_sysctl, op, u.pcitopoinfo.num_devs) )
474 ret = -EFAULT;
475 }
476 break;
477 }
478 #endif
479
480 case XEN_SYSCTL_livepatch_op:
481 ret = livepatch_op(&op->u.livepatch);
482 if ( ret != -ENOSYS && ret != -EOPNOTSUPP )
483 copyback = 1;
484 break;
485
486 default:
487 ret = arch_do_sysctl(op, u_sysctl);
488 copyback = 0;
489 break;
490 }
491
492 out:
493 spin_unlock(&sysctl_lock);
494
495 if ( copyback && (!ret || copyback > 0) &&
496 __copy_to_guest(u_sysctl, op, 1) )
497 ret = -EFAULT;
498
499 return ret;
500 }
501
502 /*
503 * Local variables:
504 * mode: C
505 * c-file-style: "BSD"
506 * c-basic-offset: 4
507 * tab-width: 4
508 * indent-tabs-mode: nil
509 * End:
510 */
511