1 /******************************************************************************
2  * xc_cpuid_x86.c
3  *
4  * Compute cpuid of a domain.
5  *
6  * Copyright (c) 2008, Citrix Systems, Inc.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation;
11  * version 2.1 of the License.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <stdlib.h>
23 #include <stdbool.h>
24 #include <limits.h>
25 #include "xc_private.h"
26 #include "xc_bitops.h"
27 #include <xen/hvm/params.h>
28 #include <xen-tools/libs.h>
29 
30 enum {
31 #define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
32 #include <xen/arch-x86/cpufeatureset.h>
33 };
34 #include "_xc_cpuid_autogen.h"
35 
36 #define bitmaskof(idx)      (1u << ((idx) & 31))
37 #define featureword_of(idx) ((idx) >> 5)
38 #define clear_feature(idx, dst) ((dst) &= ~bitmaskof(idx))
39 #define set_feature(idx, dst)   ((dst) |=  bitmaskof(idx))
40 
41 #define DEF_MAX_BASE 0x0000000du
42 #define DEF_MAX_INTELEXT  0x80000008u
43 #define DEF_MAX_AMDEXT    0x8000001cu
44 
xc_get_cpu_levelling_caps(xc_interface * xch,uint32_t * caps)45 int xc_get_cpu_levelling_caps(xc_interface *xch, uint32_t *caps)
46 {
47     DECLARE_SYSCTL;
48     int ret;
49 
50     sysctl.cmd = XEN_SYSCTL_get_cpu_levelling_caps;
51     ret = do_sysctl(xch, &sysctl);
52 
53     if ( !ret )
54         *caps = sysctl.u.cpu_levelling_caps.caps;
55 
56     return ret;
57 }
58 
xc_get_cpu_featureset(xc_interface * xch,uint32_t index,uint32_t * nr_features,uint32_t * featureset)59 int xc_get_cpu_featureset(xc_interface *xch, uint32_t index,
60                           uint32_t *nr_features, uint32_t *featureset)
61 {
62     DECLARE_SYSCTL;
63     DECLARE_HYPERCALL_BOUNCE(featureset,
64                              *nr_features * sizeof(*featureset),
65                              XC_HYPERCALL_BUFFER_BOUNCE_OUT);
66     int ret;
67 
68     if ( xc_hypercall_bounce_pre(xch, featureset) )
69         return -1;
70 
71     sysctl.cmd = XEN_SYSCTL_get_cpu_featureset;
72     sysctl.u.cpu_featureset.index = index;
73     sysctl.u.cpu_featureset.nr_features = *nr_features;
74     set_xen_guest_handle(sysctl.u.cpu_featureset.features, featureset);
75 
76     ret = do_sysctl(xch, &sysctl);
77 
78     xc_hypercall_bounce_post(xch, featureset);
79 
80     if ( !ret )
81         *nr_features = sysctl.u.cpu_featureset.nr_features;
82 
83     return ret;
84 }
85 
xc_get_cpu_featureset_size(void)86 uint32_t xc_get_cpu_featureset_size(void)
87 {
88     return FEATURESET_NR_ENTRIES;
89 }
90 
xc_get_static_cpu_featuremask(enum xc_static_cpu_featuremask mask)91 const uint32_t *xc_get_static_cpu_featuremask(
92     enum xc_static_cpu_featuremask mask)
93 {
94     const static uint32_t known[FEATURESET_NR_ENTRIES] = INIT_KNOWN_FEATURES,
95         special[FEATURESET_NR_ENTRIES] = INIT_SPECIAL_FEATURES,
96         pv[FEATURESET_NR_ENTRIES] = INIT_PV_FEATURES,
97         hvm_shadow[FEATURESET_NR_ENTRIES] = INIT_HVM_SHADOW_FEATURES,
98         hvm_hap[FEATURESET_NR_ENTRIES] = INIT_HVM_HAP_FEATURES,
99         deep_features[FEATURESET_NR_ENTRIES] = INIT_DEEP_FEATURES;
100 
101     BUILD_BUG_ON(ARRAY_SIZE(known) != FEATURESET_NR_ENTRIES);
102     BUILD_BUG_ON(ARRAY_SIZE(special) != FEATURESET_NR_ENTRIES);
103     BUILD_BUG_ON(ARRAY_SIZE(pv) != FEATURESET_NR_ENTRIES);
104     BUILD_BUG_ON(ARRAY_SIZE(hvm_shadow) != FEATURESET_NR_ENTRIES);
105     BUILD_BUG_ON(ARRAY_SIZE(hvm_hap) != FEATURESET_NR_ENTRIES);
106     BUILD_BUG_ON(ARRAY_SIZE(deep_features) != FEATURESET_NR_ENTRIES);
107 
108     switch ( mask )
109     {
110     case XC_FEATUREMASK_KNOWN:
111         return known;
112 
113     case XC_FEATUREMASK_SPECIAL:
114         return special;
115 
116     case XC_FEATUREMASK_PV:
117         return pv;
118 
119     case XC_FEATUREMASK_HVM_SHADOW:
120         return hvm_shadow;
121 
122     case XC_FEATUREMASK_HVM_HAP:
123         return hvm_hap;
124 
125     case XC_FEATUREMASK_DEEP_FEATURES:
126         return deep_features;
127 
128     default:
129         return NULL;
130     }
131 }
132 
xc_get_feature_deep_deps(uint32_t feature)133 const uint32_t *xc_get_feature_deep_deps(uint32_t feature)
134 {
135     static const struct {
136         uint32_t feature;
137         uint32_t fs[FEATURESET_NR_ENTRIES];
138     } deep_deps[] = INIT_DEEP_DEPS;
139 
140     unsigned int start = 0, end = ARRAY_SIZE(deep_deps);
141 
142     BUILD_BUG_ON(ARRAY_SIZE(deep_deps) != NR_DEEP_DEPS);
143 
144     /* deep_deps[] is sorted.  Perform a binary search. */
145     while ( start < end )
146     {
147         unsigned int mid = start + ((end - start) / 2);
148 
149         if ( deep_deps[mid].feature > feature )
150             end = mid;
151         else if ( deep_deps[mid].feature < feature )
152             start = mid + 1;
153         else
154             return deep_deps[mid].fs;
155     }
156 
157     return NULL;
158 }
159 
160 struct cpuid_domain_info
161 {
162     enum
163     {
164         VENDOR_UNKNOWN,
165         VENDOR_INTEL,
166         VENDOR_AMD,
167     } vendor;
168 
169     bool hvm;
170     uint64_t xfeature_mask;
171 
172     uint32_t *featureset;
173     unsigned int nr_features;
174 
175     /* PV-only information. */
176     bool pv64;
177 
178     /* HVM-only information. */
179     bool pae;
180     bool nestedhvm;
181 };
182 
cpuid(const unsigned int * input,unsigned int * regs)183 static void cpuid(const unsigned int *input, unsigned int *regs)
184 {
185     unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
186 #ifdef __i386__
187     /* Use the stack to avoid reg constraint failures with some gcc flags */
188     asm (
189         "push %%ebx; push %%edx\n\t"
190         "cpuid\n\t"
191         "mov %%ebx,4(%4)\n\t"
192         "mov %%edx,12(%4)\n\t"
193         "pop %%edx; pop %%ebx\n\t"
194         : "=a" (regs[0]), "=c" (regs[2])
195         : "0" (input[0]), "1" (count), "S" (regs)
196         : "memory" );
197 #else
198     asm (
199         "cpuid"
200         : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
201         : "0" (input[0]), "2" (count) );
202 #endif
203 }
204 
get_cpuid_domain_info(xc_interface * xch,uint32_t domid,struct cpuid_domain_info * info,uint32_t * featureset,unsigned int nr_features)205 static int get_cpuid_domain_info(xc_interface *xch, uint32_t domid,
206                                  struct cpuid_domain_info *info,
207                                  uint32_t *featureset,
208                                  unsigned int nr_features)
209 {
210     struct xen_domctl domctl = {};
211     xc_dominfo_t di;
212     unsigned int in[2] = { 0, ~0U }, regs[4];
213     unsigned int i, host_nr_features = xc_get_cpu_featureset_size();
214     int rc;
215 
216     cpuid(in, regs);
217     if ( regs[1] == 0x756e6547U &&      /* "GenuineIntel" */
218          regs[2] == 0x6c65746eU &&
219          regs[3] == 0x49656e69U )
220         info->vendor = VENDOR_INTEL;
221     else if ( regs[1] == 0x68747541U && /* "AuthenticAMD" */
222               regs[2] == 0x444d4163U &&
223               regs[3] == 0x69746e65U )
224         info->vendor = VENDOR_AMD;
225     else
226         info->vendor = VENDOR_UNKNOWN;
227 
228     if ( xc_domain_getinfo(xch, domid, 1, &di) != 1 ||
229          di.domid != domid )
230         return -ESRCH;
231 
232     info->hvm = di.hvm;
233 
234     info->featureset = calloc(host_nr_features, sizeof(*info->featureset));
235     if ( !info->featureset )
236         return -ENOMEM;
237 
238     info->nr_features = host_nr_features;
239 
240     if ( featureset )
241     {
242         memcpy(info->featureset, featureset,
243                min(host_nr_features, nr_features) * sizeof(*info->featureset));
244 
245         /* Check for truncated set bits. */
246         for ( i = nr_features; i < host_nr_features; ++i )
247             if ( featureset[i] != 0 )
248                 return -EOPNOTSUPP;
249     }
250 
251     /* Get xstate information. */
252     domctl.cmd = XEN_DOMCTL_getvcpuextstate;
253     domctl.domain = domid;
254     rc = do_domctl(xch, &domctl);
255     if ( rc )
256         return rc;
257 
258     info->xfeature_mask = domctl.u.vcpuextstate.xfeature_mask;
259 
260     if ( di.hvm )
261     {
262         uint64_t val;
263 
264         rc = xc_hvm_param_get(xch, domid, HVM_PARAM_PAE_ENABLED, &val);
265         if ( rc )
266             return rc;
267 
268         info->pae = !!val;
269 
270         rc = xc_hvm_param_get(xch, domid, HVM_PARAM_NESTEDHVM, &val);
271         if ( rc )
272             return rc;
273 
274         info->nestedhvm = !!val;
275 
276         if ( !featureset )
277         {
278             rc = xc_get_cpu_featureset(xch, XEN_SYSCTL_cpu_featureset_hvm,
279                                        &host_nr_features, info->featureset);
280             if ( rc )
281                 return rc;
282         }
283     }
284     else
285     {
286         unsigned int width;
287 
288         rc = xc_domain_get_guest_width(xch, domid, &width);
289         if ( rc )
290             return rc;
291 
292         info->pv64 = (width == 8);
293 
294         if ( !featureset )
295         {
296             rc = xc_get_cpu_featureset(xch, XEN_SYSCTL_cpu_featureset_pv,
297                                        &host_nr_features, info->featureset);
298             if ( rc )
299                 return rc;
300         }
301     }
302 
303     return 0;
304 }
305 
free_cpuid_domain_info(struct cpuid_domain_info * info)306 static void free_cpuid_domain_info(struct cpuid_domain_info *info)
307 {
308     free(info->featureset);
309 }
310 
amd_xc_cpuid_policy(xc_interface * xch,const struct cpuid_domain_info * info,const unsigned int * input,unsigned int * regs)311 static void amd_xc_cpuid_policy(xc_interface *xch,
312                                 const struct cpuid_domain_info *info,
313                                 const unsigned int *input, unsigned int *regs)
314 {
315     switch ( input[0] )
316     {
317     case 0x00000002:
318     case 0x00000004:
319         regs[0] = regs[1] = regs[2] = 0;
320         break;
321 
322     case 0x80000000:
323         if ( regs[0] > DEF_MAX_AMDEXT )
324             regs[0] = DEF_MAX_AMDEXT;
325         break;
326 
327     case 0x80000008:
328         /*
329          * ECX[15:12] is ApicIdCoreSize: ECX[7:0] is NumberOfCores (minus one).
330          * Update to reflect vLAPIC_ID = vCPU_ID * 2.
331          */
332         regs[2] = ((regs[2] + (1u << 12)) & 0xf000u) |
333                   ((regs[2] & 0xffu) << 1) | 1u;
334         break;
335 
336     case 0x8000000a: {
337         if ( !info->nestedhvm )
338         {
339             regs[0] = regs[1] = regs[2] = regs[3] = 0;
340             break;
341         }
342 
343 #define SVM_FEATURE_NPT            0x00000001 /* Nested page table support */
344 #define SVM_FEATURE_LBRV           0x00000002 /* LBR virtualization support */
345 #define SVM_FEATURE_SVML           0x00000004 /* SVM locking MSR support */
346 #define SVM_FEATURE_NRIPS          0x00000008 /* Next RIP save on VMEXIT */
347 #define SVM_FEATURE_TSCRATEMSR     0x00000010 /* TSC ratio MSR support */
348 #define SVM_FEATURE_VMCBCLEAN      0x00000020 /* VMCB clean bits support */
349 #define SVM_FEATURE_FLUSHBYASID    0x00000040 /* TLB flush by ASID support */
350 #define SVM_FEATURE_DECODEASSISTS  0x00000080 /* Decode assists support */
351 #define SVM_FEATURE_PAUSEFILTER    0x00000400 /* Pause intercept filter */
352 
353         /* Pass 1: Only passthrough SVM features which are
354          * available in hw and which are implemented
355          */
356         regs[3] &= (SVM_FEATURE_NPT | SVM_FEATURE_LBRV | \
357             SVM_FEATURE_NRIPS | SVM_FEATURE_PAUSEFILTER | \
358             SVM_FEATURE_DECODEASSISTS);
359 
360         /* Pass 2: Always enable SVM features which are emulated */
361         regs[3] |= SVM_FEATURE_VMCBCLEAN | SVM_FEATURE_TSCRATEMSR;
362         break;
363     }
364 
365     }
366 }
367 
intel_xc_cpuid_policy(xc_interface * xch,const struct cpuid_domain_info * info,const unsigned int * input,unsigned int * regs)368 static void intel_xc_cpuid_policy(xc_interface *xch,
369                                   const struct cpuid_domain_info *info,
370                                   const unsigned int *input, unsigned int *regs)
371 {
372     switch ( input[0] )
373     {
374     case 0x00000004:
375         /*
376          * EAX[31:26] is Maximum Cores Per Package (minus one).
377          * Update to reflect vLAPIC_ID = vCPU_ID * 2.
378          */
379         regs[0] = (((regs[0] & 0x7c000000u) << 1) | 0x04000000u |
380                    (regs[0] & 0x3ffu));
381         regs[3] &= 0x3ffu;
382         break;
383 
384     case 0x80000000:
385         if ( regs[0] > DEF_MAX_INTELEXT )
386             regs[0] = DEF_MAX_INTELEXT;
387         break;
388 
389     case 0x80000005:
390         regs[0] = regs[1] = regs[2] = 0;
391         break;
392 
393     case 0x80000008:
394         /* Mask AMD Number of Cores information. */
395         regs[2] = 0;
396         break;
397     }
398 }
399 
xc_cpuid_hvm_policy(xc_interface * xch,const struct cpuid_domain_info * info,const unsigned int * input,unsigned int * regs)400 static void xc_cpuid_hvm_policy(xc_interface *xch,
401                                 const struct cpuid_domain_info *info,
402                                 const unsigned int *input, unsigned int *regs)
403 {
404     switch ( input[0] )
405     {
406     case 0x00000000:
407         if ( regs[0] > DEF_MAX_BASE )
408             regs[0] = DEF_MAX_BASE;
409         break;
410 
411     case 0x00000001:
412         /*
413          * EBX[23:16] is Maximum Logical Processors Per Package.
414          * Update to reflect vLAPIC_ID = vCPU_ID * 2.
415          */
416         regs[1] = (regs[1] & 0x0000ffffu) | ((regs[1] & 0x007f0000u) << 1);
417 
418         regs[2] = info->featureset[featureword_of(X86_FEATURE_SSE3)];
419         regs[3] = (info->featureset[featureword_of(X86_FEATURE_FPU)] |
420                    bitmaskof(X86_FEATURE_HTT));
421         break;
422 
423     case 0x00000007: /* Intel-defined CPU features */
424         if ( input[1] == 0 )
425         {
426             regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)];
427             regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)];
428             regs[3] = info->featureset[featureword_of(X86_FEATURE_AVX512_4VNNIW)];
429         }
430         else
431         {
432             regs[1] = 0;
433             regs[2] = 0;
434             regs[3] = 0;
435         }
436         regs[0] = 0;
437         break;
438 
439     case 0x0000000d: /* Xen automatically calculates almost everything. */
440         if ( input[1] == 1 )
441             regs[0] = info->featureset[featureword_of(X86_FEATURE_XSAVEOPT)];
442         else
443             regs[0] = 0;
444         regs[1] = regs[2] = regs[3] = 0;
445         break;
446 
447     case 0x80000000:
448         /* Passthrough to cpu vendor specific functions */
449         break;
450 
451     case 0x80000001:
452         regs[2] = (info->featureset[featureword_of(X86_FEATURE_LAHF_LM)] &
453                    ~bitmaskof(X86_FEATURE_CMP_LEGACY));
454         regs[3] = info->featureset[featureword_of(X86_FEATURE_SYSCALL)];
455         break;
456 
457     case 0x80000007:
458         /*
459          * Keep only TSCInvariant. This may be cleared by the hypervisor
460          * depending on guest TSC and migration settings.
461          */
462         regs[0] = regs[1] = regs[2] = 0;
463         regs[3] &= 1u<<8;
464         break;
465 
466     case 0x80000008:
467         regs[0] &= 0x0000ffffu;
468         regs[1] = regs[3] = 0;
469         break;
470 
471     case 0x00000002: /* Intel cache info (dumped by AMD policy) */
472     case 0x00000004: /* Intel cache info (dumped by AMD policy) */
473     case 0x0000000a: /* Architectural Performance Monitor Features */
474     case 0x80000002: /* Processor name string */
475     case 0x80000003: /* ... continued         */
476     case 0x80000004: /* ... continued         */
477     case 0x80000005: /* AMD L1 cache/TLB info (dumped by Intel policy) */
478     case 0x80000006: /* AMD L2/3 cache/TLB info ; Intel L2 cache features */
479     case 0x8000000a: /* AMD SVM feature bits */
480     case 0x80000019: /* AMD 1G TLB */
481     case 0x8000001a: /* AMD perf hints */
482     case 0x8000001c: /* AMD lightweight profiling */
483         break;
484 
485     default:
486         regs[0] = regs[1] = regs[2] = regs[3] = 0;
487         break;
488     }
489 
490     if ( info->vendor == VENDOR_AMD )
491         amd_xc_cpuid_policy(xch, info, input, regs);
492     else
493         intel_xc_cpuid_policy(xch, info, input, regs);
494 }
495 
xc_cpuid_pv_policy(xc_interface * xch,const struct cpuid_domain_info * info,const unsigned int * input,unsigned int * regs)496 static void xc_cpuid_pv_policy(xc_interface *xch,
497                                const struct cpuid_domain_info *info,
498                                const unsigned int *input, unsigned int *regs)
499 {
500     switch ( input[0] )
501     {
502     case 0x00000000:
503         if ( regs[0] > DEF_MAX_BASE )
504             regs[0] = DEF_MAX_BASE;
505         break;
506 
507     case 0x00000001:
508     {
509         /* Host topology exposed to PV guest.  Provide host value. */
510         bool host_htt = regs[3] & bitmaskof(X86_FEATURE_HTT);
511 
512         /*
513          * Don't pick host's Initial APIC ID which can change from run
514          * to run.
515          */
516         regs[1] &= 0x00ffffffu;
517 
518         regs[2] = info->featureset[featureword_of(X86_FEATURE_SSE3)];
519         regs[3] = (info->featureset[featureword_of(X86_FEATURE_FPU)] &
520                    ~bitmaskof(X86_FEATURE_HTT));
521 
522         if ( host_htt )
523             regs[3] |= bitmaskof(X86_FEATURE_HTT);
524         break;
525     }
526 
527     case 0x00000007:
528         if ( input[1] == 0 )
529         {
530             regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)];
531             regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)];
532             regs[3] = info->featureset[featureword_of(X86_FEATURE_AVX512_4VNNIW)];
533         }
534         else
535         {
536             regs[1] = 0;
537             regs[2] = 0;
538             regs[3] = 0;
539         }
540         regs[0] = 0;
541         break;
542 
543     case 0x0000000d: /* Xen automatically calculates almost everything. */
544         if ( input[1] == 1 )
545             regs[0] = info->featureset[featureword_of(X86_FEATURE_XSAVEOPT)];
546         else
547             regs[0] = 0;
548         regs[1] = regs[2] = regs[3] = 0;
549         break;
550 
551     case 0x80000000:
552     {
553         unsigned int max = info->vendor == VENDOR_AMD
554             ? DEF_MAX_AMDEXT : DEF_MAX_INTELEXT;
555 
556         if ( regs[0] > max )
557             regs[0] = max;
558         break;
559     }
560 
561     case 0x80000001:
562     {
563         /* Host topology exposed to PV guest.  Provide host CMP_LEGACY value. */
564         bool host_cmp_legacy = regs[2] & bitmaskof(X86_FEATURE_CMP_LEGACY);
565 
566         regs[2] = (info->featureset[featureword_of(X86_FEATURE_LAHF_LM)] &
567                    ~bitmaskof(X86_FEATURE_CMP_LEGACY));
568         regs[3] = info->featureset[featureword_of(X86_FEATURE_SYSCALL)];
569 
570         if ( host_cmp_legacy )
571             regs[2] |= bitmaskof(X86_FEATURE_CMP_LEGACY);
572 
573         break;
574     }
575 
576     case 0x00000005: /* MONITOR/MWAIT */
577     case 0x0000000b: /* Extended Topology Enumeration */
578     case 0x8000000a: /* SVM revision and features */
579     case 0x8000001b: /* Instruction Based Sampling */
580     case 0x8000001c: /* Light Weight Profiling */
581     case 0x8000001e: /* Extended topology reporting */
582         regs[0] = regs[1] = regs[2] = regs[3] = 0;
583         break;
584     }
585 }
586 
xc_cpuid_policy(xc_interface * xch,const struct cpuid_domain_info * info,const unsigned int * input,unsigned int * regs)587 static int xc_cpuid_policy(xc_interface *xch,
588                            const struct cpuid_domain_info *info,
589                            const unsigned int *input, unsigned int *regs)
590 {
591     /*
592      * For hypervisor leaves (0x4000XXXX) only 0x4000xx00.EAX[7:0] bits (max
593      * number of leaves) can be set by user. Hypervisor will enforce this so
594      * all other bits are don't-care and we can set them to zero.
595      */
596     if ( (input[0] & 0xffff0000) == 0x40000000 )
597     {
598         regs[0] = regs[1] = regs[2] = regs[3] = 0;
599         return 0;
600     }
601 
602     if ( info->hvm )
603         xc_cpuid_hvm_policy(xch, info, input, regs);
604     else
605         xc_cpuid_pv_policy(xch, info, input, regs);
606 
607     return 0;
608 }
609 
xc_cpuid_do_domctl(xc_interface * xch,uint32_t domid,const unsigned int * input,const unsigned int * regs)610 static int xc_cpuid_do_domctl(
611     xc_interface *xch, uint32_t domid,
612     const unsigned int *input, const unsigned int *regs)
613 {
614     DECLARE_DOMCTL;
615 
616     memset(&domctl, 0, sizeof (domctl));
617     domctl.domain = domid;
618     domctl.cmd = XEN_DOMCTL_set_cpuid;
619     domctl.u.cpuid.input[0] = input[0];
620     domctl.u.cpuid.input[1] = input[1];
621     domctl.u.cpuid.eax = regs[0];
622     domctl.u.cpuid.ebx = regs[1];
623     domctl.u.cpuid.ecx = regs[2];
624     domctl.u.cpuid.edx = regs[3];
625 
626     return do_domctl(xch, &domctl);
627 }
628 
alloc_str(void)629 static char *alloc_str(void)
630 {
631     char *s = malloc(33);
632     if ( s == NULL )
633         return s;
634     memset(s, 0, 33);
635     return s;
636 }
637 
xc_cpuid_to_str(const unsigned int * regs,char ** strs)638 void xc_cpuid_to_str(const unsigned int *regs, char **strs)
639 {
640     int i, j;
641 
642     for ( i = 0; i < 4; i++ )
643     {
644         strs[i] = alloc_str();
645         if ( strs[i] == NULL )
646             continue;
647         for ( j = 0; j < 32; j++ )
648             strs[i][j] = !!((regs[i] & (1U << (31 - j)))) ? '1' : '0';
649     }
650 }
651 
sanitise_featureset(struct cpuid_domain_info * info)652 static void sanitise_featureset(struct cpuid_domain_info *info)
653 {
654     const uint32_t fs_size = xc_get_cpu_featureset_size();
655     uint32_t disabled_features[fs_size];
656     static const uint32_t deep_features[] = INIT_DEEP_FEATURES;
657     unsigned int i, b;
658 
659     if ( info->hvm )
660     {
661         /* HVM Guest */
662 
663         if ( !info->pae )
664             clear_bit(X86_FEATURE_PAE, info->featureset);
665 
666         if ( !info->nestedhvm )
667         {
668             clear_bit(X86_FEATURE_SVM, info->featureset);
669             clear_bit(X86_FEATURE_VMX, info->featureset);
670         }
671     }
672     else
673     {
674         /* PV or PVH Guest */
675 
676         if ( !info->pv64 )
677         {
678             clear_bit(X86_FEATURE_LM, info->featureset);
679             if ( info->vendor != VENDOR_AMD )
680                 clear_bit(X86_FEATURE_SYSCALL, info->featureset);
681         }
682 
683         clear_bit(X86_FEATURE_PSE, info->featureset);
684         clear_bit(X86_FEATURE_PSE36, info->featureset);
685         clear_bit(X86_FEATURE_PGE, info->featureset);
686         clear_bit(X86_FEATURE_PAGE1GB, info->featureset);
687     }
688 
689     if ( info->xfeature_mask == 0 )
690         clear_bit(X86_FEATURE_XSAVE, info->featureset);
691 
692     /* Disable deep dependencies of disabled features. */
693     for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i )
694         disabled_features[i] = ~info->featureset[i] & deep_features[i];
695 
696     for ( b = 0; b < sizeof(disabled_features) * CHAR_BIT; ++b )
697     {
698         const uint32_t *dfs;
699 
700         if ( !test_bit(b, disabled_features) ||
701              !(dfs = xc_get_feature_deep_deps(b)) )
702              continue;
703 
704         for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i )
705         {
706             info->featureset[i] &= ~dfs[i];
707             disabled_features[i] &= ~dfs[i];
708         }
709     }
710 }
711 
xc_cpuid_apply_policy(xc_interface * xch,uint32_t domid,uint32_t * featureset,unsigned int nr_features)712 int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid,
713                           uint32_t *featureset,
714                           unsigned int nr_features)
715 {
716     struct cpuid_domain_info info = {};
717     unsigned int input[2] = { 0, 0 }, regs[4];
718     unsigned int base_max, ext_max;
719     int rc;
720 
721     rc = get_cpuid_domain_info(xch, domid, &info, featureset, nr_features);
722     if ( rc )
723         goto out;
724 
725     cpuid(input, regs);
726     base_max = (regs[0] <= DEF_MAX_BASE) ? regs[0] : DEF_MAX_BASE;
727     input[0] = 0x80000000;
728     cpuid(input, regs);
729 
730     if ( info.vendor == VENDOR_AMD )
731         ext_max = (regs[0] <= DEF_MAX_AMDEXT) ? regs[0] : DEF_MAX_AMDEXT;
732     else
733         ext_max = (regs[0] <= DEF_MAX_INTELEXT) ? regs[0] : DEF_MAX_INTELEXT;
734 
735     sanitise_featureset(&info);
736 
737     input[0] = 0;
738     input[1] = XEN_CPUID_INPUT_UNUSED;
739     for ( ; ; )
740     {
741         cpuid(input, regs);
742         xc_cpuid_policy(xch, &info, input, regs);
743 
744         if ( regs[0] || regs[1] || regs[2] || regs[3] )
745         {
746             rc = xc_cpuid_do_domctl(xch, domid, input, regs);
747             if ( rc )
748                 goto out;
749         }
750 
751         /* Intel cache descriptor leaves. */
752         if ( input[0] == 4 )
753         {
754             input[1]++;
755             /* More to do? Then loop keeping %%eax==0x00000004. */
756             if ( (regs[0] & 0x1f) != 0 )
757                 continue;
758         }
759 
760         input[0]++;
761         if ( !(input[0] & 0x80000000u) && (input[0] > base_max ) )
762             input[0] = 0x80000000u;
763 
764         input[1] = XEN_CPUID_INPUT_UNUSED;
765         if ( (input[0] == 4) || (input[0] == 7) )
766             input[1] = 0;
767         else if ( input[0] == 0xd )
768             input[1] = 1; /* Xen automatically calculates almost everything. */
769 
770         if ( (input[0] & 0x80000000u) && (input[0] > ext_max) )
771             break;
772     }
773 
774  out:
775     free_cpuid_domain_info(&info);
776     return rc;
777 }
778 
779 /*
780  * Configure a single input with the informatiom from config.
781  *
782  * Config is an array of strings:
783  *   config[0] = eax
784  *   config[1] = ebx
785  *   config[2] = ecx
786  *   config[3] = edx
787  *
788  * The format of the string is the following:
789  *   '1' -> force to 1
790  *   '0' -> force to 0
791  *   'x' -> we don't care (use default)
792  *   'k' -> pass through host value
793  *   's' -> pass through the first time and then keep the same value
794  *          across save/restore and migration.
795  *
796  * For 's' and 'x' the configuration is overwritten with the value applied.
797  */
xc_cpuid_set(xc_interface * xch,uint32_t domid,const unsigned int * input,const char ** config,char ** config_transformed)798 int xc_cpuid_set(
799     xc_interface *xch, uint32_t domid, const unsigned int *input,
800     const char **config, char **config_transformed)
801 {
802     int rc;
803     unsigned int i, j, regs[4], polregs[4];
804     struct cpuid_domain_info info = {};
805 
806     memset(config_transformed, 0, 4 * sizeof(*config_transformed));
807 
808     rc = get_cpuid_domain_info(xch, domid, &info, NULL, 0);
809     if ( rc )
810         goto out;
811 
812     cpuid(input, regs);
813 
814     memcpy(polregs, regs, sizeof(regs));
815     xc_cpuid_policy(xch, &info, input, polregs);
816 
817     for ( i = 0; i < 4; i++ )
818     {
819         if ( config[i] == NULL )
820         {
821             regs[i] = polregs[i];
822             continue;
823         }
824 
825         config_transformed[i] = alloc_str();
826         if ( config_transformed[i] == NULL )
827         {
828             rc = -ENOMEM;
829             goto fail;
830         }
831 
832         for ( j = 0; j < 32; j++ )
833         {
834             unsigned char val = !!((regs[i] & (1U << (31 - j))));
835             unsigned char polval = !!((polregs[i] & (1U << (31 - j))));
836 
837             rc = -EINVAL;
838             if ( !strchr("10xks", config[i][j]) )
839                 goto fail;
840 
841             if ( config[i][j] == '1' )
842                 val = 1;
843             else if ( config[i][j] == '0' )
844                 val = 0;
845             else if ( config[i][j] == 'x' )
846                 val = polval;
847 
848             if ( val )
849                 set_feature(31 - j, regs[i]);
850             else
851                 clear_feature(31 - j, regs[i]);
852 
853             config_transformed[i][j] = config[i][j];
854             if ( config[i][j] == 's' )
855                 config_transformed[i][j] = '0' + val;
856         }
857     }
858 
859     rc = xc_cpuid_do_domctl(xch, domid, input, regs);
860     if ( rc == 0 )
861         goto out;
862 
863  fail:
864     for ( i = 0; i < 4; i++ )
865     {
866         free(config_transformed[i]);
867         config_transformed[i] = NULL;
868     }
869 
870  out:
871     free_cpuid_domain_info(&info);
872     return rc;
873 }
874