1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4import sys, os, re
5
6if sys.version_info < (3, 0):
7    range = xrange
8
9class Fail(Exception):
10    pass
11
12class State(object):
13
14    def __init__(self, input, output):
15
16        self.source = input
17        self.input  = open_file_or_fd(input, "r", 2)
18        self.output = open_file_or_fd(output, "w", 2)
19
20        # State parsed from input
21        self.names = {}  # Value => Name mapping
22        self.values = {} # Name => Value mapping
23        self.raw = {
24            '!': set(), '|': set(),
25            'A': set(), 'S': set(), 'H': set(),
26            'a': set(), 's': set(), 'h': set(),
27        }
28
29        # State calculated
30        self.nr_entries = 0 # Number of words in a featureset
31        self.common_1d = 0 # Common features between 1d and e1d
32        self.pv_def = set() # PV default features
33        self.hvm_shadow_def = set() # HVM shadow default features
34        self.hvm_hap_def = set() # HVM HAP default features
35        self.pv_max = set() # PV max features
36        self.hvm_shadow_max = set() # HVM shadow max features
37        self.hvm_hap_max = set() # HVM HAP max features
38        self.bitfields = [] # Text to declare named bitfields in C
39        self.deep_deps = {} # { feature num => dependant features }
40        self.nr_deep_deps = 0 # Number of entries in deep_deps
41        self.deep_features = set() # featureset of keys in deep_deps
42
43def parse_definitions(state):
44    """
45    Parse featureset information from @param f and mutate the global
46    namespace with symbols
47    """
48    feat_regex = re.compile(
49        r"^XEN_CPUFEATURE\(([A-Z0-9_]+),"
50        r"\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)"
51        r"\s+/\*([\w!|]*) .*$")
52
53    word_regex = re.compile(
54        r"^/\* .* word (\d*) .*\*/$")
55    last_word = -1
56
57    this = sys.modules[__name__]
58
59    for l in state.input.readlines():
60
61        # Short circuit the regexes...
62        if not (l.startswith("XEN_CPUFEATURE(") or
63                l.startswith("/* ")):
64            continue
65
66        # Handle /* ... word $N */ lines
67        if l.startswith("/* "):
68
69            res = word_regex.match(l)
70            if res is None:
71                continue # Some other comment
72
73            word = int(res.groups()[0])
74
75            if word != last_word + 1:
76                raise Fail("Featureset word %u out of order (last word %u)"
77                           % (word, last_word))
78
79            last_word = word
80            state.nr_entries = word + 1
81            continue
82
83        # Handle XEN_CPUFEATURE( lines
84        res = feat_regex.match(l)
85
86        if res is None:
87            raise Fail("Failed to interpret '%s'" % (l.strip(), ))
88
89        name = res.groups()[0]
90        val = eval(res.groups()[1]) # Regex confines this to a very simple expression
91        attr = res.groups()[2]
92
93        if hasattr(this, name):
94            raise Fail("Duplicate symbol %s" % (name,))
95
96        if val in state.names:
97            raise Fail("Aliased value between %s and %s" %
98                       (name, state.names[val]))
99
100        # Mutate the current namespace to insert a feature literal with its
101        # bit index.  Prepend an underscore if the name starts with a digit.
102        if name[0] in "0123456789":
103            this_name = "_" + name
104        else:
105            this_name = name
106        setattr(this, this_name, val)
107
108        # Construct forward and reverse mappings between name and value
109        state.names[val] = name
110        state.values[name.lower().replace("_", "-")] = val
111
112        for a in attr:
113            try:
114                state.raw[a].add(val)
115            except KeyError:
116                raise Fail("Unrecognised attribute '%s' for %s" % (a, name))
117
118    if len(state.names) == 0:
119        raise Fail("No features found")
120
121    if state.nr_entries == 0:
122        raise Fail("No featureset word info found")
123
124    max_val = max(state.names.keys())
125    if (max_val >> 5) >= state.nr_entries:
126        max_name = state.names[max_val]
127        raise Fail("Feature %s (%d*32+%d) exceeds FEATURESET_NR_ENTRIES (%d)"
128                   % (max_name, max_val >> 5, max_val & 31, state.nr_entries))
129
130def featureset_to_uint32s(fs, nr):
131    """ Represent a featureset as a list of C-compatible uint32_t's """
132
133    bitmap = 0
134    for f in fs:
135        bitmap |= 1 << f
136
137    words = []
138    while bitmap:
139        words.append(bitmap & ((1 << 32) - 1))
140        bitmap >>= 32
141
142    assert len(words) <= nr
143
144    if len(words) < nr:
145        words.extend([0] * (nr - len(words)))
146
147    return ("0x%08xU" % x for x in words)
148
149def format_uint32s(state, featureset, indent):
150    """ Format a list of uint32_t's suitable for a macro definition """
151    words = featureset_to_uint32s(featureset, state.nr_entries)
152    spaces = " " * indent
153    return spaces + (", \\\n" + spaces).join(words) + ", \\"
154
155
156def crunch_numbers(state):
157
158    # Features common between 1d and e1d.
159    common_1d = (FPU, VME, DE, PSE, TSC, MSR, PAE, MCE, CX8, APIC,
160                 MTRR, PGE, MCA, CMOV, PAT, PSE36, MMX, FXSR)
161    state.common_1d = common_1d
162
163    state.pv_def =                                state.raw['A']
164    state.hvm_shadow_def = state.pv_def         | state.raw['S']
165    state.hvm_hap_def =    state.hvm_shadow_def | state.raw['H']
166
167    state.pv_max =                                state.raw['A'] | state.raw['a']
168    state.hvm_shadow_max = state.pv_max         | state.raw['S'] | state.raw['s']
169    state.hvm_hap_max =    state.hvm_shadow_max | state.raw['H'] | state.raw['h']
170
171    #
172    # Feature dependency information.
173    #
174    # !!! WARNING !!!
175    #
176    # A lot of this information is derived from the written text of vendors
177    # software manuals, rather than directly from a statement.  As such, it
178    # does contain guesswork and assumptions, and may not accurately match
179    # hardware implementations.
180    #
181    # It is however designed to create an end result for a guest which does
182    # plausibly match real hardware.
183    #
184    # !!! WARNING !!!
185    #
186    # The format of this dictionary is that the feature in the key is a direct
187    # prerequisite of each feature in the value.
188    #
189    # The first consideration is about which functionality is physically built
190    # on top of other features.  The second consideration, which is more
191    # subjective, is whether real hardware would ever be found supporting
192    # feature X but not Y.
193    #
194    deps = {
195        # FPU is taken to mean support for the x87 regisers as well as the
196        # instructions.  MMX is documented to alias the %MM registers over the
197        # x87 %ST registers in hardware.  Correct restoring of error pointers
198        # of course makes no sense without there being anything to restore.
199        FPU: [MMX, RSTR_FP_ERR_PTRS],
200
201        # The PSE36 feature indicates that reserved bits in a PSE superpage
202        # may be used as extra physical address bits.
203        PSE: [PSE36],
204
205        # Entering Long Mode requires that %CR4.PAE is set.  The NX pagetable
206        # bit is only representable in the 64bit PTE format offered by PAE.
207        PAE: [LM, NX],
208
209        TSC: [TSC_DEADLINE, RDTSCP, TSC_ADJUST, ITSC],
210
211        # APIC is special, but X2APIC does depend on APIC being available in
212        # the first place.
213        APIC: [X2APIC, TSC_DEADLINE, EXTAPIC],
214
215        # The CLZERO insn requires a means to determine the cache line size,
216        # which is tied to the CLFLUSH insn.
217        CLFLUSH: [CLZERO],
218
219        # AMD built MMXExtentions and 3DNow as extentions to MMX.
220        MMX: [MMXEXT, _3DNOW],
221
222        # The FXSAVE/FXRSTOR instructions were introduced into hardware before
223        # SSE, which is why they behave differently based on %CR4.OSFXSAVE and
224        # have their own feature bit.  AMD however introduce the Fast FXSR
225        # feature as an optimisation.
226        FXSR: [FFXSR, SSE],
227
228        # SSE is taken to mean support for the %XMM registers as well as the
229        # instructions.  Several further instruction sets are built on core
230        # %XMM support, without specific inter-dependencies.  Additionally
231        # AMD has a special mis-alignment sub-mode.
232        SSE: [SSE2, MISALIGNSSE],
233
234        # SSE2 was re-specified as core instructions for 64bit.  Also ISA
235        # extensions dealing with vectors of integers are added here rather
236        # than to SSE.
237        SSE2: [SSE3, LM, AESNI, PCLMULQDQ, SHA, GFNI],
238
239        # Other SSEn each depend on their predecessor versions.  AMD
240        # Lisbon/Magny-Cours processors implemented SSE4A without SSSE3.
241        SSE3: [SSSE3, SSE4A],
242        SSSE3: [SSE4_1],
243        SSE4_1: [SSE4_2],
244
245        # AMD specify no relationship between POPCNT and SSE4.2.  Intel
246        # document that SSE4.2 should be checked for before checking for
247        # POPCNT.  However, it has its own feature bit, and operates on GPRs
248        # rather than %XMM state, so doesn't inherently depend on SSE.
249        # Therefore, we do not specify a dependency between SSE4_2 and POPCNT.
250        #
251        # SSE4_2: [POPCNT]
252
253        # XSAVE is an extra set of instructions for state management, but
254        # doesn't constitue new state itself.  Some of the dependent features
255        # are instructions built on top of base XSAVE, while others are new
256        # instruction groups which are specified to require XSAVE for state
257        # management.
258        XSAVE: [XSAVEOPT, XSAVEC, XGETBV1, XSAVES,
259                AVX, MPX, PKU, AMX_TILE, LWP],
260
261        # AVX is taken to mean hardware support for 256bit registers (which in
262        # practice depends on the VEX prefix to encode), and the instructions
263        # themselves.
264        #
265        # AVX is not taken to mean support for the VEX prefix itself (nor XOP
266        # for the XOP prefix).  VEX/XOP-encoded GPR instructions, such as
267        # those from the BMI{1,2}, TBM and LWP sets function fine in the
268        # absence of any enabled xstate.
269        AVX: [FMA, FMA4, F16C, AVX2, XOP, AVX_NE_CONVERT, SM3],
270
271        # This dependency exists solely for the shadow pagetable code.  If the
272        # host doesn't have NX support, the shadow pagetable code can't handle
273        # SMAP correctly for guests.
274        NX: [SMAP],
275
276        # CX16 is only encodable in Long Mode.  LAHF_LM indicates that the
277        # SAHF/LAHF instructions are reintroduced in Long Mode.  1GB
278        # superpages, PCID and PKU are only available in 4 level paging.
279        # NO_LMSL indicates the absense of Long Mode Segment Limits, which
280        # have been dropped in hardware.
281        LM: [CX16, PCID, LAHF_LM, PAGE1GB, PKU, NO_LMSL, AMX_TILE, CMPCCXADD],
282
283        # AMD K6-2+ and K6-III processors shipped with 3DNow+, beyond the
284        # standard 3DNow in the earlier K6 processors.
285        _3DNOW: [_3DNOWEXT],
286
287        # This is just the dependency between AVX512 and AVX2 of XSTATE
288        # feature flags.  If want to use AVX512, AVX2 must be supported and
289        # enabled.  Certain later extensions, acting on 256-bit vectors of
290        # integers, better depend on AVX2 than AVX.
291        AVX2: [AVX512F, VAES, VPCLMULQDQ, AVX_VNNI, AVX_IFMA, AVX_VNNI_INT8,
292               AVX_VNNI_INT16, SHA512, SM4],
293
294        # AVX512F is taken to mean hardware support for 512bit registers
295        # (which in practice depends on the EVEX prefix to encode) as well
296        # as mask registers, and the instructions themselves. All further
297        # AVX512 features are built on top of AVX512F
298        AVX512F: [AVX512DQ, AVX512_IFMA, AVX512PF, AVX512ER, AVX512CD,
299                  AVX512BW, AVX512VL, AVX512_4VNNIW, AVX512_4FMAPS,
300                  AVX512_VNNI, AVX512_VPOPCNTDQ, AVX512_VP2INTERSECT],
301
302        # AVX512 extensions acting on vectors of bytes/words are made
303        # dependents of AVX512BW (as to requiring wider than 16-bit mask
304        # registers), despite the SDM not formally making this connection.
305        AVX512BW: [AVX512_VBMI, AVX512_VBMI2, AVX512_BITALG, AVX512_BF16,
306                   AVX512_FP16],
307
308        # Extensions with VEX/EVEX encodings keyed to a separate feature
309        # flag are made dependents of their respective legacy feature.
310        PCLMULQDQ: [VPCLMULQDQ],
311        AESNI: [VAES],
312
313        # The features:
314        #   * Single Thread Indirect Branch Predictors
315        #   * Speculative Store Bypass Disable
316        #   * Predictive Store Forward Disable
317        #
318        # enumerate new bits in MSR_SPEC_CTRL, and technically enumerate
319        # MSR_SPEC_CTRL itself.  AMD further enumerates hints to guide OS
320        # behaviour.
321        #
322        # However, no real hardware will exist with e.g. SSBD but not
323        # IBRSB/IBRS, and we pass this MSR directly to guests.  Treating them
324        # as dependent features simplifies Xen's logic, and prevents the guest
325        # from seeing implausible configurations.
326        IBRSB: [STIBP, SSBD, INTEL_PSFD, EIBRS,
327                IPRED_CTRL, RRSBA_CTRL, BHI_CTRL],
328        IBRS: [AMD_STIBP, AMD_SSBD, PSFD, AUTO_IBRS,
329               IBRS_ALWAYS, IBRS_FAST, IBRS_SAME_MODE],
330        IBPB: [IBPB_RET, SBPB, IBPB_BRTYPE],
331        AMD_STIBP: [STIBP_ALWAYS],
332
333        # In principle the TSXLDTRK insns could also be considered independent.
334        RTM: [TSXLDTRK],
335
336        # Enhanced Predictive Store-Forwarding is a informational note on top
337        # of PSF.
338        PSFD: [EPSF],
339
340        # The ARCH_CAPS CPUID bit enumerates the availability of the whole register.
341        ARCH_CAPS: list(range(RDCL_NO, RDCL_NO + 64)),
342
343        # The behaviour described by RRSBA depend on eIBRS being active.
344        EIBRS: [RRSBA],
345
346        # AMX-TILE means hardware support for tile registers and general non-
347        # computational instructions.  All further AMX features are built on top
348        # of AMX-TILE.
349        AMX_TILE: [AMX_BF16, AMX_INT8, AMX_FP16, AMX_COMPLEX],
350    }
351
352    deep_features = tuple(sorted(deps.keys()))
353    state.deep_deps = {}
354
355    for feat in deep_features:
356
357        seen = [feat]
358        to_process = list(deps[feat])
359
360        while len(to_process):
361
362            # To debug, uncomment the following lines:
363            # def repl(l):
364            #     return "[" + ", ".join((state.names[x] for x in l)) + "]"
365            # sys.stderr.write("Feature %s, seen %s, to_process %s \n" % \
366            #     (state.names[feat], repl(seen), repl(to_process)))
367
368            f = to_process.pop(0)
369
370            if f in seen:
371                raise Fail("ERROR: Cycle found with %s when processing %s"
372                           % (state.names[f], state.names[feat]))
373
374            seen.append(f)
375            to_process = list(set(to_process + deps.get(f, [])))
376
377        state.deep_deps[feat] = seen[1:]
378
379    state.deep_features = deps.keys()
380    state.nr_deep_deps = len(state.deep_deps.keys())
381
382    # Calculate the bitfield name declarations.  Leave 4 placeholders on the end
383    for word in range(state.nr_entries + 4):
384
385        names = []
386        for bit in range(32):
387
388            name = state.names.get(word * 32 + bit, "")
389
390            # Prepend an underscore if the name starts with a digit.
391            if name and name[0] in "0123456789":
392                name = "_" + name
393
394            # Don't generate names for features fast-forwarded from other
395            # state
396            if name in ("APIC", "OSXSAVE", "OSPKE"):
397                name = ""
398
399            names.append(name.lower())
400
401        if any(names):
402            state.bitfields.append("bool " + ":1, ".join(names) + ":1")
403        else:
404            state.bitfields.append("uint32_t _placeholder_%u" % (word, ))
405
406
407def write_results(state):
408    state.output.write(
409"""/*
410 * Automatically generated by %s - Do not edit!
411 * Source data: %s
412 */
413#ifndef __XEN_X86__FEATURESET_DATA__
414#define __XEN_X86__FEATURESET_DATA__
415""" % (sys.argv[0], state.source))
416
417    state.output.write(
418"""
419#define FEATURESET_NR_ENTRIES %s
420
421#define CPUID_COMMON_1D_FEATURES %s
422
423#define INIT_KNOWN_FEATURES { \\\n%s\n}
424
425#define INIT_SPECIAL_FEATURES { \\\n%s\n}
426
427#define INIT_SIMPLE_OR { \\\n%s\n}
428
429#define INIT_PV_DEF_FEATURES { \\\n%s\n}
430
431#define INIT_PV_MAX_FEATURES { \\\n%s\n}
432
433#define INIT_HVM_SHADOW_DEF_FEATURES { \\\n%s\n}
434
435#define INIT_HVM_SHADOW_MAX_FEATURES { \\\n%s\n}
436
437#define INIT_HVM_HAP_DEF_FEATURES { \\\n%s\n}
438
439#define INIT_HVM_HAP_MAX_FEATURES { \\\n%s\n}
440
441#define NR_DEEP_DEPS %sU
442
443#define INIT_DEEP_FEATURES { \\\n%s\n}
444
445#define INIT_DEEP_DEPS { \\
446""" % (state.nr_entries,
447       next(featureset_to_uint32s(state.common_1d, 1)),
448       format_uint32s(state, state.names.keys(), 4),
449       format_uint32s(state, state.raw['!'], 4),
450       format_uint32s(state, state.raw['|'], 4),
451       format_uint32s(state, state.pv_def, 4),
452       format_uint32s(state, state.pv_max, 4),
453       format_uint32s(state, state.hvm_shadow_def, 4),
454       format_uint32s(state, state.hvm_shadow_max, 4),
455       format_uint32s(state, state.hvm_hap_def, 4),
456       format_uint32s(state, state.hvm_hap_max, 4),
457       state.nr_deep_deps,
458       format_uint32s(state, state.deep_features, 4),
459       ))
460
461    for dep in sorted(state.deep_deps.keys()):
462        state.output.write(
463            "    { %#xU, /* %s */ { \\\n%s\n    }, }, \\\n"
464            % (dep, state.names[dep],
465               format_uint32s(state, state.deep_deps[dep], 8)
466           ))
467
468    state.output.write(
469"""}
470
471#define INIT_FEATURE_NAME_TO_VAL { \\
472""")
473
474    for name, bit in sorted(state.values.items()):
475        state.output.write(
476            '    { "%s", %sU },\\\n' % (name, bit)
477            )
478
479    state.output.write(
480"""}
481
482""")
483
484    state.output.write(
485"""
486#define INIT_FEATURE_VAL_TO_NAME { \\
487""")
488
489    for name, bit in sorted(state.values.items()):
490        state.output.write(
491            '    [%s] = "%s",\\\n' % (bit, name)
492            )
493
494        # Add the other alias for 1d/e1d common bits.  64 is the difference
495        # between 1d and e1d.
496        if bit in state.common_1d:
497            state.output.write(
498                '    [%s] = "%s",\\\n' % (64 + bit, name)
499            )
500
501    # Pad to an exact multiple of FEATURESET_SIZE if necessary
502    pad_feat = state.nr_entries * 32 - 1
503    if not state.names.get(pad_feat):
504        state.output.write(
505            '    [%s] = NULL,\\\n' % (pad_feat, )
506        )
507
508    state.output.write(
509"""}
510
511""")
512
513    for idx, text in enumerate(state.bitfields):
514        state.output.write(
515            "#define CPUID_BITFIELD_%d \\\n    %s\n\n"
516            % (idx, text))
517
518    state.output.write(
519"""
520#endif /* __XEN_X86__FEATURESET_DATA__ */
521""")
522
523
524def open_file_or_fd(val, mode, buffering):
525    """
526    If 'val' looks like a decimal integer, open it as an fd.  If not, try to
527    open it as a regular file.
528    """
529
530    fd = -1
531    try:
532        # Does it look like an integer?
533        try:
534            fd = int(val, 10)
535        except ValueError:
536            pass
537
538        if fd == 0:
539            return sys.stdin
540        elif fd == 1:
541            return sys.stdout
542        elif fd == 2:
543            return sys.stderr
544
545        # Try to open it...
546        if fd != -1:
547            return os.fdopen(fd, mode, buffering)
548        else:
549            return open(val, mode, buffering)
550
551    except StandardError:
552        e = sys.exc_info()[1]
553        if fd != -1:
554            raise Fail("Unable to open fd %d: %s: %s" %
555                       (fd, e.__class__.__name__, e))
556        else:
557            raise Fail("Unable to open file '%s': %s: %s" %
558                       (val, e.__class__.__name__, e))
559
560    raise SystemExit(2)
561
562def main():
563    from optparse import OptionParser
564
565    # Change stdout to be line-buffered.
566    sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 1)
567
568    parser = OptionParser(usage = "%prog [options] -i INPUT -o OUTPUT",
569                          description =
570                          "Process featureset information")
571
572    parser.add_option("-i", "--in", dest = "fin", metavar = "<FD or FILE>",
573                      default = "0",
574                      help = "Featureset definitions")
575    parser.add_option("-o", "--out", dest = "fout", metavar = "<FD or FILE>",
576                      default = "1",
577                      help = "Featureset calculated information")
578
579    opts, _ = parser.parse_args()
580
581    if opts.fin is None or opts.fout is None:
582        parser.print_help(sys.stderr)
583        raise SystemExit(1)
584
585    state = State(opts.fin, opts.fout)
586
587    parse_definitions(state)
588    crunch_numbers(state)
589    write_results(state)
590
591
592if __name__ == "__main__":
593    try:
594        sys.exit(main())
595    except Fail:
596        e = sys.exc_info()[1]
597        sys.stderr.write("%s: Fail: %s\n" %
598                         (os.path.abspath(sys.argv[0]), str(e)))
599        sys.exit(1)
600    except SystemExit:
601        e = sys.exc_info()[1]
602        sys.exit(e.code)
603    except KeyboardInterrupt:
604        sys.exit(2)
605