1""" 2Process raw qstr file and output qstr data with length, hash and data bytes. 3 4This script works with Python 2.6, 2.7, 3.3 and 3.4. 5""" 6 7from __future__ import print_function 8 9import re 10import sys 11 12# Python 2/3 compatibility: 13# - iterating through bytes is different 14# - codepoint2name lives in a different module 15import platform 16 17if platform.python_version_tuple()[0] == "2": 18 bytes_cons = lambda val, enc=None: bytearray(val) 19 from htmlentitydefs import codepoint2name 20elif platform.python_version_tuple()[0] == "3": 21 bytes_cons = bytes 22 from html.entities import codepoint2name 23# end compatibility code 24 25codepoint2name[ord("-")] = "hyphen" 26 27# add some custom names to map characters that aren't in HTML 28codepoint2name[ord(" ")] = "space" 29codepoint2name[ord("'")] = "squot" 30codepoint2name[ord(",")] = "comma" 31codepoint2name[ord(".")] = "dot" 32codepoint2name[ord(":")] = "colon" 33codepoint2name[ord(";")] = "semicolon" 34codepoint2name[ord("/")] = "slash" 35codepoint2name[ord("%")] = "percent" 36codepoint2name[ord("#")] = "hash" 37codepoint2name[ord("(")] = "paren_open" 38codepoint2name[ord(")")] = "paren_close" 39codepoint2name[ord("[")] = "bracket_open" 40codepoint2name[ord("]")] = "bracket_close" 41codepoint2name[ord("{")] = "brace_open" 42codepoint2name[ord("}")] = "brace_close" 43codepoint2name[ord("*")] = "star" 44codepoint2name[ord("!")] = "bang" 45codepoint2name[ord("\\")] = "backslash" 46codepoint2name[ord("+")] = "plus" 47codepoint2name[ord("$")] = "dollar" 48codepoint2name[ord("=")] = "equals" 49codepoint2name[ord("?")] = "question" 50codepoint2name[ord("@")] = "at_sign" 51codepoint2name[ord("^")] = "caret" 52codepoint2name[ord("|")] = "pipe" 53codepoint2name[ord("~")] = "tilde" 54 55# static qstrs, should be sorted 56 57static_qstr_list = [ 58 "", 59 "__dir__", # Put __dir__ after empty qstr for builtin dir() to work 60 "\n", 61 " ", 62 "*", 63 "/", 64 "<module>", 65 "_", 66 "__call__", 67 "__class__", 68 "__delitem__", 69 "__enter__", 70 "__exit__", 71 "__getattr__", 72 "__getitem__", 73 "__hash__", 74 "__init__", 75 "__int__", 76 "__iter__", 77 "__len__", 78 "__main__", 79 "__module__", 80 "__name__", 81 "__new__", 82 "__next__", 83 "__qualname__", 84 "__repr__", 85 "__setitem__", 86 "__str__", 87 "ArithmeticError", 88 "AssertionError", 89 "AttributeError", 90 "BaseException", 91 "EOFError", 92 "Ellipsis", 93 "Exception", 94 "GeneratorExit", 95 "ImportError", 96 "IndentationError", 97 "IndexError", 98 "KeyError", 99 "KeyboardInterrupt", 100 "LookupError", 101 "MemoryError", 102 "NameError", 103 "NoneType", 104 "NotImplementedError", 105 "OSError", 106 "OverflowError", 107 "RuntimeError", 108 "StopIteration", 109 "SyntaxError", 110 "SystemExit", 111 "TypeError", 112 "ValueError", 113 "ZeroDivisionError", 114 "abs", 115 "all", 116 "any", 117 "append", 118 "args", 119 "bool", 120 "builtins", 121 "bytearray", 122 "bytecode", 123 "bytes", 124 "callable", 125 "chr", 126 "classmethod", 127 "clear", 128 "close", 129 "const", 130 "copy", 131 "count", 132 "dict", 133 "dir", 134 "divmod", 135 "end", 136 "endswith", 137 "eval", 138 "exec", 139 "extend", 140 "find", 141 "format", 142 "from_bytes", 143 "get", 144 "getattr", 145 "globals", 146 "hasattr", 147 "hash", 148 "id", 149 "index", 150 "insert", 151 "int", 152 "isalpha", 153 "isdigit", 154 "isinstance", 155 "islower", 156 "isspace", 157 "issubclass", 158 "isupper", 159 "items", 160 "iter", 161 "join", 162 "key", 163 "keys", 164 "len", 165 "list", 166 "little", 167 "locals", 168 "lower", 169 "lstrip", 170 "main", 171 "map", 172 "micropython", 173 "next", 174 "object", 175 "open", 176 "ord", 177 "pop", 178 "popitem", 179 "pow", 180 "print", 181 "range", 182 "read", 183 "readinto", 184 "readline", 185 "remove", 186 "replace", 187 "repr", 188 "reverse", 189 "rfind", 190 "rindex", 191 "round", 192 "rsplit", 193 "rstrip", 194 "self", 195 "send", 196 "sep", 197 "set", 198 "setattr", 199 "setdefault", 200 "sort", 201 "sorted", 202 "split", 203 "start", 204 "startswith", 205 "staticmethod", 206 "step", 207 "stop", 208 "str", 209 "strip", 210 "sum", 211 "super", 212 "throw", 213 "to_bytes", 214 "tuple", 215 "type", 216 "update", 217 "upper", 218 "utf-8", 219 "value", 220 "values", 221 "write", 222 "zip", 223] 224 225# this must match the equivalent function in qstr.c 226def compute_hash(qstr, bytes_hash): 227 hash = 5381 228 for b in qstr: 229 hash = (hash * 33) ^ b 230 # Make sure that valid hash is never zero, zero means "hash not computed" 231 return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 232 233 234def qstr_escape(qst): 235 def esc_char(m): 236 c = ord(m.group(0)) 237 try: 238 name = codepoint2name[c] 239 except KeyError: 240 name = "0x%02x" % c 241 return "_" + name + "_" 242 243 return re.sub(r"[^A-Za-z0-9_]", esc_char, qst) 244 245 246def parse_input_headers(infiles): 247 qcfgs = {} 248 qstrs = {} 249 250 # add static qstrs 251 for qstr in static_qstr_list: 252 # work out the corresponding qstr name 253 ident = qstr_escape(qstr) 254 255 # don't add duplicates 256 assert ident not in qstrs 257 258 # add the qstr to the list, with order number to retain original order in file 259 order = len(qstrs) - 300000 260 qstrs[ident] = (order, ident, qstr) 261 262 # read the qstrs in from the input files 263 for infile in infiles: 264 with open(infile, "rt") as f: 265 for line in f: 266 line = line.strip() 267 268 # is this a config line? 269 match = re.match(r"^QCFG\((.+), (.+)\)", line) 270 if match: 271 value = match.group(2) 272 if value[0] == "(" and value[-1] == ")": 273 # strip parenthesis from config value 274 value = value[1:-1] 275 qcfgs[match.group(1)] = value 276 continue 277 278 # is this a QSTR line? 279 match = re.match(r"^Q\((.*)\)$", line) 280 if not match: 281 continue 282 283 # get the qstr value 284 qstr = match.group(1) 285 286 # special cases to specify control characters 287 if qstr == "\\n": 288 qstr = "\n" 289 elif qstr == "\\r\\n": 290 qstr = "\r\n" 291 292 # work out the corresponding qstr name 293 ident = qstr_escape(qstr) 294 295 # don't add duplicates 296 if ident in qstrs: 297 continue 298 299 # add the qstr to the list, with order number to retain original order in file 300 order = len(qstrs) 301 # but put special method names like __add__ at the top of list, so 302 # that their id's fit into a byte 303 if ident == "": 304 # Sort empty qstr above all still 305 order = -200000 306 elif ident == "__dir__": 307 # Put __dir__ after empty qstr for builtin dir() to work 308 order = -190000 309 elif ident.startswith("__"): 310 order -= 100000 311 qstrs[ident] = (order, ident, qstr) 312 313 if not qcfgs: 314 sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") 315 sys.exit(1) 316 317 return qcfgs, qstrs 318 319 320def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): 321 qbytes = bytes_cons(qstr, "utf8") 322 qlen = len(qbytes) 323 qhash = compute_hash(qbytes, cfg_bytes_hash) 324 if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): 325 # qstr is all printable ASCII so render it as-is (for easier debugging) 326 qdata = qstr 327 else: 328 # qstr contains non-printable codes so render entire thing as hex pairs 329 qdata = "".join(("\\x%02x" % b) for b in qbytes) 330 if qlen >= (1 << (8 * cfg_bytes_len)): 331 print("qstr is too long:", qstr) 332 assert False 333 qlen_str = ("\\x%02x" * cfg_bytes_len) % tuple( 334 ((qlen >> (8 * i)) & 0xFF) for i in range(cfg_bytes_len) 335 ) 336 qhash_str = ("\\x%02x" * cfg_bytes_hash) % tuple( 337 ((qhash >> (8 * i)) & 0xFF) for i in range(cfg_bytes_hash) 338 ) 339 return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata) 340 341 342def print_qstr_data(qcfgs, qstrs): 343 # get config variables 344 cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"]) 345 cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"]) 346 347 # print out the starter of the generated C header file 348 print("// This file was automatically generated by makeqstrdata.py") 349 print("") 350 351 # add NULL qstr with no hash or data 352 print( 353 'QDEF(MP_QSTRnull, (const byte*)"%s%s" "")' 354 % ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len) 355 ) 356 357 # go through each qstr and print it out 358 for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): 359 qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) 360 print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) 361 362 363def do_work(infiles): 364 qcfgs, qstrs = parse_input_headers(infiles) 365 print_qstr_data(qcfgs, qstrs) 366 367 368if __name__ == "__main__": 369 do_work(sys.argv[1:]) 370