1# Test a UTF-8 encoded literal 2s = "asdf©qwer" 3for i in range(len(s)): 4 print("s[%d]: %s %X" % (i, s[i], ord(s[i]))) 5 6# Test all three forms of Unicode escape, and 7# all blocks of UTF-8 byte patterns 8s = "a\xA9\xFF\u0123\u0800\uFFEE\U0001F44C" 9for i in range(-len(s), len(s)): 10 print("s[%d]: %s %X" % (i, s[i], ord(s[i]))) 11 print("s[:%d]: %d chars, '%s'" % (i, len(s[:i]), s[:i])) 12 for j in range(i, len(s)): 13 print("s[%d:%d]: %d chars, '%s'" % (i, j, len(s[i:j]), s[i:j])) 14 print("s[%d:]: %d chars, '%s'" % (i, len(s[i:]), s[i:])) 15 16# Test UTF-8 encode and decode 17enc = s.encode() 18print(enc, enc.decode() == s) 19 20# printing of unicode chars using repr 21# NOTE: for some characters (eg \u10ff) we differ to CPython 22print(repr("a\uffff")) 23print(repr("a\U0001ffff")) 24 25# test invalid escape code 26try: 27 eval('"\\U00110000"') 28except SyntaxError: 29 print("SyntaxError") 30 31# test unicode string given to int 32try: 33 int("\u0200") 34except ValueError: 35 print("ValueError") 36 37# test invalid UTF-8 string 38try: 39 str(b"ab\xa1", "utf8") 40except UnicodeError: 41 print("UnicodeError") 42try: 43 str(b"ab\xf8", "utf8") 44except UnicodeError: 45 print("UnicodeError") 46try: 47 str(bytearray(b"ab\xc0a"), "utf8") 48except UnicodeError: 49 print("UnicodeError") 50try: 51 str(b"\xf0\xe0\xed\xe8", "utf8") 52except UnicodeError: 53 print("UnicodeError") 54