1 /* 2 * This file is part of the MicroPython project, http://micropython.org/ 3 * 4 * The MIT License (MIT) 5 * 6 * Copyright (c) 2013, 2014 Damien P. George 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a copy 9 * of this software and associated documentation files (the "Software"), to deal 10 * in the Software without restriction, including without limitation the rights 11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 * copies of the Software, and to permit persons to whom the Software is 13 * furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice shall be included in 16 * all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 * THE SOFTWARE. 25 */ 26 #ifndef MICROPY_INCLUDED_PY_LEXER_H 27 #define MICROPY_INCLUDED_PY_LEXER_H 28 29 #include <stdint.h> 30 31 #include "py/mpconfig.h" 32 #include "py/qstr.h" 33 #include "py/reader.h" 34 35 /* lexer.h -- simple tokeniser for MicroPython 36 * 37 * Uses (byte) length instead of null termination. 38 * Tokens are the same - UTF-8 with (byte) length. 39 */ 40 41 typedef enum _mp_token_kind_t { 42 MP_TOKEN_END, 43 44 MP_TOKEN_INVALID, 45 MP_TOKEN_DEDENT_MISMATCH, 46 MP_TOKEN_LONELY_STRING_OPEN, 47 #if MICROPY_PY_FSTRINGS 48 MP_TOKEN_MALFORMED_FSTRING, 49 MP_TOKEN_FSTRING_RAW, 50 #endif 51 52 MP_TOKEN_NEWLINE, 53 MP_TOKEN_INDENT, 54 MP_TOKEN_DEDENT, 55 56 MP_TOKEN_NAME, 57 MP_TOKEN_INTEGER, 58 MP_TOKEN_FLOAT_OR_IMAG, 59 MP_TOKEN_STRING, 60 MP_TOKEN_BYTES, 61 62 MP_TOKEN_ELLIPSIS, 63 64 MP_TOKEN_KW_FALSE, 65 MP_TOKEN_KW_NONE, 66 MP_TOKEN_KW_TRUE, 67 MP_TOKEN_KW___DEBUG__, 68 MP_TOKEN_KW_AND, 69 MP_TOKEN_KW_AS, 70 MP_TOKEN_KW_ASSERT, 71 #if MICROPY_PY_ASYNC_AWAIT 72 MP_TOKEN_KW_ASYNC, 73 MP_TOKEN_KW_AWAIT, 74 #endif 75 MP_TOKEN_KW_BREAK, 76 MP_TOKEN_KW_CLASS, 77 MP_TOKEN_KW_CONTINUE, 78 MP_TOKEN_KW_DEF, 79 MP_TOKEN_KW_DEL, 80 MP_TOKEN_KW_ELIF, 81 MP_TOKEN_KW_ELSE, 82 MP_TOKEN_KW_EXCEPT, 83 MP_TOKEN_KW_FINALLY, 84 MP_TOKEN_KW_FOR, 85 MP_TOKEN_KW_FROM, 86 MP_TOKEN_KW_GLOBAL, 87 MP_TOKEN_KW_IF, 88 MP_TOKEN_KW_IMPORT, 89 MP_TOKEN_KW_IN, 90 MP_TOKEN_KW_IS, 91 MP_TOKEN_KW_LAMBDA, 92 MP_TOKEN_KW_NONLOCAL, 93 MP_TOKEN_KW_NOT, 94 MP_TOKEN_KW_OR, 95 MP_TOKEN_KW_PASS, 96 MP_TOKEN_KW_RAISE, 97 MP_TOKEN_KW_RETURN, 98 MP_TOKEN_KW_TRY, 99 MP_TOKEN_KW_WHILE, 100 MP_TOKEN_KW_WITH, 101 MP_TOKEN_KW_YIELD, 102 103 MP_TOKEN_OP_ASSIGN, 104 MP_TOKEN_OP_TILDE, 105 106 // Order of these 6 matches corresponding mp_binary_op_t operator 107 MP_TOKEN_OP_LESS, 108 MP_TOKEN_OP_MORE, 109 MP_TOKEN_OP_DBL_EQUAL, 110 MP_TOKEN_OP_LESS_EQUAL, 111 MP_TOKEN_OP_MORE_EQUAL, 112 MP_TOKEN_OP_NOT_EQUAL, 113 114 // Order of these 13 matches corresponding mp_binary_op_t operator 115 MP_TOKEN_OP_PIPE, 116 MP_TOKEN_OP_CARET, 117 MP_TOKEN_OP_AMPERSAND, 118 MP_TOKEN_OP_DBL_LESS, 119 MP_TOKEN_OP_DBL_MORE, 120 MP_TOKEN_OP_PLUS, 121 MP_TOKEN_OP_MINUS, 122 MP_TOKEN_OP_STAR, 123 MP_TOKEN_OP_AT, 124 MP_TOKEN_OP_DBL_SLASH, 125 MP_TOKEN_OP_SLASH, 126 MP_TOKEN_OP_PERCENT, 127 MP_TOKEN_OP_DBL_STAR, 128 129 // Order of these 13 matches corresponding mp_binary_op_t operator 130 MP_TOKEN_DEL_PIPE_EQUAL, 131 MP_TOKEN_DEL_CARET_EQUAL, 132 MP_TOKEN_DEL_AMPERSAND_EQUAL, 133 MP_TOKEN_DEL_DBL_LESS_EQUAL, 134 MP_TOKEN_DEL_DBL_MORE_EQUAL, 135 MP_TOKEN_DEL_PLUS_EQUAL, 136 MP_TOKEN_DEL_MINUS_EQUAL, 137 MP_TOKEN_DEL_STAR_EQUAL, 138 MP_TOKEN_DEL_AT_EQUAL, 139 MP_TOKEN_DEL_DBL_SLASH_EQUAL, 140 MP_TOKEN_DEL_SLASH_EQUAL, 141 MP_TOKEN_DEL_PERCENT_EQUAL, 142 MP_TOKEN_DEL_DBL_STAR_EQUAL, 143 144 MP_TOKEN_DEL_PAREN_OPEN, 145 MP_TOKEN_DEL_PAREN_CLOSE, 146 MP_TOKEN_DEL_BRACKET_OPEN, 147 MP_TOKEN_DEL_BRACKET_CLOSE, 148 MP_TOKEN_DEL_BRACE_OPEN, 149 MP_TOKEN_DEL_BRACE_CLOSE, 150 MP_TOKEN_DEL_COMMA, 151 MP_TOKEN_DEL_COLON, 152 MP_TOKEN_DEL_PERIOD, 153 MP_TOKEN_DEL_SEMICOLON, 154 MP_TOKEN_DEL_EQUAL, 155 MP_TOKEN_DEL_MINUS_MORE, 156 } mp_token_kind_t; 157 158 // this data structure is exposed for efficiency 159 // public members are: source_name, tok_line, tok_column, tok_kind, vstr 160 typedef struct _mp_lexer_t { 161 qstr source_name; // name of source 162 mp_reader_t reader; // stream source 163 164 unichar chr0, chr1, chr2; // current cached characters from source 165 #if MICROPY_PY_FSTRINGS 166 unichar chr0_saved, chr1_saved, chr2_saved; // current cached characters from alt source 167 #endif 168 169 size_t line; // current source line 170 size_t column; // current source column 171 172 mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit 173 mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines 174 175 size_t alloc_indent_level; 176 size_t num_indent_level; 177 uint16_t *indent_level; 178 179 size_t tok_line; // token source line 180 size_t tok_column; // token source column 181 mp_token_kind_t tok_kind; // token kind 182 vstr_t vstr; // token data 183 #if MICROPY_PY_FSTRINGS 184 vstr_t fstring_args; // extracted arguments to pass to .format() 185 size_t fstring_args_idx; // how many bytes of fstring_args have been read 186 #endif 187 } mp_lexer_t; 188 189 mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader); 190 mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, size_t len, size_t free_len); 191 192 void mp_lexer_free(mp_lexer_t *lex); 193 void mp_lexer_to_next(mp_lexer_t *lex); 194 195 /******************************************************************/ 196 // platform specific import function; must be implemented for a specific port 197 // TODO tidy up, rename, or put elsewhere 198 199 typedef enum { 200 MP_IMPORT_STAT_NO_EXIST, 201 MP_IMPORT_STAT_DIR, 202 MP_IMPORT_STAT_FILE, 203 } mp_import_stat_t; 204 205 mp_import_stat_t mp_import_stat(const char *path); 206 mp_lexer_t *mp_lexer_new_from_file(const char *filename); 207 208 #if MICROPY_HELPER_LEXER_UNIX 209 mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd); 210 #endif 211 212 #endif // MICROPY_INCLUDED_PY_LEXER_H 213