1 // Copyright 2007-2009 Russ Cox. All Rights Reserved. 2 // Copyright 2014 Paul Sokolovsky. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 #ifndef _RE1_5_REGEXP__H 7 #define _RE1_5_REGEXP__H 8 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <stdarg.h> 13 #include <assert.h> 14 15 #define nil ((void*)0) 16 #define nelem(x) (sizeof(x)/sizeof((x)[0])) 17 18 typedef struct Regexp Regexp; 19 typedef struct Prog Prog; 20 typedef struct ByteProg ByteProg; 21 typedef struct Inst Inst; 22 typedef struct Subject Subject; 23 24 struct Regexp 25 { 26 int type; 27 int n; 28 int ch; 29 Regexp *left; 30 Regexp *right; 31 }; 32 33 enum /* Regexp.type */ 34 { 35 Alt = 1, 36 Cat, 37 Lit, 38 Dot, 39 Paren, 40 Quest, 41 Star, 42 Plus, 43 }; 44 45 Regexp *parse(char*); 46 Regexp *reg(int type, Regexp *left, Regexp *right); 47 void printre(Regexp*); 48 #ifndef re1_5_fatal 49 void re1_5_fatal(char*); 50 #endif 51 #ifndef re1_5_stack_chk 52 #define re1_5_stack_chk() 53 #endif 54 void *mal(int); 55 56 struct Prog 57 { 58 Inst *start; 59 int len; 60 }; 61 62 struct ByteProg 63 { 64 int bytelen; 65 int len; 66 int sub; 67 char insts[0]; 68 }; 69 70 struct Inst 71 { 72 int opcode; 73 int c; 74 int n; 75 Inst *x; 76 Inst *y; 77 int gen; // global state, oooh! 78 }; 79 80 enum /* Inst.opcode */ 81 { 82 // Instructions which consume input bytes (and thus fail if none left) 83 CONSUMERS = 1, 84 Char = CONSUMERS, 85 Any, 86 Class, 87 ClassNot, 88 NamedClass, 89 90 ASSERTS = 0x50, 91 Bol = ASSERTS, 92 Eol, 93 94 // Instructions which take relative offset as arg 95 JUMPS = 0x60, 96 Jmp = JUMPS, 97 Split, 98 RSplit, 99 100 // Other (special) instructions 101 Save = 0x7e, 102 Match = 0x7f, 103 }; 104 105 #define inst_is_consumer(inst) ((inst) < ASSERTS) 106 #define inst_is_jump(inst) ((inst) & 0x70 == JUMPS) 107 108 Prog *compile(Regexp*); 109 void printprog(Prog*); 110 111 extern int gen; 112 113 enum { 114 MAXSUB = 20 115 }; 116 117 typedef struct Sub Sub; 118 119 struct Sub 120 { 121 int ref; 122 int nsub; 123 const char *sub[MAXSUB]; 124 }; 125 126 Sub *newsub(int n); 127 Sub *incref(Sub*); 128 Sub *copy(Sub*); 129 Sub *update(Sub*, int, const char*); 130 void decref(Sub*); 131 132 struct Subject { 133 const char *begin; 134 const char *end; 135 }; 136 137 138 #define NON_ANCHORED_PREFIX 5 139 #define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode)) 140 141 int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int); 142 int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int); 143 int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int); 144 int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int); 145 int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int); 146 147 int re1_5_sizecode(const char *re); 148 int re1_5_compilecode(ByteProg *prog, const char *re); 149 void re1_5_dumpcode(ByteProg *prog); 150 void cleanmarks(ByteProg *prog); 151 int _re1_5_classmatch(const char *pc, const char *sp); 152 int _re1_5_namedclassmatch(const char *pc, const char *sp); 153 154 #endif /*_RE1_5_REGEXP__H*/ 155