1 // Copyright 2007-2009 Russ Cox.  All Rights Reserved.
2 // Copyright 2014 Paul Sokolovsky.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 #ifndef _RE1_5_REGEXP__H
7 #define _RE1_5_REGEXP__H
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdarg.h>
13 #include <assert.h>
14 
15 #define nil ((void*)0)
16 #define nelem(x) (sizeof(x)/sizeof((x)[0]))
17 
18 typedef struct Regexp Regexp;
19 typedef struct Prog Prog;
20 typedef struct ByteProg ByteProg;
21 typedef struct Inst Inst;
22 typedef struct Subject Subject;
23 
24 struct Regexp
25 {
26 	int type;
27 	int n;
28 	int ch;
29 	Regexp *left;
30 	Regexp *right;
31 };
32 
33 enum	/* Regexp.type */
34 {
35 	Alt = 1,
36 	Cat,
37 	Lit,
38 	Dot,
39 	Paren,
40 	Quest,
41 	Star,
42 	Plus,
43 };
44 
45 Regexp *parse(char*);
46 Regexp *reg(int type, Regexp *left, Regexp *right);
47 void printre(Regexp*);
48 #ifndef re1_5_fatal
49 void re1_5_fatal(char*);
50 #endif
51 #ifndef re1_5_stack_chk
52 #define re1_5_stack_chk()
53 #endif
54 void *mal(int);
55 
56 struct Prog
57 {
58 	Inst *start;
59 	int len;
60 };
61 
62 struct ByteProg
63 {
64 	int bytelen;
65 	int len;
66 	int sub;
67 	char insts[0];
68 };
69 
70 struct Inst
71 {
72 	int opcode;
73 	int c;
74 	int n;
75 	Inst *x;
76 	Inst *y;
77 	int gen;	// global state, oooh!
78 };
79 
80 enum	/* Inst.opcode */
81 {
82 	// Instructions which consume input bytes (and thus fail if none left)
83 	CONSUMERS = 1,
84 	Char = CONSUMERS,
85 	Any,
86 	Class,
87 	ClassNot,
88 	NamedClass,
89 
90 	ASSERTS = 0x50,
91 	Bol = ASSERTS,
92 	Eol,
93 
94 	// Instructions which take relative offset as arg
95 	JUMPS = 0x60,
96 	Jmp = JUMPS,
97 	Split,
98 	RSplit,
99 
100 	// Other (special) instructions
101 	Save = 0x7e,
102 	Match = 0x7f,
103 };
104 
105 #define inst_is_consumer(inst) ((inst) < ASSERTS)
106 #define inst_is_jump(inst) ((inst) & 0x70 == JUMPS)
107 
108 Prog *compile(Regexp*);
109 void printprog(Prog*);
110 
111 extern int gen;
112 
113 enum {
114 	MAXSUB = 20
115 };
116 
117 typedef struct Sub Sub;
118 
119 struct Sub
120 {
121 	int ref;
122 	int nsub;
123 	const char *sub[MAXSUB];
124 };
125 
126 Sub *newsub(int n);
127 Sub *incref(Sub*);
128 Sub *copy(Sub*);
129 Sub *update(Sub*, int, const char*);
130 void decref(Sub*);
131 
132 struct Subject {
133 	const char *begin;
134 	const char *end;
135 };
136 
137 
138 #define NON_ANCHORED_PREFIX 5
139 #define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode))
140 
141 int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int);
142 int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int);
143 int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int);
144 int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int);
145 int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int);
146 
147 int re1_5_sizecode(const char *re);
148 int re1_5_compilecode(ByteProg *prog, const char *re);
149 void re1_5_dumpcode(ByteProg *prog);
150 void cleanmarks(ByteProg *prog);
151 int _re1_5_classmatch(const char *pc, const char *sp);
152 int _re1_5_namedclassmatch(const char *pc, const char *sp);
153 
154 #endif /*_RE1_5_REGEXP__H*/
155