1 // class template regex -*- C++ -*-
2
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24
25 /**
26 * @file bits/regex_automaton.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
_GLIBCXX_VISIBILITY(default)31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36
37 /**
38 * @defgroup regex-detail Base and Implementation Classes
39 * @ingroup regex
40 * @{
41 */
42
43 typedef long _StateIdT;
44 static const _StateIdT _S_invalid_state_id = -1;
45
46 template<typename _CharT>
47 using _Matcher = std::function<bool (_CharT)>;
48
49 /// Operation codes that define the type of transitions within the base NFA
50 /// that represents the regular expression.
51 enum _Opcode : int
52 {
53 _S_opcode_unknown,
54 _S_opcode_alternative,
55 _S_opcode_backref,
56 _S_opcode_line_begin_assertion,
57 _S_opcode_line_end_assertion,
58 _S_opcode_word_boundary,
59 _S_opcode_subexpr_lookahead,
60 _S_opcode_subexpr_begin,
61 _S_opcode_subexpr_end,
62 _S_opcode_dummy,
63 _S_opcode_match,
64 _S_opcode_accept,
65 };
66
67 struct _State_base
68 {
69 _Opcode _M_opcode; // type of outgoing transition
70 _StateIdT _M_next; // outgoing transition
71 union // Since they are mutually exclusive.
72 {
73 size_t _M_subexpr; // for _S_opcode_subexpr_*
74 size_t _M_backref_index; // for _S_opcode_backref
75 struct
76 {
77 // for _S_opcode_alternative.
78 _StateIdT _M_quant_index;
79 // for _S_opcode_alternative or _S_opcode_subexpr_lookahead
80 _StateIdT _M_alt;
81 // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
82 // quantifiers (ungreedy if set true)
83 bool _M_neg;
84 };
85 };
86
87 explicit _State_base(_Opcode __opcode)
88 : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
89 { }
90
91 protected:
92 ~_State_base() = default;
93
94 public:
95 #ifdef _GLIBCXX_DEBUG
96 std::ostream&
97 _M_print(std::ostream& ostr) const;
98
99 // Prints graphviz dot commands for state.
100 std::ostream&
101 _M_dot(std::ostream& __ostr, _StateIdT __id) const;
102 #endif
103 };
104
105 template<typename _TraitsT>
106 struct _State : _State_base
107 {
108 typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
109
110 _MatcherT _M_matches; // for _S_opcode_match
111
112 explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
113 };
114
115 struct _NFA_base
116 {
117 typedef size_t _SizeT;
118 typedef regex_constants::syntax_option_type _FlagT;
119
120 explicit
121 _NFA_base(_FlagT __f)
122 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
123 _M_quant_count(0), _M_has_backref(false)
124 { }
125
126 _NFA_base(_NFA_base&&) = default;
127
128 protected:
129 ~_NFA_base() = default;
130
131 public:
132 _FlagT
133 _M_options() const
134 { return _M_flags; }
135
136 _StateIdT
137 _M_start() const
138 { return _M_start_state; }
139
140 _SizeT
141 _M_sub_count() const
142 { return _M_subexpr_count; }
143
144 std::vector<size_t> _M_paren_stack;
145 _FlagT _M_flags;
146 _StateIdT _M_start_state;
147 _SizeT _M_subexpr_count;
148 _SizeT _M_quant_count;
149 bool _M_has_backref;
150 };
151
152 template<typename _TraitsT>
153 struct _NFA
154 : _NFA_base, std::vector<_State<_TraitsT>>
155 {
156 typedef _State<_TraitsT> _StateT;
157 typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
158
159 using _NFA_base::_NFA_base;
160
161 // for performance reasons _NFA objects should only be moved not copied
162 _NFA(const _NFA&) = delete;
163 _NFA(_NFA&&) = default;
164
165 _StateIdT
166 _M_insert_accept()
167 {
168 auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
169 return __ret;
170 }
171
172 _StateIdT
173 _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
174 {
175 _StateT __tmp(_S_opcode_alternative);
176 // It labels every quantifier to make greedy comparison easier in BFS
177 // approach.
178 __tmp._M_quant_index = this->_M_quant_count++;
179 __tmp._M_next = __next;
180 __tmp._M_alt = __alt;
181 __tmp._M_neg = __neg;
182 return _M_insert_state(std::move(__tmp));
183 }
184
185 _StateIdT
186 _M_insert_matcher(_MatcherT __m)
187 {
188 _StateT __tmp(_S_opcode_match);
189 __tmp._M_matches = std::move(__m);
190 return _M_insert_state(std::move(__tmp));
191 }
192
193 _StateIdT
194 _M_insert_subexpr_begin()
195 {
196 auto __id = this->_M_subexpr_count++;
197 this->_M_paren_stack.push_back(__id);
198 _StateT __tmp(_S_opcode_subexpr_begin);
199 __tmp._M_subexpr = __id;
200 return _M_insert_state(std::move(__tmp));
201 }
202
203 _StateIdT
204 _M_insert_subexpr_end()
205 {
206 _StateT __tmp(_S_opcode_subexpr_end);
207 __tmp._M_subexpr = this->_M_paren_stack.back();
208 this->_M_paren_stack.pop_back();
209 return _M_insert_state(std::move(__tmp));
210 }
211
212 _StateIdT
213 _M_insert_backref(size_t __index);
214
215 _StateIdT
216 _M_insert_line_begin()
217 { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
218
219 _StateIdT
220 _M_insert_line_end()
221 { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
222
223 _StateIdT
224 _M_insert_word_bound(bool __neg)
225 {
226 _StateT __tmp(_S_opcode_word_boundary);
227 __tmp._M_neg = __neg;
228 return _M_insert_state(std::move(__tmp));
229 }
230
231 _StateIdT
232 _M_insert_lookahead(_StateIdT __alt, bool __neg)
233 {
234 _StateT __tmp(_S_opcode_subexpr_lookahead);
235 __tmp._M_alt = __alt;
236 __tmp._M_neg = __neg;
237 return _M_insert_state(std::move(__tmp));
238 }
239
240 _StateIdT
241 _M_insert_dummy()
242 { return _M_insert_state(_StateT(_S_opcode_dummy)); }
243
244 _StateIdT
245 _M_insert_state(_StateT __s)
246 {
247 this->push_back(std::move(__s));
248 return this->size()-1;
249 }
250
251 // Eliminate dummy node in this NFA to make it compact.
252 void
253 _M_eliminate_dummy();
254
255 #ifdef _GLIBCXX_DEBUG
256 std::ostream&
257 _M_dot(std::ostream& __ostr) const;
258 #endif
259 };
260
261 /// Describes a sequence of one or more %_State, its current start
262 /// and end(s). This structure contains fragments of an NFA during
263 /// construction.
264 template<typename _TraitsT>
265 class _StateSeq
266 {
267 public:
268 typedef _NFA<_TraitsT> _RegexT;
269
270 public:
271 _StateSeq(_RegexT& __nfa, _StateIdT __s)
272 : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
273 { }
274
275 _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
276 : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
277 { }
278
279 // Append a state on *this and change *this to the new sequence.
280 void
281 _M_append(_StateIdT __id)
282 {
283 _M_nfa[_M_end]._M_next = __id;
284 _M_end = __id;
285 }
286
287 // Append a sequence on *this and change *this to the new sequence.
288 void
289 _M_append(const _StateSeq& __s)
290 {
291 _M_nfa[_M_end]._M_next = __s._M_start;
292 _M_end = __s._M_end;
293 }
294
295 // Clones an entire sequence.
296 _StateSeq
297 _M_clone();
298
299 public:
300 _RegexT& _M_nfa;
301 _StateIdT _M_start;
302 _StateIdT _M_end;
303 };
304
305 //@} regex-detail
306 _GLIBCXX_END_NAMESPACE_VERSION
307 } // namespace __detail
308 } // namespace std
309
310 #include <bits/regex_automaton.tcc>
311