1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_automaton.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
_GLIBCXX_VISIBILITY(default)31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37   /**
38    *  @defgroup regex-detail Base and Implementation Classes
39    *  @ingroup regex
40    *  @{
41    */
42 
43   typedef long _StateIdT;
44   static const _StateIdT _S_invalid_state_id  = -1;
45 
46   template<typename _CharT>
47     using _Matcher = std::function<bool (_CharT)>;
48 
49   /// Operation codes that define the type of transitions within the base NFA
50   /// that represents the regular expression.
51   enum _Opcode : int
52   {
53       _S_opcode_unknown,
54       _S_opcode_alternative,
55       _S_opcode_backref,
56       _S_opcode_line_begin_assertion,
57       _S_opcode_line_end_assertion,
58       _S_opcode_word_boundary,
59       _S_opcode_subexpr_lookahead,
60       _S_opcode_subexpr_begin,
61       _S_opcode_subexpr_end,
62       _S_opcode_dummy,
63       _S_opcode_match,
64       _S_opcode_accept,
65   };
66 
67   struct _State_base
68   {
69     _Opcode      _M_opcode;           // type of outgoing transition
70     _StateIdT    _M_next;             // outgoing transition
71     union // Since they are mutually exclusive.
72     {
73       size_t _M_subexpr;        // for _S_opcode_subexpr_*
74       size_t _M_backref_index;  // for _S_opcode_backref
75       struct
76       {
77 	// for _S_opcode_alternative.
78 	_StateIdT  _M_quant_index;
79 	// for _S_opcode_alternative or _S_opcode_subexpr_lookahead
80 	_StateIdT  _M_alt;
81 	// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
82 	// quantifiers (ungreedy if set true)
83 	bool       _M_neg;
84       };
85     };
86 
87     explicit _State_base(_Opcode __opcode)
88     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
89     { }
90 
91   protected:
92     ~_State_base() = default;
93 
94   public:
95 #ifdef _GLIBCXX_DEBUG
96     std::ostream&
97     _M_print(std::ostream& ostr) const;
98 
99     // Prints graphviz dot commands for state.
100     std::ostream&
101     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
102 #endif
103   };
104 
105   template<typename _TraitsT>
106     struct _State : _State_base
107     {
108       typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
109 
110       _MatcherT      _M_matches;        // for _S_opcode_match
111 
112       explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
113     };
114 
115   struct _NFA_base
116   {
117     typedef size_t                              _SizeT;
118     typedef regex_constants::syntax_option_type _FlagT;
119 
120     explicit
121     _NFA_base(_FlagT __f)
122     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
123     _M_quant_count(0), _M_has_backref(false)
124     { }
125 
126     _NFA_base(_NFA_base&&) = default;
127 
128   protected:
129     ~_NFA_base() = default;
130 
131   public:
132     _FlagT
133     _M_options() const
134     { return _M_flags; }
135 
136     _StateIdT
137     _M_start() const
138     { return _M_start_state; }
139 
140     _SizeT
141     _M_sub_count() const
142     { return _M_subexpr_count; }
143 
144     std::vector<size_t>       _M_paren_stack;
145     _FlagT                    _M_flags;
146     _StateIdT                 _M_start_state;
147     _SizeT                    _M_subexpr_count;
148     _SizeT                    _M_quant_count;
149     bool                      _M_has_backref;
150   };
151 
152   template<typename _TraitsT>
153     struct _NFA
154     : _NFA_base, std::vector<_State<_TraitsT>>
155     {
156       typedef _State<_TraitsT>				_StateT;
157       typedef _Matcher<typename _TraitsT::char_type>	_MatcherT;
158 
159       using _NFA_base::_NFA_base;
160 
161       // for performance reasons _NFA objects should only be moved not copied
162       _NFA(const _NFA&) = delete;
163       _NFA(_NFA&&) = default;
164 
165       _StateIdT
166       _M_insert_accept()
167       {
168 	auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
169 	return __ret;
170       }
171 
172       _StateIdT
173       _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
174       {
175 	_StateT __tmp(_S_opcode_alternative);
176 	// It labels every quantifier to make greedy comparison easier in BFS
177 	// approach.
178 	__tmp._M_quant_index = this->_M_quant_count++;
179 	__tmp._M_next = __next;
180 	__tmp._M_alt = __alt;
181 	__tmp._M_neg = __neg;
182 	return _M_insert_state(std::move(__tmp));
183       }
184 
185       _StateIdT
186       _M_insert_matcher(_MatcherT __m)
187       {
188 	_StateT __tmp(_S_opcode_match);
189 	__tmp._M_matches = std::move(__m);
190 	return _M_insert_state(std::move(__tmp));
191       }
192 
193       _StateIdT
194       _M_insert_subexpr_begin()
195       {
196 	auto __id = this->_M_subexpr_count++;
197 	this->_M_paren_stack.push_back(__id);
198 	_StateT __tmp(_S_opcode_subexpr_begin);
199 	__tmp._M_subexpr = __id;
200 	return _M_insert_state(std::move(__tmp));
201       }
202 
203       _StateIdT
204       _M_insert_subexpr_end()
205       {
206 	_StateT __tmp(_S_opcode_subexpr_end);
207 	__tmp._M_subexpr = this->_M_paren_stack.back();
208 	this->_M_paren_stack.pop_back();
209 	return _M_insert_state(std::move(__tmp));
210       }
211 
212       _StateIdT
213       _M_insert_backref(size_t __index);
214 
215       _StateIdT
216       _M_insert_line_begin()
217       { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
218 
219       _StateIdT
220       _M_insert_line_end()
221       { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
222 
223       _StateIdT
224       _M_insert_word_bound(bool __neg)
225       {
226 	_StateT __tmp(_S_opcode_word_boundary);
227 	__tmp._M_neg = __neg;
228 	return _M_insert_state(std::move(__tmp));
229       }
230 
231       _StateIdT
232       _M_insert_lookahead(_StateIdT __alt, bool __neg)
233       {
234 	_StateT __tmp(_S_opcode_subexpr_lookahead);
235 	__tmp._M_alt = __alt;
236 	__tmp._M_neg = __neg;
237 	return _M_insert_state(std::move(__tmp));
238       }
239 
240       _StateIdT
241       _M_insert_dummy()
242       { return _M_insert_state(_StateT(_S_opcode_dummy)); }
243 
244       _StateIdT
245       _M_insert_state(_StateT __s)
246       {
247 	this->push_back(std::move(__s));
248 	return this->size()-1;
249       }
250 
251       // Eliminate dummy node in this NFA to make it compact.
252       void
253       _M_eliminate_dummy();
254 
255 #ifdef _GLIBCXX_DEBUG
256       std::ostream&
257       _M_dot(std::ostream& __ostr) const;
258 #endif
259     };
260 
261   /// Describes a sequence of one or more %_State, its current start
262   /// and end(s).  This structure contains fragments of an NFA during
263   /// construction.
264   template<typename _TraitsT>
265     class _StateSeq
266     {
267     public:
268       typedef _NFA<_TraitsT> _RegexT;
269 
270     public:
271       _StateSeq(_RegexT& __nfa, _StateIdT __s)
272       : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
273       { }
274 
275       _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
276       : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
277       { }
278 
279       // Append a state on *this and change *this to the new sequence.
280       void
281       _M_append(_StateIdT __id)
282       {
283 	_M_nfa[_M_end]._M_next = __id;
284 	_M_end = __id;
285       }
286 
287       // Append a sequence on *this and change *this to the new sequence.
288       void
289       _M_append(const _StateSeq& __s)
290       {
291 	_M_nfa[_M_end]._M_next = __s._M_start;
292 	_M_end = __s._M_end;
293       }
294 
295       // Clones an entire sequence.
296       _StateSeq
297       _M_clone();
298 
299     public:
300       _RegexT&  _M_nfa;
301       _StateIdT _M_start;
302       _StateIdT _M_end;
303     };
304 
305  //@} regex-detail
306 _GLIBCXX_END_NAMESPACE_VERSION
307 } // namespace __detail
308 } // namespace std
309 
310 #include <bits/regex_automaton.tcc>
311