libstdc++
regex_automaton.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_automaton.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  /**
38  * @defgroup regex-detail Base and Implementation Classes
39  * @ingroup regex
40  * @{
41  */
42 
43  typedef long _StateIdT;
44  typedef std::set<_StateIdT> _StateSet;
45  static const _StateIdT _S_invalid_state_id = -1;
46 
47  template<typename _CharT>
48  using _Matcher = std::function<bool (_CharT)>;
49 
50  /// Operation codes that define the type of transitions within the base NFA
51  /// that represents the regular expression.
52  enum _Opcode : int
53  {
54  _S_opcode_unknown,
55  _S_opcode_alternative,
56  _S_opcode_backref,
57  _S_opcode_line_begin_assertion,
58  _S_opcode_line_end_assertion,
59  _S_opcode_word_boundary,
60  _S_opcode_subexpr_lookahead,
61  _S_opcode_subexpr_begin,
62  _S_opcode_subexpr_end,
63  _S_opcode_dummy,
64  _S_opcode_match,
65  _S_opcode_accept,
66  };
67 
68  struct _State_base
69  {
70  _Opcode _M_opcode; // type of outgoing transition
71  _StateIdT _M_next; // outgoing transition
72  union // Since they are mutually exclusive.
73  {
74  size_t _M_subexpr; // for _S_opcode_subexpr_*
75  size_t _M_backref_index; // for _S_opcode_backref
76  struct
77  {
78  // for _S_opcode_alternative.
79  _StateIdT _M_quant_index;
80  // for _S_opcode_alternative or _S_opcode_subexpr_lookahead
81  _StateIdT _M_alt;
82  // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
83  // quantifiers (ungreedy if set true)
84  bool _M_neg;
85  };
86  };
87 
88  explicit _State_base(_Opcode __opcode)
89  : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
90  { }
91 
92  protected:
93  ~_State_base() = default;
94 
95  public:
96 #ifdef _GLIBCXX_DEBUG
98  _M_print(std::ostream& ostr) const;
99 
100  // Prints graphviz dot commands for state.
101  std::ostream&
102  _M_dot(std::ostream& __ostr, _StateIdT __id) const;
103 #endif
104  };
105 
106  template<typename _TraitsT>
107  struct _State : _State_base
108  {
109  typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
110 
111  _MatcherT _M_matches; // for _S_opcode_match
112 
113  explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
114  };
115 
116  struct _NFA_base
117  {
118  typedef size_t _SizeT;
120 
121  explicit
122  _NFA_base(_FlagT __f)
123  : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
124  _M_quant_count(0), _M_has_backref(false)
125  { }
126 
127  _NFA_base(_NFA_base&&) = default;
128 
129  protected:
130  ~_NFA_base() = default;
131 
132  public:
133  _FlagT
134  _M_options() const
135  { return _M_flags; }
136 
137  _StateIdT
138  _M_start() const
139  { return _M_start_state; }
140 
141  const _StateSet&
142  _M_final_states() const
143  { return _M_accepting_states; }
144 
145  _SizeT
146  _M_sub_count() const
147  { return _M_subexpr_count; }
148 
149  std::vector<size_t> _M_paren_stack;
150  _StateSet _M_accepting_states;
151  _FlagT _M_flags;
152  _StateIdT _M_start_state;
153  _SizeT _M_subexpr_count;
154  _SizeT _M_quant_count;
155  bool _M_has_backref;
156  };
157 
158  template<typename _TraitsT>
159  struct _NFA
160  : _NFA_base, std::vector<_State<_TraitsT>>
161  {
162  typedef _State<_TraitsT> _StateT;
163  typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
164 
165  using _NFA_base::_NFA_base;
166 
167  // for performance reasons _NFA objects should only be moved not copied
168  _NFA(const _NFA&) = delete;
169  _NFA(_NFA&&) = default;
170 
171  _StateIdT
172  _M_insert_accept()
173  {
174  auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
175  this->_M_accepting_states.insert(__ret);
176  return __ret;
177  }
178 
179  _StateIdT
180  _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
181  {
182  _StateT __tmp(_S_opcode_alternative);
183  // It labels every quantifier to make greedy comparison easier in BFS
184  // approach.
185  __tmp._M_quant_index = this->_M_quant_count++;
186  __tmp._M_next = __next;
187  __tmp._M_alt = __alt;
188  __tmp._M_neg = __neg;
189  return _M_insert_state(std::move(__tmp));
190  }
191 
192  _StateIdT
193  _M_insert_matcher(_MatcherT __m)
194  {
195  _StateT __tmp(_S_opcode_match);
196  __tmp._M_matches = std::move(__m);
197  return _M_insert_state(std::move(__tmp));
198  }
199 
200  _StateIdT
201  _M_insert_subexpr_begin()
202  {
203  auto __id = this->_M_subexpr_count++;
204  this->_M_paren_stack.push_back(__id);
205  _StateT __tmp(_S_opcode_subexpr_begin);
206  __tmp._M_subexpr = __id;
207  return _M_insert_state(std::move(__tmp));
208  }
209 
210  _StateIdT
211  _M_insert_subexpr_end()
212  {
213  _StateT __tmp(_S_opcode_subexpr_end);
214  __tmp._M_subexpr = this->_M_paren_stack.back();
215  this->_M_paren_stack.pop_back();
216  return _M_insert_state(std::move(__tmp));
217  }
218 
219  _StateIdT
220  _M_insert_backref(size_t __index);
221 
222  _StateIdT
223  _M_insert_line_begin()
224  { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
225 
226  _StateIdT
227  _M_insert_line_end()
228  { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
229 
230  _StateIdT
231  _M_insert_word_bound(bool __neg)
232  {
233  _StateT __tmp(_S_opcode_word_boundary);
234  __tmp._M_neg = __neg;
235  return _M_insert_state(std::move(__tmp));
236  }
237 
238  _StateIdT
239  _M_insert_lookahead(_StateIdT __alt, bool __neg)
240  {
241  _StateT __tmp(_S_opcode_subexpr_lookahead);
242  __tmp._M_alt = __alt;
243  __tmp._M_neg = __neg;
244  return _M_insert_state(std::move(__tmp));
245  }
246 
247  _StateIdT
248  _M_insert_dummy()
249  { return _M_insert_state(_StateT(_S_opcode_dummy)); }
250 
251  _StateIdT
252  _M_insert_state(_StateT __s)
253  {
254  this->push_back(std::move(__s));
255  return this->size()-1;
256  }
257 
258  // Eliminate dummy node in this NFA to make it compact.
259  void
260  _M_eliminate_dummy();
261 
262 #ifdef _GLIBCXX_DEBUG
263  std::ostream&
264  _M_dot(std::ostream& __ostr) const;
265 #endif
266  };
267 
268  /// Describes a sequence of one or more %_State, its current start
269  /// and end(s). This structure contains fragments of an NFA during
270  /// construction.
271  template<typename _TraitsT>
272  class _StateSeq
273  {
274  public:
275  typedef _NFA<_TraitsT> _RegexT;
276 
277  public:
278  _StateSeq(_RegexT& __nfa, _StateIdT __s)
279  : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
280  { }
281 
282  _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
283  : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
284  { }
285 
286  // Append a state on *this and change *this to the new sequence.
287  void
288  _M_append(_StateIdT __id)
289  {
290  _M_nfa[_M_end]._M_next = __id;
291  _M_end = __id;
292  }
293 
294  // Append a sequence on *this and change *this to the new sequence.
295  void
296  _M_append(const _StateSeq& __s)
297  {
298  _M_nfa[_M_end]._M_next = __s._M_start;
299  _M_end = __s._M_end;
300  }
301 
302  // Clones an entire sequence.
303  _StateSeq
304  _M_clone();
305 
306  public:
307  _RegexT& _M_nfa;
308  _StateIdT _M_start;
309  _StateIdT _M_end;
310  };
311 
312  //@} regex-detail
313 _GLIBCXX_END_NAMESPACE_VERSION
314 } // namespace __detail
315 } // namespace std
316 
317 #include <bits/regex_automaton.tcc>
constexpr size_t size() const noexcept
Returns the total number of bits.
Definition: bitset:1293
void push_back(const value_type &__x)
Add data to the end of the vector.
Definition: stl_vector.h:899
syntax_option_type
This is a bitmask type indicating how to interpret the regex.
Describes a sequence of one or more _State, its current start and end(s). This structure contains fra...
_Opcode
Operation codes that define the type of transitions within the base NFA that represents the regular e...