libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // See below __regex_algo_impl to get what this is talking about. The default
32 // value 1 indicated a conservative optimization without giving up worst case
33 // performance.
34 #ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
35 #define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
36 #endif
37 
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43 
44  // Result of merging regex_match and regex_search.
45  //
46  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47  // the other one if possible, for test purpose).
48  //
49  // That __match_mode is true means regex_match, else regex_search.
50  template<typename _BiIter, typename _Alloc,
51  typename _CharT, typename _TraitsT,
52  _RegexExecutorPolicy __policy,
53  bool __match_mode>
54  bool
55  __regex_algo_impl(_BiIter __s,
56  _BiIter __e,
57  match_results<_BiIter, _Alloc>& __m,
58  const basic_regex<_CharT, _TraitsT>& __re,
60  {
61  if (__re._M_automaton == nullptr)
62  return false;
63 
64  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65  __res.resize(__re._M_automaton->_M_sub_count() + 2);
66  for (auto& __it : __res)
67  __it.matched = false;
68 
69  // This function decide which executor to use under given circumstances.
70  // The _S_auto policy now is the following: if a NFA has no
71  // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
72  // quantifiers (*, +, ?), the BFS executor will be used, other wise
73  // DFS executor. This is because DFS executor has a exponential upper
74  // bound, but better best-case performace. Meanwhile, BFS executor can
75  // effectively prevent from exponential-long time matching (which must
76  // contains many quantifiers), but it's slower in average.
77  //
78  // For simple regex, BFS executor could be 2 or more times slower than
79  // DFS executor.
80  //
81  // Of course, BFS executor cannot handle back-references.
82  bool __ret;
83  if (!__re._M_automaton->_M_has_backref
84  && (__policy == _RegexExecutorPolicy::_S_alternate
85  || __re._M_automaton->_M_quant_count
86  > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
87  {
88  _Executor<_BiIter, _Alloc, _TraitsT, false>
89  __executor(__s, __e, __m, __re, __flags);
90  if (__match_mode)
91  __ret = __executor._M_match();
92  else
93  __ret = __executor._M_search();
94  }
95  else
96  {
97  _Executor<_BiIter, _Alloc, _TraitsT, true>
98  __executor(__s, __e, __m, __re, __flags);
99  if (__match_mode)
100  __ret = __executor._M_match();
101  else
102  __ret = __executor._M_search();
103  }
104  if (__ret)
105  {
106  for (auto __it : __res)
107  if (!__it.matched)
108  __it.first = __it.second = __e;
109  auto& __pre = __res[__res.size()-2];
110  auto& __suf = __res[__res.size()-1];
111  if (__match_mode)
112  {
113  __pre.matched = false;
114  __pre.first = __s;
115  __pre.second = __s;
116  __suf.matched = false;
117  __suf.first = __e;
118  __suf.second = __e;
119  }
120  else
121  {
122  __pre.first = __s;
123  __pre.second = __res[0].first;
124  __pre.matched = (__pre.first != __pre.second);
125  __suf.first = __res[0].second;
126  __suf.second = __e;
127  __suf.matched = (__suf.first != __suf.second);
128  }
129  if (__re.flags() & regex_constants::nosubs)
130  __res.resize(3);
131  }
132  return __ret;
133  }
134 
135 _GLIBCXX_END_NAMESPACE_VERSION
136 }
137 
138 _GLIBCXX_BEGIN_NAMESPACE_VERSION
139 
140  template<typename _Ch_type>
141  template<typename _Fwd_iter>
142  typename regex_traits<_Ch_type>::string_type
144  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
145  {
146  typedef std::ctype<char_type> __ctype_type;
147  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
148 
149  static const char* __collatenames[] =
150  {
151  "NUL",
152  "SOH",
153  "STX",
154  "ETX",
155  "EOT",
156  "ENQ",
157  "ACK",
158  "alert",
159  "backspace",
160  "tab",
161  "newline",
162  "vertical-tab",
163  "form-feed",
164  "carriage-return",
165  "SO",
166  "SI",
167  "DLE",
168  "DC1",
169  "DC2",
170  "DC3",
171  "DC4",
172  "NAK",
173  "SYN",
174  "ETB",
175  "CAN",
176  "EM",
177  "SUB",
178  "ESC",
179  "IS4",
180  "IS3",
181  "IS2",
182  "IS1",
183  "space",
184  "exclamation-mark",
185  "quotation-mark",
186  "number-sign",
187  "dollar-sign",
188  "percent-sign",
189  "ampersand",
190  "apostrophe",
191  "left-parenthesis",
192  "right-parenthesis",
193  "asterisk",
194  "plus-sign",
195  "comma",
196  "hyphen",
197  "period",
198  "slash",
199  "zero",
200  "one",
201  "two",
202  "three",
203  "four",
204  "five",
205  "six",
206  "seven",
207  "eight",
208  "nine",
209  "colon",
210  "semicolon",
211  "less-than-sign",
212  "equals-sign",
213  "greater-than-sign",
214  "question-mark",
215  "commercial-at",
216  "A",
217  "B",
218  "C",
219  "D",
220  "E",
221  "F",
222  "G",
223  "H",
224  "I",
225  "J",
226  "K",
227  "L",
228  "M",
229  "N",
230  "O",
231  "P",
232  "Q",
233  "R",
234  "S",
235  "T",
236  "U",
237  "V",
238  "W",
239  "X",
240  "Y",
241  "Z",
242  "left-square-bracket",
243  "backslash",
244  "right-square-bracket",
245  "circumflex",
246  "underscore",
247  "grave-accent",
248  "a",
249  "b",
250  "c",
251  "d",
252  "e",
253  "f",
254  "g",
255  "h",
256  "i",
257  "j",
258  "k",
259  "l",
260  "m",
261  "n",
262  "o",
263  "p",
264  "q",
265  "r",
266  "s",
267  "t",
268  "u",
269  "v",
270  "w",
271  "x",
272  "y",
273  "z",
274  "left-curly-bracket",
275  "vertical-line",
276  "right-curly-bracket",
277  "tilde",
278  "DEL",
279  ""
280  };
281 
282  // same as boost
283  //static const char* __digraphs[] =
284  // {
285  // "ae",
286  // "Ae",
287  // "AE",
288  // "ch",
289  // "Ch",
290  // "CH",
291  // "ll",
292  // "Ll",
293  // "LL",
294  // "ss",
295  // "Ss",
296  // "SS",
297  // "nj",
298  // "Nj",
299  // "NJ",
300  // "dz",
301  // "Dz",
302  // "DZ",
303  // "lj",
304  // "Lj",
305  // "LJ",
306  // ""
307  // };
308 
309  std::string __s(__last - __first, '?');
310  __fctyp.narrow(__first, __last, '?', &*__s.begin());
311 
312  for (unsigned int __i = 0; *__collatenames[__i]; __i++)
313  if (__s == __collatenames[__i])
314  return string_type(1, __fctyp.widen(static_cast<char>(__i)));
315 
316  //for (unsigned int __i = 0; *__digraphs[__i]; __i++)
317  // {
318  // const char* __now = __digraphs[__i];
319  // if (__s == __now)
320  // {
321  // string_type ret(__s.size(), __fctyp.widen('?'));
322  // __fctyp.widen(__now, __now + 2/* ouch */, &*ret.begin());
323  // return ret;
324  // }
325  // }
326  return string_type();
327  }
328 
329  template<typename _Ch_type>
330  template<typename _Fwd_iter>
331  typename regex_traits<_Ch_type>::char_class_type
333  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
334  {
335  typedef std::ctype<char_type> __ctype_type;
336  typedef std::ctype<char> __cctype_type;
337  typedef const pair<const char*, char_class_type> _ClassnameEntry;
338  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
339  const __cctype_type& __cctyp(use_facet<__cctype_type>(_M_locale));
340 
341  static _ClassnameEntry __classnames[] =
342  {
343  {"d", ctype_base::digit},
344  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
345  {"s", ctype_base::space},
346  {"alnum", ctype_base::alnum},
347  {"alpha", ctype_base::alpha},
348  {"blank", {0, _RegexMask::_S_blank}},
349  {"cntrl", ctype_base::cntrl},
350  {"digit", ctype_base::digit},
351  {"graph", ctype_base::graph},
352  {"lower", ctype_base::lower},
353  {"print", ctype_base::print},
354  {"punct", ctype_base::punct},
355  {"space", ctype_base::space},
356  {"upper", ctype_base::upper},
357  {"xdigit", ctype_base::xdigit},
358  };
359 
360  std::string __s(__last - __first, '?');
361  __fctyp.narrow(__first, __last, '?', &__s[0]);
362  __cctyp.tolower(&*__s.begin(), &*__s.begin() + __s.size());
363  for (_ClassnameEntry* __it = __classnames;
364  __it < *(&__classnames + 1);
365  ++__it)
366  {
367  if (__s == __it->first)
368  {
369  if (__icase
370  && ((__it->second
371  & (ctype_base::lower | ctype_base::upper)) != 0))
372  return ctype_base::alpha;
373  return __it->second;
374  }
375  }
376  return 0;
377  }
378 
379  template<typename _Ch_type>
380  bool
382  isctype(_Ch_type __c, char_class_type __f) const
383  {
384  typedef std::ctype<char_type> __ctype_type;
385  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
386 
387  return __fctyp.is(__f._M_base, __c)
388  // [[:w:]]
389  || ((__f._M_extended & _RegexMask::_S_under)
390  && __c == __fctyp.widen('_'))
391  // [[:blank:]]
392  || ((__f._M_extended & _RegexMask::_S_blank)
393  && (__c == __fctyp.widen(' ')
394  || __c == __fctyp.widen('\t')));
395  }
396 
397  template<typename _Ch_type>
398  int
400  value(_Ch_type __ch, int __radix) const
401  {
403  long __v;
404  if (__radix == 8)
405  __is >> std::oct;
406  else if (__radix == 16)
407  __is >> std::hex;
408  __is >> __v;
409  return __is.fail() ? -1 : __v;
410  }
411 
412  template<typename _Bi_iter, typename _Alloc>
413  template<typename _Out_iter>
415  format(_Out_iter __out,
416  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
417  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
418  match_flag_type __flags) const
419  {
420  _GLIBCXX_DEBUG_ASSERT( ready() );
421  regex_traits<char_type> __traits;
422  typedef std::ctype<char_type> __ctype_type;
423  const __ctype_type&
424  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
425 
426  auto __output = [&](size_t __idx)
427  {
428  auto& __sub = _Base_type::operator[](__idx);
429  if (__sub.matched)
430  std::copy(__sub.first, __sub.second, __out);
431  };
432 
433  if (__flags & regex_constants::format_sed)
434  {
435  for (; __fmt_first != __fmt_last;)
436  if (*__fmt_first == '&')
437  {
438  __output(0);
439  ++__fmt_first;
440  }
441  else if (*__fmt_first == '\\')
442  {
443  if (++__fmt_first != __fmt_last
444  && __fctyp.is(__ctype_type::digit, *__fmt_first))
445  __output(__traits.value(*__fmt_first++, 10));
446  else
447  *__out++ = '\\';
448  }
449  else
450  *__out++ = *__fmt_first++;
451  }
452  else
453  {
454  while (1)
455  {
456  auto __next = std::find(__fmt_first, __fmt_last, '$');
457  if (__next == __fmt_last)
458  break;
459 
460  std::copy(__fmt_first, __next, __out);
461 
462  auto __eat = [&](char __ch) -> bool
463  {
464  if (*__next == __ch)
465  {
466  ++__next;
467  return true;
468  }
469  return false;
470  };
471 
472  if (++__next == __fmt_last)
473  *__out++ = '$';
474  else if (__eat('$'))
475  *__out++ = '$';
476  else if (__eat('&'))
477  __output(0);
478  else if (__eat('`'))
479  __output(_Base_type::size()-2);
480  else if (__eat('\''))
481  __output(_Base_type::size()-1);
482  else if (__fctyp.is(__ctype_type::digit, *__next))
483  {
484  long __num = __traits.value(*__next, 10);
485  if (++__next != __fmt_last
486  && __fctyp.is(__ctype_type::digit, *__next))
487  {
488  __num *= 10;
489  __num += __traits.value(*__next++, 10);
490  }
491  if (0 <= __num && __num < this->size())
492  __output(__num);
493  }
494  else
495  *__out++ = '$';
496  __fmt_first = __next;
497  }
498  std::copy(__fmt_first, __fmt_last, __out);
499  }
500  return __out;
501  }
502 
503  template<typename _Out_iter, typename _Bi_iter,
504  typename _Rx_traits, typename _Ch_type>
505  _Out_iter
506  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
508  const _Ch_type* __fmt,
510  {
512  _IterT __i(__first, __last, __e, __flags);
513  _IterT __end;
514  if (__i == __end)
515  {
516  if (!(__flags & regex_constants::format_no_copy))
517  std::copy(__first, __last, __out);
518  }
519  else
520  {
521  sub_match<_Bi_iter> __last;
522  auto __len = char_traits<_Ch_type>::length(__fmt);
523  for (; __i != __end; ++__i)
524  {
525  if (!(__flags & regex_constants::format_no_copy))
526  std::copy(__i->prefix().first, __i->prefix().second, __out);
527  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
528  __last = __i->suffix();
530  break;
531  }
532  if (!(__flags & regex_constants::format_no_copy))
533  std::copy(__last.first, __last.second, __out);
534  }
535  return __out;
536  }
537 
538  template<typename _Bi_iter,
539  typename _Ch_type,
540  typename _Rx_traits>
541  bool
543  operator==(const regex_iterator& __rhs) const
544  {
545  return (_M_match.empty() && __rhs._M_match.empty())
546  || (_M_begin == __rhs._M_begin
547  && _M_end == __rhs._M_end
548  && _M_pregex == __rhs._M_pregex
549  && _M_flags == __rhs._M_flags
550  && _M_match[0] == __rhs._M_match[0]);
551  }
552 
553  template<typename _Bi_iter,
554  typename _Ch_type,
555  typename _Rx_traits>
559  {
560  // In all cases in which the call to regex_search returns true,
561  // match.prefix().first shall be equal to the previous value of
562  // match[0].second, and for each index i in the half-open range
563  // [0, match.size()) for which match[i].matched is true,
564  // match[i].position() shall return distance(begin, match[i].first).
565  // [28.12.1.4.5]
566  if (_M_match[0].matched)
567  {
568  auto __start = _M_match[0].second;
569  auto __prefix_first = _M_match[0].second;
570  if (_M_match[0].first == _M_match[0].second)
571  {
572  if (__start == _M_end)
573  {
574  _M_match = value_type();
575  return *this;
576  }
577  else
578  {
579  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
580  _M_flags
583  {
584  _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
585  _M_match.at(_M_match.size()).first = __prefix_first;
586  _M_match._M_in_iterator = true;
587  _M_match._M_begin = _M_begin;
588  return *this;
589  }
590  else
591  ++__start;
592  }
593  }
595  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
596  {
597  _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
598  _M_match.at(_M_match.size()).first = __prefix_first;
599  _M_match._M_in_iterator = true;
600  _M_match._M_begin = _M_begin;
601  }
602  else
603  _M_match = value_type();
604  }
605  return *this;
606  }
607 
608  template<typename _Bi_iter,
609  typename _Ch_type,
610  typename _Rx_traits>
614  {
615  _M_position = __rhs._M_position;
616  _M_subs = __rhs._M_subs;
617  _M_n = __rhs._M_n;
618  _M_result = __rhs._M_result;
619  _M_suffix = __rhs._M_suffix;
620  _M_has_m1 = __rhs._M_has_m1;
621  if (__rhs._M_result == &__rhs._M_suffix)
622  _M_result = &_M_suffix;
623  return *this;
624  }
625 
626  template<typename _Bi_iter,
627  typename _Ch_type,
628  typename _Rx_traits>
629  bool
632  {
633  if (_M_end_of_seq() && __rhs._M_end_of_seq())
634  return true;
635  if (_M_suffix.matched && __rhs._M_suffix.matched
636  && _M_suffix == __rhs._M_suffix)
637  return true;
638  if (_M_end_of_seq() || _M_suffix.matched
639  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
640  return false;
641  return _M_position == __rhs._M_position
642  && _M_n == __rhs._M_n
643  && _M_subs == __rhs._M_subs;
644  }
645 
646  template<typename _Bi_iter,
647  typename _Ch_type,
648  typename _Rx_traits>
652  {
653  _Position __prev = _M_position;
654  if (_M_suffix.matched)
655  *this = regex_token_iterator();
656  else if (_M_n + 1 < _M_subs.size())
657  {
658  _M_n++;
659  _M_result = &_M_current_match();
660  }
661  else
662  {
663  _M_n = 0;
664  ++_M_position;
665  if (_M_position != _Position())
666  _M_result = &_M_current_match();
667  else if (_M_has_m1 && __prev->suffix().length() != 0)
668  {
669  _M_suffix.matched = true;
670  _M_suffix.first = __prev->suffix().first;
671  _M_suffix.second = __prev->suffix().second;
672  _M_result = &_M_suffix;
673  }
674  else
675  *this = regex_token_iterator();
676  }
677  return *this;
678  }
679 
680  template<typename _Bi_iter,
681  typename _Ch_type,
682  typename _Rx_traits>
683  void
685  _M_init(_Bi_iter __a, _Bi_iter __b)
686  {
687  _M_has_m1 = false;
688  for (auto __it : _M_subs)
689  if (__it == -1)
690  {
691  _M_has_m1 = true;
692  break;
693  }
694  if (_M_position != _Position())
695  _M_result = &_M_current_match();
696  else if (_M_has_m1)
697  {
698  _M_suffix.matched = true;
699  _M_suffix.first = __a;
700  _M_suffix.second = __b;
701  _M_result = &_M_suffix;
702  }
703  else
704  _M_result = nullptr;
705  }
706 
707 _GLIBCXX_END_NAMESPACE_VERSION
708 } // namespace
709 
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2218
_T2 second
first is a copy of the first object
Definition: stl_pair.h:102
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:613
match_flag_type
This is a bitmask type indicating regex matching rules.
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:949
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2084
bool fail() const
Fast error checking.
Definition: basic_ios.h:195
_T1 first
second_type is the second bound type
Definition: stl_pair.h:101
constexpr size_t size() const noexcept
Returns the total number of bits.
Definition: bitset:1293
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:651
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:382
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:631
bool empty() const
Indicates if the match_results contains no results.
Definition: regex.h:1608
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Class regex_traits. Describes aspects of a regular expression.
Definition: regex.h:72
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
The ctype&lt;char&gt; specialization.This class defines classification and conversion functions for the cha...
Basis for explicit traits specializations.
Definition: char_traits.h:227
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:365
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:941
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:558
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:400
Controlling input for std::string.
Definition: iosfwd:97
bool operator==(const regex_iterator &__rhs) const
Tests the equivalence of two regex iterators.
Definition: regex.tcc:543
reference operator[](size_t __position)
Array-indexing support.
Definition: bitset:1156