libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2019 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37  // Result of merging regex_match and regex_search.
38  //
39  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40  // the other one if possible, for test purpose).
41  //
42  // That __match_mode is true means regex_match, else regex_search.
43  template<typename _BiIter, typename _Alloc,
44  typename _CharT, typename _TraitsT,
45  _RegexExecutorPolicy __policy,
46  bool __match_mode>
47  bool
48  __regex_algo_impl(_BiIter __s,
49  _BiIter __e,
50  match_results<_BiIter, _Alloc>& __m,
51  const basic_regex<_CharT, _TraitsT>& __re,
53  {
54  if (__re._M_automaton == nullptr)
55  return false;
56 
57  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58  __m._M_begin = __s;
59  __m._M_resize(__re._M_automaton->_M_sub_count());
60  for (auto& __it : __res)
61  __it.matched = false;
62 
63  bool __ret;
64  if ((__re.flags() & regex_constants::__polynomial)
65  || (__policy == _RegexExecutorPolicy::_S_alternate
66  && !__re._M_automaton->_M_has_backref))
67  {
68  _Executor<_BiIter, _Alloc, _TraitsT, false>
69  __executor(__s, __e, __m, __re, __flags);
70  if (__match_mode)
71  __ret = __executor._M_match();
72  else
73  __ret = __executor._M_search();
74  }
75  else
76  {
77  _Executor<_BiIter, _Alloc, _TraitsT, true>
78  __executor(__s, __e, __m, __re, __flags);
79  if (__match_mode)
80  __ret = __executor._M_match();
81  else
82  __ret = __executor._M_search();
83  }
84  if (__ret)
85  {
86  for (auto& __it : __res)
87  if (!__it.matched)
88  __it.first = __it.second = __e;
89  auto& __pre = __m._M_prefix();
90  auto& __suf = __m._M_suffix();
91  if (__match_mode)
92  {
93  __pre.matched = false;
94  __pre.first = __s;
95  __pre.second = __s;
96  __suf.matched = false;
97  __suf.first = __e;
98  __suf.second = __e;
99  }
100  else
101  {
102  __pre.first = __s;
103  __pre.second = __res[0].first;
104  __pre.matched = (__pre.first != __pre.second);
105  __suf.first = __res[0].second;
106  __suf.second = __e;
107  __suf.matched = (__suf.first != __suf.second);
108  }
109  }
110  else
111  {
112  __m._M_resize(0);
113  for (auto& __it : __res)
114  {
115  __it.matched = false;
116  __it.first = __it.second = __e;
117  }
118  }
119  return __ret;
120  }
121 }
122 
123  template<typename _Ch_type>
124  template<typename _Fwd_iter>
125  typename regex_traits<_Ch_type>::string_type
127  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
128  {
129  typedef std::ctype<char_type> __ctype_type;
130  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
131 
132  static const char* __collatenames[] =
133  {
134  "NUL",
135  "SOH",
136  "STX",
137  "ETX",
138  "EOT",
139  "ENQ",
140  "ACK",
141  "alert",
142  "backspace",
143  "tab",
144  "newline",
145  "vertical-tab",
146  "form-feed",
147  "carriage-return",
148  "SO",
149  "SI",
150  "DLE",
151  "DC1",
152  "DC2",
153  "DC3",
154  "DC4",
155  "NAK",
156  "SYN",
157  "ETB",
158  "CAN",
159  "EM",
160  "SUB",
161  "ESC",
162  "IS4",
163  "IS3",
164  "IS2",
165  "IS1",
166  "space",
167  "exclamation-mark",
168  "quotation-mark",
169  "number-sign",
170  "dollar-sign",
171  "percent-sign",
172  "ampersand",
173  "apostrophe",
174  "left-parenthesis",
175  "right-parenthesis",
176  "asterisk",
177  "plus-sign",
178  "comma",
179  "hyphen",
180  "period",
181  "slash",
182  "zero",
183  "one",
184  "two",
185  "three",
186  "four",
187  "five",
188  "six",
189  "seven",
190  "eight",
191  "nine",
192  "colon",
193  "semicolon",
194  "less-than-sign",
195  "equals-sign",
196  "greater-than-sign",
197  "question-mark",
198  "commercial-at",
199  "A",
200  "B",
201  "C",
202  "D",
203  "E",
204  "F",
205  "G",
206  "H",
207  "I",
208  "J",
209  "K",
210  "L",
211  "M",
212  "N",
213  "O",
214  "P",
215  "Q",
216  "R",
217  "S",
218  "T",
219  "U",
220  "V",
221  "W",
222  "X",
223  "Y",
224  "Z",
225  "left-square-bracket",
226  "backslash",
227  "right-square-bracket",
228  "circumflex",
229  "underscore",
230  "grave-accent",
231  "a",
232  "b",
233  "c",
234  "d",
235  "e",
236  "f",
237  "g",
238  "h",
239  "i",
240  "j",
241  "k",
242  "l",
243  "m",
244  "n",
245  "o",
246  "p",
247  "q",
248  "r",
249  "s",
250  "t",
251  "u",
252  "v",
253  "w",
254  "x",
255  "y",
256  "z",
257  "left-curly-bracket",
258  "vertical-line",
259  "right-curly-bracket",
260  "tilde",
261  "DEL",
262  };
263 
264  string __s;
265  for (; __first != __last; ++__first)
266  __s += __fctyp.narrow(*__first, 0);
267 
268  for (const auto& __it : __collatenames)
269  if (__s == __it)
270  return string_type(1, __fctyp.widen(
271  static_cast<char>(&__it - __collatenames)));
272 
273  // TODO Add digraph support:
274  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
275 
276  return string_type();
277  }
278 
279  template<typename _Ch_type>
280  template<typename _Fwd_iter>
281  typename regex_traits<_Ch_type>::char_class_type
283  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
284  {
285  typedef std::ctype<char_type> __ctype_type;
286  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
287 
288  // Mappings from class name to class mask.
289  static const pair<const char*, char_class_type> __classnames[] =
290  {
291  {"d", ctype_base::digit},
292  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
293  {"s", ctype_base::space},
294  {"alnum", ctype_base::alnum},
295  {"alpha", ctype_base::alpha},
296  {"blank", ctype_base::blank},
297  {"cntrl", ctype_base::cntrl},
298  {"digit", ctype_base::digit},
299  {"graph", ctype_base::graph},
300  {"lower", ctype_base::lower},
301  {"print", ctype_base::print},
302  {"punct", ctype_base::punct},
303  {"space", ctype_base::space},
304  {"upper", ctype_base::upper},
305  {"xdigit", ctype_base::xdigit},
306  };
307 
308  string __s;
309  for (; __first != __last; ++__first)
310  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
311 
312  for (const auto& __it : __classnames)
313  if (__s == __it.first)
314  {
315  if (__icase
316  && ((__it.second
317  & (ctype_base::lower | ctype_base::upper)) != 0))
318  return ctype_base::alpha;
319  return __it.second;
320  }
321  return 0;
322  }
323 
324  template<typename _Ch_type>
325  bool
327  isctype(_Ch_type __c, char_class_type __f) const
328  {
329  typedef std::ctype<char_type> __ctype_type;
330  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
331 
332  return __fctyp.is(__f._M_base, __c)
333  // [[:w:]]
334  || ((__f._M_extended & _RegexMask::_S_under)
335  && __c == __fctyp.widen('_'));
336  }
337 
338  template<typename _Ch_type>
339  int
341  value(_Ch_type __ch, int __radix) const
342  {
344  long __v;
345  if (__radix == 8)
346  __is >> std::oct;
347  else if (__radix == 16)
348  __is >> std::hex;
349  __is >> __v;
350  return __is.fail() ? -1 : __v;
351  }
352 
353  template<typename _Bi_iter, typename _Alloc>
354  template<typename _Out_iter>
356  format(_Out_iter __out,
357  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
358  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
359  match_flag_type __flags) const
360  {
361  __glibcxx_assert( ready() );
362  regex_traits<char_type> __traits;
363  typedef std::ctype<char_type> __ctype_type;
364  const __ctype_type&
365  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
366 
367  auto __output = [&](size_t __idx)
368  {
369  auto& __sub = (*this)[__idx];
370  if (__sub.matched)
371  __out = std::copy(__sub.first, __sub.second, __out);
372  };
373 
374  if (__flags & regex_constants::format_sed)
375  {
376  bool __escaping = false;
377  for (; __fmt_first != __fmt_last; __fmt_first++)
378  {
379  if (__escaping)
380  {
381  __escaping = false;
382  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
383  __output(__traits.value(*__fmt_first, 10));
384  else
385  *__out++ = *__fmt_first;
386  continue;
387  }
388  if (*__fmt_first == '\\')
389  {
390  __escaping = true;
391  continue;
392  }
393  if (*__fmt_first == '&')
394  {
395  __output(0);
396  continue;
397  }
398  *__out++ = *__fmt_first;
399  }
400  if (__escaping)
401  *__out++ = '\\';
402  }
403  else
404  {
405  while (1)
406  {
407  auto __next = std::find(__fmt_first, __fmt_last, '$');
408  if (__next == __fmt_last)
409  break;
410 
411  __out = std::copy(__fmt_first, __next, __out);
412 
413  auto __eat = [&](char __ch) -> bool
414  {
415  if (*__next == __ch)
416  {
417  ++__next;
418  return true;
419  }
420  return false;
421  };
422 
423  if (++__next == __fmt_last)
424  *__out++ = '$';
425  else if (__eat('$'))
426  *__out++ = '$';
427  else if (__eat('&'))
428  __output(0);
429  else if (__eat('`'))
430  {
431  auto& __sub = _M_prefix();
432  if (__sub.matched)
433  __out = std::copy(__sub.first, __sub.second, __out);
434  }
435  else if (__eat('\''))
436  {
437  auto& __sub = _M_suffix();
438  if (__sub.matched)
439  __out = std::copy(__sub.first, __sub.second, __out);
440  }
441  else if (__fctyp.is(__ctype_type::digit, *__next))
442  {
443  long __num = __traits.value(*__next, 10);
444  if (++__next != __fmt_last
445  && __fctyp.is(__ctype_type::digit, *__next))
446  {
447  __num *= 10;
448  __num += __traits.value(*__next++, 10);
449  }
450  if (0 <= __num && __num < this->size())
451  __output(__num);
452  }
453  else
454  *__out++ = '$';
455  __fmt_first = __next;
456  }
457  __out = std::copy(__fmt_first, __fmt_last, __out);
458  }
459  return __out;
460  }
461 
462  template<typename _Out_iter, typename _Bi_iter,
463  typename _Rx_traits, typename _Ch_type>
464  _Out_iter
465  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
467  const _Ch_type* __fmt,
469  {
471  _IterT __i(__first, __last, __e, __flags);
472  _IterT __end;
473  if (__i == __end)
474  {
475  if (!(__flags & regex_constants::format_no_copy))
476  __out = std::copy(__first, __last, __out);
477  }
478  else
479  {
480  sub_match<_Bi_iter> __last;
481  auto __len = char_traits<_Ch_type>::length(__fmt);
482  for (; __i != __end; ++__i)
483  {
484  if (!(__flags & regex_constants::format_no_copy))
485  __out = std::copy(__i->prefix().first, __i->prefix().second,
486  __out);
487  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
488  __last = __i->suffix();
490  break;
491  }
492  if (!(__flags & regex_constants::format_no_copy))
493  __out = std::copy(__last.first, __last.second, __out);
494  }
495  return __out;
496  }
497 
498  template<typename _Bi_iter,
499  typename _Ch_type,
500  typename _Rx_traits>
501  bool
503  operator==(const regex_iterator& __rhs) const noexcept
504  {
505  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
506  return true;
507  return _M_pregex == __rhs._M_pregex
508  && _M_begin == __rhs._M_begin
509  && _M_end == __rhs._M_end
510  && _M_flags == __rhs._M_flags
511  && _M_match[0] == __rhs._M_match[0];
512  }
513 
514  template<typename _Bi_iter,
515  typename _Ch_type,
516  typename _Rx_traits>
520  {
521  // In all cases in which the call to regex_search returns true,
522  // match.prefix().first shall be equal to the previous value of
523  // match[0].second, and for each index i in the half-open range
524  // [0, match.size()) for which match[i].matched is true,
525  // match[i].position() shall return distance(begin, match[i].first).
526  // [28.12.1.4.5]
527  if (_M_match[0].matched)
528  {
529  auto __start = _M_match[0].second;
530  auto __prefix_first = _M_match[0].second;
531  if (_M_match[0].first == _M_match[0].second)
532  {
533  if (__start == _M_end)
534  {
535  _M_pregex = nullptr;
536  return *this;
537  }
538  else
539  {
540  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
541  _M_flags
544  {
545  __glibcxx_assert(_M_match[0].matched);
546  auto& __prefix = _M_match._M_prefix();
547  __prefix.first = __prefix_first;
548  __prefix.matched = __prefix.first != __prefix.second;
549  // [28.12.1.4.5]
550  _M_match._M_begin = _M_begin;
551  return *this;
552  }
553  else
554  ++__start;
555  }
556  }
558  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
559  {
560  __glibcxx_assert(_M_match[0].matched);
561  auto& __prefix = _M_match._M_prefix();
562  __prefix.first = __prefix_first;
563  __prefix.matched = __prefix.first != __prefix.second;
564  // [28.12.1.4.5]
565  _M_match._M_begin = _M_begin;
566  }
567  else
568  _M_pregex = nullptr;
569  }
570  return *this;
571  }
572 
573  template<typename _Bi_iter,
574  typename _Ch_type,
575  typename _Rx_traits>
579  {
580  _M_position = __rhs._M_position;
581  _M_subs = __rhs._M_subs;
582  _M_n = __rhs._M_n;
583  _M_suffix = __rhs._M_suffix;
584  _M_has_m1 = __rhs._M_has_m1;
585  _M_normalize_result();
586  return *this;
587  }
588 
589  template<typename _Bi_iter,
590  typename _Ch_type,
591  typename _Rx_traits>
592  bool
595  {
596  if (_M_end_of_seq() && __rhs._M_end_of_seq())
597  return true;
598  if (_M_suffix.matched && __rhs._M_suffix.matched
599  && _M_suffix == __rhs._M_suffix)
600  return true;
601  if (_M_end_of_seq() || _M_suffix.matched
602  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
603  return false;
604  return _M_position == __rhs._M_position
605  && _M_n == __rhs._M_n
606  && _M_subs == __rhs._M_subs;
607  }
608 
609  template<typename _Bi_iter,
610  typename _Ch_type,
611  typename _Rx_traits>
615  {
616  _Position __prev = _M_position;
617  if (_M_suffix.matched)
618  *this = regex_token_iterator();
619  else if (_M_n + 1 < _M_subs.size())
620  {
621  _M_n++;
622  _M_result = &_M_current_match();
623  }
624  else
625  {
626  _M_n = 0;
627  ++_M_position;
628  if (_M_position != _Position())
629  _M_result = &_M_current_match();
630  else if (_M_has_m1 && __prev->suffix().length() != 0)
631  {
632  _M_suffix.matched = true;
633  _M_suffix.first = __prev->suffix().first;
634  _M_suffix.second = __prev->suffix().second;
635  _M_result = &_M_suffix;
636  }
637  else
638  *this = regex_token_iterator();
639  }
640  return *this;
641  }
642 
643  template<typename _Bi_iter,
644  typename _Ch_type,
645  typename _Rx_traits>
646  void
648  _M_init(_Bi_iter __a, _Bi_iter __b)
649  {
650  _M_has_m1 = false;
651  for (auto __it : _M_subs)
652  if (__it == -1)
653  {
654  _M_has_m1 = true;
655  break;
656  }
657  if (_M_position != _Position())
658  _M_result = &_M_current_match();
659  else if (_M_has_m1)
660  {
661  _M_suffix.matched = true;
662  _M_suffix.first = __a;
663  _M_suffix.second = __b;
664  _M_result = &_M_suffix;
665  }
666  else
667  _M_result = nullptr;
668  }
669 
670 _GLIBCXX_END_NAMESPACE_VERSION
671 } // namespace
_GLIBCXX17_INLINE constexpr match_flag_type match_not_null
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:370
_T1 first
second_type is the second bound type
Definition: stl_pair.h:214
Basis for explicit traits specializations.
Definition: char_traits.h:284
ISO C++ entities toplevel namespace is std.
_T2 second
first is a copy of the first object
Definition: stl_pair.h:215
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:327
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2365
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:208
bool fail() const
Fast error checking.
Definition: basic_ios.h:201
_GLIBCXX17_INLINE constexpr match_flag_type match_continuous
Managing sequences of characters and character-like objects.
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:614
_GLIBCXX17_INLINE constexpr match_flag_type format_first_only
Controlling input for std::string.
Definition: iosfwd:100
match_flag_type
This is a bitmask type indicating regex matching rules.
_GLIBCXX17_INLINE constexpr syntax_option_type __polynomial
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:283
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition: regex.tcc:503
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:127
Primary class template ctype facet.This template class defines classification and conversion function...
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2217
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Describes aspects of a regular expression.
Definition: regex.h:80
_GLIBCXX17_INLINE constexpr match_flag_type match_prev_avail
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1039
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:519
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1031
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:578
_GLIBCXX17_INLINE constexpr match_flag_type format_sed
_GLIBCXX17_INLINE constexpr match_flag_type format_no_copy
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:594
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:341