libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37  /// @cond undocumented
38 
39  // Result of merging regex_match and regex_search.
40  //
41  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42  // the other one if possible, for test purpose).
43  //
44  // That __match_mode is true means regex_match, else regex_search.
45  template<typename _BiIter, typename _Alloc,
46  typename _CharT, typename _TraitsT,
47  _RegexExecutorPolicy __policy,
48  bool __match_mode>
49  bool
50  __regex_algo_impl(_BiIter __s,
51  _BiIter __e,
52  match_results<_BiIter, _Alloc>& __m,
53  const basic_regex<_CharT, _TraitsT>& __re,
55  {
56  if (__re._M_automaton == nullptr)
57  return false;
58 
59  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60  __m._M_begin = __s;
61  __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63  bool __ret;
64  if ((__re.flags() & regex_constants::__polynomial)
65  || (__policy == _RegexExecutorPolicy::_S_alternate
66  && !__re._M_automaton->_M_has_backref))
67  {
68  _Executor<_BiIter, _Alloc, _TraitsT, false>
69  __executor(__s, __e, __m, __re, __flags);
70  if (__match_mode)
71  __ret = __executor._M_match();
72  else
73  __ret = __executor._M_search();
74  }
75  else
76  {
77  _Executor<_BiIter, _Alloc, _TraitsT, true>
78  __executor(__s, __e, __m, __re, __flags);
79  if (__match_mode)
80  __ret = __executor._M_match();
81  else
82  __ret = __executor._M_search();
83  }
84  if (__ret)
85  {
86  for (auto& __it : __res)
87  if (!__it.matched)
88  __it.first = __it.second = __e;
89  auto& __pre = __m._M_prefix();
90  auto& __suf = __m._M_suffix();
91  if (__match_mode)
92  {
93  __pre.matched = false;
94  __pre.first = __s;
95  __pre.second = __s;
96  __suf.matched = false;
97  __suf.first = __e;
98  __suf.second = __e;
99  }
100  else
101  {
102  __pre.first = __s;
103  __pre.second = __res[0].first;
104  __pre.matched = (__pre.first != __pre.second);
105  __suf.first = __res[0].second;
106  __suf.second = __e;
107  __suf.matched = (__suf.first != __suf.second);
108  }
109  }
110  else
111  {
112  __m._M_establish_failed_match(__e);
113  }
114  return __ret;
115  }
116  /// @endcond
117 } // namespace __detail
118 
119  /// @cond
120 
121  template<typename _Ch_type>
122  template<typename _Fwd_iter>
123  typename regex_traits<_Ch_type>::string_type
125  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126  {
127  typedef std::ctype<char_type> __ctype_type;
128  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129 
130  static const char* __collatenames[] =
131  {
132  "NUL",
133  "SOH",
134  "STX",
135  "ETX",
136  "EOT",
137  "ENQ",
138  "ACK",
139  "alert",
140  "backspace",
141  "tab",
142  "newline",
143  "vertical-tab",
144  "form-feed",
145  "carriage-return",
146  "SO",
147  "SI",
148  "DLE",
149  "DC1",
150  "DC2",
151  "DC3",
152  "DC4",
153  "NAK",
154  "SYN",
155  "ETB",
156  "CAN",
157  "EM",
158  "SUB",
159  "ESC",
160  "IS4",
161  "IS3",
162  "IS2",
163  "IS1",
164  "space",
165  "exclamation-mark",
166  "quotation-mark",
167  "number-sign",
168  "dollar-sign",
169  "percent-sign",
170  "ampersand",
171  "apostrophe",
172  "left-parenthesis",
173  "right-parenthesis",
174  "asterisk",
175  "plus-sign",
176  "comma",
177  "hyphen",
178  "period",
179  "slash",
180  "zero",
181  "one",
182  "two",
183  "three",
184  "four",
185  "five",
186  "six",
187  "seven",
188  "eight",
189  "nine",
190  "colon",
191  "semicolon",
192  "less-than-sign",
193  "equals-sign",
194  "greater-than-sign",
195  "question-mark",
196  "commercial-at",
197  "A",
198  "B",
199  "C",
200  "D",
201  "E",
202  "F",
203  "G",
204  "H",
205  "I",
206  "J",
207  "K",
208  "L",
209  "M",
210  "N",
211  "O",
212  "P",
213  "Q",
214  "R",
215  "S",
216  "T",
217  "U",
218  "V",
219  "W",
220  "X",
221  "Y",
222  "Z",
223  "left-square-bracket",
224  "backslash",
225  "right-square-bracket",
226  "circumflex",
227  "underscore",
228  "grave-accent",
229  "a",
230  "b",
231  "c",
232  "d",
233  "e",
234  "f",
235  "g",
236  "h",
237  "i",
238  "j",
239  "k",
240  "l",
241  "m",
242  "n",
243  "o",
244  "p",
245  "q",
246  "r",
247  "s",
248  "t",
249  "u",
250  "v",
251  "w",
252  "x",
253  "y",
254  "z",
255  "left-curly-bracket",
256  "vertical-line",
257  "right-curly-bracket",
258  "tilde",
259  "DEL",
260  };
261 
262  string __s;
263  for (; __first != __last; ++__first)
264  __s += __fctyp.narrow(*__first, 0);
265 
266  for (const auto& __it : __collatenames)
267  if (__s == __it)
268  return string_type(1, __fctyp.widen(
269  static_cast<char>(&__it - __collatenames)));
270 
271  // TODO Add digraph support:
272  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273 
274  return string_type();
275  }
276 
277  template<typename _Ch_type>
278  template<typename _Fwd_iter>
279  typename regex_traits<_Ch_type>::char_class_type
281  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282  {
283  typedef std::ctype<char_type> __ctype_type;
284  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285 
286  // Mappings from class name to class mask.
287  static const pair<const char*, char_class_type> __classnames[] =
288  {
289  {"d", ctype_base::digit},
290  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
291  {"s", ctype_base::space},
292  {"alnum", ctype_base::alnum},
293  {"alpha", ctype_base::alpha},
294  {"blank", ctype_base::blank},
295  {"cntrl", ctype_base::cntrl},
296  {"digit", ctype_base::digit},
297  {"graph", ctype_base::graph},
298  {"lower", ctype_base::lower},
299  {"print", ctype_base::print},
300  {"punct", ctype_base::punct},
301  {"space", ctype_base::space},
302  {"upper", ctype_base::upper},
303  {"xdigit", ctype_base::xdigit},
304  };
305 
306  string __s;
307  for (; __first != __last; ++__first)
308  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 
310  for (const auto& __it : __classnames)
311  if (__s == __it.first)
312  {
313  if (__icase
314  && ((__it.second
315  & (ctype_base::lower | ctype_base::upper)) != 0))
316  return ctype_base::alpha;
317  return __it.second;
318  }
319  return 0;
320  }
321 
322  template<typename _Ch_type>
323  bool
325  isctype(_Ch_type __c, char_class_type __f) const
326  {
327  typedef std::ctype<char_type> __ctype_type;
328  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 
330  return __fctyp.is(__f._M_base, __c)
331  // [[:w:]]
332  || ((__f._M_extended & _RegexMask::_S_under)
333  && __c == __fctyp.widen('_'));
334  }
335 
336  template<typename _Ch_type>
337  int
339  value(_Ch_type __ch, int __radix) const
340  {
341  std::basic_istringstream<char_type> __is(string_type(1, __ch));
342  long __v;
343  if (__radix == 8)
344  __is >> std::oct;
345  else if (__radix == 16)
346  __is >> std::hex;
347  __is >> __v;
348  return __is.fail() ? -1 : __v;
349  }
350 
351  template<typename _Bi_iter, typename _Alloc>
352  template<typename _Out_iter>
353  _Out_iter
355  format(_Out_iter __out,
356  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358  match_flag_type __flags) const
359  {
360  __glibcxx_assert( ready() );
361  regex_traits<char_type> __traits;
362  typedef std::ctype<char_type> __ctype_type;
363  const __ctype_type&
364  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
365 
366  auto __output = [&](size_t __idx)
367  {
368  auto& __sub = (*this)[__idx];
369  if (__sub.matched)
370  __out = std::copy(__sub.first, __sub.second, __out);
371  };
372 
373  if (__flags & regex_constants::format_sed)
374  {
375  bool __escaping = false;
376  for (; __fmt_first != __fmt_last; __fmt_first++)
377  {
378  if (__escaping)
379  {
380  __escaping = false;
381  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382  __output(__traits.value(*__fmt_first, 10));
383  else
384  *__out++ = *__fmt_first;
385  continue;
386  }
387  if (*__fmt_first == '\\')
388  {
389  __escaping = true;
390  continue;
391  }
392  if (*__fmt_first == '&')
393  {
394  __output(0);
395  continue;
396  }
397  *__out++ = *__fmt_first;
398  }
399  if (__escaping)
400  *__out++ = '\\';
401  }
402  else
403  {
404  while (1)
405  {
406  auto __next = std::find(__fmt_first, __fmt_last, '$');
407  if (__next == __fmt_last)
408  break;
409 
410  __out = std::copy(__fmt_first, __next, __out);
411 
412  auto __eat = [&](char __ch) -> bool
413  {
414  if (*__next == __ch)
415  {
416  ++__next;
417  return true;
418  }
419  return false;
420  };
421 
422  if (++__next == __fmt_last)
423  *__out++ = '$';
424  else if (__eat('$'))
425  *__out++ = '$';
426  else if (__eat('&'))
427  __output(0);
428  else if (__eat('`'))
429  {
430  auto& __sub = _M_prefix();
431  if (__sub.matched)
432  __out = std::copy(__sub.first, __sub.second, __out);
433  }
434  else if (__eat('\''))
435  {
436  auto& __sub = _M_suffix();
437  if (__sub.matched)
438  __out = std::copy(__sub.first, __sub.second, __out);
439  }
440  else if (__fctyp.is(__ctype_type::digit, *__next))
441  {
442  long __num = __traits.value(*__next, 10);
443  if (++__next != __fmt_last
444  && __fctyp.is(__ctype_type::digit, *__next))
445  {
446  __num *= 10;
447  __num += __traits.value(*__next++, 10);
448  }
449  if (0 <= __num && __num < this->size())
450  __output(__num);
451  }
452  else
453  *__out++ = '$';
454  __fmt_first = __next;
455  }
456  __out = std::copy(__fmt_first, __fmt_last, __out);
457  }
458  return __out;
459  }
460 
461  template<typename _Out_iter, typename _Bi_iter,
462  typename _Rx_traits, typename _Ch_type>
463  _Out_iter
464  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465  const basic_regex<_Ch_type, _Rx_traits>& __e,
466  const _Ch_type* __fmt,
468  {
469  typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470  _IterT __i(__first, __last, __e, __flags);
471  _IterT __end;
472  if (__i == __end)
473  {
474  if (!(__flags & regex_constants::format_no_copy))
475  __out = std::copy(__first, __last, __out);
476  }
477  else
478  {
479  sub_match<_Bi_iter> __last;
480  auto __len = char_traits<_Ch_type>::length(__fmt);
481  for (; __i != __end; ++__i)
482  {
483  if (!(__flags & regex_constants::format_no_copy))
484  __out = std::copy(__i->prefix().first, __i->prefix().second,
485  __out);
486  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
487  __last = __i->suffix();
489  break;
490  }
491  if (!(__flags & regex_constants::format_no_copy))
492  __out = std::copy(__last.first, __last.second, __out);
493  }
494  return __out;
495  }
496 
497  template<typename _Bi_iter,
498  typename _Ch_type,
499  typename _Rx_traits>
500  bool
502  operator==(const regex_iterator& __rhs) const noexcept
503  {
504  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
505  return true;
506  return _M_pregex == __rhs._M_pregex
507  && _M_begin == __rhs._M_begin
508  && _M_end == __rhs._M_end
509  && _M_flags == __rhs._M_flags
510  && _M_match[0] == __rhs._M_match[0];
511  }
512 
513  template<typename _Bi_iter,
514  typename _Ch_type,
515  typename _Rx_traits>
516  regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
518  operator++()
519  {
520  // In all cases in which the call to regex_search returns true,
521  // match.prefix().first shall be equal to the previous value of
522  // match[0].second, and for each index i in the half-open range
523  // [0, match.size()) for which match[i].matched is true,
524  // match[i].position() shall return distance(begin, match[i].first).
525  // [28.12.1.4.5]
526  if (_M_match[0].matched)
527  {
528  auto __start = _M_match[0].second;
529  auto __prefix_first = _M_match[0].second;
530  if (_M_match[0].first == _M_match[0].second)
531  {
532  if (__start == _M_end)
533  {
534  _M_pregex = nullptr;
535  return *this;
536  }
537  else
538  {
539  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
540  _M_flags
543  {
544  __glibcxx_assert(_M_match[0].matched);
545  auto& __prefix = _M_match._M_prefix();
546  __prefix.first = __prefix_first;
547  __prefix.matched = __prefix.first != __prefix.second;
548  // [28.12.1.4.5]
549  _M_match._M_begin = _M_begin;
550  return *this;
551  }
552  else
553  ++__start;
554  }
555  }
557  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
558  {
559  __glibcxx_assert(_M_match[0].matched);
560  auto& __prefix = _M_match._M_prefix();
561  __prefix.first = __prefix_first;
562  __prefix.matched = __prefix.first != __prefix.second;
563  // [28.12.1.4.5]
564  _M_match._M_begin = _M_begin;
565  }
566  else
567  _M_pregex = nullptr;
568  }
569  return *this;
570  }
571 
572  template<typename _Bi_iter,
573  typename _Ch_type,
574  typename _Rx_traits>
575  regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
577  operator=(const regex_token_iterator& __rhs)
578  {
579  _M_position = __rhs._M_position;
580  _M_subs = __rhs._M_subs;
581  _M_n = __rhs._M_n;
582  _M_suffix = __rhs._M_suffix;
583  _M_has_m1 = __rhs._M_has_m1;
584  _M_normalize_result();
585  return *this;
586  }
587 
588  template<typename _Bi_iter,
589  typename _Ch_type,
590  typename _Rx_traits>
591  bool
593  operator==(const regex_token_iterator& __rhs) const
594  {
595  if (_M_end_of_seq() && __rhs._M_end_of_seq())
596  return true;
597  if (_M_suffix.matched && __rhs._M_suffix.matched
598  && _M_suffix == __rhs._M_suffix)
599  return true;
600  if (_M_end_of_seq() || _M_suffix.matched
601  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
602  return false;
603  return _M_position == __rhs._M_position
604  && _M_n == __rhs._M_n
605  && _M_subs == __rhs._M_subs;
606  }
607 
608  template<typename _Bi_iter,
609  typename _Ch_type,
610  typename _Rx_traits>
611  regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
613  operator++()
614  {
615  _Position __prev = _M_position;
616  if (_M_suffix.matched)
617  *this = regex_token_iterator();
618  else if (_M_n + 1 < _M_subs.size())
619  {
620  _M_n++;
621  _M_result = &_M_current_match();
622  }
623  else
624  {
625  _M_n = 0;
626  ++_M_position;
627  if (_M_position != _Position())
628  _M_result = &_M_current_match();
629  else if (_M_has_m1 && __prev->suffix().length() != 0)
630  {
631  _M_suffix.matched = true;
632  _M_suffix.first = __prev->suffix().first;
633  _M_suffix.second = __prev->suffix().second;
634  _M_result = &_M_suffix;
635  }
636  else
637  *this = regex_token_iterator();
638  }
639  return *this;
640  }
641 
642  template<typename _Bi_iter,
643  typename _Ch_type,
644  typename _Rx_traits>
645  void
646  regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
647  _M_init(_Bi_iter __a, _Bi_iter __b)
648  {
649  _M_has_m1 = false;
650  for (auto __it : _M_subs)
651  if (__it == -1)
652  {
653  _M_has_m1 = true;
654  break;
655  }
656  if (_M_position != _Position())
657  _M_result = &_M_current_match();
658  else if (_M_has_m1)
659  {
660  _M_suffix.matched = true;
661  _M_suffix.first = __a;
662  _M_suffix.second = __b;
663  _M_result = &_M_suffix;
664  }
665  else
666  _M_result = nullptr;
667  }
668 
669  /// @endcond
670 
671 _GLIBCXX_END_NAMESPACE_VERSION
672 } // namespace
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2479
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2331
ISO C++ entities toplevel namespace is std.
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1062
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1054
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:245
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
Controlling input for std::string.
Definition: sstream:537
Primary class template ctype facet.
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
regex_iterator & operator++()
Increments a regex_iterator.
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
regex_token_iterator & operator++()
Increments a regex_token_iterator.
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.