This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch, libstdc++] Add specific error message into exceptions


On 27/08/15 22:18 -0700, Tim Shen wrote:
Bootstrapped and tested.

Thanks!


--
Regards,
Tim Shen

commit 53c1caff442e97a18652ec8b1d984341168fd98d
Author: Tim Shen <timshen@google.com>
Date:   Thu Aug 27 21:42:40 2015 -0700

   	PR libstdc++/67361
   	* include/bits/regex_error.h: Add __throw_regex_error that
   	supports string.
   	* include/bits/regex_automaton.h: Add more specific exception
   	messages.
   	* include/bits/regex_automaton.tcc: Likewise.
   	* include/bits/regex_compiler.h: Likewise.
   	* include/bits/regex_compiler.tcc: Likewise.
   	* include/bits/regex_scanner.h: Likewise.
   	* include/bits/regex_scanner.tcc: Likewise.

Nice, thanks for doing this!

@@ -158,10 +159,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
      // time, "\\2" is valid, but "\\1" and "\\3" are not.
      if (__index >= _M_subexpr_count)
-	__throw_regex_error(regex_constants::error_backref);
+	__throw_regex_error(
+	  regex_constants::error_backref,
+	  "Back-reference index exceeds current sub-expression count.");
      for (auto __it : this->_M_paren_stack)
	if (__index == __it)
-	  __throw_regex_error(regex_constants::error_backref);
+	  __throw_regex_error(
+	    regex_constants::error_backref,
+	    "Back-reference refered to an opened sub-expression.");

Should be "referred".

And one of the other strings in another throw says "befoer".


diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 0cb0c04..12ffabe 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -397,7 +397,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
	auto __st = _M_traits.lookup_collatename(__s.data(),
						 __s.data() + __s.size());
	if (__st.empty())
-	  __throw_regex_error(regex_constants::error_collate);
+	  __throw_regex_error(regex_constants::error_collate,
+			      string("Invalid collate element: "));
	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
	_M_is_ready = false;

There seems to be no need to construct a std::string here, just pass a
const char* (see below).

Also, this string ends in a colon, whereas most end in a period. Any
reason for the difference?


@@ -411,7 +412,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
	auto __st = _M_traits.lookup_collatename(__s.data(),
						 __s.data() + __s.size());
	if (__st.empty())
-	  __throw_regex_error(regex_constants::error_collate);
+	  __throw_regex_error(regex_constants::error_collate,
+			      string("Invalid equivalence class."));
	__st = _M_traits.transform_primary(__st.data(),
					   __st.data() + __st.size());
	_M_equiv_set.push_back(__st);

Just pass const char*.

@@ -428,7 +430,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
						 __s.data() + __s.size(),
						 __icase);
	if (__mask == 0)
-	  __throw_regex_error(regex_constants::error_ctype);
+	  __throw_regex_error(regex_constants::error_collate,
+			      string("Invalid character class."));
	if (!__neg)
	  _M_class_set |= __mask;
	else

Ditto.

@@ -442,7 +445,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _M_make_range(_CharT __l, _CharT __r)
      {
	if (__l > __r)
-	  __throw_regex_error(regex_constants::error_range);
+	  __throw_regex_error(regex_constants::error_range,
+			      string("Invalid range in bracket expression."));
	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
					 _M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG

Ditto.

diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 9a62311..019ca42 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -77,16 +77,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _M_traits(_M_nfa->_M_traits),
      _M_ctype(std::use_facet<_CtypeT>(__loc))
    {
-      _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
-      __r._M_append(_M_nfa->_M_insert_subexpr_begin());
-      this->_M_disjunction();
-      if (!_M_match_token(_ScannerT::_S_token_eof))
-	__throw_regex_error(regex_constants::error_paren);
-      __r._M_append(_M_pop());
-      _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
-      __r._M_append(_M_nfa->_M_insert_subexpr_end());
-      __r._M_append(_M_nfa->_M_insert_accept());
-      _M_nfa->_M_eliminate_dummy();
+      __try
+	{
+	  _StateSeqT __r(*_M_nfa, _M_nfa->_M_start());
+	  __r._M_append(_M_nfa->_M_insert_subexpr_begin());
+	  this->_M_disjunction();
+	  if (!_M_match_token(_ScannerT::_S_token_eof))
+	    __throw_regex_error(regex_constants::error_paren,
+				"Unexpected end of regex.");
+	  __r._M_append(_M_pop());
+	  _GLIBCXX_DEBUG_ASSERT(_M_stack.empty());
+	  __r._M_append(_M_nfa->_M_insert_subexpr_end());
+	  __r._M_append(_M_nfa->_M_insert_accept());
+	  _M_nfa->_M_eliminate_dummy();
+	}
+      __catch(std::regex_error __e)
+	{
+	  __throw_regex_error(__e.code(),
+			      string(__e.what()) + " Location: \""
+			      + _M_scanner._M_get_location_string() + "\"");
+	}
    }

  template<typename _TraitsT>

I wonder if we want to make this more efficient by adding a private
member to regex_error that would allow information to be appended to
the string, rather then creating a new regex_error with a new string.

diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index 778edd5..0dd1fdf 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -155,6 +155,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    regex_constants::error_type
    code() const
    { return _M_code; }
+
+  private:
+    regex_error(regex_constants::error_type __ecode, const string& __what)
+    : std::runtime_error(__what), _M_code(__ecode) { }
+
+    friend void __throw_regex_error(regex_constants::error_type, const string&);
  };

  //@} // group regex
@@ -162,5 +168,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  void
  __throw_regex_error(regex_constants::error_type __ecode);

+  inline void
+  __throw_regex_error(regex_constants::error_type __ecode, const string& __what)
+  { _GLIBCXX_THROW_OR_ABORT(regex_error(__ecode, __what)); }
+
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace std

I suggest adding another overload that takes a const char* rather than
std::string. The reason is that when using the new ABI this function
will take a std::__cxx11::string, so calling it will allocate memory
for the string data, then that string is passed to the regex_error
constructor which has to convert it internally to an old std::string,
which has to allocate a second time.

If there is an overload taking a const char* then that can be passed
to the regex_error constructor and only one allocation will be done.

(I have considered making it possible for exceptions to move the
memory from a new string into an their old string member, but that
isn't done currently.)


diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index b47103e..7795dd2 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -220,6 +220,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _M_get_value() const
      { return _M_value; }

+      string
+      _M_get_location_string() const
+      {
+	auto __left = std::max(_M_begin, _M_current - 2);
+	auto __right = std::min(_M_end, _M_current + 3);
+	static constexpr char __here[] = ">>><<<";

I don't think there's any advantage to using a static here, it doesn't
need to be a global symbol, and with optimisation enabled we get the
same code from just const char __here[] = ">>><<<";


@@ -247,6 +263,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      void
      _M_eat_class(char);

+      _IterT                        _M_begin;
      _IterT                        _M_current;
      _IterT                        _M_end;
      _CtypeT&                      _M_ctype;

This looks like an ABI change, as the size of the type changes.

If I understand correctly this is only needed for the location info,
we could still have nice human readable text in the exceptions without
this, right?



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]