Implement regex_traits::lookup_classname

Jonathan Wakely jwakely.gcc@gmail.com
Wed Oct 13 09:05:00 GMT 2010


Instead of just fixing libstdc++/45990 by applying the proposed
resolution of LWG 1337 I thought I'd have a go at providing the
missing implementation of lookup_classname.

Comments would be welcome, especially regarding the fact that the WP
talks about using bitwise or in regex_traits::isctype, whereas if
char_class_type is a Bitmask Type then testing whether the "w" and
"blank" elements are set requires using bitwise and, or am I missing
something?

Because we have different values of the ctype_base::mask elements on
different platforms, it's not trivial to simply define two more
constants for the "w" and "blank" because we don't know which bits
might be used already by ctype_base::mask.  So I chose to use a 64-bit
type, with the lowest two bits for the new bitmask elements and the
rest of the value contains a ctype_base::mask value left-shifted by
two bits.

Jonathan
-------------- next part --------------
Index: include/bits/regex.h
===================================================================
--- include/bits/regex.h	(revision 165394)
+++ include/bits/regex.h	(working copy)
@@ -37,6 +37,10 @@ namespace std
  */
  //@{
 
+  typedef unsigned long long __re_char_class_type;
+  static const __re_char_class_type _S_char_class_blank = 1;
+  static const __re_char_class_type _S_char_class_under = 2;
+
   // [7.7] Class regex_traits
   /**
    * @brief Describes aspects of a regular expression.
@@ -55,7 +59,7 @@ namespace std
       typedef _Ch_type                     char_type;
       typedef std::basic_string<char_type> string_type;
       typedef std::locale                  locale_type;
-      typedef std::ctype_base::mask        char_class_type;
+      typedef __re_char_class_type         char_class_type;
 
     public:
       /**
@@ -211,13 +215,11 @@ namespace std
        * - upper
        * - xdigit
        *
-       * @todo Implement this function.
        */
       template<typename _Fwd_iter>
         char_class_type
         lookup_classname(_Fwd_iter __first, _Fwd_iter __last,
-	                 bool __icase = false) const
-	{ return 0; }
+	                 bool __icase = false) const;
 
       /**
        * @brief Determines if @p c is a member of an identified class.
@@ -277,6 +279,100 @@ namespace std
       locale_type _M_locale;
     };
 
+  struct __re_class
+  {
+    const void* _M_name;
+    __re_char_class_type _M_class;
+  };
+
+  template<typename _Ch_type>
+    inline bool
+    operator<(__re_class const& __lhs, basic_string<_Ch_type> const& __rhs)
+    { return static_cast<const _Ch_type*>(__lhs._M_name) < __rhs; }
+
+  template<typename _Ch_type>
+    inline bool
+    operator<(basic_string<_Ch_type> const& __lhs, __re_class const& __rhs)
+    { return __lhs < static_cast<const _Ch_type*>(__rhs._M_name); }
+
+  template<typename _Ch_type>
+    inline __re_char_class_type
+    __lookup_classname(std::basic_string<_Ch_type> __name)
+    { return __re_char_class_type(); }
+
+  template<>
+    inline __re_char_class_type
+    __lookup_classname(std::basic_string<char> __name)
+    {
+      static const __re_class __masks[] = {
+        // elements must stay sorted
+        { "alnum",  ctype_base::alnum << 2 },
+        { "alpha",  ctype_base::alpha << 2 },
+        { "blank",  _S_char_class_blank },
+        { "cntrl",  ctype_base::cntrl << 2 },
+        { "d",      ctype_base::digit << 2 },
+        { "digit",  ctype_base::digit << 2 },
+        { "graph",  ctype_base::graph << 2 },
+        { "lower",  ctype_base::lower << 2 },
+        { "print",  ctype_base::print << 2 },
+        { "punct",  ctype_base::punct << 2 },
+        { "s",      ctype_base::space << 2 },
+        { "space",  ctype_base::space << 2 },
+        { "upper",  ctype_base::upper << 2 },
+        { "w",      (ctype_base::alnum << 2) | _S_char_class_under },
+        { "xdigit", ctype_base::xdigit << 2 }
+      };
+      auto __pos = std::equal_range(__masks, std::end(__masks), __name);
+      if (__pos.first != __pos.second)
+        return __pos.first->_M_class;
+      return __re_char_class_type();
+    }
+
+  template<>
+    inline __re_char_class_type
+    __lookup_classname(std::basic_string<wchar_t> __name)
+    {
+      static const __re_class __masks[] = {
+        // elements must stay sorted
+        { L"alnum",  ctype_base::alnum << 2 },
+        { L"alpha",  ctype_base::alpha << 2 },
+        { L"blank",  _S_char_class_blank },
+        { L"cntrl",  ctype_base::cntrl << 2 },
+        { L"d",      ctype_base::digit << 2 },
+        { L"digit",  ctype_base::digit << 2 },
+        { L"graph",  ctype_base::graph << 2 },
+        { L"lower",  ctype_base::lower << 2 },
+        { L"print",  ctype_base::print << 2 },
+        { L"punct",  ctype_base::punct << 2 },
+        { L"s",      ctype_base::space << 2 },
+        { L"space",  ctype_base::space << 2 },
+        { L"upper",  ctype_base::upper << 2 },
+        { L"w",      (ctype_base::alnum << 2) | _S_char_class_under },
+        { L"xdigit", ctype_base::xdigit << 2 }
+      };
+      auto __pos = std::equal_range(__masks, std::end(__masks), __name);
+      if (__pos.first != __pos.second)
+        return __pos.first->_M_class;
+      return __re_char_class_type();
+    }
+
+  template<typename _Ch_type>
+  template<typename _Fwd_iter>
+    inline typename regex_traits<_Ch_type>::char_class_type
+    regex_traits<_Ch_type>::
+    lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
+    {
+      auto __res = __lookup_classname(string_type(__first, __last));
+      if (__icase)
+      {
+        if ((__res>>2) | ctype_base::lower)
+          __res |= (ctype_base::upper<<2);
+        if ((__res>>2) | ctype_base::upper)
+          __res |= (ctype_base::lower<<2);
+      }
+      return __res;
+    }
+
   template<typename _Ch_type>
     bool
     regex_traits<_Ch_type>::
@@ -286,30 +382,18 @@ namespace std
       using std::use_facet;
       const ctype<_Ch_type>& __ctype(use_facet<
 				     ctype<_Ch_type> >(_M_locale));
-      
-      if (__ctype.is(__c, __f))
+
+      if (__ctype.is(__f >> 2, __c))
 	return true;
-      
+
       // special case of underscore in [[:w:]]
-      if (__c == __ctype.widen('_'))
-	{
-	  const char __wb[] = "w";
-	  char_class_type __wt = this->lookup_classname(__wb,
-							__wb + sizeof(__wb));
-	  if (__f | __wt)
-	    return true;
-	}
-    
+      if (__f & _S_char_class_under && __c == __ctype.widen('_'))
+        return true;
+
       // special case of [[:space:]] in [[:blank:]]
-      if (__ctype.is(std::ctype_base::space, __c))
-	{
-	  const char __bb[] = "blank";
-	  char_class_type __bt = this->lookup_classname(__bb,
-							__bb + sizeof(__bb));
-	  if (__f | __bt)
-	    return true;
-	}
-      
+      if (__f & _S_char_class_blank && __ctype.is(std::ctype_base::space, __c))
+        return true;
+
       return false;
     }
 
Index: testsuite/28_regex/07_traits/char/isctype.cc
===================================================================
--- testsuite/28_regex/07_traits/char/isctype.cc	(revision 165394)
+++ testsuite/28_regex/07_traits/char/isctype.cc	(working copy)
@@ -1,4 +1,4 @@
-// { dg-do run { xfail *-*-* } }
+// { dg-do run }
 // { dg-options "-std=c++0x" }
 
 //
@@ -35,14 +35,16 @@ test01()
   typedef char CharT;
   typedef std::regex_traits<CharT> traits;
 
-	char name[] = "lower";
-	traits t;
+  char name[] = "lower";
+  traits t;
 
-	VERIFY( t.isctype('e',  t.lookup_classname(name, name+sizeof(name)-1)) );
+  VERIFY( t.isctype('e', t.lookup_classname(name, name+sizeof(name)-1)) );
+  VERIFY( t.isctype('E', t.lookup_classname(name, name+sizeof(name)-1, true)) );
+  VERIFY( !t.isctype('E', t.lookup_classname(name, name+sizeof(name)-1)) );
 }
 
 int main()
 {
-	test01();
-	return 0;
+  test01();
+  return 0;
 }


More information about the Libstdc++ mailing list