This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch] Speed up parsing of ints and floats (in the "C" locale)


Benjamin Kosnik wrote:

+ template<typename _CharT2>
+ typename __enable_if<int, __is_char<_CharT2>::__value>::__type
+ _M_find(const _CharT2*, size_t __len, _CharT2 __c) const


very nice!


Thanks Benjamin ;) The below slight variant is going in... Tested x86/ia64-linux.

Paolo.

///////////////
2006-04-29  Paolo Carlini  <pcarlini@suse.de>

	* include/bits/locale_facets.tcc (num_get<>::_M_extract_float):
	Special case main parsing loop for !_M_allocated (i.e., "C" locale).
	(num_get<>::_M_extract_int): Likewise.
	* include/bits/locale_facets.h (num_get<>::_M_find): New.
Index: include/bits/locale_facets.tcc
===================================================================
--- include/bits/locale_facets.tcc	(revision 113192)
+++ include/bits/locale_facets.tcc	(working copy)
@@ -340,94 +340,147 @@
       string __found_grouping;
       if (__lc->_M_use_grouping)
 	__found_grouping.reserve(32);
-      const char_type* __q;
       const char_type* __lit_zero = __lit + __num_base::_S_izero;
-      while (!__testeof)
-        {
-	  // According to 22.2.2.1.2, p8-9, first look for thousands_sep
-	  // and decimal_point.
-          if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
-	    {
-	      if (!__found_dec && !__found_sci)
-		{
-		  // NB: Thousands separator at the beginning of a string
-		  // is a no-no, as is two consecutive thousands separators.
-		  if (__sep_pos)
-		    {
-		      __found_grouping += static_cast<char>(__sep_pos);
-		      __sep_pos = 0;
-		    }
-		  else
-		    {
-		      // NB: __convert_to_v will not assign __v and will
-		      // set the failbit.
-		      __xtrc.clear();
-		      break;
-		    }
-		}
-	      else
-		break;
-            }
-	  else if (__c == __lc->_M_decimal_point)
-	    {
-	      if (!__found_dec && !__found_sci)
-		{
-		  // If no grouping chars are seen, no grouping check
-		  // is applied. Therefore __found_grouping is adjusted
-		  // only if decimal_point comes after some thousands_sep.
-		  if (__found_grouping.size())
-		    __found_grouping += static_cast<char>(__sep_pos);
-		  __xtrc += '.';
-		  __found_dec = true;
-		}
-	      else
-		break;
-	    }
-          else if ((__q = __traits_type::find(__lit_zero, 10, __c)))
-	    {
-	      __xtrc += __num_base::_S_atoms_in[__q - __lit];
-	      __found_mantissa = true;
-	      ++__sep_pos;
-	    }
-	  else if ((__c == __lit[__num_base::_S_ie] 
-		    || __c == __lit[__num_base::_S_iE])
-		   && !__found_sci && __found_mantissa)
-	    {
-	      // Scientific notation.
-	      if (__found_grouping.size() && !__found_dec)
-		__found_grouping += static_cast<char>(__sep_pos);
-	      __xtrc += 'e';
-	      __found_sci = true;
 
-	      // Remove optional plus or minus sign, if they exist.
-	      if (++__beg != __end)
-		{
-		  __c = *__beg;
-		  const bool __plus = __c == __lit[__num_base::_S_iplus];
-		  if ((__plus || __c == __lit[__num_base::_S_iminus])
-		      && !(__lc->_M_use_grouping
-			   && __c == __lc->_M_thousands_sep)
-		      && !(__c == __lc->_M_decimal_point))
-		    __xtrc += __plus ? '+' : '-';
-		  else
-		    continue;
-		}
-	      else
-		{
-		  __testeof = true;
+      if (!__lc->_M_allocated)
+	// "C" locale
+	while (!__testeof)
+	  {
+	    const int __digit = _M_find(__lit_zero, 10, __c);
+	    if (__digit != -1)
+	      {
+		__xtrc += '0' + __digit;
+		__found_mantissa = true;
+	      }
+	    else if (__c == __lc->_M_decimal_point
+		     && !__found_dec && !__found_sci)
+	      {
+		__xtrc += '.';
+		__found_dec = true;
+	      }
+	    else if ((__c == __lit[__num_base::_S_ie] 
+		      || __c == __lit[__num_base::_S_iE])
+		     && !__found_sci && __found_mantissa)
+	      {
+		// Scientific notation.
+		__xtrc += 'e';
+		__found_sci = true;
+		
+		// Remove optional plus or minus sign, if they exist.
+		if (++__beg != __end)
+		  {
+		    __c = *__beg;
+		    const bool __plus = __c == __lit[__num_base::_S_iplus];
+		    if (__plus || __c == __lit[__num_base::_S_iminus])
+		      __xtrc += __plus ? '+' : '-';
+		    else
+		      continue;
+		  }
+		else
+		  {
+		    __testeof = true;
+		    break;
+		  }
+	      }
+	    else
+	      break;
+
+	    if (++__beg != __end)
+	      __c = *__beg;
+	    else
+	      __testeof = true;
+	  }
+      else
+	while (!__testeof)
+	  {
+	    // According to 22.2.2.1.2, p8-9, first look for thousands_sep
+	    // and decimal_point.
+	    if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
+	      {
+		if (!__found_dec && !__found_sci)
+		  {
+		    // NB: Thousands separator at the beginning of a string
+		    // is a no-no, as is two consecutive thousands separators.
+		    if (__sep_pos)
+		      {
+			__found_grouping += static_cast<char>(__sep_pos);
+			__sep_pos = 0;
+		      }
+		    else
+		      {
+			// NB: __convert_to_v will not assign __v and will
+			// set the failbit.
+			__xtrc.clear();
+			break;
+		      }
+		  }
+		else
 		  break;
-		}
-	    }
-	  else
-	    // Not a valid input item.
-	    break;
+	      }
+	    else if (__c == __lc->_M_decimal_point)
+	      {
+		if (!__found_dec && !__found_sci)
+		  {
+		    // If no grouping chars are seen, no grouping check
+		    // is applied. Therefore __found_grouping is adjusted
+		    // only if decimal_point comes after some thousands_sep.
+		    if (__found_grouping.size())
+		      __found_grouping += static_cast<char>(__sep_pos);
+		    __xtrc += '.';
+		    __found_dec = true;
+		  }
+		else
+		  break;
+	      }
+	    else
+	      {
+		const char_type* __q =
+		  __traits_type::find(__lit_zero, 10, __c);
+		if (__q)
+		  {
+		    __xtrc += '0' + (__q - __lit_zero);
+		    __found_mantissa = true;
+		    ++__sep_pos;
+		  }
+		else if ((__c == __lit[__num_base::_S_ie] 
+			  || __c == __lit[__num_base::_S_iE])
+			 && !__found_sci && __found_mantissa)
+		  {
+		    // Scientific notation.
+		    if (__found_grouping.size() && !__found_dec)
+		      __found_grouping += static_cast<char>(__sep_pos);
+		    __xtrc += 'e';
+		    __found_sci = true;
+		    
+		    // Remove optional plus or minus sign, if they exist.
+		    if (++__beg != __end)
+		      {
+			__c = *__beg;
+			const bool __plus = __c == __lit[__num_base::_S_iplus];
+			if ((__plus || __c == __lit[__num_base::_S_iminus])
+			    && !(__lc->_M_use_grouping
+				 && __c == __lc->_M_thousands_sep)
+			    && !(__c == __lc->_M_decimal_point))
+		      __xtrc += __plus ? '+' : '-';
+			else
+			  continue;
+		      }
+		    else
+		      {
+			__testeof = true;
+			break;
+		      }
+		  }
+		else
+		  break;
+	      }
+	    
+	    if (++__beg != __end)
+	      __c = *__beg;
+	    else
+	      __testeof = true;
+	  }
 
-	  if (++__beg != __end)
-	    __c = *__beg;
-	  else
-	    __testeof = true;
-        }
-
       // Digit grouping is checked. If grouping and found_grouping don't
       // match, then get very very upset, and set failbit.
       if (__found_grouping.size())
@@ -569,54 +622,81 @@
 	  -numeric_limits<_ValueT>::min() : numeric_limits<_ValueT>::max();
 	const __unsigned_type __smax = __max / __base;
 	__unsigned_type __result = 0;
-	const char_type* __q;
+	int __digit = 0;
 	const char_type* __lit_zero = __lit + __num_base::_S_izero;
-	while (!__testeof)
-	  {
-	    // According to 22.2.2.1.2, p8-9, first look for thousands_sep
-	    // and decimal_point.
-	    if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
-	      {
-		// NB: Thousands separator at the beginning of a string
-		// is a no-no, as is two consecutive thousands separators.
-		if (__sep_pos)
-		  {
-		    __found_grouping += static_cast<char>(__sep_pos);
-		    __sep_pos = 0;
-		  }
-		else
-		  {
+
+	if (!__lc->_M_allocated)
+	  // "C" locale
+	  while (!__testeof)
+	    {
+	      __digit = _M_find(__lit_zero, __len, __c);
+	      if (__digit == -1)
+		break;
+	      
+	      if (__result > __smax)
+		__testfail = true;
+	      else
+		{
+		  __result *= __base;
+		  __testfail |= __result > __max - __digit;
+		  __result += __digit;
+		  ++__sep_pos;
+		}
+	      
+	      if (++__beg != __end)
+		__c = *__beg;
+	      else
+		__testeof = true;
+	    }
+	else
+	  while (!__testeof)
+	    {
+	      // According to 22.2.2.1.2, p8-9, first look for thousands_sep
+	      // and decimal_point.
+	      if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
+		{
+		  // NB: Thousands separator at the beginning of a string
+		  // is a no-no, as is two consecutive thousands separators.
+		  if (__sep_pos)
+		    {
+		      __found_grouping += static_cast<char>(__sep_pos);
+		      __sep_pos = 0;
+		    }
+		  else
+		    {
+		      __testfail = true;
+		      break;
+		    }
+		}
+	      else if (__c == __lc->_M_decimal_point)
+		break;
+	      else
+		{
+		  const char_type* __q =
+		    __traits_type::find(__lit_zero, __len, __c);
+		  if (!__q)
+		    break;
+		  
+		  __digit = __q - __lit_zero;
+		  if (__digit > 15)
+		    __digit -= 6;
+		  if (__result > __smax)
 		    __testfail = true;
-		    break;
-		  }
-	      }
-	    else if (__c == __lc->_M_decimal_point)
-	      break;
-	    else if ((__q = __traits_type::find(__lit_zero, __len, __c)))
-	      {
-		int __digit = __q - __lit_zero;
-		if (__digit > 15)
-		  __digit -= 6;
-		if (__result > __smax)
-		  __testfail = true;
-		else
-		  {
-		    __result *= __base;
-		    __testfail |= __result > __max - __digit;
-		    __result += __digit;
-		    ++__sep_pos;
-		  }
-	      }
-	    else
-	      // Not a valid input item.	      
-	      break;
-	    
-	    if (++__beg != __end)
-	      __c = *__beg;
-	    else
-	      __testeof = true;
-	  }
-
+		  else
+		    {
+		      __result *= __base;
+		      __testfail |= __result > __max - __digit;
+		      __result += __digit;
+		      ++__sep_pos;
+		    }
+		}
+	      
+	      if (++__beg != __end)
+		__c = *__beg;
+	      else
+		__testeof = true;
+	    }
+	
 	// Digit grouping is checked. If grouping and found_grouping don't
 	// match, then get very very upset, and set failbit.
 	if (__found_grouping.size())
Index: include/bits/locale_facets.h
===================================================================
--- include/bits/locale_facets.h	(revision 113192)
+++ include/bits/locale_facets.h	(working copy)
@@ -47,6 +47,7 @@
 #include <iosfwd>
 #include <bits/ios_base.h>  // For ios_base, ios_base::iostate
 #include <streambuf>
+#include <bits/cpp_type_traits.h>
 
 _GLIBCXX_BEGIN_NAMESPACE(std)
 
@@ -2125,6 +2126,43 @@
         _M_extract_int(iter_type, iter_type, ios_base&, ios_base::iostate&,
 		       _ValueT& __v) const;
 
+      template<typename _CharT2>
+        typename __enable_if<int, __is_char<_CharT2>::__value>::__type
+        _M_find(const _CharT2*, size_t __len, _CharT2 __c) const
+        {
+	  int __ret = -1;
+	  if (__len <= 10)
+	    {
+	      if (__c >= _CharT2('0') && __c < _CharT2(_CharT2('0') + __len))
+		__ret = __c - _CharT2('0');
+	    }
+	  else
+	    {
+	      if (__c >= _CharT2('0') && __c <= _CharT2('9'))
+		__ret = __c - _CharT2('0');
+	      else if (__c >= _CharT2('a') && __c <= _CharT2('f'))
+		__ret = 10 + (__c - _CharT2('a'));
+	      else if (__c >= _CharT2('A') && __c <= _CharT2('F'))
+		__ret = 10 + (__c - _CharT2('A'));
+	    }
+	  return __ret;
+	}
+
+      template<typename _CharT2>
+        typename __enable_if<int, !__is_char<_CharT2>::__value>::__type
+        _M_find(const _CharT2* __zero, size_t __len, _CharT2 __c) const
+        {
+	  int __ret = -1;
+	  const char_type* __q = char_traits<_CharT2>::find(__zero, __len, __c);
+	  if (__q)
+	    {
+	      __ret = __q - __zero;
+	      if (__ret > 15)
+		__ret -= 6;
+	    }
+	  return __ret;
+	}
+
       //@{
       /**
        *  @brief  Numeric parsing.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]