This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[v3] Codecvt::do_length rewrite and other tweaks


Hi,

tested x86-linux, committed.

Paolo.

////////////
2003-11-25  Paolo Carlini  <pcarlini@suse.de>

	* config/locale/gnu/codecvt_members.cc (codecvt::do_length):
	Rewrite, using both mbsnrtowcs and mbrtowc in a loop: the
	former is very fast, but stops if encounters a NUL.
	(codecvt::do_out): Tweaks.
	(codecvt::do_in): Tweaks, improve the code dealing with errors.
	* testsuite/performance/wchar_t_length.cc: New.
diff -prN libstdc++-v3-orig/config/locale/gnu/codecvt_members.cc libstdc++-v3/config/locale/gnu/codecvt_members.cc
*** libstdc++-v3-orig/config/locale/gnu/codecvt_members.cc	Mon Nov 24 22:13:38 2003
--- libstdc++-v3/config/locale/gnu/codecvt_members.cc	Tue Nov 25 18:37:37 2003
***************
*** 1,6 ****
  // std::codecvt implementation details, GNU version -*- C++ -*-
  
! // Copyright (C) 2002 Free Software Foundation, Inc.
  //
  // This file is part of the GNU ISO C++ Library.  This library is free
  // software; you can redistribute it and/or modify it under the
--- 1,6 ----
  // std::codecvt implementation details, GNU version -*- C++ -*-
  
! // Copyright (C) 2002, 2003 Free Software Foundation, Inc.
  //
  // This file is part of the GNU ISO C++ Library.  This library is free
  // software; you can redistribute it and/or modify it under the
*************** namespace std
*** 57,73 ****
      // wcsnrtombs is *very* fast but stops if encounters NUL characters:
      // in case we fall back to wcrtomb and then continue, in a loop.
      // NB: wcsnrtombs is a GNU extension
!     __from_next = __from;
!     __to_next = __to;
!     while (__from_next < __from_end && __to_next < __to_end
! 	   && __ret == ok)
        {
  	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
  						      __from_end - __from_next);
  	if (!__from_chunk_end)
  	  __from_chunk_end = __from_end;
  
! 	const intern_type* __tmp_from = __from_next;
  	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
  					 __from_chunk_end - __from_next,
  					 __to_end - __to_next, &__state);
--- 57,72 ----
      // wcsnrtombs is *very* fast but stops if encounters NUL characters:
      // in case we fall back to wcrtomb and then continue, in a loop.
      // NB: wcsnrtombs is a GNU extension
!     for (__from_next = __from, __to_next = __to;
! 	 __from_next < __from_end && __to_next < __to_end
! 	 && __ret == ok;)
        {
  	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
  						      __from_end - __from_next);
  	if (!__from_chunk_end)
  	  __from_chunk_end = __from_end;
  
! 	__from = __from_next;
  	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
  					 __from_chunk_end - __from_next,
  					 __to_end - __to_next, &__state);
*************** namespace std
*** 76,84 ****
  	    // In case of error, in order to stop at the exact place we
  	    // have to start again from the beginning with a series of
  	    // wcrtomb.
! 	    while (__tmp_from < __from_next)
! 	      __to_next += wcrtomb(__to_next, *__tmp_from++, &__tmp_state);
! 	    __state = __tmp_state;	    
  	    __ret = error;
  	  }
  	else if (__from_next && __from_next < __from_chunk_end)
--- 75,83 ----
  	    // In case of error, in order to stop at the exact place we
  	    // have to start again from the beginning with a series of
  	    // wcrtomb.
! 	    for (; __from < __from_next; ++__from)
! 	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
! 	    __state = __tmp_state;
  	    __ret = error;
  	  }
  	else if (__from_next && __from_next < __from_chunk_end)
*************** namespace std
*** 133,142 ****
      // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
      // in case we store a L'\0' and then continue, in a loop.
      // NB: mbsnrtowcs is a GNU extension
!     __from_next = __from;
!     __to_next = __to;
!     while (__from_next < __from_end && __to_next < __to_end
! 	   && __ret == ok)
        {
  	const extern_type* __from_chunk_end;
  	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
--- 132,140 ----
      // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
      // in case we store a L'\0' and then continue, in a loop.
      // NB: mbsnrtowcs is a GNU extension
!     for (__from_next = __from, __to_next = __to;
! 	 __from_next < __from_end && __to_next < __to_end
! 	 && __ret == ok;)
        {
  	const extern_type* __from_chunk_end;
  	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
*************** namespace std
*** 145,151 ****
  	if (!__from_chunk_end)
  	  __from_chunk_end = __from_end;
  
! 	const extern_type* __tmp_from = __from_next;
  	const size_t __conv = mbsnrtowcs(__to_next, &__from_next,
  					 __from_chunk_end - __from_next,
  					 __to_end - __to_next, &__state);
--- 143,149 ----
  	if (!__from_chunk_end)
  	  __from_chunk_end = __from_end;
  
! 	__from = __from_next;
  	const size_t __conv = mbsnrtowcs(__to_next, &__from_next,
  					 __from_chunk_end - __from_next,
  					 __to_end - __to_next, &__state);
*************** namespace std
*** 154,162 ****
  	    // In case of error, in order to stop at the exact place we
  	    // have to start again from the beginning with a series of
  	    // mbrtowc.
! 	    while (__tmp_from < __from_next)
! 	      __tmp_from += mbrtowc(__to_next++, __tmp_from,
! 				    __from_next - __tmp_from, &__tmp_state);
  	    __state = __tmp_state;	    
  	    __ret = error;
  	  }
--- 152,169 ----
  	    // In case of error, in order to stop at the exact place we
  	    // have to start again from the beginning with a series of
  	    // mbrtowc.
! 	    for (;;)
! 	      {
! 		const size_t __conv_err = mbrtowc(__to_next, __from,
! 						  __from_end - __from,
! 						  &__tmp_state);
! 		if (__conv_err == static_cast<size_t>(-1)
! 		    || __conv_err == static_cast<size_t>(-2))
! 		  break;
! 		__from += __conv_err;
! 		++__to_next;
! 	      }
! 	    __from_next = __from;
  	    __state = __tmp_state;	    
  	    __ret = error;
  	  }
*************** namespace std
*** 177,182 ****
--- 184,190 ----
  	    if (__to_next < __to_end)
  	      {
  		// XXX Probably wrong for stateful encodings
+ 		__tmp_state = __state;		
  		++__from_next;
  		*__to_next++ = L'\0';
  	      }
*************** namespace std
*** 232,270 ****
    {
      int __ret = 0;
      state_type __tmp_state(__state);
  #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
      __c_locale __old = __uselocale(_M_c_locale_codecvt);
  #endif
  
      while (__from < __end && __max)
        {
! 	size_t __conv = mbrtowc(NULL, __from, __end - __from, &__tmp_state);
  	if (__conv == static_cast<size_t>(-1))
  	  {
! 	    // Invalid source character
! 	    break;
! 	  }
! 	else if (__conv == static_cast<size_t>(-2))
! 	  {
! 	    // Remainder of input does not form a complete destination
! 	    // character.
  	    break;
  	  }
! 	else if (__conv == 0)
  	  {
  	    // XXX Probably wrong for stateful encodings
! 	    __conv = 1;
  	  }
- 
- 	__state = __tmp_state;
- 	__from += __conv;
- 	__ret += __conv;
- 	__max--;
        }
  
  #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
      __uselocale(__old);
  #endif
      return __ret; 
    }
  #endif
--- 240,309 ----
    {
      int __ret = 0;
      state_type __tmp_state(__state);
+ 
  #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
      __c_locale __old = __uselocale(_M_c_locale_codecvt);
  #endif
  
+     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
+     // in case we advance past it and then continue, in a loop.
+     // NB: mbsnrtowcs is a GNU extension
+   
+     // A dummy internal buffer is needed in order for mbsnrtocws to consider
+     // its fourth parameter (it wouldn't with NULL as first parameter).
+     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 
+ 							   * __max));
      while (__from < __end && __max)
        {
! 	const extern_type* __from_chunk_end;
! 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
! 								  __end
! 								  - __from));
! 	if (!__from_chunk_end)
! 	  __from_chunk_end = __end;
! 
! 	const extern_type* __tmp_from = __from;
! 	const size_t __conv = mbsnrtowcs(__to, &__from,
! 					 __from_chunk_end - __from,
! 					 __max, &__state);
  	if (__conv == static_cast<size_t>(-1))
  	  {
! 	    // In case of error, in order to stop at the exact place we
! 	    // have to start again from the beginning with a series of
! 	    // mbrtowc.
! 	    for (__from = __tmp_from;;)
! 	      {
! 		const size_t __conv_err = mbrtowc(NULL, __from, __end - __from,
! 						  &__tmp_state);
! 		if (__conv_err == static_cast<size_t>(-1)
! 		    || __conv_err == static_cast<size_t>(-2))
! 		  break;
! 		__from += __conv_err;
! 	      }
! 	    __state = __tmp_state;
! 	    __ret += __from - __tmp_from;
  	    break;
  	  }
! 	if (!__from)
! 	  __from = __from_chunk_end;
! 	
! 	__ret += __from - __tmp_from;
! 	__max -= __conv;
! 
! 	if (__from < __end && __max)
  	  {
  	    // XXX Probably wrong for stateful encodings
! 	    __tmp_state = __state;
! 	    ++__from;
! 	    ++__ret;
! 	    --__max;
  	  }
        }
  
  #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
      __uselocale(__old);
  #endif
+ 
      return __ret; 
    }
  #endif
diff -prN libstdc++-v3-orig/testsuite/performance/wchar_t_length.cc libstdc++-v3/testsuite/performance/wchar_t_length.cc
*** libstdc++-v3-orig/testsuite/performance/wchar_t_length.cc	Thu Jan  1 01:00:00 1970
--- libstdc++-v3/testsuite/performance/wchar_t_length.cc	Tue Nov 25 17:55:21 2003
***************
*** 0 ****
--- 1,62 ----
+ // Copyright (C) 2003 Free Software Foundation, Inc.
+ //
+ // This file is part of the GNU ISO C++ Library.  This library is free
+ // software; you can redistribute it and/or modify it under the
+ // terms of the GNU General Public License as published by the
+ // Free Software Foundation; either version 2, or (at your option)
+ // any later version.
+ 
+ // This library is distributed in the hope that it will be useful,
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ // GNU General Public License for more details.
+ 
+ // You should have received a copy of the GNU General Public License along
+ // with this library; see the file COPYING.  If not, write to the Free
+ // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ // USA.
+ 
+ // As a special exception, you may use this file as part of a free software
+ // library without restriction.  Specifically, if other files instantiate
+ // templates or use macros or inline functions from this file, or you compile
+ // this file and link it with other files to produce an executable, this
+ // file does not by itself cause the resulting executable to be covered by
+ // the GNU General Public License.  This exception does not however
+ // invalidate any other reasons why the executable file might be covered by
+ // the GNU General Public License.
+ 
+ #include <cstdio>
+ #include <cstring>
+ #include <fstream>
+ #include <langinfo.h>
+ #include <iconv.h>
+ #include <testsuite_performance.h>
+ 
+ // libstdc++/11602 (do_length)
+ int main(int argc, char** argv)
+ {
+   using namespace std;
+   using namespace __gnu_test;
+ 
+   time_counter time;
+   resource_counter resource;
+   const int iters = 400000;
+ 
+   char cbuf[1024];
+   
+   memset(cbuf, 'a', 1024);
+ 
+   // C++ (codecvt)
+   locale loc;
+   const codecvt<wchar_t, char, mbstate_t>& cvt =
+     use_facet<codecvt<wchar_t, char, mbstate_t> >(loc);
+   mbstate_t state;
+   memset(&state, 0, sizeof(state));
+   start_counters(time, resource);
+   for (int i = 0; i < iters; ++i)
+     cvt.length(state, cbuf, cbuf + 1024, 1024);
+   stop_counters(time, resource);
+   report_performance(__FILE__, "C++ (codecvt)", time, resource);
+ 
+   return 0;
+ }

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]