This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[Patch] Codecvt::do_length rewrite
- From: Paolo Carlini <pcarlini at suse dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>
- Date: Tue, 25 Nov 2003 17:50:14 +0100
- Subject: [Patch] Codecvt::do_length rewrite
Hi,
this is the final part of the work, do_length... turned out to be
sligthly more difficult than expected, since mbsnrtowcs ignores
its fourth parameter if the first is NULL. Therefore, the best I
could do is adding a temporary dummy buffer. Interestingly, there
is *no* runtime penalty for this! Even more interesting and
unexpected, couldn't do a fare comparison with iconv, since,
_contrary to the official glibc docs_, passing a fourth NULL
parameter doesn't work and leads to seg faults or asserts. Weird.
Took to occasion to refine and uniformize here and there the existing
code for do_in and do_out. In the process I have discovered some
subtleties not completely covered by the current testsuite entries
and therefore mean to add a couple of new tests asap.
Tested x86-linux, will commit soon...
Paolo.
////////////
2003-11-25 Paolo Carlini <pcarlini@suse.de>
* config/locale/gnu/codecvt_members.cc (codecvt::do_length):
Rewrite, using both mbsnrtowcs and mbrtowc in a loop: the
former is very fast, but stops if encounters a NUL.
(codecvt::do_out): Tweaks.
(codecvt::do_in): Tweaks, improve the code dealing with errors.
* testsuite/performance/wchar_t_length.cc: New.
diff -prN libstdc++-v3-orig/config/locale/gnu/codecvt_members.cc libstdc++-v3/config/locale/gnu/codecvt_members.cc
*** libstdc++-v3-orig/config/locale/gnu/codecvt_members.cc Mon Nov 24 22:13:38 2003
--- libstdc++-v3/config/locale/gnu/codecvt_members.cc Tue Nov 25 16:36:55 2003
*************** namespace std
*** 57,73 ****
// wcsnrtombs is *very* fast but stops if encounters NUL characters:
// in case we fall back to wcrtomb and then continue, in a loop.
// NB: wcsnrtombs is a GNU extension
! __from_next = __from;
! __to_next = __to;
! while (__from_next < __from_end && __to_next < __to_end
! && __ret == ok)
{
const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
__from_end - __from_next);
if (!__from_chunk_end)
__from_chunk_end = __from_end;
! const intern_type* __tmp_from = __from_next;
const size_t __conv = wcsnrtombs(__to_next, &__from_next,
__from_chunk_end - __from_next,
__to_end - __to_next, &__state);
--- 57,72 ----
// wcsnrtombs is *very* fast but stops if encounters NUL characters:
// in case we fall back to wcrtomb and then continue, in a loop.
// NB: wcsnrtombs is a GNU extension
! for (__from_next = __from, __to_next = __to;
! __from_next < __from_end && __to_next < __to_end
! && __ret == ok;)
{
const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
__from_end - __from_next);
if (!__from_chunk_end)
__from_chunk_end = __from_end;
! __from = __from_next;
const size_t __conv = wcsnrtombs(__to_next, &__from_next,
__from_chunk_end - __from_next,
__to_end - __to_next, &__state);
*************** namespace std
*** 76,84 ****
// In case of error, in order to stop at the exact place we
// have to start again from the beginning with a series of
// wcrtomb.
! while (__tmp_from < __from_next)
! __to_next += wcrtomb(__to_next, *__tmp_from++, &__tmp_state);
! __state = __tmp_state;
__ret = error;
}
else if (__from_next && __from_next < __from_chunk_end)
--- 75,83 ----
// In case of error, in order to stop at the exact place we
// have to start again from the beginning with a series of
// wcrtomb.
! for (; __from < __from_next; ++__from)
! __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
! __state = __tmp_state;
__ret = error;
}
else if (__from_next && __from_next < __from_chunk_end)
*************** namespace std
*** 133,142 ****
// mbsnrtowcs is *very* fast but stops if encounters NUL characters:
// in case we store a L'\0' and then continue, in a loop.
// NB: mbsnrtowcs is a GNU extension
! __from_next = __from;
! __to_next = __to;
! while (__from_next < __from_end && __to_next < __to_end
! && __ret == ok)
{
const extern_type* __from_chunk_end;
__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
--- 132,140 ----
// mbsnrtowcs is *very* fast but stops if encounters NUL characters:
// in case we store a L'\0' and then continue, in a loop.
// NB: mbsnrtowcs is a GNU extension
! for (__from_next = __from, __to_next = __to;
! __from_next < __from_end && __to_next < __to_end
! && __ret == ok;)
{
const extern_type* __from_chunk_end;
__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
*************** namespace std
*** 145,151 ****
if (!__from_chunk_end)
__from_chunk_end = __from_end;
! const extern_type* __tmp_from = __from_next;
const size_t __conv = mbsnrtowcs(__to_next, &__from_next,
__from_chunk_end - __from_next,
__to_end - __to_next, &__state);
--- 143,149 ----
if (!__from_chunk_end)
__from_chunk_end = __from_end;
! __from = __from_next;
const size_t __conv = mbsnrtowcs(__to_next, &__from_next,
__from_chunk_end - __from_next,
__to_end - __to_next, &__state);
*************** namespace std
*** 154,162 ****
// In case of error, in order to stop at the exact place we
// have to start again from the beginning with a series of
// mbrtowc.
! while (__tmp_from < __from_next)
! __tmp_from += mbrtowc(__to_next++, __tmp_from,
! __from_next - __tmp_from, &__tmp_state);
__state = __tmp_state;
__ret = error;
}
--- 152,169 ----
// In case of error, in order to stop at the exact place we
// have to start again from the beginning with a series of
// mbrtowc.
! for (;;)
! {
! const size_t __conv_err = mbrtowc(__to_next, __from,
! __from_end - __from,
! &__tmp_state);
! if (__conv_err == static_cast<size_t>(-1)
! || __conv_err == static_cast<size_t>(-2))
! break;
! __from += __conv_err;
! ++__to_next;
! }
! __from_next = __from;
__state = __tmp_state;
__ret = error;
}
*************** namespace std
*** 177,182 ****
--- 184,190 ----
if (__to_next < __to_end)
{
// XXX Probably wrong for stateful encodings
+ __tmp_state = __state;
++__from_next;
*__to_next++ = L'\0';
}
*************** namespace std
*** 232,270 ****
{
int __ret = 0;
state_type __tmp_state(__state);
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
__c_locale __old = __uselocale(_M_c_locale_codecvt);
#endif
while (__from < __end && __max)
{
! size_t __conv = mbrtowc(NULL, __from, __end - __from, &__tmp_state);
if (__conv == static_cast<size_t>(-1))
{
! // Invalid source character
! break;
! }
! else if (__conv == static_cast<size_t>(-2))
! {
! // Remainder of input does not form a complete destination
! // character.
break;
}
! else if (__conv == 0)
{
// XXX Probably wrong for stateful encodings
! __conv = 1;
}
!
! __state = __tmp_state;
! __from += __conv;
! __ret += __conv;
! __max--;
}
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
__uselocale(__old);
#endif
return __ret;
}
#endif
--- 240,311 ----
{
int __ret = 0;
state_type __tmp_state(__state);
+
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
__c_locale __old = __uselocale(_M_c_locale_codecvt);
#endif
+ // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
+ // in case we advance past it and then continue, in a loop.
+ // NB: mbsnrtowcs is a GNU extension
+
+ // A dummy internal buffer is needed in order for mbsnrtocws to consider
+ // its fourth parameter (it wouldn't with NULL as first parameter).
+ wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
+ * __max));
while (__from < __end && __max)
{
! const extern_type* __from_chunk_end;
! __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
! __end
! - __from));
! if (!__from_chunk_end)
! __from_chunk_end = __end;
!
! const extern_type* __tmp_from = __from;
! const size_t __conv = mbsnrtowcs(__to, &__from,
! __from_chunk_end - __from,
! __max, &__state);
if (__conv == static_cast<size_t>(-1))
{
! // In case of error, in order to stop at the exact place we
! // have to start again from the beginning with a series of
! // mbrtowc.
! for (__from = __tmp_from;;)
! {
! const size_t __conv_err = mbrtowc(NULL, __from, __end - __from,
! &__tmp_state);
! if (__conv_err == static_cast<size_t>(-1)
! || __conv_err == static_cast<size_t>(-2))
! break;
! __from += __conv_err;
! }
! __state = __tmp_state;
! __ret += __from - __tmp_from;
break;
}
! if (!__from)
! __from = __from_chunk_end;
!
! __ret += __from - __tmp_from;
! __max -= __conv;
!
! if (__from < __end && __max)
{
// XXX Probably wrong for stateful encodings
! __tmp_state = __state;
! ++__from;
! ++__ret;
! --__max;
}
! else
! break;
}
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ > 2)
__uselocale(__old);
#endif
+
return __ret;
}
#endif
diff -prN libstdc++-v3-orig/testsuite/performance/wchar_t_length.cc libstdc++-v3/testsuite/performance/wchar_t_length.cc
*** libstdc++-v3-orig/testsuite/performance/wchar_t_length.cc Thu Jan 1 01:00:00 1970
--- libstdc++-v3/testsuite/performance/wchar_t_length.cc Tue Nov 25 17:25:57 2003
***************
*** 0 ****
--- 1,67 ----
+ // Copyright (C) 2003 Free Software Foundation, Inc.
+ //
+ // This file is part of the GNU ISO C++ Library. This library is free
+ // software; you can redistribute it and/or modify it under the
+ // terms of the GNU General Public License as published by the
+ // Free Software Foundation; either version 2, or (at your option)
+ // any later version.
+
+ // This library is distributed in the hope that it will be useful,
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ // GNU General Public License for more details.
+
+ // You should have received a copy of the GNU General Public License along
+ // with this library; see the file COPYING. If not, write to the Free
+ // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ // USA.
+
+ // As a special exception, you may use this file as part of a free software
+ // library without restriction. Specifically, if other files instantiate
+ // templates or use macros or inline functions from this file, or you compile
+ // this file and link it with other files to produce an executable, this
+ // file does not by itself cause the resulting executable to be covered by
+ // the GNU General Public License. This exception does not however
+ // invalidate any other reasons why the executable file might be covered by
+ // the GNU General Public License.
+
+ #include <cstdio>
+ #include <cstring>
+ #include <fstream>
+ #include <langinfo.h>
+ #include <iconv.h>
+ #include <testsuite_performance.h>
+
+ // libstdc++/11602 (do_length)
+ int main(int argc, char** argv)
+ {
+ using namespace std;
+ using namespace __gnu_test;
+
+ time_counter time;
+ resource_counter resource;
+ const int iters = 400000;
+
+ wchar_t wbuf[1024];
+ char cbuf[1024];
+
+ memset(cbuf, 'a', 1024);
+
+ // C++ (codecvt)
+ locale loc;
+ const codecvt<wchar_t, char, mbstate_t>& cvt =
+ use_facet<codecvt<wchar_t, char, mbstate_t> >(loc);
+ mbstate_t state;
+ memset(&state, 0, sizeof(state));
+ start_counters(time, resource);
+ for (int i = 0; i < iters; ++i)
+ {
+ const char* from_next;
+ wchar_t* to_next;
+ cvt.length(state, cbuf, cbuf + 1024, 1024);
+ }
+ stop_counters(time, resource);
+ report_performance(__FILE__, "C++ (codecvt)", time, resource);
+
+ return 0;
+ }