This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[Patch] To _M_extract_float for redundant operator*() and more
- From: Paolo Carlini <pcarlini at suse dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>
- Date: Sun, 31 Oct 2004 19:21:17 +0100
- Subject: [Patch] To _M_extract_float for redundant operator*() and more
Hi,
this patch deals with the most subtle issue I have faced during the
last weeks... Fiuuuu! I tried to summarize it in the ChangeLog.
Consider an istreambuf_iterator: operator*() is defined to return
sgetc() of the underlying streambuf. Now, in the unbuffered case,
underflow() is not guaranteed to return the same value if called
many times in a row! This means that, in principle, we *cannot*
dereference many times a generic input iterator and assume that
the value is always the same. Indeed, consistently, the standard
in 22.2.2.1.2, Stage 2, mandates to dereference 'in' one time
for each increment and do the whole work.
There is also a performance issue, indeed: just have a look to
the implementation of istreambuf_iterator to see what I mean:
both operator*() and operator!= are *really* expensive!
Thus the below.
The basic idea is introducing __testeof which becomes true as soon
as __beg == __end and is propagated downstream. Every time we have
to increment __beg we do:
if (++__beg != __end)
__c = *__beg;
else
__testeof = true;
and everything becomes correct.
There is also a nice performance implication: statically linked
testcases parsing floats become about 5% faster (1-2% in the
dynamically linked case).
Tested x86-linux, will test further on x86_64 and, if nobody objects,
commit tomorrow. Of course, integers and money_get will need the
same treatment...
Paolo.
////////////////
2004-11-01 Paolo Carlini <pcarlini@suse.de>
* include/bits/locale_facets.tcc (num_get<>::_M_extract_float):
Evaluate *__beg the exact strict minimum number of times; likewise
for __beg != __end; in the main parsing loop, call ++__beg in two
places only. The former is also a correctness issue, because,
according to the standard (22.2.2.1.2, Stage 2), 'in' shall be
dereferenced only one time for each increment.
diff -prN libstdc++-v3-orig/include/bits/locale_facets.tcc libstdc++-v3/include/bits/locale_facets.tcc
*** libstdc++-v3-orig/include/bits/locale_facets.tcc Thu Oct 28 14:47:36 2004
--- libstdc++-v3/include/bits/locale_facets.tcc Sun Oct 31 18:47:29 2004
*************** namespace std
*** 279,306 ****
const locale& __loc = __io._M_getloc();
const __cache_type* __lc = __uc(__loc);
const _CharT* __lit = __lc->_M_atoms_in;
! // True if a mantissa is found.
! bool __found_mantissa = false;
// First check for sign.
! if (__beg != __end)
{
! const char_type __c = *__beg;
const bool __plus = __c == __lit[__num_base::_S_iplus];
if ((__plus || __c == __lit[__num_base::_S_iminus])
&& !(__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
&& !(__c == __lc->_M_decimal_point))
{
__xtrc += __plus ? '+' : '-';
! ++__beg;
}
}
// Next, look for leading zeros.
! while (__beg != __end)
{
- const char_type __c = *__beg;
if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep
|| __c == __lc->_M_decimal_point)
break;
--- 279,312 ----
const locale& __loc = __io._M_getloc();
const __cache_type* __lc = __uc(__loc);
const _CharT* __lit = __lc->_M_atoms_in;
+ char_type __c = char_type();
! // True if __beg becomes equal to __end.
! bool __testeof = __beg == __end;
// First check for sign.
! if (!__testeof)
{
! __c = *__beg;
const bool __plus = __c == __lit[__num_base::_S_iplus];
if ((__plus || __c == __lit[__num_base::_S_iminus])
&& !(__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
&& !(__c == __lc->_M_decimal_point))
{
__xtrc += __plus ? '+' : '-';
! if (++__beg != __end)
! __c = *__beg;
! else
! __testeof = true;
}
}
+ // True if a mantissa is found.
+ bool __found_mantissa = false;
+
// Next, look for leading zeros.
! while (!__testeof)
{
if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep
|| __c == __lc->_M_decimal_point)
break;
*************** namespace std
*** 311,317 ****
__xtrc += '0';
__found_mantissa = true;
}
! ++__beg;
}
else
break;
--- 317,326 ----
__xtrc += '0';
__found_mantissa = true;
}
! if (++__beg != __end)
! __c = *__beg;
! else
! __testeof = true;
}
else
break;
*************** namespace std
*** 324,335 ****
if (__lc->_M_use_grouping)
__found_grouping.reserve(32);
int __sep_pos = 0;
const char_type* __lit_zero = __lit + __num_base::_S_izero;
! while (__beg != __end)
{
// According to 22.2.2.1.2, p8-9, first look for thousands_sep
// and decimal_point.
- char_type __c = *__beg;
if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
{
if (!__found_dec && !__found_sci)
--- 333,344 ----
if (__lc->_M_use_grouping)
__found_grouping.reserve(32);
int __sep_pos = 0;
+ const char_type* __q;
const char_type* __lit_zero = __lit + __num_base::_S_izero;
! while (!__testeof)
{
// According to 22.2.2.1.2, p8-9, first look for thousands_sep
// and decimal_point.
if (__lc->_M_use_grouping && __c == __lc->_M_thousands_sep)
{
if (!__found_dec && !__found_sci)
*************** namespace std
*** 340,346 ****
{
__found_grouping += static_cast<char>(__sep_pos);
__sep_pos = 0;
- ++__beg;
}
else
{
--- 349,354 ----
*************** namespace std
*** 362,411 ****
__found_grouping += static_cast<char>(__sep_pos);
__xtrc += '.';
__found_dec = true;
- ++__beg;
}
else
break;
}
! else
{
! const char_type* __q = __traits_type::find(__lit_zero, 10, __c);
! if (__q)
{
! __xtrc += __num_base::_S_atoms_in[__q - __lit];
! __found_mantissa = true;
! ++__sep_pos;
! ++__beg;
}
! else if ((__c == __lit[__num_base::_S_ie]
! || __c == __lit[__num_base::_S_iE])
! && __found_mantissa && !__found_sci)
{
! // Scientific notation.
! if (__found_grouping.size() && !__found_dec)
! __found_grouping += static_cast<char>(__sep_pos);
! __xtrc += 'e';
! __found_sci = true;
!
! // Remove optional plus or minus sign, if they exist.
! if (++__beg != __end)
! {
! __c = *__beg;
! const bool __plus = __c == __lit[__num_base::_S_iplus];
! if ((__plus || __c == __lit[__num_base::_S_iminus])
! && !(__lc->_M_use_grouping
! && __c == __lc->_M_thousands_sep)
! && !(__c == __lc->_M_decimal_point))
! {
! __xtrc += __plus ? '+' : '-';
! ++__beg;
! }
! }
}
- else
- // Not a valid input item.
- break;
}
}
// Digit grouping is checked. If grouping and found_grouping don't
--- 370,422 ----
__found_grouping += static_cast<char>(__sep_pos);
__xtrc += '.';
__found_dec = true;
}
else
break;
}
! else if ((__q = __traits_type::find(__lit_zero, 10, __c)))
{
! __xtrc += __num_base::_S_atoms_in[__q - __lit];
! __found_mantissa = true;
! ++__sep_pos;
! }
! else if ((__c == __lit[__num_base::_S_ie]
! || __c == __lit[__num_base::_S_iE])
! && __found_mantissa && !__found_sci)
! {
! // Scientific notation.
! if (__found_grouping.size() && !__found_dec)
! __found_grouping += static_cast<char>(__sep_pos);
! __xtrc += 'e';
! __found_sci = true;
!
! // Remove optional plus or minus sign, if they exist.
! if (++__beg != __end)
{
! __c = *__beg;
! const bool __plus = __c == __lit[__num_base::_S_iplus];
! if ((__plus || __c == __lit[__num_base::_S_iminus])
! && !(__lc->_M_use_grouping
! && __c == __lc->_M_thousands_sep)
! && !(__c == __lc->_M_decimal_point))
! __xtrc += __plus ? '+' : '-';
! else
! continue;
}
! else
{
! __testeof = true;
! break;
}
}
+ else
+ // Not a valid input item.
+ break;
+
+ if (++__beg != __end)
+ __c = *__beg;
+ else
+ __testeof = true;
}
// Digit grouping is checked. If grouping and found_grouping don't
*************** namespace std
*** 423,429 ****
}
// Finish up.
! if (__beg == __end)
__err |= ios_base::eofbit;
return __beg;
}
--- 434,440 ----
}
// Finish up.
! if (__testeof)
__err |= ios_base::eofbit;
return __beg;
}