This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[Patch] libsdtc++/32908
- From: Paolo Carlini <pcarlini at suse dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>
- Date: Sun, 29 Jul 2007 19:35:50 +0200
- Subject: [Patch] libsdtc++/32908
Hi,
this work turned out to be slightly less trivial than I thought because
I didn't want to add too much duplicated code. Thus the idea of using
the separate struct __lc_rai. Maybe a bit of analysis is in order to
explain why a separate __newlast1 is not needed and we can simply assign
to __last1. Note first that __last1 is only used in the final line of
each function, in case it doesn't return early from inside the loop. In
that case, clearly, __first1 == __newlast1. But then there is only one
interesting possibility, __newlast1 < __last1. In that case, however,
the loop ended with __first2 == __last2 and the function shall return
false anyway.
After this interesting tweak (many thanks to Chris!), which actually, in
my measurements, at least assuming the data is in the cache, speeds-up
vector<int> comparison operators even more than the reported 15%, more
like 20-25%, I would really leave these algorithms alone. We have bigger
fish to fry, as Bjarne often repeats ;)
Patch tested x86/x86_64-linux, performance too.
Paolo.
//////////////////////
2007-07-30 Paolo Carlini <pcarlini@suse.de>
PR libstdc++/32908
* include/bits/stl_algobase.h (struct __lc_rai): New.
(lexicographical_compare(_II1, _II1, _II2, _II2),
lexicographical_compare(_II1, _II1, _II2, _II2, _Compare)): Use it.
* testsuite/performance/25_algorithms/lexicographical_compare.cc: New.
Index: include/bits/stl_algobase.h
===================================================================
--- include/bits/stl_algobase.h (revision 126980)
+++ include/bits/stl_algobase.h (working copy)
@@ -778,6 +778,43 @@
return true;
}
+
+ template<typename, typename>
+ struct __lc_rai
+ {
+ template<typename _II1, typename _II2>
+ static _II1
+ __newlast1(_II1, _II1 __last1, _II2, _II2)
+ { return __last1; }
+
+ template<typename _II>
+ static bool
+ __cnd2(_II __first, _II __last)
+ { return __first != __last; }
+ };
+
+ template<>
+ struct __lc_rai<random_access_iterator_tag,
+ random_access_iterator_tag>
+ {
+ template<typename _RAI1, typename _RAI2>
+ static _RAI1
+ __newlast1(_RAI1 __first1, _RAI1 __last1,
+ _RAI2 __first2, _RAI2 __last2)
+ {
+ const typename iterator_traits<_RAI1>::difference_type
+ __diff1 = __last1 - __first1;
+ const typename iterator_traits<_RAI2>::difference_type
+ __diff2 = __last2 - __first2;
+ return __diff2 < __diff1 ? __first1 + __diff2 : __last1;
+ }
+
+ template<typename _RAI>
+ static bool
+ __cnd2(_RAI, _RAI)
+ { return true; }
+ };
+
/**
* @brief Performs "dictionary" comparison on ranges.
* @param first1 An input iterator.
@@ -792,24 +829,30 @@
* (Quoted from [25.3.8]/1.) If the iterators are all character pointers,
* then this is an inline call to @c memcmp.
*/
- template<typename _InputIterator1, typename _InputIterator2>
+ template<typename _II1, typename _II2>
bool
- lexicographical_compare(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _InputIterator2 __last2)
+ lexicographical_compare(_II1 __first1, _II1 __last1,
+ _II2 __first2, _II2 __last2)
{
+ typedef typename iterator_traits<_II1>::iterator_category _Category1;
+ typedef typename iterator_traits<_II2>::iterator_category _Category2;
+
// concept requirements
- __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
- __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
- __glibcxx_function_requires(_LessThanOpConcept<
- typename iterator_traits<_InputIterator1>::value_type,
- typename iterator_traits<_InputIterator2>::value_type>)
- __glibcxx_function_requires(_LessThanOpConcept<
- typename iterator_traits<_InputIterator2>::value_type,
- typename iterator_traits<_InputIterator1>::value_type>)
+ typedef typename iterator_traits<_II1>::value_type _ValueType1;
+ typedef typename iterator_traits<_II2>::value_type _ValueType2;
+ __glibcxx_function_requires(_InputIteratorConcept<_II1>)
+ __glibcxx_function_requires(_InputIteratorConcept<_II2>)
+ __glibcxx_function_requires(_LessThanOpConcept<_ValueType1, _ValueType2>)
+ __glibcxx_function_requires(_LessThanOpConcept<_ValueType2, _ValueType1>)
__glibcxx_requires_valid_range(__first1, __last1);
__glibcxx_requires_valid_range(__first2, __last2);
- for (; __first1 != __last1 && __first2 != __last2;
+ __last1 = __lc_rai<_Category1, _Category2>::__newlast1(__first1,
+ __last1,
+ __first2,
+ __last2);
+ for (; __first1 != __last1
+ && __lc_rai<_Category1, _Category2>::__cnd2(__first2, __last2);
++__first1, ++__first2)
{
if (*__first1 < *__first2)
@@ -829,23 +872,29 @@
* @param comp A @link s20_3_3_comparisons comparison functor@endlink.
* @return A boolean true or false.
*
- * The same as the four-parameter @c lexigraphical_compare, but uses the
+ * The same as the four-parameter @c lexicographical_compare, but uses the
* comp parameter instead of @c <.
*/
- template<typename _InputIterator1, typename _InputIterator2,
- typename _Compare>
+ template<typename _II1, typename _II2, typename _Compare>
bool
- lexicographical_compare(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _InputIterator2 __last2,
- _Compare __comp)
+ lexicographical_compare(_II1 __first1, _II1 __last1,
+ _II2 __first2, _II2 __last2, _Compare __comp)
{
+ typedef typename iterator_traits<_II1>::iterator_category _Category1;
+ typedef typename iterator_traits<_II2>::iterator_category _Category2;
+
// concept requirements
- __glibcxx_function_requires(_InputIteratorConcept<_InputIterator1>)
- __glibcxx_function_requires(_InputIteratorConcept<_InputIterator2>)
+ __glibcxx_function_requires(_InputIteratorConcept<_II1>)
+ __glibcxx_function_requires(_InputIteratorConcept<_II2>)
__glibcxx_requires_valid_range(__first1, __last1);
__glibcxx_requires_valid_range(__first2, __last2);
- for (; __first1 != __last1 && __first2 != __last2;
+ __last1 = __lc_rai<_Category1, _Category2>::__newlast1(__first1,
+ __last1,
+ __first2,
+ __last2);
+ for (; __first1 != __last1
+ && __lc_rai<_Category1, _Category2>::__cnd2(__first2, __last2);
++__first1, ++__first2)
{
if (__comp(*__first1, *__first2))
Index: testsuite/performance/25_algorithms/lexicographical_compare.cc
===================================================================
--- testsuite/performance/25_algorithms/lexicographical_compare.cc (revision 0)
+++ testsuite/performance/25_algorithms/lexicographical_compare.cc (revision 0)
@@ -0,0 +1,55 @@
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING. If not, write to the Free
+// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction. Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License. This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+#include <vector>
+#include <testsuite_performance.h>
+
+// libstdc++/32908
+int main()
+{
+ using namespace __gnu_test;
+
+ time_counter time;
+ resource_counter resource;
+
+ int cnt = 0;
+ std::vector<int> a(10000), b(10000);
+
+ start_counters(time, resource);
+ for (int i = 0; i < 100000; ++i)
+ {
+ if (a < b)
+ ++cnt;
+ if (a > b)
+ ++cnt;
+ }
+ stop_counters(time, resource);
+ report_performance(__FILE__, "", time, resource);
+ clear_counters(time, resource);
+
+ return cnt;
+}