This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[PATCH][libstdc++-v3 parallel mode] Fix parallel functions not called by default
- From: Johannes Singler <singler at ira dot uka dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>, gcc-patches at gcc dot gnu dot org
- Date: Tue, 03 Feb 2009 14:52:54 +0100
- Subject: [PATCH][libstdc++-v3 parallel mode] Fix parallel functions not called by default
The first patch fixes a bug that made some functions being exectued
sequentially by default, although _GLIBCXX_PARALLEL was switched on.
It dates back to this thread:
<http://gcc.gnu.org/ml/libstdc++/2008-09/msg00116.html>
The second patch fixes regressions that came up because of the better
test coverage due to the first patch. The bugs were there before. And
overall, only the experimental parallel mode is affected, there is no
change for the normal case.
Tested x86_64-unknown-linux-gnu: No regressions
Please approve for mainline and gcc-4_3-branch. I know that GCC 4.4
currently is in a late stage, but this patch is important for the
parallel mode, and it only affects an experimental part.
In case of approval, commit together or separate?
2009-02-03 Johannes Singler <singler@ira.uka.de>
* include/parallel/algorithmfwd.h:
Add parallelism default for many declarations.
* include/parallel/numericfwd.h: Likewise.
2009-02-03 Johannes Singler <singler@ira.uka.de>
* include/parallel/iterator.h: Use iterator_traits as usual.
* include/parallel/par_loop.h:
Include equally_split.h.
(for_each_template_random_access_ed)
Avoid calling the Result default constructor.
* include/parallel/numeric: Replace
for_each_template_random_access by
for_each_template_random_access_ed in numeric functions.
Johannes
Index: include/parallel/algorithmfwd.h
===================================================================
--- include/parallel/algorithmfwd.h (revision 143763)
+++ include/parallel/algorithmfwd.h (working copy)
@@ -99,7 +99,8 @@
template<typename _RAIter, typename _Tp>
typename iterator_traits<_RAIter>::difference_type
count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_unbalanced);
template<typename _IIter, typename _Predicate>
@@ -121,7 +122,8 @@
template<typename _RAIter, typename _Predicate>
typename iterator_traits<_RAIter>::difference_type
count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_unbalanced);
// algobase.h
template<typename _IIter1, typename _IIter2>
@@ -228,7 +230,8 @@
template<typename _RAIter, typename _Function>
_Function
for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _FIter, typename _Generator>
@@ -250,7 +253,8 @@
template<typename _RAIter, typename _Generator>
void
generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _OIter, typename _Size, typename _Generator>
_OIter
@@ -272,7 +276,8 @@
template<typename _RAIter, typename _Size, typename _Generator>
_RAIter
generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _IIter1, typename _IIter2>
bool
@@ -429,7 +434,8 @@
_RAOIter
transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation,
random_access_iterator_tag, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _IIter1, typename _IIter2, typename _OIter,
@@ -455,7 +461,8 @@
transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _IIter1, typename _IIter2, typename _OIter,
typename _BiOperation, typename _Tag1,
@@ -546,7 +553,8 @@
template<typename _RAIter, typename _Compare>
_RAIter
max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _IIter1, typename _IIter2, typename _OIter>
@@ -615,7 +623,8 @@
template<typename _RAIter, typename _Compare>
_RAIter
min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_balanced);
template<typename _RAIter>
void
Index: include/parallel/numericfwd.h
===================================================================
--- include/parallel/numericfwd.h (revision 143763)
+++ include/parallel/numericfwd.h (working copy)
@@ -83,7 +83,8 @@
_Tp
accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_unbalanced);
template<typename _IIter, typename _OIter>
_OIter
@@ -124,7 +125,8 @@
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag,
random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism parallelism
+ = __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp
@@ -164,7 +166,8 @@
inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag,
- __gnu_parallel::_Parallelism);
+ __gnu_parallel::_Parallelism
+ = __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2,
Index: include/parallel/iterator.h
===================================================================
--- include/parallel/iterator.h (revision 143763)
+++ include/parallel/iterator.h (working copy)
@@ -131,7 +131,8 @@
public:
typedef IteratorCategory iterator_category;
typedef void value_type;
- typedef typename Iterator1::difference_type difference_type;
+ typedef typename std::iterator_traits<Iterator1>::difference_type
+ difference_type;
typedef type* pointer;
typedef type& reference;
Index: include/parallel/par_loop.h
===================================================================
--- include/parallel/par_loop.h (revision 143763)
+++ include/parallel/par_loop.h (working copy)
@@ -42,6 +42,7 @@
#include <omp.h>
#include <parallel/settings.h>
#include <parallel/base.h>
+#include <parallel/equally_split.h>
namespace __gnu_parallel
{
@@ -80,9 +81,9 @@
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
-
const difference_type length = end - begin;
Result *thread_results;
+ bool* constructed;
thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length);
@@ -92,13 +93,15 @@
# pragma omp single
{
num_threads = omp_get_num_threads();
- thread_results = new Result[num_threads];
+ thread_results = static_cast<Result*>(
+ ::operator new(num_threads * sizeof(Result)));
+ constructed = new bool[num_threads];
}
thread_index_t iam = omp_get_thread_num();
// Neutral element.
- Result reduct = Result();
+ Result* reduct = static_cast<Result*>(::operator new(sizeof(Result)));
difference_type
start = equally_split_point(length, num_threads, iam),
@@ -106,23 +109,30 @@
if (start < stop)
{
- reduct = f(o, begin + start);
+ new(reduct) Result(f(o, begin + start));
++start;
+ constructed[iam] = true;
}
+ else
+ constructed[iam] = false;
for (; start < stop; ++start)
- reduct = r(reduct, f(o, begin + start));
+ *reduct = r(*reduct, f(o, begin + start));
- thread_results[iam] = reduct;
+ thread_results[iam] = *reduct;
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
- output = r(output, thread_results[i]);
+ if (constructed[i])
+ output = r(output, thread_results[i]);
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
+ delete[] thread_results;
+ delete[] constructed;
+
return o;
}
Index: include/parallel/numeric
===================================================================
--- include/parallel/numeric (revision 143763)
+++ include/parallel/numeric (working copy)
@@ -103,13 +103,13 @@
__gnu_parallel::accumulate_selector<_RandomAccessIterator>
my_selector;
__gnu_parallel::
- for_each_template_random_access(begin, end,
+ for_each_template_random_access_ed(begin, end,
__gnu_parallel::nothing(),
my_selector,
__gnu_parallel::
accumulate_binop_reduct
<BinaryOperation>(binary_op),
- res, res, -1, parallelism_tag);
+ res, res, -1);
return res;
}
else
@@ -211,9 +211,9 @@
inner_product_selector<RandomAccessIterator1,
RandomAccessIterator2, T> my_selector(first1, first2);
__gnu_parallel::
- for_each_template_random_access(first1, last1, binary_op2,
+ for_each_template_random_access_ed(first1, last1, binary_op2,
my_selector, binary_op1,
- res, res, -1, parallelism_tag);
+ res, res, -1);
return res;
}
else
@@ -432,10 +432,10 @@
end_pair(end, result + (end - begin));
__gnu_parallel::adjacent_difference_selector<ip> functionality;
__gnu_parallel::
- for_each_template_random_access(begin_pair, end_pair, bin_op,
+ for_each_template_random_access_ed(begin_pair, end_pair, bin_op,
functionality,
__gnu_parallel::dummy_reduct(),
- dummy, dummy, -1, parallelism_tag);
+ dummy, dummy, -1);
return functionality.finish_iterator;
}
else