This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][libstdc++-v3 parallel mode] Fix parallel functions not called by default


The first patch fixes a bug that made some functions being exectued
sequentially by default, although _GLIBCXX_PARALLEL was switched on.
It dates back to this thread:

<http://gcc.gnu.org/ml/libstdc++/2008-09/msg00116.html>

The second patch fixes regressions that came up because of the better
test coverage due to the first patch.  The bugs were there before.  And
overall, only the experimental parallel mode is affected, there is no
change for the normal case.

Tested x86_64-unknown-linux-gnu: No regressions

Please approve for mainline and gcc-4_3-branch.  I know that GCC 4.4
currently is in a late stage, but this patch is important for the
parallel mode, and it only affects an experimental part.
In case of approval, commit together or separate?

2009-02-03  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/algorithmfwd.h:
          Add parallelism default for many declarations.
        * include/parallel/numericfwd.h: Likewise.


2009-02-03  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/iterator.h: Use iterator_traits as usual.
        * include/parallel/par_loop.h:
          Include equally_split.h.
          (for_each_template_random_access_ed)
          Avoid calling the Result default constructor.
        * include/parallel/numeric: Replace
          for_each_template_random_access by
          for_each_template_random_access_ed in numeric functions.

Johannes

Index: include/parallel/algorithmfwd.h
===================================================================
--- include/parallel/algorithmfwd.h	(revision 143763)
+++ include/parallel/algorithmfwd.h	(working copy)
@@ -99,7 +99,8 @@
   template<typename _RAIter, typename _Tp>
     typename iterator_traits<_RAIter>::difference_type
     count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
-		 __gnu_parallel::_Parallelism);
+		 __gnu_parallel::_Parallelism parallelism
+		 = __gnu_parallel::parallel_unbalanced);
 
 
   template<typename _IIter, typename _Predicate>
@@ -121,7 +122,8 @@
   template<typename _RAIter, typename _Predicate>
     typename iterator_traits<_RAIter>::difference_type
     count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
-		    __gnu_parallel::_Parallelism);
+		    __gnu_parallel::_Parallelism parallelism
+		    = __gnu_parallel::parallel_unbalanced);
 
   // algobase.h
   template<typename _IIter1, typename _IIter2>
@@ -228,7 +230,8 @@
   template<typename _RAIter, typename _Function>
     _Function
     for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag, 
-		    __gnu_parallel::_Parallelism);
+		    __gnu_parallel::_Parallelism  parallelism
+		    = __gnu_parallel::parallel_balanced);
 
 
   template<typename _FIter, typename _Generator>
@@ -250,7 +253,8 @@
   template<typename _RAIter, typename _Generator>
     void
     generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag, 
-		    __gnu_parallel::_Parallelism);
+		    __gnu_parallel::_Parallelism parallelism
+		    = __gnu_parallel::parallel_balanced);
 
   template<typename _OIter, typename _Size, typename _Generator>
     _OIter
@@ -272,7 +276,8 @@
   template<typename _RAIter, typename _Size, typename _Generator>
     _RAIter
     generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag, 
-		      __gnu_parallel::_Parallelism);
+		      __gnu_parallel::_Parallelism parallelism
+		      = __gnu_parallel::parallel_balanced);
 
   template<typename _IIter1, typename _IIter2>
     bool
@@ -429,7 +434,8 @@
     _RAOIter
     transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation, 
 		      random_access_iterator_tag, random_access_iterator_tag, 
-		      __gnu_parallel::_Parallelism);
+		      __gnu_parallel::_Parallelism parallelism
+		      = __gnu_parallel::parallel_balanced);
 
 
   template<typename _IIter1, typename _IIter2, typename _OIter,
@@ -455,7 +461,8 @@
     transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation, 
 		      random_access_iterator_tag, random_access_iterator_tag, 
 		      random_access_iterator_tag,
-		      __gnu_parallel::_Parallelism);
+		      __gnu_parallel::_Parallelism parallelism
+		      = __gnu_parallel::parallel_balanced);
 
   template<typename _IIter1, typename _IIter2, typename _OIter,
 	   typename _BiOperation, typename _Tag1,
@@ -546,7 +553,8 @@
   template<typename _RAIter, typename _Compare>
     _RAIter
     max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, 
-		       __gnu_parallel::_Parallelism);
+		       __gnu_parallel::_Parallelism parallelism
+		       = __gnu_parallel::parallel_balanced);
 
 
   template<typename _IIter1, typename _IIter2, typename _OIter>
@@ -615,7 +623,8 @@
   template<typename _RAIter, typename _Compare>
     _RAIter
     min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, 
-		       __gnu_parallel::_Parallelism);
+		       __gnu_parallel::_Parallelism parallelism
+		       = __gnu_parallel::parallel_balanced);
 
   template<typename _RAIter>
     void
Index: include/parallel/numericfwd.h
===================================================================
--- include/parallel/numericfwd.h	(revision 143763)
+++ include/parallel/numericfwd.h	(working copy)
@@ -83,7 +83,8 @@
     _Tp
     accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
 		      random_access_iterator_tag,
-		      __gnu_parallel::_Parallelism);
+		      __gnu_parallel::_Parallelism parallelism
+		      = __gnu_parallel::parallel_unbalanced);
 
   template<typename _IIter, typename _OIter>
     _OIter
@@ -124,7 +125,8 @@
     adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, 
 			       random_access_iterator_tag, 
 			       random_access_iterator_tag, 
-			       __gnu_parallel::_Parallelism);
+			       __gnu_parallel::_Parallelism parallelism
+			       = __gnu_parallel::parallel_unbalanced);
 
   template<typename _IIter1, typename _IIter2, typename _Tp>
     _Tp
@@ -164,7 +166,8 @@
     inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1, 
 			 BinaryFunction2, random_access_iterator_tag, 
 			 random_access_iterator_tag, 
-			 __gnu_parallel::_Parallelism);
+			 __gnu_parallel::_Parallelism
+			 = __gnu_parallel::parallel_unbalanced);
 
   template<typename _IIter1, typename _IIter2, typename _Tp,
 	   typename _BinaryFunction1, typename _BinaryFunction2,
Index: include/parallel/iterator.h
===================================================================
--- include/parallel/iterator.h	(revision 143763)
+++ include/parallel/iterator.h	(working copy)
@@ -131,7 +131,8 @@
     public:
       typedef IteratorCategory iterator_category;
       typedef void value_type;
-      typedef typename Iterator1::difference_type difference_type;
+      typedef typename std::iterator_traits<Iterator1>::difference_type
+                                                            difference_type;
       typedef type* pointer;
       typedef type& reference;
 
Index: include/parallel/par_loop.h
===================================================================
--- include/parallel/par_loop.h	(revision 143763)
+++ include/parallel/par_loop.h	(working copy)
@@ -42,6 +42,7 @@
 #include <omp.h>
 #include <parallel/settings.h>
 #include <parallel/base.h>
+#include <parallel/equally_split.h>
 
 namespace __gnu_parallel
 {
@@ -80,9 +81,9 @@
   {
     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::difference_type difference_type;
-
     const difference_type length = end - begin;
     Result *thread_results;
+    bool* constructed;
 
     thread_index_t num_threads =
       __gnu_parallel::min<difference_type>(get_max_threads(), length);
@@ -92,13 +93,15 @@
 #       pragma omp single
           {
             num_threads = omp_get_num_threads();
-            thread_results = new Result[num_threads];
+            thread_results = static_cast<Result*>(
+                                ::operator new(num_threads * sizeof(Result)));
+            constructed = new bool[num_threads];
           }
 
         thread_index_t iam = omp_get_thread_num();
 
         // Neutral element.
-        Result reduct = Result();
+        Result* reduct = static_cast<Result*>(::operator new(sizeof(Result)));
 
         difference_type
             start = equally_split_point(length, num_threads, iam),
@@ -106,23 +109,30 @@
 
         if (start < stop)
           {
-            reduct = f(o, begin + start);
+            new(reduct) Result(f(o, begin + start));
             ++start;
+            constructed[iam] = true;
           }
+        else
+          constructed[iam] = false;
 
         for (; start < stop; ++start)
-          reduct = r(reduct, f(o, begin + start));
+          *reduct = r(*reduct, f(o, begin + start));
 
-        thread_results[iam] = reduct;
+        thread_results[iam] = *reduct;
       } //parallel
 
     for (thread_index_t i = 0; i < num_threads; ++i)
-      output = r(output, thread_results[i]);
+        if (constructed[i])
+            output = r(output, thread_results[i]);
 
     // Points to last element processed (needed as return value for
     // some algorithms like transform).
     f.finish_iterator = begin + length;
 
+    delete[] thread_results;
+    delete[] constructed;
+
     return o;
   }
 
Index: include/parallel/numeric
===================================================================
--- include/parallel/numeric	(revision 143763)
+++ include/parallel/numeric	(working copy)
@@ -103,13 +103,13 @@
 	  __gnu_parallel::accumulate_selector<_RandomAccessIterator>
 	    my_selector;
 	  __gnu_parallel::
-	    for_each_template_random_access(begin, end,
+	    for_each_template_random_access_ed(begin, end,
 					    __gnu_parallel::nothing(),
 					    my_selector,
 					    __gnu_parallel::
 					    accumulate_binop_reduct
 					    <BinaryOperation>(binary_op),
-					    res, res, -1, parallelism_tag);
+					    res, res, -1);
 	  return res;
 	}
       else
@@ -211,9 +211,9 @@
 	    inner_product_selector<RandomAccessIterator1,
 	    RandomAccessIterator2, T> my_selector(first1, first2);
 	  __gnu_parallel::
-	    for_each_template_random_access(first1, last1, binary_op2,
+	    for_each_template_random_access_ed(first1, last1, binary_op2,
 					    my_selector, binary_op1,
-					    res, res, -1, parallelism_tag);
+					    res, res, -1);
 	  return res;
 	}
       else
@@ -432,10 +432,10 @@
 	    end_pair(end, result + (end - begin));
 	  __gnu_parallel::adjacent_difference_selector<ip> functionality;
 	  __gnu_parallel::
-	    for_each_template_random_access(begin_pair, end_pair, bin_op,
+	    for_each_template_random_access_ed(begin_pair, end_pair, bin_op,
 					    functionality,
 					    __gnu_parallel::dummy_reduct(),
-					    dummy, dummy, -1, parallelism_tag);
+					    dummy, dummy, -1);
 	  return functionality.finish_iterator;
 	}
       else

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]