libstdc++
partial_sum.h
Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/partial_sum.h
00026  *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
00027 *  sums.
00028  *  This file is a GNU parallel extension to the Standard C++ Library.
00029  */
00030 
00031 // Written by Johannes Singler.
00032 
00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00035 
00036 #include <omp.h>
00037 #include <new>
00038 #include <bits/stl_algobase.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/numericfwd.h>
00041 
00042 namespace __gnu_parallel
00043 {
00044   // Problem: there is no 0-element given.
00045 
00046   /** @brief Base case prefix sum routine.
00047    *  @param __begin Begin iterator of input sequence.
00048    *  @param __end End iterator of input sequence.
00049    *  @param __result Begin iterator of output sequence.
00050    *  @param __bin_op Associative binary function.
00051    *  @param __value Start value. Must be passed since the neutral
00052    *  element is unknown in general.
00053    *  @return End iterator of output sequence. */
00054   template<typename _IIter,
00055        typename _OutputIterator,
00056        typename _BinaryOperation>
00057     _OutputIterator
00058     __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
00059                     _OutputIterator __result,
00060                     _BinaryOperation __bin_op,
00061       typename std::iterator_traits <_IIter>::value_type __value)
00062     {
00063       if (__begin == __end)
00064     return __result;
00065 
00066       while (__begin != __end)
00067     {
00068       __value = __bin_op(__value, *__begin);
00069       *__result = __value;
00070       ++__result;
00071       ++__begin;
00072     }
00073       return __result;
00074     }
00075 
00076   /** @brief Parallel partial sum implementation, two-phase approach,
00077       no recursion.
00078       *  @param __begin Begin iterator of input sequence.
00079       *  @param __end End iterator of input sequence.
00080       *  @param __result Begin iterator of output sequence.
00081       *  @param __bin_op Associative binary function.
00082       *  @param __n Length of sequence.
00083       *  @param __num_threads Number of threads to use.
00084       *  @return End iterator of output sequence.
00085       */
00086   template<typename _IIter,
00087        typename _OutputIterator,
00088        typename _BinaryOperation>
00089     _OutputIterator
00090     __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
00091                   _OutputIterator __result,
00092                   _BinaryOperation __bin_op,
00093       typename std::iterator_traits<_IIter>::difference_type __n)
00094     {
00095       typedef std::iterator_traits<_IIter> _TraitsType;
00096       typedef typename _TraitsType::value_type _ValueType;
00097       typedef typename _TraitsType::difference_type _DifferenceType;
00098 
00099       if (__begin == __end)
00100     return __result;
00101 
00102       _ThreadIndex __num_threads =
00103         std::min<_DifferenceType>(__get_max_threads(), __n - 1);
00104 
00105       if (__num_threads < 2)
00106     {
00107       *__result = *__begin;
00108       return __parallel_partial_sum_basecase(__begin + 1, __end,
00109                          __result + 1, __bin_op,
00110                          *__begin);
00111     }
00112 
00113       _DifferenceType* __borders;
00114       _ValueType* __sums;
00115 
00116       const _Settings& __s = _Settings::get();
00117 
00118 #     pragma omp parallel num_threads(__num_threads)
00119       {
00120 #       pragma omp single
00121     {
00122       __num_threads = omp_get_num_threads();
00123         
00124       __borders = new _DifferenceType[__num_threads + 2];
00125 
00126       if (__s.partial_sum_dilation == 1.0f)
00127         equally_split(__n, __num_threads + 1, __borders);
00128       else
00129         {
00130           _DifferenceType __first_part_length =
00131           std::max<_DifferenceType>(1,
00132             __n / (1.0f + __s.partial_sum_dilation * __num_threads));
00133           _DifferenceType __chunk_length =
00134           (__n - __first_part_length) / __num_threads;
00135           _DifferenceType __borderstart =
00136           __n - __num_threads * __chunk_length;
00137           __borders[0] = 0;
00138           for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
00139         {
00140           __borders[__i] = __borderstart;
00141           __borderstart += __chunk_length;
00142         }
00143           __borders[__num_threads + 1] = __n;
00144         }
00145 
00146       __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00147                                                            * __num_threads));
00148       _OutputIterator __target_end;
00149     } //single
00150 
00151         _ThreadIndex __iam = omp_get_thread_num();
00152         if (__iam == 0)
00153           {
00154             *__result = *__begin;
00155             __parallel_partial_sum_basecase(__begin + 1,
00156                         __begin + __borders[1],
00157                         __result + 1,
00158                         __bin_op, *__begin);
00159             ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
00160           }
00161         else
00162           {
00163             ::new(&(__sums[__iam]))
00164               _ValueType(__gnu_parallel::accumulate(
00165                                          __begin + __borders[__iam] + 1,
00166                                          __begin + __borders[__iam + 1],
00167                                          *(__begin + __borders[__iam]),
00168                                          __bin_op,
00169                                          __gnu_parallel::sequential_tag()));
00170           }
00171 
00172 #       pragma omp barrier
00173 
00174 #       pragma omp single
00175     __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
00176                     __sums + 1, __bin_op, __sums[0]);
00177 
00178 #       pragma omp barrier
00179 
00180     // Still same team.
00181         __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
00182                     __begin + __borders[__iam + 2],
00183                     __result + __borders[__iam + 1],
00184                     __bin_op, __sums[__iam]);
00185       } //parallel
00186 
00187       for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
00188     __sums[__i].~_ValueType();
00189       ::operator delete(__sums);
00190 
00191       delete[] __borders;
00192 
00193       return __result + __n;
00194     }
00195 
00196   /** @brief Parallel partial sum front-__end.
00197    *  @param __begin Begin iterator of input sequence.
00198    *  @param __end End iterator of input sequence.
00199    *  @param __result Begin iterator of output sequence.
00200    *  @param __bin_op Associative binary function.
00201    *  @return End iterator of output sequence. */
00202   template<typename _IIter,
00203        typename _OutputIterator,
00204        typename _BinaryOperation>
00205     _OutputIterator
00206     __parallel_partial_sum(_IIter __begin, _IIter __end,
00207                _OutputIterator __result, _BinaryOperation __bin_op)
00208     {
00209       _GLIBCXX_CALL(__begin - __end)
00210 
00211       typedef std::iterator_traits<_IIter> _TraitsType;
00212       typedef typename _TraitsType::value_type _ValueType;
00213       typedef typename _TraitsType::difference_type _DifferenceType;
00214 
00215       _DifferenceType __n = __end - __begin;
00216 
00217       switch (_Settings::get().partial_sum_algorithm)
00218     {
00219     case LINEAR:
00220       // Need an initial offset.
00221       return __parallel_partial_sum_linear(__begin, __end, __result,
00222                            __bin_op, __n);
00223     default:
00224       // Partial_sum algorithm not implemented.
00225       _GLIBCXX_PARALLEL_ASSERT(0);
00226       return __result + __n;
00227     }
00228     }
00229 }
00230 
00231 #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */