00001 // -*- C++ -*- 00002 00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the terms 00007 // of the GNU General Public License as published by the Free Software 00008 // Foundation; either version 2, or (at your option) any later 00009 // version. 00010 00011 // This library is distributed in the hope that it will be useful, but 00012 // WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 // General Public License for more details. 00015 00016 // You should have received a copy of the GNU General Public License 00017 // along with this library; see the file COPYING. If not, write to 00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, 00019 // MA 02111-1307, USA. 00020 00021 // As a special exception, you may use this file as part of a free 00022 // software library without restriction. Specifically, if other files 00023 // instantiate templates or use macros or inline functions from this 00024 // file, or you compile this file and link it with other files to 00025 // produce an executable, this file does not by itself cause the 00026 // resulting executable to be covered by the GNU General Public 00027 // License. This exception does not however invalidate any other 00028 // reasons why the executable file might be covered by the GNU General 00029 // Public License. 00030 00031 /** @file parallel/par_loop.h 00032 * @brief Parallelization of embarrassingly parallel execution by 00033 * means of equal splitting. 00034 * This file is a GNU parallel extension to the Standard C++ Library. 00035 */ 00036 00037 // Written by Felix Putze. 00038 00039 #ifndef _GLIBCXX_PARALLEL_PAR_LOOP_H 00040 #define _GLIBCXX_PARALLEL_PAR_LOOP_H 1 00041 00042 #include <omp.h> 00043 #include <parallel/settings.h> 00044 #include <parallel/base.h> 00045 00046 namespace __gnu_parallel 00047 { 00048 00049 /** @brief Embarrassingly parallel algorithm for random access 00050 * iterators, using hand-crafted parallelization by equal splitting 00051 * the work. 00052 * 00053 * @param begin Begin iterator of element sequence. 00054 * @param end End iterator of element sequence. 00055 * @param o User-supplied functor (comparator, predicate, adding 00056 * functor, ...) 00057 * @param f Functor to "process" an element with op (depends on 00058 * desired functionality, e. g. for std::for_each(), ...). 00059 * @param r Functor to "add" a single result to the already 00060 * processed elements (depends on functionality). 00061 * @param base Base value for reduction. 00062 * @param output Pointer to position where final result is written to 00063 * @param bound Maximum number of elements processed (e. g. for 00064 * std::count_n()). 00065 * @return User-supplied functor (that may contain a part of the result). 00066 */ 00067 template<typename RandomAccessIterator, 00068 typename Op, 00069 typename Fu, 00070 typename Red, 00071 typename Result> 00072 Op 00073 for_each_template_random_access_ed(RandomAccessIterator begin, 00074 RandomAccessIterator end, 00075 Op o, Fu& f, Red r, Result base, 00076 Result& output, 00077 typename std::iterator_traits 00078 <RandomAccessIterator>:: 00079 difference_type bound) 00080 { 00081 typedef std::iterator_traits<RandomAccessIterator> traits_type; 00082 typedef typename traits_type::difference_type difference_type; 00083 00084 const difference_type length = end - begin; 00085 Result *thread_results; 00086 00087 thread_index_t num_threads = 00088 __gnu_parallel::min<difference_type>(get_max_threads(), length); 00089 00090 # pragma omp parallel num_threads(num_threads) 00091 { 00092 # pragma omp single 00093 { 00094 num_threads = omp_get_num_threads(); 00095 thread_results = new Result[num_threads]; 00096 } 00097 00098 thread_index_t iam = omp_get_thread_num(); 00099 00100 // Neutral element. 00101 Result reduct = Result(); 00102 00103 difference_type 00104 start = equally_split_point(length, num_threads, iam), 00105 stop = equally_split_point(length, num_threads, iam + 1); 00106 00107 if (start < stop) 00108 { 00109 reduct = f(o, begin + start); 00110 ++start; 00111 } 00112 00113 for (; start < stop; ++start) 00114 reduct = r(reduct, f(o, begin + start)); 00115 00116 thread_results[iam] = reduct; 00117 } //parallel 00118 00119 for (thread_index_t i = 0; i < num_threads; ++i) 00120 output = r(output, thread_results[i]); 00121 00122 // Points to last element processed (needed as return value for 00123 // some algorithms like transform). 00124 f.finish_iterator = begin + length; 00125 00126 return o; 00127 } 00128 00129 } // end namespace 00130 00131 #endif