libstdc++: partition.h Source File

00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 2, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License
00017 // along with this library; see the file COPYING.  If not, write to
00018 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
00019 // MA 02111-1307, USA.
00020 
00021 // As a special exception, you may use this file as part of a free
00022 // software library without restriction.  Specifically, if other files
00023 // instantiate templates or use macros or inline functions from this
00024 // file, or you compile this file and link it with other files to
00025 // produce an executable, this file does not by itself cause the
00026 // resulting executable to be covered by the GNU General Public
00027 // License.  This exception does not however invalidate any other
00028 // reasons why the executable file might be covered by the GNU General
00029 // Public License.
00030 
00031 /** @file parallel/partition.h
00032  *  @brief Parallel implementation of std::partition(),
00033  *  std::nth_element(), and std::partial_sort().
00034  *  This file is a GNU parallel extension to the Standard C++ Library.
00035  */
00036 
00037 // Written by Johannes Singler and Felix Putze.
00038 
00039 #ifndef _GLIBCXX_PARALLEL_PARTITION_H
00040 #define _GLIBCXX_PARALLEL_PARTITION_H 1
00041 
00042 #include <parallel/basic_iterator.h>
00043 #include <parallel/sort.h>
00044 #include <parallel/random_number.h>
00045 #include <bits/stl_algo.h>
00046 #include <parallel/parallel.h>
00047 
00048 /** @brief Decide whether to declare certain variables volatile. */
00049 #define _GLIBCXX_VOLATILE volatile
00050 
00051 namespace __gnu_parallel
00052 {
00053 /** @brief Parallel implementation of std::partition.
00054   *  @param begin Begin iterator of input sequence to split.
00055   *  @param end End iterator of input sequence to split.
00056   *  @param pred Partition predicate, possibly including some kind of pivot.
00057   *  @param num_threads Maximum number of threads to use for this task.
00058   *  @return Number of elements not fulfilling the predicate. */
00059 template<typename RandomAccessIterator, typename Predicate>
00060   typename std::iterator_traits<RandomAccessIterator>::difference_type
00061   parallel_partition(RandomAccessIterator begin, RandomAccessIterator end,
00062                      Predicate pred, thread_index_t num_threads)
00063   {
00064     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00065     typedef typename traits_type::value_type value_type;
00066     typedef typename traits_type::difference_type difference_type;
00067 
00068     difference_type n = end - begin;
00069 
00070     _GLIBCXX_CALL(n)
00071 
00072     const _Settings& __s = _Settings::get();
00073 
00074     // Shared.
00075     _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
00076     _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
00077     _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
00078 
00079     bool* reserved_left = NULL, * reserved_right = NULL;
00080 
00081     difference_type chunk_size;
00082 
00083     omp_lock_t result_lock;
00084     omp_init_lock(&result_lock);
00085 
00086     //at least two chunks per thread
00087     if(right - left + 1 >= 2 * num_threads * chunk_size)
00088 #   pragma omp parallel num_threads(num_threads)
00089       {
00090 #       pragma omp single
00091           {
00092             num_threads = omp_get_num_threads();
00093             reserved_left = new bool[num_threads];
00094             reserved_right = new bool[num_threads];
00095 
00096             if (__s.partition_chunk_share > 0.0)
00097               chunk_size = std::max<difference_type>(__s.partition_chunk_size,
00098                     (double)n * __s.partition_chunk_share
00099                              / (double)num_threads);
00100             else
00101               chunk_size = __s.partition_chunk_size;
00102           }
00103 
00104         while (right - left + 1 >= 2 * num_threads * chunk_size)
00105           {
00106 #           pragma omp single
00107               {
00108                 difference_type num_chunks = (right - left + 1) / chunk_size;
00109 
00110                 for (int r = 0; r < num_threads; ++r)
00111                   {
00112                     reserved_left[r] = false;
00113                     reserved_right[r] = false;
00114                   }
00115                 leftover_left = 0;
00116                 leftover_right = 0;
00117               } //implicit barrier
00118 
00119             // Private.
00120             difference_type thread_left, thread_left_border,
00121                             thread_right, thread_right_border;
00122             thread_left = left + 1;
00123 
00124             // Just to satisfy the condition below.
00125             thread_left_border = thread_left - 1;
00126             thread_right = n - 1;
00127             thread_right_border = thread_right + 1;
00128 
00129             bool iam_finished = false;
00130             while (!iam_finished)
00131               {
00132                 if (thread_left > thread_left_border)
00133                   {
00134                     omp_set_lock(&result_lock);
00135                     if (left + (chunk_size - 1) > right)
00136                       iam_finished = true;
00137                     else
00138                       {
00139                         thread_left = left;
00140                         thread_left_border = left + (chunk_size - 1);
00141                         left += chunk_size;
00142                       }
00143                     omp_unset_lock(&result_lock);
00144                   }
00145 
00146                 if (thread_right < thread_right_border)
00147                   {
00148                     omp_set_lock(&result_lock);
00149                     if (left > right - (chunk_size - 1))
00150                       iam_finished = true;
00151                     else
00152                       {
00153                         thread_right = right;
00154                         thread_right_border = right - (chunk_size - 1);
00155                         right -= chunk_size;
00156                       }
00157                     omp_unset_lock(&result_lock);
00158                   }
00159 
00160                 if (iam_finished)
00161                   break;
00162 
00163                 // Swap as usual.
00164                 while (thread_left < thread_right)
00165                   {
00166                     while (pred(begin[thread_left])
00167                             && thread_left <= thread_left_border)
00168                       ++thread_left;
00169                     while (!pred(begin[thread_right])
00170                             && thread_right >= thread_right_border)
00171                       --thread_right;
00172 
00173                     if (thread_left > thread_left_border
00174                         || thread_right < thread_right_border)
00175                       // Fetch new chunk(s).
00176                       break;
00177 
00178                     std::swap(begin[thread_left], begin[thread_right]);
00179                     ++thread_left;
00180                     --thread_right;
00181                   }
00182               }
00183 
00184             // Now swap the leftover chunks to the right places.
00185             if (thread_left <= thread_left_border)
00186 #             pragma omp atomic
00187               ++leftover_left;
00188             if (thread_right >= thread_right_border)
00189 #             pragma omp atomic
00190               ++leftover_right;
00191 
00192 #           pragma omp barrier
00193 
00194 #           pragma omp single
00195               {
00196                 leftnew = left - leftover_left * chunk_size;
00197                 rightnew = right + leftover_right * chunk_size;
00198               }
00199 
00200 #           pragma omp barrier
00201 
00202             // <=> thread_left_border + (chunk_size - 1) >= leftnew
00203             if (thread_left <= thread_left_border
00204                 && thread_left_border >= leftnew)
00205               {
00206                 // Chunk already in place, reserve spot.
00207                 reserved_left[(left - (thread_left_border + 1)) / chunk_size]
00208                     = true;
00209               }
00210 
00211             // <=> thread_right_border - (chunk_size - 1) <= rightnew
00212             if (thread_right >= thread_right_border
00213                 && thread_right_border <= rightnew)
00214               {
00215                 // Chunk already in place, reserve spot.
00216                 reserved_right[((thread_right_border - 1) - right)
00217                    / chunk_size] = true;
00218               }
00219 
00220 #           pragma omp barrier
00221 
00222             if (thread_left <= thread_left_border
00223                 && thread_left_border < leftnew)
00224               {
00225                 // Find spot and swap.
00226                 difference_type swapstart = -1;
00227                 omp_set_lock(&result_lock);
00228                 for (int r = 0; r < leftover_left; ++r)
00229                   if (!reserved_left[r])
00230                     {
00231                       reserved_left[r] = true;
00232                       swapstart = left - (r + 1) * chunk_size;
00233                       break;
00234                     }
00235                 omp_unset_lock(&result_lock);
00236 
00237 #if _GLIBCXX_ASSERTIONS
00238                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
00239 #endif
00240 
00241                 std::swap_ranges(begin + thread_left_border
00242                  - (chunk_size - 1),
00243                  begin + thread_left_border + 1,
00244                  begin + swapstart);
00245               }
00246 
00247             if (thread_right >= thread_right_border
00248                 && thread_right_border > rightnew)
00249               {
00250                 // Find spot and swap
00251                 difference_type swapstart = -1;
00252                 omp_set_lock(&result_lock);
00253                 for (int r = 0; r < leftover_right; ++r)
00254                   if (!reserved_right[r])
00255                     {
00256                       reserved_right[r] = true;
00257                       swapstart = right + r * chunk_size + 1;
00258                       break;
00259                     }
00260                 omp_unset_lock(&result_lock);
00261 
00262 #if _GLIBCXX_ASSERTIONS
00263                 _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
00264 #endif
00265 
00266                 std::swap_ranges(begin + thread_right_border,
00267                  begin + thread_right_border + chunk_size,
00268                  begin + swapstart);
00269               }
00270 #if _GLIBCXX_ASSERTIONS
00271 #             pragma omp barrier
00272 
00273 #             pragma omp single
00274                 {
00275                   for (int r = 0; r < leftover_left; ++r)
00276                     _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
00277                   for (int r = 0; r < leftover_right; ++r)
00278                     _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
00279                 }
00280 
00281 #             pragma omp barrier
00282 #endif
00283 
00284 #             pragma omp barrier
00285 
00286               left = leftnew;
00287               right = rightnew;
00288           }
00289 #         pragma omp flush(left, right)
00290       } // end "recursion" //parallel
00291 
00292     difference_type final_left = left, final_right = right;
00293 
00294     while (final_left < final_right)
00295       {
00296         // Go right until key is geq than pivot.
00297         while (pred(begin[final_left]) && final_left < final_right)
00298           ++final_left;
00299 
00300         // Go left until key is less than pivot.
00301         while (!pred(begin[final_right]) && final_left < final_right)
00302           --final_right;
00303 
00304         if (final_left == final_right)
00305           break;
00306         std::swap(begin[final_left], begin[final_right]);
00307         ++final_left;
00308         --final_right;
00309       }
00310 
00311     // All elements on the left side are < piv, all elements on the
00312     // right are >= piv
00313     delete[] reserved_left;
00314     delete[] reserved_right;
00315 
00316     omp_destroy_lock(&result_lock);
00317 
00318     // Element "between" final_left and final_right might not have
00319     // been regarded yet
00320     if (final_left < n && !pred(begin[final_left]))
00321       // Really swapped.
00322       return final_left;
00323     else
00324       return final_left + 1;
00325   }
00326 
00327 /**
00328   *  @brief Parallel implementation of std::nth_element().
00329   *  @param begin Begin iterator of input sequence.
00330   *  @param nth Iterator of element that must be in position afterwards.
00331   *  @param end End iterator of input sequence.
00332   *  @param comp Comparator.
00333   */
00334 template<typename RandomAccessIterator, typename Comparator>
00335   void 
00336   parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, 
00337                RandomAccessIterator end, Comparator comp)
00338   {
00339     typedef std::iterator_traits<RandomAccessIterator> traits_type;
00340     typedef typename traits_type::value_type value_type;
00341     typedef typename traits_type::difference_type difference_type;
00342 
00343     _GLIBCXX_CALL(end - begin)
00344 
00345     RandomAccessIterator split;
00346     random_number rng;
00347 
00348     difference_type minimum_length =
00349       std::max<difference_type>(2, _Settings::get().partition_minimal_n);
00350 
00351     // Break if input range to small.
00352     while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
00353       {
00354         difference_type n = end - begin;
00355 
00356         RandomAccessIterator pivot_pos = begin +  rng(n);
00357 
00358         // Swap pivot_pos value to end.
00359         if (pivot_pos != (end - 1))
00360           std::swap(*pivot_pos, *(end - 1));
00361         pivot_pos = end - 1;
00362 
00363         // XXX Comparator must have first_value_type, second_value_type,
00364     // result_type
00365         // Comparator == __gnu_parallel::lexicographic<S, int,
00366     // __gnu_parallel::less<S, S> >
00367         // pivot_pos == std::pair<S, int>*
00368         // XXX binder2nd only for RandomAccessIterators??
00369         __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
00370       pred(comp, *pivot_pos);
00371 
00372         // Divide, leave pivot unchanged in last place.
00373         RandomAccessIterator split_pos1, split_pos2;
00374         split_pos1 = begin + parallel_partition(begin, end - 1, pred,
00375                         get_max_threads());
00376 
00377         // Left side: < pivot_pos; right side: >= pivot_pos
00378 
00379         // Swap pivot back to middle.
00380         if (split_pos1 != pivot_pos)
00381           std::swap(*split_pos1, *pivot_pos);
00382         pivot_pos = split_pos1;
00383 
00384         // In case all elements are equal, split_pos1 == 0
00385         if ((split_pos1 + 1 - begin) < (n >> 7)
00386         || (end - split_pos1) < (n >> 7))
00387           {
00388             // Very unequal split, one part smaller than one 128th
00389             // elements not strictly larger than the pivot.
00390             __gnu_parallel::unary_negate<__gnu_parallel::
00391           binder1st<Comparator, value_type, value_type, bool>, value_type>
00392           pred(__gnu_parallel::binder1st<Comparator, value_type,
00393            value_type, bool>(comp, *pivot_pos));
00394 
00395             // Find other end of pivot-equal range.
00396             split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
00397                              end, pred);
00398           }
00399         else
00400           // Only skip the pivot.
00401           split_pos2 = split_pos1 + 1;
00402 
00403         // Compare iterators.
00404         if (split_pos2 <= nth)
00405           begin = split_pos2;
00406         else if (nth < split_pos1)
00407           end = split_pos1;
00408         else
00409           break;
00410       }
00411 
00412     // Only at most _Settings::partition_minimal_n elements left.
00413     __gnu_sequential::sort(begin, end, comp);
00414   }
00415 
00416 /** @brief Parallel implementation of std::partial_sort().
00417 *  @param begin Begin iterator of input sequence.
00418 *  @param middle Sort until this position.
00419 *  @param end End iterator of input sequence.
00420 *  @param comp Comparator. */
00421 template<typename RandomAccessIterator, typename Comparator>
00422   void
00423   parallel_partial_sort(RandomAccessIterator begin,
00424             RandomAccessIterator middle,
00425             RandomAccessIterator end, Comparator comp)
00426   {
00427     parallel_nth_element(begin, middle, end, comp);
00428     std::sort(begin, middle, comp);
00429   }
00430 
00431 } //namespace __gnu_parallel
00432 
00433 #undef _GLIBCXX_VOLATILE
00434 
00435 #endif