libstdc++/api/a01080_source.html

 // -*- C++ -*-


 // Copyright (C) 2007-2015 Free Software Foundation, Inc.

 //

 // This file is part of the GNU ISO C++ Library.  This library is free

 // software; you can redistribute it and/or modify it under the terms

 // of the GNU General Public License as published by the Free Software

 // Foundation; either version 3, or (at your option) any later

 // version.


 // This library is distributed in the hope that it will be useful, but

 // WITHOUT ANY WARRANTY; without even the implied warranty of

 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 // General Public License for more details.


 // Under Section 7 of GPL version 3, you are granted additional

 // permissions described in the GCC Runtime Library Exception, version

 // 3.1, as published by the Free Software Foundation.


 // You should have received a copy of the GNU General Public License and

 // a copy of the GCC Runtime Library Exception along with this program;

 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

 // <http://www.gnu.org/licenses/>.


 /** @file parallel/multiway_mergesort.h

  *  @brief Parallel multiway merge sort.

  *  This file is a GNU parallel extension to the Standard C++ Library.

  */


 // Written by Johannes Singler.


 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H

 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1


 #include <vector>


 #include <parallel/basic_iterator.h>

 #include <bits/stl_algo.h>

 #include <parallel/parallel.h>

 #include <parallel/multiway_merge.h>


 namespace __gnu_parallel

 {

   /** @brief Subsequence description. */

   template<typename _DifferenceTp>

     struct _Piece

     {

       typedef _DifferenceTp _DifferenceType;


       /** @brief Begin of subsequence. */

       _DifferenceType _M_begin;


       /** @brief End of subsequence. */

       _DifferenceType _M_end;

     };


   /** @brief Data accessed by all threads.

    *

    *  PMWMS = parallel multiway mergesort */

   template<typename _RAIter>

     struct _PMWMSSortingData

     {

       typedef std::iterator_traits<_RAIter> _TraitsType;

       typedef typename _TraitsType::value_type _ValueType;

       typedef typename _TraitsType::difference_type _DifferenceType;


       /** @brief Number of threads involved. */

       _ThreadIndex _M_num_threads;


       /** @brief Input __begin. */

       _RAIter _M_source;


       /** @brief Start indices, per thread. */

       _DifferenceType* _M_starts;


       /** @brief Storage in which to sort. */

       _ValueType** _M_temporary;


       /** @brief Samples. */

       _ValueType* _M_samples;


       /** @brief Offsets to add to the found positions. */

       _DifferenceType* _M_offsets;


       /** @brief Pieces of data to merge @c [thread][__sequence] */

       std::vector<_Piece<_DifferenceType> >* _M_pieces;

   };


   /**

    *  @brief Select _M_samples from a sequence.

    *  @param __sd Pointer to algorithm data. _Result will be placed in

    *  @c __sd->_M_samples.

    *  @param __num_samples Number of _M_samples to select.

    */

   template<typename _RAIter, typename _DifferenceTp>

     void

     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,

                         _DifferenceTp __num_samples)

     {

       typedef std::iterator_traits<_RAIter> _TraitsType;

       typedef typename _TraitsType::value_type _ValueType;

       typedef _DifferenceTp _DifferenceType;


       _ThreadIndex __iam = omp_get_thread_num();


       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];


       __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],

                       __num_samples + 1, __es);


       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)

         ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))

             _ValueType(__sd->_M_source[__sd->_M_starts[__iam]

                                        + __es[__i + 1]]);


       delete[] __es;

     }


   /** @brief Split consistently. */

   template<bool __exact, typename _RAIter,

            typename _Compare, typename _SortingPlacesIterator>

     struct _SplitConsistently

     { };


   /** @brief Split by exact splitting. */

   template<typename _RAIter, typename _Compare,

            typename _SortingPlacesIterator>

     struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>

     {

       void

       operator()(const _ThreadIndex __iam,

                  _PMWMSSortingData<_RAIter>* __sd,

                  _Compare& __comp,

                  const typename

                  std::iterator_traits<_RAIter>::difference_type

                  __num_samples) const

       {

 #       pragma omp barrier


         std::vector<std::pair<_SortingPlacesIterator,

                               _SortingPlacesIterator> >

           __seqs(__sd->_M_num_threads);

         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)

           __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],

                                        __sd->_M_temporary[__s]

                                        + (__sd->_M_starts[__s + 1]

                                           - __sd->_M_starts[__s]));


         std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);


         // if not last thread

         if (__iam < __sd->_M_num_threads - 1)

           multiseq_partition(__seqs.begin(), __seqs.end(),

                              __sd->_M_starts[__iam + 1], __offsets.begin(),

                              __comp);


         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)

           {

             // for each sequence

             if (__iam < (__sd->_M_num_threads - 1))

               __sd->_M_pieces[__iam][__seq]._M_end

                 = __offsets[__seq] - __seqs[__seq].first;

             else

               // very end of this sequence

               __sd->_M_pieces[__iam][__seq]._M_end =

                 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];

           }


 #       pragma omp barrier


         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)

           {

             // For each sequence.

             if (__iam > 0)

               __sd->_M_pieces[__iam][__seq]._M_begin =

                 __sd->_M_pieces[__iam - 1][__seq]._M_end;

             else

               // Absolute beginning.

               __sd->_M_pieces[__iam][__seq]._M_begin = 0;

           }

       }

   };


   /** @brief Split by sampling. */

   template<typename _RAIter, typename _Compare,

            typename _SortingPlacesIterator>

     struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>

     {

       void

       operator()(const _ThreadIndex __iam,

                  _PMWMSSortingData<_RAIter>* __sd,

                  _Compare& __comp,

                  const typename

                  std::iterator_traits<_RAIter>::difference_type

                  __num_samples) const

       {

         typedef std::iterator_traits<_RAIter> _TraitsType;

         typedef typename _TraitsType::value_type _ValueType;

         typedef typename _TraitsType::difference_type _DifferenceType;


         __determine_samples(__sd, __num_samples);


 #       pragma omp barrier


 #       pragma omp single

         __gnu_sequential::sort(__sd->_M_samples,

                                __sd->_M_samples

                                + (__num_samples * __sd->_M_num_threads),

                                __comp);


 #       pragma omp barrier


         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)

           {

             // For each sequence.

             if (__num_samples * __iam > 0)

               __sd->_M_pieces[__iam][__s]._M_begin =

                 std::lower_bound(__sd->_M_temporary[__s],

                                  __sd->_M_temporary[__s]

                                  + (__sd->_M_starts[__s + 1]

                                     - __sd->_M_starts[__s]),

                                  __sd->_M_samples[__num_samples * __iam],

                                  __comp)

                 - __sd->_M_temporary[__s];

             else

               // Absolute beginning.

               __sd->_M_pieces[__iam][__s]._M_begin = 0;


             if ((__num_samples * (__iam + 1)) <

                 (__num_samples * __sd->_M_num_threads))

               __sd->_M_pieces[__iam][__s]._M_end =

                 std::lower_bound(__sd->_M_temporary[__s],

                                  __sd->_M_temporary[__s]

                                  + (__sd->_M_starts[__s + 1]

                                     - __sd->_M_starts[__s]),

                                  __sd->_M_samples[__num_samples * (__iam + 1)],

                                  __comp)

                 - __sd->_M_temporary[__s];

             else

               // Absolute end.

               __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]

                                                     - __sd->_M_starts[__s]);

           }

       }

   };


   template<bool __stable, typename _RAIter, typename _Compare>

     struct __possibly_stable_sort

     { };


   template<typename _RAIter, typename _Compare>

     struct __possibly_stable_sort<true, _RAIter, _Compare>

     {

       void operator()(const _RAIter& __begin,

                       const _RAIter& __end, _Compare& __comp) const

       { __gnu_sequential::stable_sort(__begin, __end, __comp); }

     };


   template<typename _RAIter, typename _Compare>

     struct __possibly_stable_sort<false, _RAIter, _Compare>

     {

       void operator()(const _RAIter __begin,

                       const _RAIter __end, _Compare& __comp) const

       { __gnu_sequential::sort(__begin, __end, __comp); }

     };


   template<bool __stable, typename Seq_RAIter,

            typename _RAIter, typename _Compare,

            typename DiffType>

     struct __possibly_stable_multiway_merge

     { };


   template<typename Seq_RAIter, typename _RAIter,

            typename _Compare, typename _DiffType>

     struct __possibly_stable_multiway_merge<true, Seq_RAIter,

                                             _RAIter, _Compare, _DiffType>

     {

       void operator()(const Seq_RAIter& __seqs_begin,

                       const Seq_RAIter& __seqs_end,

                       const _RAIter& __target,

                       _Compare& __comp,

                       _DiffType __length_am) const

       { stable_multiway_merge(__seqs_begin, __seqs_end, __target,

                               __length_am, __comp, sequential_tag()); }

     };


   template<typename Seq_RAIter, typename _RAIter,

            typename _Compare, typename _DiffType>

     struct __possibly_stable_multiway_merge<false, Seq_RAIter,

                                             _RAIter, _Compare, _DiffType>

     {

       void operator()(const Seq_RAIter& __seqs_begin,

                       const Seq_RAIter& __seqs_end,

                       const _RAIter& __target,

                       _Compare& __comp,

                       _DiffType __length_am) const

       { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,

                        __comp, sequential_tag()); }

     };


   /** @brief PMWMS code executed by each thread.

    *  @param __sd Pointer to algorithm data.

    *  @param __comp Comparator.

    */

   template<bool __stable, bool __exact, typename _RAIter,

            typename _Compare>

     void

     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,

                           _Compare& __comp)

     {

       typedef std::iterator_traits<_RAIter> _TraitsType;

       typedef typename _TraitsType::value_type _ValueType;

       typedef typename _TraitsType::difference_type _DifferenceType;


       _ThreadIndex __iam = omp_get_thread_num();


       // Length of this thread's chunk, before merging.

       _DifferenceType __length_local =

         __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];


       // Sort in temporary storage, leave space for sentinel.


       typedef _ValueType* _SortingPlacesIterator;


       __sd->_M_temporary[__iam] =

         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)

                                                 * (__length_local + 1)));


       // Copy there.

       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],

                               __sd->_M_source + __sd->_M_starts[__iam]

                               + __length_local,

                               __sd->_M_temporary[__iam]);


       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()

         (__sd->_M_temporary[__iam],

          __sd->_M_temporary[__iam] + __length_local,

          __comp);


       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],

       // __sd->_M_temporary[__iam] + __length_local.


       // No barrier here: Synchronization is done by the splitting routine.


       _DifferenceType __num_samples =

         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;

       _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()

         (__iam, __sd, __comp, __num_samples);


       // Offset from __target __begin, __length after merging.

       _DifferenceType __offset = 0, __length_am = 0;

       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)

         {

           __length_am += (__sd->_M_pieces[__iam][__s]._M_end

                           - __sd->_M_pieces[__iam][__s]._M_begin);

           __offset += __sd->_M_pieces[__iam][__s]._M_begin;

         }


       typedef std::vector<

         std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >

         _SeqVector;

       _SeqVector __seqs(__sd->_M_num_threads);


       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)

         {

           __seqs[__s] =

             std::make_pair(__sd->_M_temporary[__s]

                            + __sd->_M_pieces[__iam][__s]._M_begin,

                            __sd->_M_temporary[__s]

                            + __sd->_M_pieces[__iam][__s]._M_end);

         }


       __possibly_stable_multiway_merge<

         __stable, typename _SeqVector::iterator,

         _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),

                                      __sd->_M_source + __offset, __comp,

                                      __length_am);


 #     pragma omp barrier


       for (_DifferenceType __i = 0; __i < __length_local; ++__i)

         __sd->_M_temporary[__iam][__i].~_ValueType();

       ::operator delete(__sd->_M_temporary[__iam]);

     }


   /** @brief PMWMS main call.

    *  @param __begin Begin iterator of sequence.

    *  @param __end End iterator of sequence.

    *  @param __comp Comparator.

    *  @param __num_threads Number of threads to use.

    */

   template<bool __stable, bool __exact, typename _RAIter,

            typename _Compare>

     void

     parallel_sort_mwms(_RAIter __begin, _RAIter __end,

                        _Compare __comp,

                        _ThreadIndex __num_threads)

     {

       _GLIBCXX_CALL(__end - __begin)


       typedef std::iterator_traits<_RAIter> _TraitsType;

       typedef typename _TraitsType::value_type _ValueType;

       typedef typename _TraitsType::difference_type _DifferenceType;


       _DifferenceType __n = __end - __begin;


       if (__n <= 1)

         return;


       // at least one element per thread

       if (__num_threads > __n)

         __num_threads = static_cast<_ThreadIndex>(__n);


       // shared variables

       _PMWMSSortingData<_RAIter> __sd;

       _DifferenceType* __starts;

       _DifferenceType __size;


 #     pragma omp parallel num_threads(__num_threads)

       {

         __num_threads = omp_get_num_threads(); //no more threads than requested


 #       pragma omp single

         {

           __sd._M_num_threads = __num_threads;

           __sd._M_source = __begin;


           __sd._M_temporary = new _ValueType*[__num_threads];


           if (!__exact)

             {

               __size =

                 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)

                 * __num_threads;

               __sd._M_samples = static_cast<_ValueType*>

                 (::operator new(__size * sizeof(_ValueType)));

             }

           else

             __sd._M_samples = 0;


           __sd._M_offsets = new _DifferenceType[__num_threads - 1];

           __sd._M_pieces

             = new std::vector<_Piece<_DifferenceType> >[__num_threads];

           for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)

             __sd._M_pieces[__s].resize(__num_threads);

           __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];


           _DifferenceType __chunk_length = __n / __num_threads;

           _DifferenceType __split = __n % __num_threads;

           _DifferenceType __pos = 0;

           for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)

             {

               __starts[__i] = __pos;

               __pos += ((__i < __split)

                         ? (__chunk_length + 1) : __chunk_length);

             }

           __starts[__num_threads] = __pos;

         } //single


         // Now sort in parallel.

         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);

       } //parallel


       delete[] __starts;

       delete[] __sd._M_temporary;


       if (!__exact)

         {

           for (_DifferenceType __i = 0; __i < __size; ++__i)

             __sd._M_samples[__i].~_ValueType();

           ::operator delete(__sd._M_samples);

         }


       delete[] __sd._M_offsets;

       delete[] __sd._M_pieces;

     }


 } //namespace __gnu_parallel


 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */

__gnu_parallel::_Settings::get
static const _Settings & get()
Get the global settings.

__gnu_parallel::_Settings::sort_mwms_oversampling
unsigned int sort_mwms_oversampling
Oversampling factor for parallel std::sort (MWMS).
Definition: settings.h:234

__gnu_parallel::_ThreadIndex
uint16_t _ThreadIndex
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Definition: types.h:123

std::vector::begin
iterator begin() noexcept
Definition: stl_vector.h:547

std::vector
A standard container which offers fixed time access to individual elements in any order...
Definition: stl_vector.h:214

__gnu_parallel::__determine_samples
void __determine_samples(_PMWMSSortingData< _RAIter > *__sd, _DifferenceTp __num_samples)
Select _M_samples from a sequence.
Definition: multiway_mergesort.h:97

std::uninitialized_copy
_ForwardIterator uninitialized_copy(_InputIterator __first, _InputIterator __last, _ForwardIterator __result)
Copies the range [first,last) into result.
Definition: stl_uninitialized.h:107

__gnu_parallel::_SplitConsistently
Split consistently.
Definition: multiway_mergesort.h:122

__gnu_parallel::parallel_sort_mwms_pu
void parallel_sort_mwms_pu(_PMWMSSortingData< _RAIter > *__sd, _Compare &__comp)
PMWMS code executed by each thread.
Definition: multiway_mergesort.h:308

multiway_merge.h
Implementation of sequential and parallel multiway merge.

__gnu_parallel::_PMWMSSortingData
Data accessed by all threads.
Definition: multiway_mergesort.h:61

__gnu_parallel::_Piece::_M_begin
_DifferenceType _M_begin
Begin of subsequence.
Definition: multiway_mergesort.h:51

__gnu_parallel
GNU parallel code for public use.
Definition: balanced_quicksort.h:58

__gnu_parallel::_PMWMSSortingData::_M_samples
_ValueType * _M_samples
Samples.
Definition: multiway_mergesort.h:80

_GLIBCXX_CALL
#define _GLIBCXX_CALL(__n)
Macro to produce log message when entering a function.
Definition: compiletime_settings.h:44

parallel.h
End-user include file. Provides advanced settings and tuning options. This file is a GNU parallel ext...

__gnu_parallel::parallel_sort_mwms
void parallel_sort_mwms(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
PMWMS main call.
Definition: multiway_mergesort.h:395

basic_iterator.h
Includes the original header files concerned with iterators except for stream iterators. This file is a GNU parallel extension to the Standard C++ Library.

__gnu_parallel::multiseq_partition
void multiseq_partition(_RanSeqs __begin_seqs, _RanSeqs __end_seqs, _RankType __rank, _RankIterator __begin_offsets, _Compare __comp=std::less< typename std::iterator_traits< typename std::iterator_traits< _RanSeqs >::value_type::first_type >::value_type >())
Splits several sorted sequences at a certain global __rank, resulting in a splitting point for each s...
Definition: multiseq_selection.h:122

__gnu_parallel::_PMWMSSortingData::_M_source
_RAIter _M_source
Input __begin.
Definition: multiway_mergesort.h:71

__gnu_parallel::_PMWMSSortingData::_M_num_threads
_ThreadIndex _M_num_threads
Number of threads involved.
Definition: multiway_mergesort.h:68

std::pair
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:96

__gnu_parallel::_PMWMSSortingData::_M_offsets
_DifferenceType * _M_offsets
Offsets to add to the found positions.
Definition: multiway_mergesort.h:83

__gnu_parallel::__equally_split
_OutputIterator __equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s)
function to split a sequence into parts of almost equal size.
Definition: equally_split.h:48

__gnu_parallel::multiway_merge
_RAIterOut multiway_merge(_RAIterPairIterator __seqs_begin, _RAIterPairIterator __seqs_end, _RAIterOut __target, _DifferenceTp __length, _Compare __comp, __gnu_parallel::sequential_tag)
Multiway Merge Frontend.
Definition: multiway_merge.h:1418

std::make_pair
constexpr pair< typename __decay_and_strip< _T1 >::__type, typename __decay_and_strip< _T2 >::__type > make_pair(_T1 &&__x, _T2 &&__y)
A convenience wrapper for creating a pair from two objects.
Definition: stl_pair.h:276

__gnu_parallel::_PMWMSSortingData::_M_starts
_DifferenceType * _M_starts
Start indices, per thread.
Definition: multiway_mergesort.h:74

stl_algo.h

__gnu_parallel::_Piece::_M_end
_DifferenceType _M_end
End of subsequence.
Definition: multiway_mergesort.h:54

__gnu_parallel::_PMWMSSortingData::_M_temporary
_ValueType ** _M_temporary
Storage in which to sort.
Definition: multiway_mergesort.h:77

__gnu_parallel::_PMWMSSortingData::_M_pieces
std::vector< _Piece< _DifferenceType > > * _M_pieces
Pieces of data to merge [thread][__sequence].
Definition: multiway_mergesort.h:86

__gnu_parallel::_Piece
Subsequence description.
Definition: multiway_mergesort.h:46