00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
00033 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
00034
00035 #include <vector>
00036
00037 #include <parallel/basic_iterator.h>
00038 #include <bits/stl_algo.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/multiway_merge.h>
00041
00042 namespace __gnu_parallel
00043 {
00044
00045
00046 template<typename _DifferenceTp>
00047 struct Piece
00048 {
00049 typedef _DifferenceTp difference_type;
00050
00051
00052 difference_type begin;
00053
00054
00055 difference_type end;
00056 };
00057
00058
00059
00060
00061 template<typename RandomAccessIterator>
00062 struct PMWMSSortingData
00063 {
00064 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00065 typedef typename traits_type::value_type value_type;
00066 typedef typename traits_type::difference_type difference_type;
00067
00068
00069 thread_index_t num_threads;
00070
00071
00072 RandomAccessIterator source;
00073
00074
00075 difference_type* starts;
00076
00077
00078 value_type** temporary;
00079
00080
00081 value_type* samples;
00082
00083
00084 difference_type* offsets;
00085
00086
00087 std::vector<Piece<difference_type> >* pieces;
00088 };
00089
00090
00091
00092
00093
00094
00095
00096 template<typename RandomAccessIterator, typename _DifferenceTp>
00097 void
00098 determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
00099 _DifferenceTp num_samples)
00100 {
00101 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00102 typedef typename traits_type::value_type value_type;
00103 typedef _DifferenceTp difference_type;
00104
00105 thread_index_t iam = omp_get_thread_num();
00106
00107 difference_type* es = new difference_type[num_samples + 2];
00108
00109 equally_split(sd->starts[iam + 1] - sd->starts[iam],
00110 num_samples + 1, es);
00111
00112 for (difference_type i = 0; i < num_samples; ++i)
00113 ::new(&(sd->samples[iam * num_samples + i]))
00114 value_type(sd->source[sd->starts[iam] + es[i + 1]]);
00115
00116 delete[] es;
00117 }
00118
00119
00120 template<bool exact, typename RandomAccessIterator,
00121 typename Comparator, typename SortingPlacesIterator>
00122 struct split_consistently
00123 {
00124 };
00125
00126
00127 template<typename RandomAccessIterator, typename Comparator,
00128 typename SortingPlacesIterator>
00129 struct split_consistently
00130 <true, RandomAccessIterator, Comparator, SortingPlacesIterator>
00131 {
00132 void operator()(
00133 const thread_index_t iam,
00134 PMWMSSortingData<RandomAccessIterator>* sd,
00135 Comparator& comp,
00136 const typename
00137 std::iterator_traits<RandomAccessIterator>::difference_type
00138 num_samples)
00139 const
00140 {
00141 # pragma omp barrier
00142
00143 std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
00144 seqs(sd->num_threads);
00145 for (thread_index_t s = 0; s < sd->num_threads; s++)
00146 seqs[s] = std::make_pair(sd->temporary[s],
00147 sd->temporary[s]
00148 + (sd->starts[s + 1] - sd->starts[s]));
00149
00150 std::vector<SortingPlacesIterator> offsets(sd->num_threads);
00151
00152
00153 if (iam < sd->num_threads - 1)
00154 multiseq_partition(seqs.begin(), seqs.end(),
00155 sd->starts[iam + 1], offsets.begin(), comp);
00156
00157 for (int seq = 0; seq < sd->num_threads; seq++)
00158 {
00159
00160 if (iam < (sd->num_threads - 1))
00161 sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
00162 else
00163
00164 sd->pieces[iam][seq].end =
00165 sd->starts[seq + 1] - sd->starts[seq];
00166 }
00167
00168 # pragma omp barrier
00169
00170 for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
00171 {
00172
00173 if (iam > 0)
00174 sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
00175 else
00176
00177 sd->pieces[iam][seq].begin = 0;
00178 }
00179 }
00180 };
00181
00182
00183 template<typename RandomAccessIterator, typename Comparator,
00184 typename SortingPlacesIterator>
00185 struct split_consistently<false, RandomAccessIterator, Comparator,
00186 SortingPlacesIterator>
00187 {
00188 void operator()(
00189 const thread_index_t iam,
00190 PMWMSSortingData<RandomAccessIterator>* sd,
00191 Comparator& comp,
00192 const typename
00193 std::iterator_traits<RandomAccessIterator>::difference_type
00194 num_samples)
00195 const
00196 {
00197 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00198 typedef typename traits_type::value_type value_type;
00199 typedef typename traits_type::difference_type difference_type;
00200
00201 determine_samples(sd, num_samples);
00202
00203 # pragma omp barrier
00204
00205 # pragma omp single
00206 __gnu_sequential::sort(sd->samples,
00207 sd->samples + (num_samples * sd->num_threads),
00208 comp);
00209
00210 # pragma omp barrier
00211
00212 for (thread_index_t s = 0; s < sd->num_threads; ++s)
00213 {
00214
00215 if (num_samples * iam > 0)
00216 sd->pieces[iam][s].begin =
00217 std::lower_bound(sd->temporary[s],
00218 sd->temporary[s]
00219 + (sd->starts[s + 1] - sd->starts[s]),
00220 sd->samples[num_samples * iam],
00221 comp)
00222 - sd->temporary[s];
00223 else
00224
00225 sd->pieces[iam][s].begin = 0;
00226
00227 if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
00228 sd->pieces[iam][s].end =
00229 std::lower_bound(sd->temporary[s],
00230 sd->temporary[s]
00231 + (sd->starts[s + 1] - sd->starts[s]),
00232 sd->samples[num_samples * (iam + 1)],
00233 comp)
00234 - sd->temporary[s];
00235 else
00236
00237 sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
00238 }
00239 }
00240 };
00241
00242 template<bool stable, typename RandomAccessIterator, typename Comparator>
00243 struct possibly_stable_sort
00244 {
00245 };
00246
00247 template<typename RandomAccessIterator, typename Comparator>
00248 struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
00249 {
00250 void operator()(const RandomAccessIterator& begin,
00251 const RandomAccessIterator& end, Comparator& comp) const
00252 {
00253 __gnu_sequential::stable_sort(begin, end, comp);
00254 }
00255 };
00256
00257 template<typename RandomAccessIterator, typename Comparator>
00258 struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
00259 {
00260 void operator()(const RandomAccessIterator begin,
00261 const RandomAccessIterator end, Comparator& comp) const
00262 {
00263 __gnu_sequential::sort(begin, end, comp);
00264 }
00265 };
00266
00267 template<bool stable, typename SeqRandomAccessIterator,
00268 typename RandomAccessIterator, typename Comparator,
00269 typename DiffType>
00270 struct possibly_stable_multiway_merge
00271 {
00272 };
00273
00274 template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
00275 typename Comparator, typename DiffType>
00276 struct possibly_stable_multiway_merge
00277 <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
00278 DiffType>
00279 {
00280 void operator()(const SeqRandomAccessIterator& seqs_begin,
00281 const SeqRandomAccessIterator& seqs_end,
00282 const RandomAccessIterator& target,
00283 Comparator& comp,
00284 DiffType length_am) const
00285 {
00286 stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
00287 sequential_tag());
00288 }
00289 };
00290
00291 template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
00292 typename Comparator, typename DiffType>
00293 struct possibly_stable_multiway_merge
00294 <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
00295 DiffType>
00296 {
00297 void operator()(const SeqRandomAccessIterator& seqs_begin,
00298 const SeqRandomAccessIterator& seqs_end,
00299 const RandomAccessIterator& target,
00300 Comparator& comp,
00301 DiffType length_am) const
00302 {
00303 multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
00304 sequential_tag());
00305 }
00306 };
00307
00308
00309
00310
00311
00312 template<bool stable, bool exact, typename RandomAccessIterator,
00313 typename Comparator>
00314 void
00315 parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
00316 Comparator& comp)
00317 {
00318 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00319 typedef typename traits_type::value_type value_type;
00320 typedef typename traits_type::difference_type difference_type;
00321
00322 thread_index_t iam = omp_get_thread_num();
00323
00324
00325 difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
00326
00327
00328
00329 typedef value_type* SortingPlacesIterator;
00330
00331 sd->temporary[iam] =
00332 static_cast<value_type*>(
00333 ::operator new(sizeof(value_type) * (length_local + 1)));
00334
00335
00336 std::uninitialized_copy(sd->source + sd->starts[iam],
00337 sd->source + sd->starts[iam] + length_local,
00338 sd->temporary[iam]);
00339
00340 possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
00341 (sd->temporary[iam], sd->temporary[iam] + length_local, comp);
00342
00343
00344
00345
00346
00347
00348 difference_type num_samples =
00349 _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
00350 split_consistently
00351 <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
00352 (iam, sd, comp, num_samples);
00353
00354
00355 difference_type offset = 0, length_am = 0;
00356 for (thread_index_t s = 0; s < sd->num_threads; s++)
00357 {
00358 length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
00359 offset += sd->pieces[iam][s].begin;
00360 }
00361
00362 typedef std::vector<
00363 std::pair<SortingPlacesIterator, SortingPlacesIterator> >
00364 seq_vector_type;
00365 seq_vector_type seqs(sd->num_threads);
00366
00367 for (int s = 0; s < sd->num_threads; ++s)
00368 {
00369 seqs[s] =
00370 std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
00371 sd->temporary[s] + sd->pieces[iam][s].end);
00372 }
00373
00374 possibly_stable_multiway_merge<
00375 stable,
00376 typename seq_vector_type::iterator,
00377 RandomAccessIterator,
00378 Comparator, difference_type>()
00379 (seqs.begin(), seqs.end(),
00380 sd->source + offset, comp,
00381 length_am);
00382
00383 # pragma omp barrier
00384
00385 ::operator delete(sd->temporary[iam]);
00386 }
00387
00388
00389
00390
00391
00392
00393
00394
00395 template<bool stable, bool exact, typename RandomAccessIterator,
00396 typename Comparator>
00397 void
00398 parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
00399 Comparator comp,
00400 thread_index_t num_threads)
00401 {
00402 _GLIBCXX_CALL(end - begin)
00403
00404 typedef std::iterator_traits<RandomAccessIterator> traits_type;
00405 typedef typename traits_type::value_type value_type;
00406 typedef typename traits_type::difference_type difference_type;
00407
00408 difference_type n = end - begin;
00409
00410 if (n <= 1)
00411 return;
00412
00413
00414 if (num_threads > n)
00415 num_threads = static_cast<thread_index_t>(n);
00416
00417
00418 PMWMSSortingData<RandomAccessIterator> sd;
00419 difference_type* starts;
00420
00421 # pragma omp parallel num_threads(num_threads)
00422 {
00423 num_threads = omp_get_num_threads();
00424
00425 # pragma omp single
00426 {
00427 sd.num_threads = num_threads;
00428 sd.source = begin;
00429
00430 sd.temporary = new value_type*[num_threads];
00431
00432 if (!exact)
00433 {
00434 difference_type size =
00435 (_Settings::get().sort_mwms_oversampling * num_threads - 1)
00436 * num_threads;
00437 sd.samples = static_cast<value_type*>(
00438 ::operator new(size * sizeof(value_type)));
00439 }
00440 else
00441 sd.samples = NULL;
00442
00443 sd.offsets = new difference_type[num_threads - 1];
00444 sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
00445 for (int s = 0; s < num_threads; ++s)
00446 sd.pieces[s].resize(num_threads);
00447 starts = sd.starts = new difference_type[num_threads + 1];
00448
00449 difference_type chunk_length = n / num_threads;
00450 difference_type split = n % num_threads;
00451 difference_type pos = 0;
00452 for (int i = 0; i < num_threads; ++i)
00453 {
00454 starts[i] = pos;
00455 pos += (i < split) ? (chunk_length + 1) : chunk_length;
00456 }
00457 starts[num_threads] = pos;
00458 }
00459
00460
00461 parallel_sort_mwms_pu<stable, exact>(&sd, comp);
00462 }
00463
00464 delete[] starts;
00465 delete[] sd.temporary;
00466
00467 if (!exact)
00468 ::operator delete(sd.samples);
00469
00470 delete[] sd.offsets;
00471 delete[] sd.pieces;
00472 }
00473 }
00474
00475 #endif