00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042 #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
00043 #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
00044
00045 #include <parallel/basic_iterator.h>
00046 #include <bits/stl_algo.h>
00047 #include <bits/stl_function.h>
00048
00049 #include <parallel/settings.h>
00050 #include <parallel/partition.h>
00051 #include <parallel/random_number.h>
00052 #include <parallel/queue.h>
00053
00054 #if _GLIBCXX_ASSERTIONS
00055 #include <parallel/checkers.h>
00056 #endif
00057
00058 namespace __gnu_parallel
00059 {
00060
00061 template<typename _RAIter>
00062 struct _QSBThreadLocal
00063 {
00064 typedef std::iterator_traits<_RAIter> _TraitsType;
00065 typedef typename _TraitsType::difference_type _DifferenceType;
00066
00067
00068
00069 typedef std::pair<_RAIter, _RAIter> _Piece;
00070
00071
00072 _Piece _M_initial;
00073
00074
00075 _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
00076
00077
00078 _ThreadIndex _M_num_threads;
00079
00080
00081 volatile _DifferenceType* _M_elements_leftover;
00082
00083
00084 _Piece _M_global;
00085
00086
00087
00088 _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
00089 };
00090
00091
00092
00093
00094
00095
00096
00097
00098 template<typename _RAIter, typename _Compare>
00099 typename std::iterator_traits<_RAIter>::difference_type
00100 __qsb_divide(_RAIter __begin, _RAIter __end,
00101 _Compare __comp, _ThreadIndex __num_threads)
00102 {
00103 _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
00104
00105 typedef std::iterator_traits<_RAIter> _TraitsType;
00106 typedef typename _TraitsType::value_type _ValueType;
00107 typedef typename _TraitsType::difference_type _DifferenceType;
00108
00109 _RAIter __pivot_pos =
00110 __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
00111 __end - 1, __comp);
00112
00113 #if defined(_GLIBCXX_ASSERTIONS)
00114
00115 _DifferenceType __n = __end - __begin;
00116
00117 _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
00118 && !__comp(*(__begin + __n / 2),
00119 *__pivot_pos))
00120 || (!__comp(*__pivot_pos, *__begin)
00121 && !__comp(*(__end - 1), *__pivot_pos))
00122 || (!__comp(*__pivot_pos, *(__begin + __n / 2))
00123 && !__comp(*__begin, *__pivot_pos))
00124 || (!__comp(*__pivot_pos, *(__begin + __n / 2))
00125 && !__comp(*(__end - 1), *__pivot_pos))
00126 || (!__comp(*__pivot_pos, *(__end - 1))
00127 && !__comp(*__begin, *__pivot_pos))
00128 || (!__comp(*__pivot_pos, *(__end - 1))
00129 && !__comp(*(__begin + __n / 2),
00130 *__pivot_pos)));
00131 #endif
00132
00133
00134 if (__pivot_pos != (__end - 1))
00135 std::iter_swap(__pivot_pos, __end - 1);
00136 __pivot_pos = __end - 1;
00137
00138 __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool>
00139 __pred(__comp, *__pivot_pos);
00140
00141
00142 _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
00143 __pred,
00144 __num_threads);
00145
00146
00147 std::iter_swap(__begin + __split_pos, __pivot_pos);
00148 __pivot_pos = __begin + __split_pos;
00149
00150 #if _GLIBCXX_ASSERTIONS
00151 _RAIter __r;
00152 for (__r = __begin; __r != __pivot_pos; ++__r)
00153 _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
00154 for (; __r != __end; ++__r)
00155 _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
00156 #endif
00157
00158 return __split_pos;
00159 }
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169 template<typename _RAIter, typename _Compare>
00170 void
00171 __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
00172 _RAIter __begin, _RAIter __end,
00173 _Compare __comp,
00174 _ThreadIndex __iam, _ThreadIndex __num_threads,
00175 bool __parent_wait)
00176 {
00177 typedef std::iterator_traits<_RAIter> _TraitsType;
00178 typedef typename _TraitsType::value_type _ValueType;
00179 typedef typename _TraitsType::difference_type _DifferenceType;
00180
00181 _DifferenceType __n = __end - __begin;
00182
00183 if (__num_threads <= 1 || __n <= 1)
00184 {
00185 __tls[__iam]->_M_initial.first = __begin;
00186 __tls[__iam]->_M_initial.second = __end;
00187
00188 __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
00189
00190 return;
00191 }
00192
00193
00194 _DifferenceType __split_pos =
00195 __qsb_divide(__begin, __end, __comp, __num_threads);
00196
00197 #if _GLIBCXX_ASSERTIONS
00198 _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
00199 __split_pos < (__end - __begin));
00200 #endif
00201
00202 _ThreadIndex
00203 __num_threads_leftside = std::max<_ThreadIndex>
00204 (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
00205 * __num_threads / __n));
00206
00207 # pragma omp atomic
00208 *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
00209
00210
00211 # pragma omp parallel num_threads(2)
00212 {
00213 bool __wait;
00214 if(omp_get_num_threads() < 2)
00215 __wait = false;
00216 else
00217 __wait = __parent_wait;
00218
00219 # pragma omp sections
00220 {
00221 # pragma omp section
00222 {
00223 __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
00224 __iam, __num_threads_leftside, __wait);
00225 __wait = __parent_wait;
00226 }
00227
00228 # pragma omp section
00229 {
00230 __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
00231 __iam + __num_threads_leftside,
00232 __num_threads - __num_threads_leftside, __wait);
00233 __wait = __parent_wait;
00234 }
00235 }
00236 }
00237 }
00238
00239
00240
00241
00242
00243
00244
00245 template<typename _RAIter, typename _Compare>
00246 void
00247 __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
00248 _Compare& __comp, _ThreadIndex __iam,
00249 bool __wait)
00250 {
00251 typedef std::iterator_traits<_RAIter> _TraitsType;
00252 typedef typename _TraitsType::value_type _ValueType;
00253 typedef typename _TraitsType::difference_type _DifferenceType;
00254 typedef std::pair<_RAIter, _RAIter> _Piece;
00255
00256 _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
00257
00258 _DifferenceType
00259 __base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
00260 if (__base_case_n < 2)
00261 __base_case_n = 2;
00262 _ThreadIndex __num_threads = __tl._M_num_threads;
00263
00264
00265 _RandomNumber __rng(__iam + 1);
00266
00267 _Piece __current = __tl._M_initial;
00268
00269 _DifferenceType __elements_done = 0;
00270 #if _GLIBCXX_ASSERTIONS
00271 _DifferenceType __total_elements_done = 0;
00272 #endif
00273
00274 for (;;)
00275 {
00276
00277 _RAIter __begin = __current.first, __end = __current.second;
00278 _DifferenceType __n = __end - __begin;
00279
00280 if (__n > __base_case_n)
00281 {
00282
00283 _RAIter __pivot_pos = __begin + __rng(__n);
00284
00285
00286 if (__pivot_pos != (__end - 1))
00287 std::iter_swap(__pivot_pos, __end - 1);
00288 __pivot_pos = __end - 1;
00289
00290 __gnu_parallel::__binder2nd
00291 <_Compare, _ValueType, _ValueType, bool>
00292 __pred(__comp, *__pivot_pos);
00293
00294
00295 _RAIter __split_pos1, __split_pos2;
00296 __split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
00297 __pred);
00298
00299
00300 #if _GLIBCXX_ASSERTIONS
00301 _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
00302 && __split_pos1 < __end);
00303 #endif
00304
00305 if (__split_pos1 != __pivot_pos)
00306 std::iter_swap(__split_pos1, __pivot_pos);
00307 __pivot_pos = __split_pos1;
00308
00309
00310 if ((__split_pos1 + 1 - __begin) < (__n >> 7)
00311 || (__end - __split_pos1) < (__n >> 7))
00312 {
00313
00314
00315 __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
00316 <_Compare, _ValueType, _ValueType, bool>, _ValueType>
00317 __pred(__gnu_parallel::__binder1st
00318 <_Compare, _ValueType, _ValueType, bool>
00319 (__comp, *__pivot_pos));
00320
00321
00322 __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
00323 __end, __pred);
00324 }
00325 else
00326
00327 __split_pos2 = __split_pos1 + 1;
00328
00329
00330 __elements_done += (__split_pos2 - __split_pos1);
00331 #if _GLIBCXX_ASSERTIONS
00332 __total_elements_done += (__split_pos2 - __split_pos1);
00333 #endif
00334
00335 if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
00336 {
00337
00338 if ((__split_pos2) != __end)
00339 __tl._M_leftover_parts.push_front
00340 (std::make_pair(__split_pos2, __end));
00341
00342
00343 __current.second = __split_pos1;
00344 continue;
00345 }
00346 else
00347 {
00348
00349 if (__begin != __split_pos1)
00350 __tl._M_leftover_parts.push_front(std::make_pair
00351 (__begin, __split_pos1));
00352
00353 __current.first = __split_pos2;
00354
00355 continue;
00356 }
00357 }
00358 else
00359 {
00360 __gnu_sequential::sort(__begin, __end, __comp);
00361 __elements_done += __n;
00362 #if _GLIBCXX_ASSERTIONS
00363 __total_elements_done += __n;
00364 #endif
00365
00366
00367 if (__tl._M_leftover_parts.pop_front(__current))
00368 continue;
00369
00370 # pragma omp atomic
00371 *__tl._M_elements_leftover -= __elements_done;
00372
00373 __elements_done = 0;
00374
00375 #if _GLIBCXX_ASSERTIONS
00376 double __search_start = omp_get_wtime();
00377 #endif
00378
00379
00380 bool __successfully_stolen = false;
00381 while (__wait && *__tl._M_elements_leftover > 0
00382 && !__successfully_stolen
00383 #if _GLIBCXX_ASSERTIONS
00384
00385 && (omp_get_wtime() < (__search_start + 1.0))
00386 #endif
00387 )
00388 {
00389 _ThreadIndex __victim;
00390 __victim = __rng(__num_threads);
00391
00392
00393 __successfully_stolen = (__victim != __iam)
00394 && __tls[__victim]->_M_leftover_parts.pop_back(__current);
00395 if (!__successfully_stolen)
00396 __yield();
00397 #if !defined(__ICC) && !defined(__ECC)
00398 # pragma omp flush
00399 #endif
00400 }
00401
00402 #if _GLIBCXX_ASSERTIONS
00403 if (omp_get_wtime() >= (__search_start + 1.0))
00404 {
00405 sleep(1);
00406 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
00407 < (__search_start + 1.0));
00408 }
00409 #endif
00410 if (!__successfully_stolen)
00411 {
00412 #if _GLIBCXX_ASSERTIONS
00413 _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
00414 #endif
00415 return;
00416 }
00417 }
00418 }
00419 }
00420
00421
00422
00423
00424
00425
00426
00427
00428 template<typename _RAIter, typename _Compare>
00429 void
00430 __parallel_sort_qsb(_RAIter __begin, _RAIter __end,
00431 _Compare __comp, _ThreadIndex __num_threads)
00432 {
00433 _GLIBCXX_CALL(__end - __begin)
00434
00435 typedef std::iterator_traits<_RAIter> _TraitsType;
00436 typedef typename _TraitsType::value_type _ValueType;
00437 typedef typename _TraitsType::difference_type _DifferenceType;
00438 typedef std::pair<_RAIter, _RAIter> _Piece;
00439
00440 typedef _QSBThreadLocal<_RAIter> _TLSType;
00441
00442 _DifferenceType __n = __end - __begin;
00443
00444 if (__n <= 1)
00445 return;
00446
00447
00448 if (__num_threads > __n)
00449 __num_threads = static_cast<_ThreadIndex>(__n);
00450
00451
00452 _TLSType** __tls = new _TLSType*[__num_threads];
00453 _DifferenceType __queue_size = (__num_threads
00454 * (_ThreadIndex)(__rd_log2(__n) + 1));
00455 for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
00456 __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
00457
00458
00459
00460
00461
00462
00463 volatile _DifferenceType __elements_leftover = __n;
00464 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
00465 {
00466 __tls[__i]->_M_elements_leftover = &__elements_leftover;
00467 __tls[__i]->_M_num_threads = __num_threads;
00468 __tls[__i]->_M_global = std::make_pair(__begin, __end);
00469
00470
00471 __tls[__i]->_M_initial = std::make_pair(__end, __end);
00472 }
00473
00474
00475 __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
00476 __num_threads, true);
00477
00478 #if _GLIBCXX_ASSERTIONS
00479
00480 _Piece __dummy;
00481 for (_ThreadIndex __i = 1; __i < __num_threads; ++__i)
00482 _GLIBCXX_PARALLEL_ASSERT(
00483 !__tls[__i]->_M_leftover_parts.pop_back(__dummy));
00484 #endif
00485
00486 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
00487 delete __tls[__i];
00488 delete[] __tls;
00489 }
00490 }
00491
00492 #endif