]> gcc.gnu.org Git - gcc.git/blob - libstdc++-v3/include/parallel/multiway_merge.h
parallel_mode.html: Added reference to MCSTL.
[gcc.git] / libstdc++-v3 / include / parallel / multiway_merge.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/multiway_merge.h
32 * @brief Implementation of sequential and parallel multiway merge.
33 *
34 * Explanations on the high-speed merging routines in the appendix of
35 *
36 * P. Sanders.
37 * Fast priority queues for cached memory.
38 * ACM Journal of Experimental Algorithmics, 5, 2000.
39 *
40 * This file is a GNU parallel extension to the Standard C++ Library.
41 */
42
43 // Written by Johannes Singler.
44
45 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
46 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
47
48 #include <vector>
49
50 #include <bits/stl_algo.h>
51 #include <parallel/features.h>
52 #include <parallel/parallel.h>
53 #include <parallel/merge.h>
54 #include <parallel/losertree.h>
55 #include <parallel/timing.h>
56 #if _GLIBCXX_ASSERTIONS
57 #include <parallel/checkers.h>
58 #endif
59
60 /** @brief Length of a sequence described by a pair of iterators. */
61 #define LENGTH(s) ((s).second - (s).first)
62
63 // XXX need iterator typedefs
64 namespace __gnu_parallel
65 {
66 template<typename RandomAccessIterator, typename Comparator>
67 class guarded_iterator;
68
69 template<typename RandomAccessIterator, typename Comparator>
70 inline bool
71 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
72 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
73
74 template<typename RandomAccessIterator, typename Comparator>
75 inline bool
76 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
77 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
78
79 /** @brief Iterator wrapper supporting an implicit supremum at the end
80 of the sequence, dominating all comparisons.
81 * Deriving from RandomAccessIterator is not possible since
82 * RandomAccessIterator need not be a class.
83 */
84 template<typename RandomAccessIterator, typename Comparator>
85 class guarded_iterator
86 {
87 private:
88 /** @brief Current iterator position. */
89 RandomAccessIterator current;
90
91 /** @brief End iterator of the sequence. */
92 RandomAccessIterator end;
93
94 /** @brief Comparator. */
95 Comparator& comp;
96
97 public:
98 /** @brief Constructor. Sets iterator to beginning of sequence.
99 * @param begin Begin iterator of sequence.
100 * @param end End iterator of sequence.
101 * @param comp Comparator provided for associated overloaded
102 * compare operators. */
103 inline guarded_iterator(RandomAccessIterator begin,
104 RandomAccessIterator end, Comparator& comp)
105 : current(begin), end(end), comp(comp)
106 { }
107
108 /** @brief Pre-increment operator.
109 * @return This. */
110 inline guarded_iterator<RandomAccessIterator, Comparator>&
111 operator++()
112 {
113 ++current;
114 return *this;
115 }
116
117 /** @brief Dereference operator.
118 * @return Referenced element. */
119 inline typename std::iterator_traits<RandomAccessIterator>::value_type
120 operator*()
121 { return *current; }
122
123 /** @brief Convert to wrapped iterator.
124 * @return Wrapped iterator. */
125 inline operator RandomAccessIterator()
126 { return current; }
127
128 friend bool
129 operator< <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
130
131 friend bool
132 operator<= <RandomAccessIterator, Comparator>(guarded_iterator<RandomAccessIterator, Comparator>& bi1, guarded_iterator<RandomAccessIterator, Comparator>& bi2);
133 };
134
135 /** @brief Compare two elements referenced by guarded iterators.
136 * @param bi1 First iterator.
137 * @param bi2 Second iterator.
138 * @return @c True if less. */
139 template<typename RandomAccessIterator, typename Comparator>
140 inline bool
141 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
142 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
143 {
144 if (bi1.current == bi1.end) //bi1 is sup
145 return bi2.current == bi2.end; //bi2 is not sup
146 if (bi2.current == bi2.end) //bi2 is sup
147 return true;
148 return (bi1.comp)(*bi1, *bi2); //normal compare
149 }
150
151 /** @brief Compare two elements referenced by guarded iterators.
152 * @param bi1 First iterator.
153 * @param bi2 Second iterator.
154 * @return @c True if less equal. */
155 template<typename RandomAccessIterator, typename Comparator>
156 inline bool
157 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
158 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
159 {
160 if (bi2.current == bi2.end) //bi1 is sup
161 return bi1.current != bi1.end; //bi2 is not sup
162 if (bi1.current == bi1.end) //bi2 is sup
163 return false;
164 return !(bi1.comp)(*bi2, *bi1); //normal compare
165 }
166
167 template<typename RandomAccessIterator, typename Comparator>
168 class unguarded_iterator;
169
170 template<typename RandomAccessIterator, typename Comparator>
171 inline bool
172 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
173 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
174
175 template<typename RandomAccessIterator, typename Comparator>
176 inline bool
177 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
178 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
179
180 template<typename RandomAccessIterator, typename Comparator>
181 class unguarded_iterator
182 {
183 private:
184 /** @brief Current iterator position. */
185 RandomAccessIterator& current;
186 /** @brief Comparator. */
187 mutable Comparator& comp;
188
189 public:
190 /** @brief Constructor. Sets iterator to beginning of sequence.
191 * @param begin Begin iterator of sequence.
192 * @param end Unused, only for compatibility.
193 * @param comp Unused, only for compatibility. */
194 inline unguarded_iterator(RandomAccessIterator begin,
195 RandomAccessIterator end, Comparator& comp)
196 : current(begin), comp(comp)
197 { }
198
199 /** @brief Pre-increment operator.
200 * @return This. */
201 inline unguarded_iterator<RandomAccessIterator, Comparator>&
202 operator++()
203 {
204 current++;
205 return *this;
206 }
207
208 /** @brief Dereference operator.
209 * @return Referenced element. */
210 inline typename std::iterator_traits<RandomAccessIterator>::value_type
211 operator*()
212 { return *current; }
213
214 /** @brief Convert to wrapped iterator.
215 * @return Wrapped iterator. */
216 inline
217 operator RandomAccessIterator()
218 { return current; }
219
220 friend bool
221 operator< <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
222
223 friend bool
224 operator<= <RandomAccessIterator, Comparator>(unguarded_iterator<RandomAccessIterator, Comparator>& bi1, unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
225 };
226
227 /** @brief Compare two elements referenced by unguarded iterators.
228 * @param bi1 First iterator.
229 * @param bi2 Second iterator.
230 * @return @c True if less. */
231 template<typename RandomAccessIterator, typename Comparator>
232 inline bool
233 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
234 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
235 {
236 // Normal compare.
237 return (bi1.comp)(*bi1, *bi2);
238 }
239
240 /** @brief Compare two elements referenced by unguarded iterators.
241 * @param bi1 First iterator.
242 * @param bi2 Second iterator.
243 * @return @c True if less equal. */
244 template<typename RandomAccessIterator, typename Comparator>
245 inline bool
246 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
247 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
248 {
249 // Normal compare.
250 return !(bi1.comp)(*bi2, *bi1);
251 }
252
253 /** Prepare a set of sequences to be merged without a (end) guard
254 * @param seqs_begin
255 * @param seqs_end
256 * @param comp
257 * @param min_sequence
258 * @param stable
259 * @pre (seqs_end - seqs_begin > 0) */
260 template<typename RandomAccessIteratorIterator, typename Comparator>
261 typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
262 prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
263 RandomAccessIteratorIterator seqs_end, Comparator comp,
264 int& min_sequence, bool stable)
265 {
266 _GLIBCXX_CALL(seqs_end - seqs_begin)
267
268 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
269 RandomAccessIterator1;
270 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
271 value_type;
272 typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
273 difference_type;
274
275 if ((*seqs_begin).first == (*seqs_begin).second)
276 {
277 // Empty sequence found, it's the first one.
278 min_sequence = 0;
279 return -1;
280 }
281
282 // Last element in sequence.
283 value_type min = *((*seqs_begin).second - 1);
284 min_sequence = 0;
285 for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; s++)
286 {
287 if ((*s).first == (*s).second)
288 {
289 // Empty sequence found.
290 min_sequence = static_cast<int>(s - seqs_begin);
291 return -1;
292 }
293
294 // Last element in sequence.
295 const value_type& v = *((*s).second - 1);
296 if (comp(v, min)) //strictly smaller
297 {
298 min = v;
299 min_sequence = static_cast<int>(s - seqs_begin);
300 }
301 }
302
303 difference_type overhang_size = 0;
304
305 int s = 0;
306 for (s = 0; s <= min_sequence; s++)
307 {
308 RandomAccessIterator1 split;
309 if (stable)
310 split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
311 min, comp);
312 else
313 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
314 min, comp);
315
316 overhang_size += seqs_begin[s].second - split;
317 }
318
319 for (; s < (seqs_end - seqs_begin); s++)
320 {
321 RandomAccessIterator1 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second, min, comp);
322 overhang_size += seqs_begin[s].second - split;
323 }
324
325 // So many elements will be left over afterwards.
326 return overhang_size;
327 }
328
329 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
330 * @param seqs_begin
331 * @param seqs_end
332 * @param comp */
333 template<typename RandomAccessIteratorIterator, typename Comparator>
334 typename std::iterator_traits<typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type>::difference_type
335 prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
336 RandomAccessIteratorIterator seqs_end,
337 Comparator comp)
338 {
339 _GLIBCXX_CALL(seqs_end - seqs_begin)
340
341 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
342 RandomAccessIterator1;
343 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
344 value_type;
345 typedef typename std::iterator_traits<RandomAccessIterator1>::difference_type
346 difference_type;
347
348 // Last element in sequence.
349 value_type max;
350 bool max_found = false;
351 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
352 {
353 if ((*s).first == (*s).second)
354 continue;
355
356 // Last element in sequence.
357 value_type& v = *((*s).second - 1);
358
359 // Strictly greater.
360 if (!max_found || comp(max, v))
361 max = v;
362 max_found = true;
363 }
364
365 difference_type overhang_size = 0;
366
367 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
368 {
369 RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp);
370 overhang_size += (*s).second - split;
371
372 // Set sentinel.
373 *((*s).second) = max;
374 }
375
376 // So many elements will be left over afterwards.
377 return overhang_size;
378 }
379
380 /** @brief Highly efficient 3-way merging procedure.
381 * @param seqs_begin Begin iterator of iterator pair input sequence.
382 * @param seqs_end End iterator of iterator pair input sequence.
383 * @param target Begin iterator out output sequence.
384 * @param comp Comparator.
385 * @param length Maximum length to merge.
386 * @param stable Unused, stable anyway.
387 * @return End iterator of output sequence. */
388 template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
389 RandomAccessIterator3
390 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
391 {
392 _GLIBCXX_CALL(length);
393
394 typedef _DifferenceTp difference_type;
395
396 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
397 RandomAccessIterator1;
398 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
399 value_type;
400
401 if (length == 0)
402 return target;
403
404 iterator<RandomAccessIterator1, Comparator>
405 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
406 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
407 seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
408
409 if (seq0 <= seq1)
410 {
411 if (seq1 <= seq2)
412 goto s012;
413 else
414 if (seq2 < seq0)
415 goto s201;
416 else
417 goto s021;
418 }
419 else
420 {
421 if (seq1 <= seq2)
422 {
423 if (seq0 <= seq2)
424 goto s102;
425 else
426 goto s120;
427 }
428 else
429 goto s210;
430 }
431
432 #define Merge3Case(a,b,c,c0,c1) \
433 s ## a ## b ## c : \
434 *target = *seq ## a; \
435 ++target; \
436 length--; \
437 ++seq ## a; \
438 if (length == 0) goto finish; \
439 if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
440 if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
441 goto s ## b ## c ## a;
442
443 Merge3Case(0, 1, 2, <=, <=);
444 Merge3Case(1, 2, 0, <=, < );
445 Merge3Case(2, 0, 1, < , < );
446 Merge3Case(1, 0, 2, < , <=);
447 Merge3Case(0, 2, 1, <=, <=);
448 Merge3Case(2, 1, 0, < , < );
449
450 #undef Merge3Case
451
452 finish:
453 ;
454
455 seqs_begin[0].first = seq0;
456 seqs_begin[1].first = seq1;
457 seqs_begin[2].first = seq2;
458
459 return target;
460 }
461
462 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
463 RandomAccessIterator3
464 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
465 {
466 _GLIBCXX_CALL(length);
467
468 typedef _DifferenceTp difference_type;
469 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
470 RandomAccessIterator1;
471 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
472 value_type;
473
474 int min_seq;
475 RandomAccessIterator3 target_end;
476
477 // Stable anyway.
478 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
479
480 difference_type total_length = 0;
481 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
482 total_length += LENGTH(*s);
483
484 if (overhang != -1)
485 {
486 difference_type unguarded_length = std::min(length, total_length - overhang);
487 target_end = multiway_merge_3_variant<unguarded_iterator>
488 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
489 overhang = length - unguarded_length;
490 }
491 else
492 {
493 // Empty sequence found.
494 overhang = length;
495 target_end = target;
496 }
497
498 #if _GLIBCXX_ASSERTIONS
499 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
500 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
501 #endif
502
503 switch (min_seq)
504 {
505 case 0:
506 // Iterators will be advanced accordingly.
507 target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
508 seqs_begin[2].first, seqs_begin[2].second,
509 target_end, overhang, comp);
510 break;
511 case 1:
512 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
513 seqs_begin[2].first, seqs_begin[2].second,
514 target_end, overhang, comp);
515 break;
516 case 2:
517 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
518 seqs_begin[1].first, seqs_begin[1].second,
519 target_end, overhang, comp);
520 break;
521 default:
522 _GLIBCXX_PARALLEL_ASSERT(false);
523 }
524
525 #if _GLIBCXX_ASSERTIONS
526 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
527 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
528 #endif
529
530 return target_end;
531 }
532
533 /** @brief Highly efficient 4-way merging procedure.
534 * @param seqs_begin Begin iterator of iterator pair input sequence.
535 * @param seqs_end End iterator of iterator pair input sequence.
536 * @param target Begin iterator out output sequence.
537 * @param comp Comparator.
538 * @param length Maximum length to merge.
539 * @param stable Unused, stable anyway.
540 * @return End iterator of output sequence. */
541 template<template<typename RAI, typename C> class iterator, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
542 RandomAccessIterator3
543 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
544 {
545 _GLIBCXX_CALL(length);
546 typedef _DifferenceTp difference_type;
547
548 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
549 RandomAccessIterator1;
550 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
551 value_type;
552
553 iterator<RandomAccessIterator1, Comparator>
554 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
555 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
556 seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
557 seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
558
559 #define Decision(a,b,c,d) { \
560 if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
561 if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
562 if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
563 goto s ## a ## b ## c ## d; }
564
565 if (seq0 <= seq1)
566 {
567 if (seq1 <= seq2)
568 Decision(0,1,2,3)
569 else
570 if (seq2 < seq0)
571 Decision(2,0,1,3)
572 else
573 Decision(0,2,1,3)
574 }
575 else
576 {
577 if (seq1 <= seq2)
578 {
579 if (seq0 <= seq2)
580 Decision(1,0,2,3)
581 else
582 Decision(1,2,0,3)
583 }
584 else
585 Decision(2,1,0,3)
586 }
587
588 #define Merge4Case(a,b,c,d,c0,c1,c2) \
589 s ## a ## b ## c ## d: \
590 if (length == 0) goto finish; \
591 *target = *seq ## a; \
592 ++target; \
593 length--; \
594 ++seq ## a; \
595 if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
596 if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
597 if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
598 goto s ## b ## c ## d ## a;
599
600 Merge4Case(0, 1, 2, 3, <=, <=, <=);
601 Merge4Case(0, 1, 3, 2, <=, <=, <=);
602 Merge4Case(0, 2, 1, 3, <=, <=, <=);
603 Merge4Case(0, 2, 3, 1, <=, <=, <=);
604 Merge4Case(0, 3, 1, 2, <=, <=, <=);
605 Merge4Case(0, 3, 2, 1, <=, <=, <=);
606 Merge4Case(1, 0, 2, 3, < , <=, <=);
607 Merge4Case(1, 0, 3, 2, < , <=, <=);
608 Merge4Case(1, 2, 0, 3, <=, < , <=);
609 Merge4Case(1, 2, 3, 0, <=, <=, < );
610 Merge4Case(1, 3, 0, 2, <=, < , <=);
611 Merge4Case(1, 3, 2, 0, <=, <=, < );
612 Merge4Case(2, 0, 1, 3, < , < , <=);
613 Merge4Case(2, 0, 3, 1, < , <=, < );
614 Merge4Case(2, 1, 0, 3, < , < , <=);
615 Merge4Case(2, 1, 3, 0, < , <=, < );
616 Merge4Case(2, 3, 0, 1, <=, < , < );
617 Merge4Case(2, 3, 1, 0, <=, < , < );
618 Merge4Case(3, 0, 1, 2, < , < , < );
619 Merge4Case(3, 0, 2, 1, < , < , < );
620 Merge4Case(3, 1, 0, 2, < , < , < );
621 Merge4Case(3, 1, 2, 0, < , < , < );
622 Merge4Case(3, 2, 0, 1, < , < , < );
623 Merge4Case(3, 2, 1, 0, < , < , < );
624
625 #undef Merge4Case
626 #undef Decision
627
628 finish:
629 ;
630
631 seqs_begin[0].first = seq0;
632 seqs_begin[1].first = seq1;
633 seqs_begin[2].first = seq2;
634 seqs_begin[3].first = seq3;
635
636 return target;
637 }
638
639 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
640 RandomAccessIterator3
641 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
642 {
643 _GLIBCXX_CALL(length);
644 typedef _DifferenceTp difference_type;
645
646 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
647 RandomAccessIterator1;
648 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
649 value_type;
650
651 int min_seq;
652 RandomAccessIterator3 target_end;
653
654 // Stable anyway.
655 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
656
657 difference_type total_length = 0;
658 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
659 total_length += LENGTH(*s);
660
661 if (overhang != -1)
662 {
663 difference_type unguarded_length = std::min(length, total_length - overhang);
664 target_end = multiway_merge_4_variant<unguarded_iterator>
665 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
666 overhang = length - unguarded_length;
667 }
668 else
669 {
670 // Empty sequence found.
671 overhang = length;
672 target_end = target;
673 }
674
675 #if _GLIBCXX_ASSERTIONS
676 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
677 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
678 #endif
679
680 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > one_missing(seqs_begin, seqs_end);
681 one_missing.erase(one_missing.begin() + min_seq); //remove
682
683 target_end = multiway_merge_3_variant<guarded_iterator>(one_missing.begin(), one_missing.end(), target_end, comp, overhang, stable);
684
685 // Insert back again.
686 one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
687 // Write back modified iterators.
688 copy(one_missing.begin(), one_missing.end(), seqs_begin);
689
690 #if _GLIBCXX_ASSERTIONS
691 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
692 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
693 #endif
694
695 return target_end;
696 }
697
698 /** @brief Basic multi-way merging procedure.
699 *
700 * The head elements are kept in a sorted array, new heads are
701 * inserted linearly.
702 * @param seqs_begin Begin iterator of iterator pair input sequence.
703 * @param seqs_end End iterator of iterator pair input sequence.
704 * @param target Begin iterator out output sequence.
705 * @param comp Comparator.
706 * @param length Maximum length to merge.
707 * @param stable Stable merging incurs a performance penalty.
708 * @return End iterator of output sequence.
709 */
710 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
711 RandomAccessIterator3
712 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
713 {
714 _GLIBCXX_CALL(length)
715
716 typedef _DifferenceTp difference_type;
717 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
718 RandomAccessIterator1;
719 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
720 value_type;
721
722 // Num remaining pieces.
723 int k = static_cast<int>(seqs_end - seqs_begin), nrp;
724
725 value_type* pl = new value_type[k];
726 int* source = new int[k];
727 difference_type total_length = 0;
728
729 #define POS(i) seqs_begin[(i)].first
730 #define STOPS(i) seqs_begin[(i)].second
731
732 // Write entries into queue.
733 nrp = 0;
734 for (int pi = 0; pi < k; pi++)
735 {
736 if (STOPS(pi) != POS(pi))
737 {
738 pl[nrp] = *(POS(pi));
739 source[nrp] = pi;
740 nrp++;
741 total_length += LENGTH(seqs_begin[pi]);
742 }
743 }
744
745 if (stable)
746 {
747 for (int k = 0; k < nrp - 1; k++)
748 for (int pi = nrp - 1; pi > k; pi--)
749 if (comp(pl[pi], pl[pi - 1]) ||
750 (!comp(pl[pi - 1], pl[pi]) && source[pi] < source[pi - 1]))
751 {
752 std::swap(pl[pi - 1], pl[pi]);
753 std::swap(source[pi - 1], source[pi]);
754 }
755 }
756 else
757 {
758 for (int k = 0; k < nrp - 1; k++)
759 for (int pi = nrp - 1; pi > k; pi--)
760 if (comp(pl[pi], pl[pi-1]))
761 {
762 std::swap(pl[pi-1], pl[pi]);
763 std::swap(source[pi-1], source[pi]);
764 }
765 }
766
767 // Iterate.
768 if (stable)
769 {
770 int j;
771 while (nrp > 0 && length > 0)
772 {
773 if (source[0] < source[1])
774 {
775 // pl[0] <= pl[1]
776 while ((nrp == 1 || !(comp(pl[1], pl[0]))) && length > 0)
777 {
778 *target = pl[0];
779 ++target;
780 ++POS(source[0]);
781 length--;
782 if (POS(source[0]) == STOPS(source[0]))
783 {
784 // Move everything to the left.
785 for (int s = 0; s < nrp - 1; s++)
786 {
787 pl[s] = pl[s + 1];
788 source[s] = source[s + 1];
789 }
790 nrp--;
791 break;
792 }
793 else
794 pl[0] = *(POS(source[0]));
795 }
796 }
797 else
798 {
799 // pl[0] < pl[1]
800 while ((nrp == 1 || comp(pl[0], pl[1])) && length > 0)
801 {
802 *target = pl[0];
803 ++target;
804 ++POS(source[0]);
805 length--;
806 if (POS(source[0]) == STOPS(source[0]))
807 {
808 for (int s = 0; s < nrp - 1; s++)
809 {
810 pl[s] = pl[s + 1];
811 source[s] = source[s + 1];
812 }
813 nrp--;
814 break;
815 }
816 else
817 pl[0] = *(POS(source[0]));
818 }
819 }
820
821 // Sink down.
822 j = 1;
823 while ((j < nrp) && (comp(pl[j], pl[j - 1]) ||
824 (!comp(pl[j - 1], pl[j]) && (source[j] < source[j - 1]))))
825 {
826 std::swap(pl[j - 1], pl[j]);
827 std::swap(source[j - 1], source[j]);
828 j++;
829 }
830 }
831 }
832 else
833 {
834 int j;
835 while (nrp > 0 && length > 0)
836 {
837 // pl[0] <= pl[1]
838 while (nrp == 1 || (!comp(pl[1], pl[0])) && length > 0)
839 {
840 *target = pl[0];
841 ++target;
842 ++POS(source[0]);
843 length--;
844 if (POS(source[0]) == STOPS(source[0]))
845 {
846 for (int s = 0; s < (nrp - 1); s++)
847 {
848 pl[s] = pl[s + 1];
849 source[s] = source[s + 1];
850 }
851 nrp--;
852 break;
853 }
854 else
855 pl[0] = *(POS(source[0]));
856 }
857
858 // Sink down.
859 j = 1;
860 while ((j < nrp) && comp(pl[j], pl[j - 1]))
861 {
862 std::swap(pl[j - 1], pl[j]);
863 std::swap(source[j - 1], source[j]);
864 j++;
865 }
866 }
867 }
868
869 delete[] pl;
870 delete[] source;
871
872 return target;
873 }
874
875 /** @brief Multi-way merging procedure for a high branching factor,
876 * guarded case.
877 *
878 * The head elements are kept in a loser tree.
879 * @param seqs_begin Begin iterator of iterator pair input sequence.
880 * @param seqs_end End iterator of iterator pair input sequence.
881 * @param target Begin iterator out output sequence.
882 * @param comp Comparator.
883 * @param length Maximum length to merge.
884 * @param stable Stable merging incurs a performance penalty.
885 * @return End iterator of output sequence.
886 */
887 template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
888 RandomAccessIterator3
889 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
890 {
891 _GLIBCXX_CALL(length)
892
893 typedef _DifferenceTp difference_type;
894 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
895 RandomAccessIterator1;
896 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
897 value_type;
898
899 int k = static_cast<int>(seqs_end - seqs_begin);
900
901 LT lt(k, comp);
902
903 difference_type total_length = 0;
904
905 for (int t = 0; t < k; t++)
906 {
907 if (stable)
908 {
909 if (seqs_begin[t].first == seqs_begin[t].second)
910 lt.insert_start_stable(value_type(), t, true);
911 else
912 lt.insert_start_stable(*seqs_begin[t].first, t, false);
913 }
914 else
915 {
916 if (seqs_begin[t].first == seqs_begin[t].second)
917 lt.insert_start(value_type(), t, true);
918 else
919 lt.insert_start(*seqs_begin[t].first, t, false);
920 }
921
922 total_length += LENGTH(seqs_begin[t]);
923 }
924
925 if (stable)
926 lt.init_stable();
927 else
928 lt.init();
929
930 total_length = std::min(total_length, length);
931
932 int source;
933
934 if (stable)
935 {
936 for (difference_type i = 0; i < total_length; i++)
937 {
938 // Take out.
939 source = lt.get_min_source();
940
941 *(target++) = *(seqs_begin[source].first++);
942
943 // Feed.
944 if (seqs_begin[source].first == seqs_begin[source].second)
945 lt.delete_min_insert_stable(value_type(), true);
946 else
947 // Replace from same source.
948 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
949
950 }
951 }
952 else
953 {
954 for (difference_type i = 0; i < total_length; i++)
955 {
956 //take out
957 source = lt.get_min_source();
958
959 *(target++) = *(seqs_begin[source].first++);
960
961 // Feed.
962 if (seqs_begin[source].first == seqs_begin[source].second)
963 lt.delete_min_insert(value_type(), true);
964 else
965 // Replace from same source.
966 lt.delete_min_insert(*seqs_begin[source].first, false);
967 }
968 }
969
970 return target;
971 }
972
973 /** @brief Multi-way merging procedure for a high branching factor,
974 * unguarded case.
975 *
976 * The head elements are kept in a loser tree.
977 * @param seqs_begin Begin iterator of iterator pair input sequence.
978 * @param seqs_end End iterator of iterator pair input sequence.
979 * @param target Begin iterator out output sequence.
980 * @param comp Comparator.
981 * @param length Maximum length to merge.
982 * @param stable Stable merging incurs a performance penalty.
983 * @return End iterator of output sequence.
984 * @pre No input will run out of elements during the merge.
985 */
986 template<typename LT, typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
987 RandomAccessIterator3
988 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
989 {
990 _GLIBCXX_CALL(length)
991 typedef _DifferenceTp difference_type;
992
993 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
994 RandomAccessIterator1;
995 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
996 value_type;
997
998 int k = seqs_end - seqs_begin;
999
1000 LT lt(k, comp);
1001
1002 difference_type total_length = 0;
1003
1004 for (int t = 0; t < k; t++)
1005 {
1006 #if _GLIBCXX_ASSERTIONS
1007 _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
1008 #endif
1009 if (stable)
1010 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1011 else
1012 lt.insert_start(*seqs_begin[t].first, t, false);
1013
1014 total_length += LENGTH(seqs_begin[t]);
1015 }
1016
1017 if (stable)
1018 lt.init_stable();
1019 else
1020 lt.init();
1021
1022 // Do not go past end.
1023 length = std::min(total_length, length);
1024
1025 int source;
1026
1027 #if _GLIBCXX_ASSERTIONS
1028 difference_type i = 0;
1029 #endif
1030
1031 if (stable)
1032 {
1033 RandomAccessIterator3 target_end = target + length;
1034 while (target < target_end)
1035 {
1036 // Take out.
1037 source = lt.get_min_source();
1038
1039 #if _GLIBCXX_ASSERTIONS
1040 _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1041 #endif
1042
1043 *(target++) = *(seqs_begin[source].first++);
1044
1045 #if _GLIBCXX_ASSERTIONS
1046 _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i == length - 1));
1047 i++;
1048 #endif
1049 // Feed.
1050 // Replace from same source.
1051 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1052
1053 }
1054 }
1055 else
1056 {
1057 RandomAccessIterator3 target_end = target + length;
1058 while (target < target_end)
1059 {
1060 // Take out.
1061 source = lt.get_min_source();
1062
1063 #if _GLIBCXX_ASSERTIONS
1064 if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
1065 printf(" %i %i %i\n", length, i, source);
1066 _GLIBCXX_PARALLEL_ASSERT(i == 0 || !comp(*(seqs_begin[source].first), *(target - 1)));
1067 #endif
1068
1069 *(target++) = *(seqs_begin[source].first++);
1070
1071 #if _GLIBCXX_ASSERTIONS
1072 if (!((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1)))
1073 printf(" %i %i %i\n", length, i, source);
1074 _GLIBCXX_PARALLEL_ASSERT((seqs_begin[source].first != seqs_begin[source].second) || (i >= length - 1));
1075 i++;
1076 #endif
1077 // Feed.
1078 // Replace from same source.
1079 lt.delete_min_insert(*seqs_begin[source].first, false);
1080 }
1081 }
1082
1083 return target;
1084 }
1085
1086 template<typename _ValueTp, class Comparator>
1087 struct loser_tree_traits
1088 {
1089 typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
1090 };
1091
1092
1093 /*#define NO_POINTER(T) \
1094 template<typename Comparator> \
1095 struct loser_tree_traits<T, Comparator> \
1096 { \
1097 typedef LoserTreePointer<T, Comparator> LT; \
1098 };*/
1099 //
1100 // NO_POINTER(unsigned char)
1101 // NO_POINTER(char)
1102 // NO_POINTER(unsigned short)
1103 // NO_POINTER(short)
1104 // NO_POINTER(unsigned int)
1105 // NO_POINTER(int)
1106 // NO_POINTER(unsigned long)
1107 // NO_POINTER(long)
1108 // NO_POINTER(unsigned long long)
1109 // NO_POINTER(long long)
1110 //
1111 // #undef NO_POINTER
1112
1113 template<typename _ValueTp, class Comparator>
1114 struct loser_tree_traits_unguarded
1115 {
1116 typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
1117 };
1118
1119 /*#define NO_POINTER_UNGUARDED(T) \
1120 template<typename Comparator> \
1121 struct loser_tree_traits_unguarded<T, Comparator> \
1122 { \
1123 typedef LoserTreePointerUnguarded<T, Comparator> LT; \
1124 };*/
1125 //
1126 // NO_POINTER_UNGUARDED(unsigned char)
1127 // NO_POINTER_UNGUARDED(char)
1128 // NO_POINTER_UNGUARDED(unsigned short)
1129 // NO_POINTER_UNGUARDED(short)
1130 // NO_POINTER_UNGUARDED(unsigned int)
1131 // NO_POINTER_UNGUARDED(int)
1132 // NO_POINTER_UNGUARDED(unsigned long)
1133 // NO_POINTER_UNGUARDED(long)
1134 // NO_POINTER_UNGUARDED(unsigned long long)
1135 // NO_POINTER_UNGUARDED(long long)
1136 //
1137 // #undef NO_POINTER_UNGUARDED
1138
1139 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1140 RandomAccessIterator3
1141 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
1142 {
1143 _GLIBCXX_CALL(length)
1144
1145 typedef _DifferenceTp difference_type;
1146
1147 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1148 RandomAccessIterator1;
1149 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1150 value_type;
1151
1152 int min_seq;
1153 RandomAccessIterator3 target_end;
1154 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
1155 comp, min_seq, stable);
1156
1157 difference_type total_length = 0;
1158 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1159 total_length += LENGTH(*s);
1160
1161 if (overhang != -1)
1162 {
1163 difference_type unguarded_length = std::min(length, total_length - overhang);
1164 target_end = multiway_merge_loser_tree_unguarded
1165 <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
1166 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1167 overhang = length - unguarded_length;
1168 }
1169 else
1170 {
1171 // Empty sequence found.
1172 overhang = length;
1173 target_end = target;
1174 }
1175
1176 #if _GLIBCXX_ASSERTIONS
1177 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1178 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1179 #endif
1180
1181 target_end = multiway_merge_loser_tree
1182 <typename loser_tree_traits<value_type, Comparator>::LT>
1183 (seqs_begin, seqs_end, target_end, comp, overhang, stable);
1184
1185 #if _GLIBCXX_ASSERTIONS
1186 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1187 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1188 #endif
1189
1190 return target_end;
1191 }
1192
1193 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1194 RandomAccessIterator3
1195 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
1196 {
1197 _GLIBCXX_CALL(length)
1198
1199 typedef _DifferenceTp difference_type;
1200 typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
1201 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1202 RandomAccessIterator1;
1203 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1204 value_type;
1205 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1206 RandomAccessIterator1;
1207
1208 RandomAccessIterator3 target_end;
1209 difference_type overhang = prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
1210
1211 difference_type total_length = 0;
1212 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1213 {
1214 total_length += LENGTH(*s);
1215
1216 // Sentinel spot.
1217 (*s).second++;
1218 }
1219
1220 difference_type unguarded_length = std::min(length, total_length - overhang);
1221 target_end = multiway_merge_loser_tree_unguarded
1222 <typename loser_tree_traits_unguarded<value_type, Comparator>::LT>
1223 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1224 overhang = length - unguarded_length;
1225
1226 #if _GLIBCXX_ASSERTIONS
1227 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1228 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1229 #endif
1230
1231 // Copy rest stable.
1232 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end && overhang > 0; s++)
1233 {
1234 // Restore.
1235 (*s).second--;
1236 difference_type local_length = std::min((difference_type)overhang, (difference_type)LENGTH(*s));
1237 target_end = std::copy((*s).first, (*s).first + local_length, target_end);
1238 (*s).first += local_length;
1239 overhang -= local_length;
1240 }
1241
1242 #if _GLIBCXX_ASSERTIONS
1243 _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
1244 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1245 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1246 #endif
1247
1248 return target_end;
1249 }
1250
1251 /** @brief Sequential multi-way merging switch.
1252 *
1253 * The decision if based on the branching factor and runtime settings.
1254 * @param seqs_begin Begin iterator of iterator pair input sequence.
1255 * @param seqs_end End iterator of iterator pair input sequence.
1256 * @param target Begin iterator out output sequence.
1257 * @param comp Comparator.
1258 * @param length Maximum length to merge.
1259 * @param stable Stable merging incurs a performance penalty.
1260 * @param sentinel The sequences have a sentinel element.
1261 * @return End iterator of output sequence. */
1262 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1263 RandomAccessIterator3
1264 multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel, sequential_tag)
1265 {
1266 _GLIBCXX_CALL(length)
1267
1268 typedef _DifferenceTp difference_type;
1269 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1270 RandomAccessIterator1;
1271 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1272 value_type;
1273
1274 #if _GLIBCXX_ASSERTIONS
1275 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
1276 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
1277 #endif
1278
1279 RandomAccessIterator3 return_target = target;
1280 int k = static_cast<int>(seqs_end - seqs_begin);
1281
1282 Settings::MultiwayMergeAlgorithm mwma = Settings::multiway_merge_algorithm;
1283
1284 if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
1285 mwma = Settings::LOSER_TREE_COMBINED;
1286
1287 switch (k)
1288 {
1289 case 0:
1290 break;
1291 case 1:
1292 return_target = std::copy(seqs_begin[0].first, seqs_begin[0].first + length, target);
1293 seqs_begin[0].first += length;
1294 break;
1295 case 2:
1296 return_target = merge_advance(seqs_begin[0].first, seqs_begin[0].second, seqs_begin[1].first, seqs_begin[1].second, target, length, comp);
1297 break;
1298 case 3:
1299 switch (mwma)
1300 {
1301 case Settings::LOSER_TREE_COMBINED:
1302 return_target = multiway_merge_3_combined(seqs_begin, seqs_end, target, comp, length, stable);
1303 break;
1304 case Settings::LOSER_TREE_SENTINEL:
1305 return_target = multiway_merge_3_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1306 break;
1307 default:
1308 return_target = multiway_merge_3_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1309 break;
1310 }
1311 break;
1312 case 4:
1313 switch (mwma)
1314 {
1315 case Settings::LOSER_TREE_COMBINED:
1316 return_target = multiway_merge_4_combined(seqs_begin, seqs_end, target, comp, length, stable);
1317 break;
1318 case Settings::LOSER_TREE_SENTINEL:
1319 return_target = multiway_merge_4_variant<unguarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1320 break;
1321 default:
1322 return_target = multiway_merge_4_variant<guarded_iterator>(seqs_begin, seqs_end, target, comp, length, stable);
1323 break;
1324 }
1325 break;
1326 default:
1327 {
1328 switch (mwma)
1329 {
1330 case Settings::BUBBLE:
1331 return_target = multiway_merge_bubble(seqs_begin, seqs_end, target, comp, length, stable);
1332 break;
1333 #if _GLIBCXX_LOSER_TREE_EXPLICIT
1334 case Settings::LOSER_TREE_EXPLICIT:
1335 return_target = multiway_merge_loser_tree<LoserTreeExplicit<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
1336 break;
1337 #endif
1338 #if _GLIBCXX_LOSER_TREE
1339 case Settings::LOSER_TREE:
1340 return_target = multiway_merge_loser_tree<LoserTree<value_type, Comparator> >(seqs_begin, seqs_end, target, comp, length, stable);
1341 break;
1342 #endif
1343 #if _GLIBCXX_LOSER_TREE_COMBINED
1344 case Settings::LOSER_TREE_COMBINED:
1345 return_target = multiway_merge_loser_tree_combined(seqs_begin, seqs_end, target, comp, length, stable);
1346 break;
1347 #endif
1348 #if _GLIBCXX_LOSER_TREE_SENTINEL
1349 case Settings::LOSER_TREE_SENTINEL:
1350 return_target = multiway_merge_loser_tree_sentinel(seqs_begin, seqs_end, target, comp, length, stable);
1351 break;
1352 #endif
1353 default:
1354 // multiway_merge algorithm not implemented.
1355 _GLIBCXX_PARALLEL_ASSERT(0);
1356 break;
1357 }
1358 }
1359 }
1360 #if _GLIBCXX_ASSERTIONS
1361 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1362 #endif
1363
1364 return return_target;
1365 }
1366
1367 /** @brief Parallel multi-way merge routine.
1368 *
1369 * The decision if based on the branching factor and runtime settings.
1370 * @param seqs_begin Begin iterator of iterator pair input sequence.
1371 * @param seqs_end End iterator of iterator pair input sequence.
1372 * @param target Begin iterator out output sequence.
1373 * @param comp Comparator.
1374 * @param length Maximum length to merge.
1375 * @param stable Stable merging incurs a performance penalty.
1376 * @param sentinel Ignored.
1377 * @return End iterator of output sequence.
1378 */
1379 template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1380 RandomAccessIterator3
1381 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable, bool sentinel)
1382 {
1383 _GLIBCXX_CALL(length)
1384
1385 typedef _DifferenceTp difference_type;
1386 typedef typename std::iterator_traits<RandomAccessIteratorIterator>::value_type::first_type
1387 RandomAccessIterator1;
1388 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1389 value_type;
1390
1391 #if _GLIBCXX_ASSERTIONS
1392 for (RandomAccessIteratorIterator rii = seqs_begin; rii != seqs_end; rii++)
1393 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*rii).first, (*rii).second, comp));
1394 #endif
1395
1396 // k sequences.
1397 int k = static_cast<int>(seqs_end - seqs_begin);
1398
1399 difference_type total_length = 0;
1400 for (RandomAccessIteratorIterator raii = seqs_begin; raii != seqs_end; raii++)
1401 total_length += LENGTH(*raii);
1402
1403 _GLIBCXX_CALL(total_length)
1404
1405 if (total_length == 0 || k == 0)
1406 return target;
1407
1408 thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
1409
1410 Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
1411
1412 for (int pr = 0; pr < num_threads; pr++)
1413 t[pr].tic();
1414
1415 bool tight = (total_length == length);
1416
1417 // Thread t will have to merge pieces[iam][0..k - 1]
1418 std::vector<std::pair<difference_type, difference_type> >* pieces = new std::vector<std::pair<difference_type, difference_type> >[num_threads];
1419 for (int s = 0; s < num_threads; s++)
1420 pieces[s].resize(k);
1421
1422 difference_type num_samples = Settings::merge_oversampling * num_threads;
1423
1424 if (Settings::multiway_merge_splitting == Settings::SAMPLING)
1425 {
1426 value_type* samples = new value_type[k * num_samples];
1427 // Sample.
1428 for (int s = 0; s < k; s++)
1429 for (int i = 0; (difference_type)i < num_samples; i++)
1430 {
1431 difference_type sample_index = static_cast<difference_type>(LENGTH(seqs_begin[s]) * (double(i + 1) / (num_samples + 1)) * (double(length) / total_length));
1432 samples[s * num_samples + i] = seqs_begin[s].first[sample_index];
1433 }
1434
1435 if (stable)
1436 __gnu_sequential::stable_sort(samples, samples + (num_samples * k), comp);
1437 else
1438 __gnu_sequential::sort(samples, samples + (num_samples * k), comp);
1439
1440 for (int slab = 0; slab < num_threads; slab++)
1441 // For each slab / processor.
1442 for (int seq = 0; seq < k; seq++)
1443 {
1444 // For each sequence.
1445 if (slab > 0)
1446 pieces[slab][seq].first = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * slab / num_threads], comp) - seqs_begin[seq].first;
1447 else
1448 {
1449 // Absolute beginning.
1450 pieces[slab][seq].first = 0;
1451 }
1452 if ((slab + 1) < num_threads)
1453 pieces[slab][seq].second = std::upper_bound(seqs_begin[seq].first, seqs_begin[seq].second, samples[num_samples * k * (slab + 1) / num_threads], comp) - seqs_begin[seq].first;
1454 else
1455 pieces[slab][seq].second = LENGTH(seqs_begin[seq]); //absolute ending
1456 }
1457 delete[] samples;
1458 }
1459 else
1460 {
1461 // (Settings::multiway_merge_splitting == Settings::EXACT).
1462 std::vector<RandomAccessIterator1>* offsets = new std::vector<RandomAccessIterator1>[num_threads];
1463 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > se(k);
1464
1465 copy(seqs_begin, seqs_end, se.begin());
1466
1467 difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
1468 equally_split(length, num_threads, borders);
1469
1470 for (int s = 0; s < (num_threads - 1); s++)
1471 {
1472 offsets[s].resize(k);
1473 multiseq_partition(se.begin(), se.end(), borders[s + 1],
1474 offsets[s].begin(), comp);
1475
1476 // Last one also needed and available.
1477 if (!tight)
1478 {
1479 offsets[num_threads - 1].resize(k);
1480 multiseq_partition(se.begin(), se.end(),
1481 difference_type(length),
1482 offsets[num_threads - 1].begin(), comp);
1483 }
1484 }
1485
1486
1487 for (int slab = 0; slab < num_threads; slab++)
1488 {
1489 // For each slab / processor.
1490 for (int seq = 0; seq < k; seq++)
1491 {
1492 // For each sequence.
1493 if (slab == 0)
1494 {
1495 // Absolute beginning.
1496 pieces[slab][seq].first = 0;
1497 }
1498 else
1499 pieces[slab][seq].first = pieces[slab - 1][seq].second;
1500 if (!tight || slab < (num_threads - 1))
1501 pieces[slab][seq].second = offsets[slab][seq] - seqs_begin[seq].first;
1502 else
1503 {
1504 // slab == num_threads - 1
1505 pieces[slab][seq].second = LENGTH(seqs_begin[seq]);
1506 }
1507 }
1508 }
1509 delete[] offsets;
1510 }
1511
1512 for (int pr = 0; pr < num_threads; pr++)
1513 t[pr].tic();
1514
1515 # pragma omp parallel num_threads(num_threads)
1516 {
1517 thread_index_t iam = omp_get_thread_num();
1518
1519 t[iam].tic();
1520
1521 difference_type target_position = 0;
1522
1523 for (int c = 0; c < k; c++)
1524 target_position += pieces[iam][c].first;
1525
1526 if (k > 2)
1527 {
1528 std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks = new std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
1529
1530 difference_type local_length = 0;
1531 for (int s = 0; s < k; s++)
1532 {
1533 chunks[s] = std::make_pair(seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].second);
1534 local_length += LENGTH(chunks[s]);
1535 }
1536
1537 multiway_merge(chunks, chunks + k, target + target_position, comp,
1538 std::min(local_length, length - target_position),
1539 stable, false, sequential_tag());
1540
1541 delete[] chunks;
1542 }
1543 else if (k == 2)
1544 {
1545 RandomAccessIterator1 begin0 = seqs_begin[0].first + pieces[iam][0].first, begin1 = seqs_begin[1].first + pieces[iam][1].first;
1546 merge_advance(begin0,
1547 seqs_begin[0].first + pieces[iam][0].second,
1548 begin1,
1549 seqs_begin[1].first + pieces[iam][1].second,
1550 target + target_position,
1551 (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
1552 comp);
1553 }
1554
1555 t[iam].tic();
1556
1557 }
1558
1559 for (int pr = 0; pr < num_threads; pr++)
1560 t[pr].tic();
1561
1562 #if _GLIBCXX_ASSERTIONS
1563 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1564 #endif
1565
1566 // Update ends of sequences.
1567 for (int s = 0; s < k; s++)
1568 seqs_begin[s].first += pieces[num_threads - 1][s].second;
1569
1570 delete[] pieces;
1571
1572 for (int pr = 0; pr < num_threads; pr++)
1573 t[pr].tic();
1574 for (int pr = 0; pr < num_threads; pr++)
1575 t[pr].print();
1576 delete[] t;
1577
1578 return target + length;
1579 }
1580
1581 /**
1582 * @brief Multi-way merging front-end.
1583 * @param seqs_begin Begin iterator of iterator pair input sequence.
1584 * @param seqs_end End iterator of iterator pair input sequence.
1585 * @param target Begin iterator out output sequence.
1586 * @param comp Comparator.
1587 * @param length Maximum length to merge.
1588 * @param stable Stable merging incurs a performance penalty.
1589 * @return End iterator of output sequence.
1590 */
1591 template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1592 RandomAccessIterator3
1593 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1594 RandomAccessIteratorPairIterator seqs_end,
1595 RandomAccessIterator3 target, Comparator comp,
1596 _DifferenceTp length, bool stable)
1597 {
1598 typedef _DifferenceTp difference_type;
1599 _GLIBCXX_CALL(seqs_end - seqs_begin)
1600
1601 if (seqs_begin == seqs_end)
1602 return target;
1603
1604 RandomAccessIterator3 target_end;
1605 if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1606 target_end = parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (difference_type)length, stable, false);
1607 else
1608 target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, false, sequential_tag());
1609
1610 return target_end;
1611 }
1612
1613 /** @brief Multi-way merging front-end.
1614 * @param seqs_begin Begin iterator of iterator pair input sequence.
1615 * @param seqs_end End iterator of iterator pair input sequence.
1616 * @param target Begin iterator out output sequence.
1617 * @param comp Comparator.
1618 * @param length Maximum length to merge.
1619 * @param stable Stable merging incurs a performance penalty.
1620 * @return End iterator of output sequence.
1621 * @pre For each @c i, @c seqs_begin[i].second must be the end
1622 * marker of the sequence, but also reference the one more sentinel
1623 * element. */
1624 template<typename RandomAccessIteratorPairIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
1625 RandomAccessIterator3
1626 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
1627 RandomAccessIteratorPairIterator seqs_end,
1628 RandomAccessIterator3 target,
1629 Comparator comp,
1630 _DifferenceTp length,
1631 bool stable)
1632 {
1633 typedef _DifferenceTp difference_type;
1634
1635 if (seqs_begin == seqs_end)
1636 return target;
1637
1638 _GLIBCXX_CALL(seqs_end - seqs_begin)
1639
1640 if (_GLIBCXX_PARALLEL_CONDITION(((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1641 return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, (typename std::iterator_traits<RandomAccessIterator3>::difference_type)length, stable, true);
1642 else
1643 return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, true, sequential_tag());
1644 }
1645 }
1646
1647 #endif
This page took 0.12008 seconds and 6 git commands to generate.