From: François Dumont Date: Wed, 22 Jan 2020 16:55:54 +0000 (+0100) Subject: libstdc++: Limit memory allocation in stable_sort/inplace_merge (PR 83938) X-Git-Tag: basepoints/gcc-12~3045 X-Git-Url: https://gcc.gnu.org/git/?a=commitdiff_plain;h=ba23e045fcb820e8d32dee361c4d048604d8d599;p=gcc.git libstdc++: Limit memory allocation in stable_sort/inplace_merge (PR 83938) Reduce memory allocation in stable_sort/inplace_merge algorithms to what is needed by the implementation. Co-authored-by: John Chang libstdc++-v3/ChangeLog: PR libstdc++/83938 * include/bits/stl_tempbuf.h (get_temporary_buffer): Change __len computation in the loop to avoid truncation. * include/bits/stl_algo.h: (__inplace_merge): Take temporary buffer length from smallest range. (__stable_sort): Limit temporary buffer length. * testsuite/25_algorithms/inplace_merge/1.cc (test4): New. * testsuite/performance/25_algorithms/stable_sort.cc: Test stable_sort under different heap memory conditions. * testsuite/performance/25_algorithms/inplace_merge.cc: New test. --- diff --git a/libstdc++-v3/include/bits/stl_algo.h b/libstdc++-v3/include/bits/stl_algo.h index 6efc99035b7d..cb748418e0e7 100644 --- a/libstdc++-v3/include/bits/stl_algo.h +++ b/libstdc++-v3/include/bits/stl_algo.h @@ -2523,6 +2523,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _ValueType; typedef typename iterator_traits<_BidirectionalIterator>::difference_type _DistanceType; + typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf; if (__first == __middle || __middle == __last) return; @@ -2530,8 +2531,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _DistanceType __len1 = std::distance(__first, __middle); const _DistanceType __len2 = std::distance(__middle, __last); - typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf; - _TmpBuf __buf(__first, __len1 + __len2); + // __merge_adaptive will use a buffer for the smaller of + // [first,middle) and [middle,last). + _TmpBuf __buf(__first, std::min(__len1, __len2)); if (__buf.begin() == 0) std::__merge_without_buffer @@ -2740,6 +2742,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp); std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp); } + std::__merge_adaptive(__first, __middle, __last, _Distance(__middle - __first), _Distance(__last - __middle), @@ -5005,9 +5008,14 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO _ValueType; typedef typename iterator_traits<_RandomAccessIterator>::difference_type _DistanceType; - typedef _Temporary_buffer<_RandomAccessIterator, _ValueType> _TmpBuf; - _TmpBuf __buf(__first, std::distance(__first, __last)); + + if (__first == __last) + return; + + // __stable_sort_adaptive sorts the range in two halves, + // so the buffer only needs to fit half the range at once. + _TmpBuf __buf(__first, (__last - __first + 1) / 2); if (__buf.begin() == 0) std::__inplace_stable_sort(__first, __last, __comp); diff --git a/libstdc++-v3/include/bits/stl_tempbuf.h b/libstdc++-v3/include/bits/stl_tempbuf.h index f6f179604723..d76ed7f7ea65 100644 --- a/libstdc++-v3/include/bits/stl_tempbuf.h +++ b/libstdc++-v3/include/bits/stl_tempbuf.h @@ -110,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::nothrow)); if (__tmp != 0) return std::pair<_Tp*, ptrdiff_t>(__tmp, __len); - __len /= 2; + __len = __len == 1 ? 0 : ((__len + 1) / 2); } return std::pair<_Tp*, ptrdiff_t>(static_cast<_Tp*>(0), 0); } diff --git a/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc b/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc index 5859f0363d57..bfdff77e0a73 100644 --- a/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc +++ b/libstdc++-v3/testsuite/25_algorithms/inplace_merge/1.cc @@ -28,7 +28,7 @@ using std::inplace_merge; typedef test_container container; -void +void test1() { int array[] = { 1 }; @@ -39,7 +39,7 @@ test1() inplace_merge(con2.begin(), con2.begin(), con2.end()); } -void +void test2() { int array[] = { 0, 2, 4, 1, 3, 5 }; @@ -86,6 +86,36 @@ test3() VERIFY( s[0].b == 0 && s[1].b == 4 && s[2].b == 1 && s[3].b == 5 ); } +void +test4() +{ + S s[8]; + for (int pivot_idx = 0; pivot_idx < 8; ++pivot_idx) + { + int bval = 0; + for (int i = 0; i != pivot_idx; ++i) + { + s[i].a = i; + s[i].b = bval++; + } + + for (int i = pivot_idx; i != 8; ++i) + { + s[i].a = i - pivot_idx; + s[i].b = bval++; + } + + inplace_merge(s, s + pivot_idx, s + 8); + + for (int i = 1; i < 8; ++i) + { + VERIFY( !(s[i] < s[i - 1]) ); + if (s[i - 1].a == s[i].a) + VERIFY( s[i - 1].b < s[i].b ); + } + } +} + int main() { @@ -95,12 +125,15 @@ main() __gnu_test::set_new_limit(sizeof(S) * 4); test3(); + test4(); __gnu_test::set_new_limit(sizeof(S)); test3(); + test4(); __gnu_test::set_new_limit(0); test3(); + test4(); return 0; } diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc new file mode 100644 index 000000000000..780c21912c7b --- /dev/null +++ b/libstdc++-v3/testsuite/performance/25_algorithms/inplace_merge.cc @@ -0,0 +1,290 @@ +// Copyright (C) 2020 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +#include +#include +#include + +#include +#include + +const int max_size = 10000000; +const int small_size = 200000; +const int front_pivot_idx = 10000; +int middle_pivot_idx = max_size / 2; +int back_pivot_idx = max_size - front_pivot_idx; + +void bench(int mem_threshold, int pivot_index, + std::vector revv, + std::vector fwdv, + std::vector wstv, + std::vector rndv) +{ + using namespace __gnu_test; + + time_counter time; + resource_counter resource; + + set_new_limit(mem_threshold); + + start_counters(time, resource); + std::inplace_merge(revv.begin(), revv.begin() + pivot_index, revv.end()); + stop_counters(time, resource); + + set_new_limit(~size_t(0)); + + report_performance(__FILE__, "reverse", time, resource); + clear_counters(time, resource); + + set_new_limit(mem_threshold); + + start_counters(time, resource); + std::inplace_merge(fwdv.begin(), fwdv.begin() + pivot_index, fwdv.end()); + stop_counters(time, resource); + + set_new_limit(~size_t(0)); + + report_performance(__FILE__, "forward", time, resource); + clear_counters(time, resource); + + set_new_limit(mem_threshold); + + start_counters(time, resource); + std::inplace_merge(wstv.begin(), wstv.begin() + pivot_index, wstv.end()); + stop_counters(time, resource); + + set_new_limit(~size_t(0)); + + report_performance(__FILE__, "worst", time, resource); + clear_counters(time, resource); + + set_new_limit(mem_threshold); + + start_counters(time, resource); + std::inplace_merge(rndv.begin(), rndv.begin() + pivot_index, rndv.end()); + stop_counters(time, resource); + + set_new_limit(~size_t(0)); + report_performance(__FILE__, "random", time, resource); +} + +void mem_bench(double mem_ratio, + const std::vector& front_revv, + const std::vector& middle_revv, + const std::vector& back_revv, + const std::vector& fwdv, + const std::vector& front_wstv, + const std::vector& middle_wstv, + const std::vector& back_wstv, + const std::vector& front_rndv, + const std::vector& middle_rndv, + const std::vector& back_rndv) +{ + using namespace __gnu_test; + + time_counter time; + resource_counter resource; + + int max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int); + start_counters(time, resource); + bench(max_mem, front_pivot_idx, front_revv, fwdv, front_wstv, front_rndv); + stop_counters(time, resource); + report_performance(__FILE__, "front pivot", time, resource); + clear_counters(time, resource); + + max_mem = (int)std::ceil(middle_pivot_idx * mem_ratio) * sizeof(int); + start_counters(time, resource); + bench(max_mem, middle_pivot_idx, middle_revv, fwdv, middle_wstv, middle_rndv); + stop_counters(time, resource); + report_performance(__FILE__, "middle pivot", time, resource); + clear_counters(time, resource); + + max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int); + start_counters(time, resource); + bench(max_mem, back_pivot_idx, back_revv, fwdv, back_wstv, back_rndv); + stop_counters(time, resource); + report_performance(__FILE__, "back pivot", time, resource); +} + +void init_reverse(std::vector& v, size_t pivot_index) +{ + int val = 0; + for (size_t i = pivot_index; i != v.size(); ++i) + v[i] = val++; + for (size_t i = 0; i != pivot_index; ++i) + v[i] = val++; +} + +void init_forward(std::vector& v) +{ + int val = 0; + for (size_t i = 0; i != v.size(); ++i) + v[i] = val++; +} + +void init_worst(std::vector& v, size_t pivot_index) +{ + int val = 0; + if (pivot_index + 1 > v.size() / 2) + { + for (size_t i = 0; i != pivot_index; val += 2, ++i) + v[i] = val; + val = 1; + } + else + { + for (size_t i = pivot_index; i != v.size(); val += 2, ++i) + v[i] = val; + val -= pivot_index * 2 + 1; + } + + if (pivot_index + 1 > v.size() / 2) + for (size_t i = pivot_index; i != v.size(); val += 2, ++i) + v[i] = val; + else + for (size_t i = 0; i != pivot_index; val += 2, ++i) + v[i] = val; +} + +void init_random(std::vector& v) +{ + // a simple pseudo-random series which does not rely on rand() and friends + v[0] = 0; + for (size_t i = 1; i != v.size(); ++i) + v[i] = (v[i-1] + 110211473) * 745988807; +} + +void reduce_size(std::vector& front_v, + std::vector& middle_v, + std::vector& back_v) +{ + front_v.erase(front_v.begin() + front_pivot_idx, + front_v.end() - back_pivot_idx); + middle_v.erase(middle_v.begin() + small_size / 2, + middle_v.end() - small_size / 2); + back_v.erase(back_v.begin() + back_pivot_idx, + back_v.end() - front_pivot_idx); +} + +int main() +{ + using namespace __gnu_test; + + // No constraint to build vectors. + set_new_limit(~size_t(0)); + + std::vector front_revv(max_size); + init_reverse(front_revv, front_pivot_idx); + + std::vector middle_revv(max_size); + init_reverse(middle_revv, middle_pivot_idx); + + std::vector back_revv(max_size); + init_reverse(back_revv, back_pivot_idx); + + std::vector fwdv(max_size); + init_forward(fwdv); + + std::vector front_wstv(max_size); + init_worst(front_wstv, front_pivot_idx); + + std::vector middle_wstv(max_size); + init_worst(middle_wstv, middle_pivot_idx); + + std::vector back_wstv(max_size); + init_worst(back_wstv, back_pivot_idx); + + std::vector front_rndv(max_size); + init_random(front_rndv); + std::vector middle_rndv(front_rndv); + std::vector back_rndv(front_rndv); + + sort(front_rndv.begin(), front_rndv.begin() + front_pivot_idx); + sort(front_rndv.begin() + front_pivot_idx, front_rndv.end()); + + sort(middle_rndv.begin(), middle_rndv.begin() + middle_pivot_idx); + sort(middle_rndv.begin() + middle_pivot_idx, middle_rndv.end()); + + sort(back_rndv.begin(), back_rndv.begin() + back_pivot_idx); + sort(back_rndv.begin() + back_pivot_idx, back_rndv.end()); + + time_counter time; + resource_counter resource; + + start_counters(time, resource); + + // No limit. + mem_bench(1.0, + front_revv, middle_revv, back_revv, + fwdv, + front_wstv, middle_wstv, back_wstv, + front_rndv, middle_rndv, back_rndv); + + stop_counters(time, resource); + + report_performance(__FILE__, "bench 1 / 1 memory", time, resource); + clear_counters(time, resource); + + start_counters(time, resource); + + // Limit to the fourth. + mem_bench(1.0 / 4, + front_revv, middle_revv, back_revv, + fwdv, + front_wstv, middle_wstv, back_wstv, + front_rndv, middle_rndv, back_rndv); + + stop_counters(time, resource); + + report_performance(__FILE__, "bench 1 / 4 memory", time, resource); + clear_counters(time, resource); + + start_counters(time, resource); + + // Really limit allocation. + mem_bench(1.0 / 64, + front_revv, middle_revv, back_revv, + fwdv, + front_wstv, middle_wstv, back_wstv, + front_rndv, middle_rndv, back_rndv); + + stop_counters(time, resource); + + report_performance(__FILE__, "bench 1 /64 memory", time, resource); + clear_counters(time, resource); + + middle_pivot_idx = small_size / 2; + back_pivot_idx = small_size - front_pivot_idx; + reduce_size(front_revv, middle_revv, back_revv); + fwdv.resize(small_size); + reduce_size(front_wstv, middle_wstv, back_wstv); + reduce_size(front_rndv, middle_rndv, back_rndv); + + start_counters(time, resource); + + // No memory. + mem_bench(0.0, + front_revv, middle_revv, back_revv, + fwdv, + front_wstv, middle_wstv, back_wstv, + front_rndv, middle_rndv, back_rndv); + + stop_counters(time, resource); + + report_performance(__FILE__, "bench 0 / 1 memory", time, resource); + return 0; +} diff --git a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc index 02b869dd1952..fe526638aaf4 100644 --- a/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc +++ b/libstdc++-v3/testsuite/performance/25_algorithms/stable_sort.cc @@ -17,49 +17,107 @@ #include #include + +#include #include -int main() +const int max_size = 10000000; +const int small_size = 200000; + +void bench(size_t mem_threshold, + std::vector revv, + std::vector fwdv, + std::vector rndv) { using namespace __gnu_test; time_counter time; resource_counter resource; - const int max_size = 10000000; - - std::vector v(max_size); - - for (int i = 0; i < max_size; ++i) - v[i] = -i; + set_new_limit(mem_threshold); start_counters(time, resource); - std::stable_sort(v.begin(), v.end()); + std::stable_sort(revv.begin(), revv.end()); stop_counters(time, resource); + set_new_limit(~size_t(0)); report_performance(__FILE__, "reverse", time, resource); clear_counters(time, resource); - for (int i = 0; i < max_size; ++i) - v[i] = i; + set_new_limit(mem_threshold); start_counters(time, resource); - std::stable_sort(v.begin(), v.end()); + std::stable_sort(fwdv.begin(), fwdv.end()); stop_counters(time, resource); + set_new_limit(~size_t(0)); report_performance(__FILE__, "forwards", time, resource); clear_counters(time, resource); - // a simple psuedo-random series which does not rely on rand() and friends - v[0] = 0; + start_counters(time, resource); + std::stable_sort(rndv.begin(), rndv.end()); + stop_counters(time, resource); + + set_new_limit(~size_t(0)); + report_performance(__FILE__, "random", time, resource); +} + +int main() +{ + using namespace __gnu_test; + + // No memory constraint. + set_new_limit(~size_t(0)); + + std::vector revv(max_size); + for (int i = 0; i < max_size; ++i) + revv[i] = -i; + + std::vector fwdv(max_size); + for (int i = 0; i < max_size; ++i) + fwdv[i] = i; + + // a simple pseudo-random series which does not rely on rand() and friends + std::vector rndv(max_size); + rndv[0] = 0; for (int i = 1; i < max_size; ++i) - v[i] = (v[i-1] + 110211473) * 745988807; + rndv[i] = (rndv[i-1] + 110211473) * 745988807; + + time_counter time; + resource_counter resource; start_counters(time, resource); - std::stable_sort(v.begin(), v.end()); + bench(~size_t(0), revv, fwdv, rndv); stop_counters(time, resource); - report_performance(__FILE__, "random", time, resource); + report_performance(__FILE__, "bench 1 / 1 memory", time, resource); + clear_counters(time, resource); + + start_counters(time, resource); + // Limit to fourth the expected size of the sorted array. + bench(max_size * sizeof(int) / 4, revv, fwdv, rndv); + stop_counters(time, resource); + + report_performance(__FILE__, "bench 1 / 4 memory", time, resource); + clear_counters(time, resource); + + start_counters(time, resource); + // Limit to 1/64 of range size. + bench(max_size * sizeof(int) / 64, revv, fwdv, rndv); + stop_counters(time, resource); + + report_performance(__FILE__, "bench 1 /64 memory", time, resource); + clear_counters(time, resource); + + revv.resize(small_size); + fwdv.resize(small_size); + rndv.resize(small_size); + + start_counters(time, resource); + // Forbid any allocation. + bench(0, revv, fwdv, rndv); + stop_counters(time, resource); + report_performance(__FILE__, "bench 0 / 1 memory", time, resource); return 0; }