This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[PATCH][libstdc++-v3 parallel mode] Improve find algorithm
- From: Johannes Singler <singler at kit dot edu>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>, gcc-patches at gcc dot gnu dot org
- Date: Tue, 08 Jun 2010 16:07:32 +0200
- Subject: [PATCH][libstdc++-v3 parallel mode] Improve find algorithm
This patch improves the parallel find algorithm by making the block size
proportional to the current start position, which gives nice theoretical
performance guarantees and better performance in practice.
Tested x86_64-unknown-linux-gnu: No regressions
Please approve for mainline.
2010-06-08 Johannes Singler <singler@kit.edu>
* include/parallel/find.h
(__find_template(.., growing_blocks_tag)): Make block size
proportional to current position.
* include/parallel/settings.h (_Settings): Introduce new tuning
parameter find_scale_factor to the end of the struct, default to
0.01.
Johannes
Index: include/parallel/settings.h
===================================================================
--- include/parallel/settings.h (revision 160424)
+++ include/parallel/settings.h (working copy)
@@ -272,6 +272,9 @@
/// Minimal input size for search and search_n.
_SequenceIndex search_minimal_n;
+ /// Block size scale-down factor with respect to current position.
+ float find_scale_factor;
+
/// Get the global settings.
_GLIBCXX_CONST static const _Settings&
get() throw();
@@ -331,7 +334,8 @@
TLB_size(128),
cache_line_size(64),
qsb_steals(0),
- search_minimal_n(1000)
+ search_minimal_n(1000),
+ find_scale_factor(0.01f)
{ }
};
}
Index: include/parallel/find.h
===================================================================
--- include/parallel/find.h (revision 160424)
+++ include/parallel/find.h (working copy)
@@ -168,9 +168,7 @@
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size
- * @see __gnu_parallel::_Settings::find_initial_block_size
- * @see __gnu_parallel::_Settings::find_maximum_block_size
- * @see __gnu_parallel::_Settings::find_increasing_factor
+ * @see __gnu_parallel::_Settings::find_scale_factor
*
* There are two main differences between the growing blocks and
* the constant-size blocks variants.
@@ -218,6 +216,8 @@
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
+ const float __scale_factor = __s.find_scale_factor;
+
_ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel shared(__result) num_threads(__num_threads)
{
@@ -227,7 +227,8 @@
// Not within first __k elements -> start parallel.
_ThreadIndex __iam = omp_get_thread_num();
- _DifferenceType __block_size = __s.find_initial_block_size;
+ _DifferenceType __block_size =
+ std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
_DifferenceType __start = __fetch_and_add<_DifferenceType>
(&__next_block_start, __block_size);
@@ -265,15 +266,14 @@
omp_unset_lock(&__result_lock);
}
- __block_size = std::min<_DifferenceType>
- (__block_size * __s.find_increasing_factor,
- __s.find_maximum_block_size);
+ _DifferenceType __block_size =
+ std::max<_DifferenceType>(1, __scale_factor * __next_block_start);
// Get new block, update pointer to next block.
__start = __fetch_and_add<_DifferenceType>(&__next_block_start,
__block_size);
- __stop = (__length < (__start + __block_size)
- ? __length : (__start + __block_size));
+ __stop =
+ std::min<_DifferenceType>(__length, __start + __block_size);
}
} //parallel