This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[PATCH][libstdc++-v3 parallel mode] PR 33892
- From: Johannes Singler <singler at ira dot uka dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>, gcc-patches at gcc dot gnu dot org
- Date: Thu, 25 Oct 2007 19:03:20 +0200
- Subject: [PATCH][libstdc++-v3 parallel mode] PR 33892
Replaced #pragma omp critical by omp_lock_* for more local
synchronization (PR 33892).
Tested xx86_64-unknown-linux-gnu: no regressions
Please approve.
2007-10-25 Johannes Singler <singler@ira.uka.de>
* include/parallel/workstealing.h: Replaced pragma by function
call lock.
* include/parallel/search.h: Same
* include/parallel/partition.h: Same
* include/parallel/find.h: Same
Johannes
Index: include/parallel/workstealing.h
===================================================================
--- include/parallel/workstealing.h (revision 129379)
+++ include/parallel/workstealing.h (working copy)
@@ -123,6 +123,9 @@
thread_index_t num_threads = get_max_threads();
difference_type num_threads_min = num_threads < end - begin ? num_threads : end - begin;
+ omp_lock_t output_lock;
+ omp_init_lock(&output_lock);
+
// No more threads than jobs, at least one thread.
difference_type num_threads_max = num_threads_min > 1 ? num_threads_min : 1;
num_threads = static_cast<thread_index_t>(num_threads_max);
@@ -276,9 +279,10 @@
}
#pragma omp flush(busy)
} // end while busy > 0
-#pragma omp critical(writeOutput)
// Add accumulated result to output.
+ omp_set_lock(&output_lock);
output = r(output, result);
+ omp_unset_lock(&output_lock);
//omp_destroy_lock(&(my_job.lock));
}
@@ -289,6 +293,8 @@
// some algorithms like transform)
f.finish_iterator = begin + length;
+ omp_destroy_lock(&output_lock);
+
return op;
}
} // end namespace
Index: include/parallel/search.h
===================================================================
--- include/parallel/search.h (revision 129379)
+++ include/parallel/search.h (working copy)
@@ -102,7 +102,7 @@
difference_type input_length = (end1 - begin1) - pattern_length;
// Where is first occurrence of pattern? defaults to end.
- difference_type res = (end1 - begin1);
+ difference_type result = (end1 - begin1);
// Pattern too long.
if (input_length < 0)
@@ -110,6 +110,9 @@
thread_index_t num_threads = std::max<difference_type>(1, std::min<difference_type>(input_length, __gnu_parallel::get_max_threads()));
+ omp_lock_t result_lock;
+ omp_init_lock(&result_lock);
+
difference_type borders[num_threads + 1];
__gnu_parallel::equally_split(input_length, num_threads, borders);
@@ -127,19 +130,21 @@
while (start <= stop && !found_pattern)
{
- // Get new value of res.
-#pragma omp flush(res)
+ // Get new value of result.
+#pragma omp flush(result)
// No chance for this thread to find first occurrence.
- if (res < start)
+ if (result < start)
break;
while (pred(begin1[start + pos_in_pattern], begin2[pos_in_pattern]))
{
++pos_in_pattern;
if (pos_in_pattern == pattern_length)
{
- // Found new candidate for res.
-#pragma omp critical (res)
- res = std::min(res, start);
+ // Found new candidate for result.
+ omp_set_lock(&result_lock);
+ result = std::min(result, start);
+ omp_unset_lock(&result_lock);
+
found_pattern = true;
break;
}
@@ -150,8 +155,10 @@
}
}
+ omp_destroy_lock(&result_lock);
+
// Return iterator on found element.
- return (begin1 + res);
+ return (begin1 + result);
}
} // end namespace
Index: include/parallel/partition.h
===================================================================
--- include/parallel/partition.h (revision 129379)
+++ include/parallel/partition.h (working copy)
@@ -84,6 +84,9 @@
else
chunk_size = Settings::partition_chunk_size;
+ omp_lock_t result_lock;
+ omp_init_lock(&result_lock);
+
// At least good for two processors.
while (right - left + 1 >= 2 * max_num_threads * chunk_size)
{
@@ -113,8 +116,8 @@
while (!iam_finished)
{
if (thread_left > thread_left_border)
-#pragma omp critical
{
+ omp_set_lock(&result_lock);
if (left + (chunk_size - 1) > right)
iam_finished = true;
else
@@ -123,11 +126,12 @@
thread_left_border = left + (chunk_size - 1);
left += chunk_size;
}
+ omp_unset_lock(&result_lock);
}
if (thread_right < thread_right_border)
-#pragma omp critical
{
+ omp_set_lock(&result_lock);
if (left > right - (chunk_size - 1))
iam_finished = true;
else
@@ -136,6 +140,7 @@
thread_right_border = right - (chunk_size - 1);
right -= chunk_size;
}
+ omp_unset_lock(&result_lock);
}
if (iam_finished)
@@ -199,16 +204,16 @@
{
// Find spot and swap.
difference_type swapstart = -1;
-#pragma omp critical
- {
- for (int r = 0; r < leftover_left; r++)
- if (!reserved_left[r])
- {
- reserved_left[r] = true;
- swapstart = left - (r + 1) * chunk_size;
- break;
- }
- }
+ omp_set_lock(&result_lock);
+ for (int r = 0; r < leftover_left; r++)
+ if (!reserved_left[r])
+ {
+ reserved_left[r] = true;
+ swapstart = left - (r + 1) * chunk_size;
+ break;
+ }
+ }
+ omp_unset_lock(&result_lock);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
@@ -222,9 +227,8 @@
{
// Find spot and swap
difference_type swapstart = -1;
-#pragma omp critical
- {
- for (int r = 0; r < leftover_right; r++)
+ omp_set_lock(&result_lock);
+ for (int r = 0; r < leftover_right; r++)
if (!reserved_right[r])
{
reserved_right[r] = true;
@@ -232,6 +236,7 @@
break;
}
}
+ omp_unset_lock(&result_lock);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
@@ -283,6 +288,8 @@
delete[] reserved_left;
delete[] reserved_right;
+ omp_destroy_lock(&result_lock);
+
// Element "between" final_left and final_right might not have
// been regarded yet
if (final_left < n && !pred(begin[final_left]))
Index: include/parallel/find.h
===================================================================
--- include/parallel/find.h (revision 129379)
+++ include/parallel/find.h (working copy)
@@ -104,6 +104,8 @@
difference_type result = length;
const thread_index_t num_threads = get_max_threads();
+ omp_lock_t result_lock;
+ omp_init_lock(&result_lock);
difference_type* borders = static_cast<difference_type*>(__builtin_alloca(sizeof(difference_type) * (num_threads + 1)));
@@ -119,21 +121,24 @@
for (; pos < limit; pos++)
{
#pragma omp flush(result)
- // Result has been set to something lower.
- if (result < pos)
- break;
+ // Result has been set to something lower.
+ if (result < pos)
+ break;
- if (selector(i1, i2, pred))
- {
-#pragma omp critical (result)
- if (result > pos)
- result = pos;
- break;
- }
- i1++;
- i2++;
- }
+ if (selector(i1, i2, pred))
+ {
+ omp_set_lock(&result_lock);
+ if (result > pos)
+ result = pos;
+ break;
+ omp_unset_lock(&result_lock);
+ }
+ i1++;
+ i2++;
+ }
}
+
+ omp_destroy_lock(&result_lock);
return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
}
@@ -191,6 +196,9 @@
difference_type result = length;
const thread_index_t num_threads = get_max_threads();
+ omp_lock_t result_lock;
+ omp_init_lock(&result_lock);
+
#pragma omp parallel shared(result) num_threads(num_threads)
{
// Not within first k elements -> start parallel.
@@ -217,7 +225,7 @@
local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred);
if (local_result.first != (begin1 + stop))
{
-#pragma omp critical(result)
+ omp_set_lock(&result_lock);
if ((local_result.first - begin1) < result)
{
result = local_result.first - begin1;
@@ -225,6 +233,7 @@
// Result cannot be in future blocks, stop algorithm.
fetch_and_add<difference_type>(&next_block_pos, length);
}
+ omp_unset_lock(&result_lock);
}
block_size = std::min<difference_type>(block_size * Settings::find_increasing_factor, Settings::find_maximum_block_size);
@@ -235,6 +244,8 @@
}
}
+ omp_destroy_lock(&result_lock);
+
// Return iterator on found element.
return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
}
@@ -286,6 +297,9 @@
difference_type result = length;
const thread_index_t num_threads = get_max_threads();
+ omp_lock_t result_lock;
+ omp_init_lock(&result_lock);
+
// Not within first sequential_search_size elements -> start parallel.
#pragma omp parallel shared(result) num_threads(num_threads)
{
@@ -314,10 +328,10 @@
local_result = selector.sequential_algorithm(begin1 + start, begin1 + stop, begin2 + start, pred);
if (local_result.first != (begin1 + stop))
{
-#pragma omp critical(result)
+ omp_set_lock(&result_lock);
if ((local_result.first - begin1) < result)
result = local_result.first - begin1;
-
+ omp_unset_lock(&result_lock);
// Will not find better value in its interval.
break;
}
@@ -330,6 +344,8 @@
}
}
+ omp_destroy_lock(&result_lock);
+
// Return iterator on found element.
return std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, begin2 + result);
}