This is the mail archive of the
libstdc++@gcc.gnu.org
mailing list for the libstdc++ project.
[PATCH][libstdc++-v3 parallel mode] Throw out running time measurement
- From: Johannes Singler <singler at ira dot uka dot de>
- To: libstdc++ <libstdc++ at gcc dot gnu dot org>, gcc-patches at gcc dot gnu dot org
- Date: Thu, 25 Oct 2007 19:10:08 +0200
- Subject: [PATCH][libstdc++-v3 parallel mode] Throw out running time measurement
This patch removes obsolete running time measurement code (that was
already deactivated).
Committed as approved.
2007-10-25 Johannes Singler <singler@ira.uka.de>
* include/parallel/multiway_merge.h: Removed Timing<inactive_tag>
* include/parallel/random_shuffle.h: Same
* include/parallel/set_operations.h: Same
* include/parallel/tree.h: Same
* include/parallel/multiway_mergesort.h: Same
* include/parallel/timing.h: Removed completely
Johannes
Index: include/parallel/multiway_merge.h
===================================================================
--- include/parallel/multiway_merge.h (revision 129315)
+++ include/parallel/multiway_merge.h (working copy)
@@ -52,7 +52,6 @@
#include <parallel/parallel.h>
#include <parallel/merge.h>
#include <parallel/losertree.h>
-#include <parallel/timing.h>
#if _GLIBCXX_ASSERTIONS
#include <parallel/checkers.h>
#endif
@@ -1354,11 +1353,6 @@
thread_index_t num_threads = static_cast<thread_index_t>(std::min(static_cast<difference_type>(get_max_threads()), total_length));
- Timing<sequential_tag>* t = new Timing<sequential_tag>[num_threads];
-
- for (int pr = 0; pr < num_threads; pr++)
- t[pr].tic();
-
bool tight = (total_length == length);
// Thread t will have to merge pieces[iam][0..k - 1]
@@ -1456,15 +1450,10 @@
delete[] offsets;
}
- for (int pr = 0; pr < num_threads; pr++)
- t[pr].tic();
-
# pragma omp parallel num_threads(num_threads)
{
thread_index_t iam = omp_get_thread_num();
- t[iam].tic();
-
difference_type target_position = 0;
for (int c = 0; c < k; c++)
@@ -1498,14 +1487,8 @@
(pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first),
comp);
}
-
- t[iam].tic();
-
}
- for (int pr = 0; pr < num_threads; pr++)
- t[pr].tic();
-
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
#endif
@@ -1516,12 +1499,6 @@
delete[] pieces;
- for (int pr = 0; pr < num_threads; pr++)
- t[pr].tic();
- for (int pr = 0; pr < num_threads; pr++)
- t[pr].print();
- delete[] t;
-
return target + length;
}
Index: include/parallel/random_shuffle.h
===================================================================
--- include/parallel/random_shuffle.h (revision 129315)
+++ include/parallel/random_shuffle.h (working copy)
@@ -42,7 +42,6 @@
#include <bits/stl_numeric.h>
#include <parallel/parallel.h>
#include <parallel/random_number.h>
-#include <parallel/timing.h>
namespace __gnu_parallel
{
@@ -136,9 +135,6 @@
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
- Timing<sequential_tag> t;
- t.tic();
-
DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[omp_get_thread_num()];
DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
thread_index_t iam = d->iam;
@@ -170,12 +166,8 @@
for (bin_index b = 0; b < sd->num_bins + 1; b++)
sd->dist[b][iam + 1] = dist[b];
- t.tic();
-
#pragma omp barrier
- t.tic();
-
#pragma omp single
{
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
@@ -188,8 +180,6 @@
#pragma omp barrier
- t.tic();
-
sequence_index_t offset = 0, global_offset = 0;
for (bin_index s = 0; s < d->bins_begin; s++)
global_offset += sd->dist[s + 1][d->num_threads];
@@ -205,12 +195,8 @@
sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * offset));
- t.tic();
-
#pragma omp barrier
- t.tic();
-
// Draw local copies to avoid false sharing.
for (bin_index b = 0; b < sd->num_bins + 1; b++)
dist[b] = sd->dist[b][iam];
@@ -237,12 +223,8 @@
delete[] bin_proc;
delete[] temporaries;
- t.tic();
-
#pragma omp barrier
- t.tic();
-
// Shuffle bins internally.
for (bin_index b = d->bins_begin; b < d->bins_end; b++)
{
@@ -253,10 +235,6 @@
}
delete[] sd->temporaries[iam];
-
- t.tic();
-
- t.print();
}
/** @brief Round up to the next greater power of 2.
@@ -453,9 +431,6 @@
for (int b = 0; b < num_bins + 1; b++)
dist0[b] = 0;
- Timing<sequential_tag> t;
- t.tic();
-
random_number bitrng(rng(0xFFFFFFFF));
for (difference_type i = 0; i < n; i++)
@@ -467,16 +442,12 @@
dist0[oracle + 1]++;
}
- t.tic();
-
// Sum up bins.
__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
for (int b = 0; b < num_bins + 1; b++)
dist1[b] = dist0[b];
- t.tic();
-
// Distribute according to oracles.
for (difference_type i = 0; i < n; i++)
target[(dist0[oracles[i]])++] = *(begin + i);
@@ -485,9 +456,7 @@
{
sequential_random_shuffle(target + dist1[b], target + dist1[b + 1],
rng);
- t.tic();
}
- t.print();
delete[] dist0;
delete[] dist1;
Index: include/parallel/set_operations.h
===================================================================
--- include/parallel/set_operations.h (revision 129315)
+++ include/parallel/set_operations.h (working copy)
@@ -381,10 +381,6 @@
#pragma omp parallel num_threads(num_threads)
{
- Timing<sequential_tag> t;
-
- t.tic();
-
// Result from multiseq_partition.
InputIterator offset[2];
const int iam = omp_get_thread_num();
@@ -407,13 +403,9 @@
iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]);
- t.tic();
-
// Make sure all threads have their block_begin result written out.
#pragma omp barrier
- t.tic();
-
iterator_pair block_begin = block_begins[ iam ];
// Begin working for the first block, while the others except
@@ -429,12 +421,9 @@
block_begin.second, block_end.second);
}
- t.tic();
-
// Make sure everyone wrote their lengths.
#pragma omp barrier
- t.tic();
OutputIterator r = result;
if (iam == 0)
@@ -458,9 +447,6 @@
op.invoke(block_begin.first, block_end.first,
block_begin.second, block_end.second, r);
}
-
- t.tic();
- t.print();
}
return return_value;
}
Index: include/parallel/tree.h
===================================================================
--- include/parallel/tree.h (revision 129315)
+++ include/parallel/tree.h (working copy)
@@ -57,13 +57,6 @@
#include <parallel/list_partition.h>
-//#define _GLIBCXX_TIMING
-#ifdef _GLIBCXX_TIMING
-#define _timing_tag parallel_tag
-#else
-#define _timing_tag sequential_tag
-#endif
-
namespace std
{
// XXX Declaration should go to stl_tree.h.
@@ -1217,10 +1210,6 @@
void
_M_bulk_insertion_construction(const _InputIterator __first, const _InputIterator __last, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
- Timing<_timing_tag> t;
-
- t.tic();
-
thread_index_t num_threads = get_max_threads();
size_type n;
size_type beg_partition[num_threads+1];
@@ -1228,8 +1217,6 @@
beg_partition[0] = 0;
bool is_sorted= is_sorted_distance_accessors(__first, __last, access, beg_partition,n, num_threads, std::__iterator_category(__first));
- t.tic("is_sorted");
-
if (not is_sorted)
{
_M_not_sorted_bulk_insertion_construction(access, beg_partition, n, num_threads, is_construction, strictly_less_or_less_equal);
@@ -1260,10 +1247,6 @@
_M_sorted_bulk_insertion(access, beg_partition, n, num_threads,
strictly_less_or_less_equal);
}
-
- t.tic("main work");
-
- t.print();
}
/** @brief Bulk construction and insertion helper method on an
@@ -1349,31 +1332,19 @@
_M_not_sorted_bulk_insertion_construction(size_type* beg_partition, ElementsToSort* v, Comparator comp, const size_type n, thread_index_t num_threads, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
// The accessors have been calculated for the non sorted.
- Timing<_timing_tag> t;
-
- t.tic();
-
num_threads = static_cast<thread_index_t>(std::min<size_type>(num_threads, n));
std::stable_sort(v, v+n, comp);
- t.tic("sort");
-
IteratorSortedElements sorted_access[num_threads+1];
range_accessors(IteratorSortedElements(v), IteratorSortedElements(v+n), sorted_access, beg_partition, n, num_threads, std::__iterator_category(v));
- t.tic("range_accessors");
-
// Partial template specialization not available.
if (is_construction)
_M_sorted_bulk_construction(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
else
_M_sorted_bulk_insertion(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal);
delete v;
-
- t.tic("actual construction or insertion");
-
- t.print();
}
/** @brief Construct a tree sequentially using the parallel routine
@@ -1753,17 +1724,11 @@
void
_M_sorted_bulk_construction(_Iterator* access, size_type* beg_partition, const size_type n, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
- Timing<_timing_tag> t;
-
// Dealing with repetitions (EFFICIENCY ISSUE).
size_type rank_shift[num_threads+1];
- t.tic();
-
_Rb_tree_node_ptr* r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, n, num_threads, strictly_less_or_less_equal);
- t.tic("bulk allocation and initialization");
-
// Link the tree appropriately.
// Dealing with repetitions (EFFICIENCY ISSUE).
ranker_gaps rank(beg_partition, rank_shift, num_threads);
@@ -1818,11 +1783,7 @@
base_type::_M_impl._M_header._M_parent = nodes_init.get_root();
nodes_init.get_root()->_M_parent= &base_type::_M_impl._M_header;
- t.tic("linking nodes");
::operator delete(r);
-
- t.tic("delete array of pointers");
- t.print();
}
@@ -1850,10 +1811,6 @@
_M_sorted_bulk_insertion(_Iterator* access, size_type* beg_partition, size_type k, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal)
{
_GLIBCXX_PARALLEL_ASSERT((size_type)num_threads <= k);
- Timing<_timing_tag> t;
-
- t.tic();
-
// num_thr-1 problems in the upper part of the tree
// num_thr problems to further parallelize
std::vector<size_type> existing(num_threads,0);
@@ -1873,7 +1830,6 @@
// 1. Construct the nodes with their corresponding data
#if _GLIBCXX_TREE_INITIAL_SPLITTING
r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, k, num_threads, strictly_less_or_less_equal);
- t.tic("bulk allocation and initialization");
#else
r = _M_sorted_no_gapped_bulk_allocation_and_initialization(access, beg_partition, k, num_threads, strictly_less_or_less_equal);
#endif
@@ -1896,8 +1852,6 @@
repetitions (EFFICIENCY ISSUE) *****/
size_type last = beg_partition[num_threads] - (rank_shift[num_threads] - rank_shift[num_threads - 1]);
- t.tic("last element to be inserted");
-
//2. Split the tree according to access in num_threads parts
//Initialize upper concat_problems
//Allocate them dynamically because they are afterwards so erased
@@ -1960,8 +1914,6 @@
size_type last = k;
#endif
- t.tic("sorted_no_gapped...");
-
// 3. Split the range according to tree and create
// 3. insertion/concatenation problems to be solved in parallel
#if _GLIBCXX_TREE_DYNAMIC_BALANCING
@@ -2018,8 +1970,6 @@
} while (change);
}
- t.tic("merging");
-
// Update root and sizes.
base_type::_M_root() = root_problem->t;
root_problem->t->_M_parent = &(base_type::_M_impl._M_header);
@@ -2069,9 +2019,6 @@
// Delete array of pointers
::operator delete(r);
-
- t.tic();
- t.print();
}
Index: include/parallel/multiway_mergesort.h
===================================================================
--- include/parallel/multiway_mergesort.h (revision 129315)
+++ include/parallel/multiway_mergesort.h (working copy)
@@ -44,7 +44,6 @@
#include <bits/stl_algo.h>
#include <parallel/parallel.h>
#include <parallel/multiway_merge.h>
-#include <parallel/timing.h>
namespace __gnu_parallel
{
@@ -160,9 +159,6 @@
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
- Timing<sequential_tag> t;
- t.tic();
-
PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
thread_index_t iam = d->iam;
@@ -196,7 +192,6 @@
// Invariant: locally sorted subsequence in sd->sorting_places[iam],
// sd->sorting_places[iam] + length_local.
- t.tic("local sort");
if (Settings::sort_splitting == Settings::SAMPLING)
{
@@ -205,8 +200,6 @@
#pragma omp barrier
- t.tic("sample/wait");
-
#pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * d->num_threads),
@@ -241,8 +234,6 @@
{
#pragma omp barrier
- t.tic("wait");
-
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > seqs(d->num_threads);
for (int s = 0; s < d->num_threads; s++)
seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
@@ -276,8 +267,6 @@
}
}
- t.tic("split");
-
// Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0;
for (int s = 0; s < d->num_threads; s++)
@@ -308,8 +297,6 @@
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag());
- t.tic("merge");
-
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp));
#endif
@@ -323,10 +310,6 @@
#endif
delete[] sd->temporaries[iam];
-
- t.tic("copy back");
-
- t.print();
}
/** @brief PMWMS main call.
Index: include/parallel/timing.h
===================================================================
--- include/parallel/timing.h (revision 129315)
+++ include/parallel/timing.h (working copy)
@@ -1,217 +0,0 @@
-// -*- C++ -*-
-
-// Copyright (C) 2007 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the terms
-// of the GNU General Public License as published by the Free Software
-// Foundation; either version 2, or (at your option) any later
-// version.
-
-// This library is distributed in the hope that it will be useful, but
-// WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-
-// You should have received a copy of the GNU General Public License
-// along with this library; see the file COPYING. If not, write to
-// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
-// MA 02111-1307, USA.
-
-// As a special exception, you may use this file as part of a free
-// software library without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to
-// produce an executable, this file does not by itself cause the
-// resulting executable to be covered by the GNU General Public
-// License. This exception does not however invalidate any other
-// reasons why the executable file might be covered by the GNU General
-// Public License.
-
-/** @file parallel/timing.h
- * @brief Provides a simple tool to do performance debugging, also in
- * parallel code.
- * This file is a GNU parallel extension to the Standard C++ Library.
- */
-
-// Written by Johannes Singler.
-
-#ifndef _GLIBCXX_PARALLEL_TIMING_H
-#define _GLIBCXX_PARALLEL_TIMING_H 1
-
-#include <omp.h>
-#include <cstdio>
-#include <cstring>
-#include <parallel/tags.h>
-
-namespace __gnu_parallel
-{
- // XXX integrate with existing performance testing infrastructure.
- /** @brief Type of of point in time, used for the Timing classes. */
- typedef double point_in_time;
-
- template<typename tag, typename must_be_int = int>
- class Timing;
-
- /** @brief A class that provides simple run time measurements, also
- for parallel code.
- * @param tag If parallel_tag, then the measurements are actually done.
- * Otherwise, no code at all is emitted by the compiler. */
- template<typename must_be_int>
- class Timing<parallel_tag, must_be_int>
- {
- private:
- static const int max_points_in_time = 100;
- point_in_time points_in_time[max_points_in_time];
- point_in_time active, last_start;
- int pos;
- char* str;
- const char* tags[max_points_in_time];
-
- public:
- Timing()
- {
- str = NULL;
- pos = 0;
- active = 0.0;
- last_start = -1.0;
- }
-
- ~Timing()
- {
- delete[] str;
- }
-
- /** @brief Take a running time measurement.
- * @param tag Optional description that will be output again with
- * the timings.
- * It should describe the operation before the tic(). To time a
- * series of @c n operations, there should be @c n+1 calls to
- * tic(), and one call to print(). */
- inline void
- tic(const char* tag = NULL)
- {
- points_in_time[pos] = omp_get_wtime();
- tags[pos] = tag;
- pos++;
- }
-
- /** @brief Start the running time measurement.
- *
- * Should be paired with stop(). */
- inline void
- start()
- {
- _GLIBCXX_PARALLEL_ASSERT(last_start == -1.0);
- last_start = omp_get_wtime();
- }
-
- /** @brief Stop the running time measurement.
- *
- * Should be paired with start(). */
- inline void
- stop()
- {
- _GLIBCXX_PARALLEL_ASSERT(last_start != -1.0);
- active += (omp_get_wtime() - last_start);
- last_start = -1.0;
- }
-
- /** @brief Reset running time accumulation. */
- inline void
- reset()
- {
- active = 0.0;
- last_start = -1.0;
- }
-
- /** @brief Accumulate the time between all pairs of start() and
- stop() so far */
- inline point_in_time
- active_time()
- { return active; }
-
- /** @brief Total time between first and last tic() */
- inline point_in_time
- total_time()
- { return (points_in_time[pos - 1] - points_in_time[0]) * 1000.0; }
-
- private:
- /** @brief Construct string to print out, presenting the timings. */
- const char*
- c_str()
- {
- // Avoid stream library here, to avoid cyclic dependencies in
- // header files.
- char tmp[1000];
-
- if (!str)
- str = new char[pos * 200];
- else
- str[0] = '\0';
-
- sprintf(str, "t %2d T[ms]", omp_get_thread_num());
- strcat(str, "\n");
-
- for (int i = 0; i < pos; )
- {
- point_in_time last = points_in_time[i];
- i++;
- if (i == pos)
- break;
- if (tags[i] == NULL)
- sprintf(tmp, "%2d: ", i - 1);
- else
- sprintf(tmp, "%20s: ", tags[i]);
- strcat(str, tmp);
-
- sprintf(tmp, "%7.2f ", (points_in_time[i] - last) * 1000.0);
- strcat(str, tmp);
- strcat(str, "\n");
- }
-
- return str;
- }
-
- public:
- /** @brief Print the running times between the tic()s. */
- void
- print()
- {
- printf("print\n");
-#pragma omp barrier
-#pragma omp master
- printf("\n\n");
-#pragma omp critical
- printf("%s\n", c_str());
- }
- };
-
- /** @brief A class that provides simple run time measurements, also
- for parallel code.
- * @param tag If parallel_tag, then the measurements are actually done,
- * otherwise, no code at all is emitted by the compiler.
- */
- template<typename must_be_int>
- class Timing<sequential_tag, must_be_int>
- {
- private:
- static const char* empty_string;
-
- public:
- inline void tic(const char* /*tag*/ = NULL) { }
- inline void start() { }
- inline void stop() { }
- inline void reset() { }
- inline point_in_time active_time() { return -1.0; }
- inline point_in_time total_time() { return -1.0; }
- inline const char* c_str() { return empty_string; }
- inline void print() { }
- };
-
- template<typename must_be_int>
- const char* Timing<sequential_tag, must_be_int>::empty_string = "";
-
-}
-
-#endif