This is the mail archive of the libstdc++@gcc.gnu.org mailing list for the libstdc++ project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

optimize std::vector move assignment


Hello,

I talked about this last year (https://gcc.gnu.org/ml/gcc-patches/2017-04/msg01063.html and thread), this tweaks std::vector move assignment to help gcc generate better code for it.

For this code

#include <vector>
#include <utility>
typedef std::vector<int> V;
void f(V&a,V&b){a=std::move(b);}

with -O2 -fdump-tree-optimized on powerpc64le-unknown-linux-gnu, we currently have

  _5 = MEM[(int * &)a_3(D)];
  MEM[(int * &)a_3(D)] = 0B;
  MEM[(int * &)a_3(D) + 8] = 0B;
  MEM[(int * &)a_3(D) + 16] = 0B;
  _6 = MEM[(int * &)b_2(D)];
  MEM[(int * &)a_3(D)] = _6;
  MEM[(int * &)b_2(D)] = 0B;
  _7 = MEM[(int * &)a_3(D) + 8];
  _8 = MEM[(int * &)b_2(D) + 8];
  MEM[(int * &)a_3(D) + 8] = _8;
  MEM[(int * &)b_2(D) + 8] = _7;
  _9 = MEM[(int * &)a_3(D) + 16];
  _10 = MEM[(int * &)b_2(D) + 16];
  MEM[(int * &)a_3(D) + 16] = _10;
  MEM[(int * &)b_2(D) + 16] = _9;
  if (_5 != 0B)

with the patch, we go down to

  _3 = MEM[(const struct _Vector_impl_data &)a_4(D)]._M_start;
  _5 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_start;
  MEM[(struct _Vector_impl_data *)a_4(D)]._M_start = _5;
  _6 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_finish;
  MEM[(struct _Vector_impl_data *)a_4(D)]._M_finish = _6;
  _7 = MEM[(const struct _Vector_impl_data &)b_2(D)]._M_end_of_storage;
  MEM[(struct _Vector_impl_data *)a_4(D)]._M_end_of_storage = _7;
  MEM[(struct _Vector_impl_data *)b_2(D)]._M_start = 0B;
  MEM[(struct _Vector_impl_data *)b_2(D)]._M_finish = 0B;
  MEM[(struct _Vector_impl_data *)b_2(D)]._M_end_of_storage = 0B;
  if (_3 != 0B)

removing 2 reads and 3 writes. At -O3 we also get more vectorization.

The main idea is to give the compiler more type information. Currently, the only type information the compiler cares about is the type used for a memory read/write. Using std::swap as before, the reads/writes are done on int&. Doing it directly, they are done on _Vector_impl_data::_M_start, a more precise information. Maybe some day the compiler will get stricter and be able to deduce the same information, but not yet.

The second point is to rotate { new, old, tmp } in an order that's simpler for the compiler. I was going to remove the swaps and use _M_copy_data directly, but since doing the swaps in a different order works...

_M_copy_data is not really needed, we could add a defaulted assignment operator, or remove the move constructor (and call a new _M_clear() from the 2 users), etc. However, it seemed the least intrusive, the least likely to have weird consequences.

I didn't add a testcase because I don't see any dg-final scan-tree-dump in the libstdc++ testsuite. The closest would be g++.dg/pr83239.C, g++.dg/vect/pr64410.cc, g++.dg/vect/pr33426-ivdep-4.cc that include <vector>, but from previous experience (already with vector), adding libstdc++ testcases to the C++ testsuite is not very popular.

Bootstrap+regtest on powerpc64le-unknown-linux-gnu.

2018-07-25  Marc Glisse  <marc.glisse@inria.fr>

	* include/bits/stl_vector.h (_Vector_impl_data::_M_copy_data): New.
	(_Vector_impl_data::_M_swap_data): Use _M_copy_data.
	(vector::_M_move_assign): Reorder the swaps.

--
Marc Glisse
Index: libstdc++-v3/include/bits/stl_vector.h
===================================================================
--- libstdc++-v3/include/bits/stl_vector.h	(revision 262948)
+++ libstdc++-v3/include/bits/stl_vector.h	(working copy)
@@ -96,25 +96,36 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 	{ }
 
 #if __cplusplus >= 201103L
 	_Vector_impl_data(_Vector_impl_data&& __x) noexcept
 	: _M_start(__x._M_start), _M_finish(__x._M_finish),
 	  _M_end_of_storage(__x._M_end_of_storage)
 	{ __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); }
 #endif
 
 	void
+	_M_copy_data(_Vector_impl_data const& __x) _GLIBCXX_NOEXCEPT
+	{
+	  _M_start = __x._M_start;
+	  _M_finish = __x._M_finish;
+	  _M_end_of_storage = __x._M_end_of_storage;
+	}
+
+	void
 	_M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT
 	{
-	  std::swap(_M_start, __x._M_start);
-	  std::swap(_M_finish, __x._M_finish);
-	  std::swap(_M_end_of_storage, __x._M_end_of_storage);
+	  // Do not use std::swap(_M_start, __x._M_start), etc as it looses
+	  // information used by TBAA.
+	  _Vector_impl_data __tmp;
+	  __tmp._M_copy_data(*this);
+	  _M_copy_data(__x);
+	  __x._M_copy_data(__tmp);
 	}
       };
 
       struct _Vector_impl
 	: public _Tp_alloc_type, public _Vector_impl_data
       {
 	_Vector_impl() _GLIBCXX_NOEXCEPT_IF( noexcept(_Tp_alloc_type()) )
 	: _Tp_alloc_type()
 	{ }
 
@@ -1724,22 +1735,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
 #if __cplusplus >= 201103L
     private:
       // Constant-time move assignment when source object's memory can be
       // moved, either because the source's allocator will move too
       // or because the allocators are equal.
       void
       _M_move_assign(vector&& __x, true_type) noexcept
       {
 	vector __tmp(get_allocator());
-	this->_M_impl._M_swap_data(__tmp._M_impl);
 	this->_M_impl._M_swap_data(__x._M_impl);
+	__tmp._M_impl._M_swap_data(__x._M_impl);
 	std::__alloc_on_move(_M_get_Tp_allocator(), __x._M_get_Tp_allocator());
       }
 
       // Do move assignment when it might not be possible to move source
       // object's memory, resulting in a linear-time operation.
       void
       _M_move_assign(vector&& __x, false_type)
       {
 	if (__x._M_get_Tp_allocator() == this->_M_get_Tp_allocator())
 	  _M_move_assign(std::move(__x), true_type());

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]