This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[C++ PATCH] Emit array initialization from ctor as loop if possible (PR c++/59659)


Hi!

The following patch attempts to improve both compile time and generated
code for array initializers if there are many consecutive same elements.

Without the patch even T10000 instead of T100000 takes a minute to compile
and generates hundreds of kilobytes large routine.

Without the patch, we generate (from gimple dump):
              A::A (D.2035, 3);
              D.2035 = D.2035 + 1;
              D.2036 = D.2036 + -1;
times 100000, with the patch we generate a simple loop that does the same
thing 100000 times.  Of course the compiler can then decide how many times
it wants to unroll it etc.

Bootstrapped/regtested on x86_64-linux and i686-linux.

Not sure if cp_tree_equal (elt, elt2) is the best check, perhaps
operand_equal_p instead, though wasn't sure if it wouldn't be upset about
C++ FE trees.  Or perhaps just limit it to the case where both elt and elt2
are same constants rather than arbitrary trees?

And, the patch hardcodes threshold 10 - 10 consecutive same ctor values are
initialized already with a loop, 9 still with straightforward code.  Wonder
if it is ok to use a constant, or do we want a param, or another constant?

2014-01-10  Jakub Jelinek  <jakub@redhat.com>

	PR c++/59659
	* init.c (build_vec_init): If there are 10+ elements with the
	same value in the CONSTRUCTOR, construct them using a runtime loop
	rather than one by one.

	* g++.dg/opt/pr59659.C: New test.

--- gcc/cp/init.c.jj	2014-01-09 19:09:42.086613999 +0100
+++ gcc/cp/init.c	2014-01-10 12:14:11.434068915 +0100
@@ -3565,6 +3565,9 @@ build_vec_init (tree base, tree maxindex
 	{
 	  tree baseref = build1 (INDIRECT_REF, type, base);
 	  tree one_init;
+	  tree value = NULL_TREE;
+	  tree for_stmt = NULL_TREE;
+	  unsigned int cnt = 0;
 
 	  num_initialized_elts++;
 
@@ -3589,7 +3592,7 @@ build_vec_init (tree base, tree maxindex
 	      e = maybe_constant_init (e);
 	      if (reduced_constant_expression_p (e))
 		{
-		  CONSTRUCTOR_APPEND_ELT (new_vec, field, e);
+		  value = e;
 		  if (do_static_init)
 		    one_init = NULL_TREE;
 		  else
@@ -3599,16 +3602,44 @@ build_vec_init (tree base, tree maxindex
 	      else
 		{
 		  if (do_static_init)
-		    {
-		      tree value = build_zero_init (TREE_TYPE (e), NULL_TREE,
-						    true);
-		      if (value)
-			CONSTRUCTOR_APPEND_ELT (new_vec, field, value);
-		    }
+		    value = build_zero_init (TREE_TYPE (e), NULL_TREE, true);
 		  saw_non_const = true;
 		}
 	    }
 
+	  cnt = 1;
+	  if (one_init)
+	    {
+	      unsigned len = CONSTRUCTOR_NELTS (init);
+
+	      /* See if we don't have 10 or more elts with the same value
+		 in a row.  In that case create a loop rather than straight
+		 line code.  */
+	      for (cnt = 1; idx + cnt < len; cnt++)
+		{
+		  tree elt2 = CONSTRUCTOR_ELT (init, idx + cnt)->value;
+		  if (!cp_tree_equal (elt, elt2)
+		      || !same_type_p (TREE_TYPE (elt), TREE_TYPE (elt2)))
+		    break;
+		}
+
+	      if (cnt < 10)
+		cnt = 1;
+	      else
+		{
+		  current_stmt_tree ()->stmts_are_full_exprs_p = 0;
+		  tree lim = build_int_cst (TREE_TYPE (iterator), cnt);
+		  lim = cp_build_binary_op (UNKNOWN_LOCATION, MINUS_EXPR,
+					    iterator, lim, complain);
+		  lim = get_temp_regvar (ptrdiff_type_node, lim);
+		  for_stmt = begin_for_stmt (NULL_TREE, NULL_TREE);
+		  finish_for_init_stmt (for_stmt);
+		  finish_for_cond (build2 (NE_EXPR, boolean_type_node,
+					   iterator, lim), for_stmt, false);
+		  current_stmt_tree ()->stmts_are_full_exprs_p = 1;
+		}
+	    }
+
 	  if (one_init)
 	    finish_expr_stmt (one_init);
 	  current_stmt_tree ()->stmts_are_full_exprs_p = 0;
@@ -3625,6 +3656,22 @@ build_vec_init (tree base, tree maxindex
 	    errors = true;
 	  else
 	    finish_expr_stmt (one_init);
+
+	  if (value)
+	    CONSTRUCTOR_APPEND_ELT (new_vec, field, value);
+
+	  if (cnt > 1)
+	    {
+	      if (value)
+		for (unsigned int i = 1; i < cnt; i++)
+		  CONSTRUCTOR_APPEND_ELT (new_vec,
+					  CONSTRUCTOR_ELT (init,
+							   idx + i)->index,
+					  value);
+	      num_initialized_elts += cnt - 1;
+	      idx += cnt - 1;
+	      finish_for_stmt (for_stmt);
+	    }
 	}
 
       if (try_const)
--- gcc/testsuite/g++.dg/opt/pr59659.C.jj	2014-01-10 12:24:55.633712582 +0100
+++ gcc/testsuite/g++.dg/opt/pr59659.C	2014-01-10 12:25:48.468432311 +0100
@@ -0,0 +1,60 @@
+// PR c++/59659
+// { dg-do run }
+// { dg-options "-O2" }
+
+struct A { A (); A (int); ~A (); };
+
+int cnt;
+
+__attribute__((noinline))
+A::A ()
+{
+  cnt++;
+}
+
+__attribute__((noinline))
+A::A (int)
+{
+  cnt++;
+}
+
+__attribute__((noinline))
+A::~A ()
+{
+  cnt--;
+}
+
+__attribute__((noinline, noclone)) void
+bar (A *a)
+{
+  __asm volatile ("" : : "r" (a) : "memory");
+  if (cnt != 102004)
+    __builtin_abort ();
+}
+
+#define T10(N) N, N, N, N, N, N, N, N, N, N
+#define T100(N) T10(N), T10(N), T10(N), T10(N), T10(N), \
+		T10(N), T10(N), T10(N), T10(N), T10(N)
+#define T1000(N) T100(N), T100(N), T100(N), T100(N), T100(N), \
+		 T100(N), T100(N), T100(N), T100(N), T100(N)
+#define T10000(N) T1000(N), T1000(N), T1000(N), T1000(N), T1000(N), \
+		  T1000(N), T1000(N), T1000(N), T1000(N), T1000(N)
+#define T100000(N) T10000(N), T10000(N), T10000(N), T10000(N), T10000(N), \
+		   T10000(N), T10000(N), T10000(N), T10000(N), T10000(N)
+
+__attribute__((noinline, noclone)) void
+foo ()
+{
+  A a[] = { 1, 2, T1000 (3), T100000 (4), T1000 (3), 2, 1 };
+  bar (a);
+}
+
+int
+main ()
+{
+  if (cnt != 0)
+    __builtin_abort ();
+  foo ();
+  if (cnt != 0)
+    __builtin_abort ();
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]