This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Improve gimplification of constructors with RANGE_EXPRs (PR c++/82294, PR c++/87436)


On Fri, Dec 14, 2018 at 10:40:19AM +0100, Richard Biener wrote:
> This looks OK to me - the only comment I have is on the two magic
> constants (64 and 8) which are used twice in the patch.  Can you
> either see to hoist the common condition into sth like
> 
>  bool prefer_loop_initializer_p = ...
> 
> or add #defines for those constants?  I suppose the hoisting
> might be tricky as int_size_in_bytes can return -1 and the
> workarounds are different in both places right now (maybe that's
> a bug as well...).  OTOH using (unsigned)int_size_in_bytes
> looks reasonable in general.

So like this?
Still need to wait for the FE patch if I want to commit the testcases, those
depend on both patches.
I've added size32plus effective target to the larger test, as 384MB is too
much for 16 or 20 bit address targets.
And, I'll gather statistics on how often this makes a difference during
gimplification during my next bootstraps/regtests.

2018-12-14  Jakub Jelinek  <jakub@redhat.com>

	PR c++/82294
	PR c++/87436
	* expr.h (categorize_ctor_elements): Add p_unique_nz_elts argument.
	* expr.c (categorize_ctor_elements_1): Likewise.  Compute it like
	p_nz_elts, except don't multiply it by mult.  Adjust recursive call.
	Fix up COMPLEX_CST handling.
	(categorize_ctor_elements): Add p_unique_nz_elts argument, initialize
	it and pass it through to categorize_ctor_elements_1.
	(mostly_zeros_p, all_zeros_p): Adjust categorize_ctor_elements callers.
	* gimplify.c (gimplify_init_constructor): Likewise.  Don't force
	ctor into readonly data section if num_unique_nonzero_elements is
	smaller or equal to 1/8 of num_nonzero_elements and size is >= 64
	bytes.

	* g++.dg/tree-ssa/pr82294.C: New test.
	* g++.dg/tree-ssa/pr87436.C: New test.

--- gcc/expr.h.jj	2018-12-13 18:00:10.527301479 +0100
+++ gcc/expr.h	2018-12-14 11:52:31.941071185 +0100
@@ -309,7 +309,8 @@ extern bool can_move_by_pieces (unsigned
 extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
-				      HOST_WIDE_INT *, bool *);
+				      HOST_WIDE_INT *, HOST_WIDE_INT *,
+				      bool *);
 
 extern void expand_operands (tree, tree, rtx, rtx*, rtx*,
 			     enum expand_modifier);
--- gcc/expr.c.jj	2018-12-13 18:00:10.426303121 +0100
+++ gcc/expr.c	2018-12-14 11:52:31.945071118 +0100
@@ -5945,10 +5945,11 @@ count_type_elements (const_tree type, bo
 
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
+			    HOST_WIDE_INT *p_unique_nz_elts,
 			    HOST_WIDE_INT *p_init_elts, bool *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
-  HOST_WIDE_INT nz_elts, init_elts, num_fields;
+  HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
   tree value, purpose, elt_type;
 
   /* Whether CTOR is a valid constant initializer, in accordance with what
@@ -5958,6 +5959,7 @@ categorize_ctor_elements_1 (const_tree c
   bool const_p = const_from_elts_p ? true : TREE_STATIC (ctor);
 
   nz_elts = 0;
+  unique_nz_elts = 0;
   init_elts = 0;
   num_fields = 0;
   elt_type = NULL_TREE;
@@ -5982,12 +5984,13 @@ categorize_ctor_elements_1 (const_tree c
 	{
 	case CONSTRUCTOR:
 	  {
-	    HOST_WIDE_INT nz = 0, ic = 0;
+	    HOST_WIDE_INT nz = 0, unz = 0, ic = 0;
 
-	    bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &ic,
-							   p_complete);
+	    bool const_elt_p = categorize_ctor_elements_1 (value, &nz, &unz,
+							   &ic, p_complete);
 
 	    nz_elts += mult * nz;
+	    unique_nz_elts += unz;
  	    init_elts += mult * ic;
 
 	    if (const_from_elts_p && const_p)
@@ -5999,21 +6002,31 @@ categorize_ctor_elements_1 (const_tree c
 	case REAL_CST:
 	case FIXED_CST:
 	  if (!initializer_zerop (value))
-	    nz_elts += mult;
+	    {
+	      nz_elts += mult;
+	      unique_nz_elts++;
+	    }
 	  init_elts += mult;
 	  break;
 
 	case STRING_CST:
 	  nz_elts += mult * TREE_STRING_LENGTH (value);
+	  unique_nz_elts += TREE_STRING_LENGTH (value);
 	  init_elts += mult * TREE_STRING_LENGTH (value);
 	  break;
 
 	case COMPLEX_CST:
 	  if (!initializer_zerop (TREE_REALPART (value)))
-	    nz_elts += mult;
+	    {
+	      nz_elts += mult;
+	      unique_nz_elts++;
+	    }
 	  if (!initializer_zerop (TREE_IMAGPART (value)))
-	    nz_elts += mult;
-	  init_elts += mult;
+	    {
+	      nz_elts += mult;
+	      unique_nz_elts++;
+	    }
+	  init_elts += 2 * mult;
 	  break;
 
 	case VECTOR_CST:
@@ -6025,7 +6038,10 @@ categorize_ctor_elements_1 (const_tree c
 	      {
 		tree v = VECTOR_CST_ELT (value, i);
 		if (!initializer_zerop (v))
-		  nz_elts += mult;
+		  {
+		    nz_elts += mult;
+		    unique_nz_elts++;
+		  }
 		init_elts += mult;
 	      }
 	  }
@@ -6035,6 +6051,7 @@ categorize_ctor_elements_1 (const_tree c
 	  {
 	    HOST_WIDE_INT tc = count_type_elements (elt_type, false);
 	    nz_elts += mult * tc;
+	    unique_nz_elts += tc;
 	    init_elts += mult * tc;
 
 	    if (const_from_elts_p && const_p)
@@ -6054,6 +6071,7 @@ categorize_ctor_elements_1 (const_tree c
     *p_complete = false;
 
   *p_nz_elts += nz_elts;
+  *p_unique_nz_elts += unique_nz_elts;
   *p_init_elts += init_elts;
 
   return const_p;
@@ -6062,6 +6080,11 @@ categorize_ctor_elements_1 (const_tree c
 /* Examine CTOR to discover:
    * how many scalar fields are set to nonzero values,
      and place it in *P_NZ_ELTS;
+   * the same, but counting RANGE_EXPRs as multiplier of 1 instead of
+     high - low + 1 (this can be useful for callers to determine ctors
+     that could be cheaply initialized with - perhaps nested - loops
+     compared to copied from huge read-only data),
+     and place it in *P_UNIQUE_NZ_ELTS;
    * how many scalar fields in total are in CTOR,
      and place it in *P_ELT_COUNT.
    * whether the constructor is complete -- in the sense that every
@@ -6073,13 +6096,16 @@ categorize_ctor_elements_1 (const_tree c
 
 bool
 categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
+			  HOST_WIDE_INT *p_unique_nz_elts,
 			  HOST_WIDE_INT *p_init_elts, bool *p_complete)
 {
   *p_nz_elts = 0;
+  *p_unique_nz_elts = 0;
   *p_init_elts = 0;
   *p_complete = true;
 
-  return categorize_ctor_elements_1 (ctor, p_nz_elts, p_init_elts, p_complete);
+  return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
+				     p_init_elts, p_complete);
 }
 
 /* TYPE is initialized by a constructor with NUM_ELTS elements, the last
@@ -6110,17 +6136,18 @@ complete_ctor_at_level_p (const_tree typ
   return count_type_elements (type, true) == num_elts;
 }
 
-/* Return 1 if EXP contains mostly (3/4)  zeros.  */
+/* Return 1 if EXP contains mostly (3/4) zeros.  */
 
 static int
 mostly_zeros_p (const_tree exp)
 {
   if (TREE_CODE (exp) == CONSTRUCTOR)
     {
-      HOST_WIDE_INT nz_elts, init_elts;
+      HOST_WIDE_INT nz_elts, unz_elts, init_elts;
       bool complete_p;
 
-      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
+      categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
+				&complete_p);
       return !complete_p || nz_elts < init_elts / 4;
     }
 
@@ -6134,10 +6161,11 @@ all_zeros_p (const_tree exp)
 {
   if (TREE_CODE (exp) == CONSTRUCTOR)
     {
-      HOST_WIDE_INT nz_elts, init_elts;
+      HOST_WIDE_INT nz_elts, unz_elts, init_elts;
       bool complete_p;
 
-      categorize_ctor_elements (exp, &nz_elts, &init_elts, &complete_p);
+      categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
+				&complete_p);
       return nz_elts == 0;
     }
 
--- gcc/gimplify.c.jj	2018-12-13 18:00:10.597300341 +0100
+++ gcc/gimplify.c	2018-12-14 12:01:17.491402703 +0100
@@ -4778,7 +4778,15 @@ gimplify_init_constructor (tree *expr_p,
       {
 	struct gimplify_init_ctor_preeval_data preeval_data;
 	HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
+	HOST_WIDE_INT num_unique_nonzero_elements;
 	bool cleared, complete_p, valid_const_initializer;
+	/* Use readonly data for initializers of this or smaller size
+	   regardless of the num_nonzero_elements / num_unique_nonzero_elements
+	   ratio.  */
+	const HOST_WIDE_INT min_unique_size = 64;
+	/* If num_nonzero_elements / num_unique_nonzero_elements ratio
+	   is smaller than this, use readonly data.  */
+	const int unique_nonzero_ratio = 8;
 
 	/* Aggregate types must lower constructors to initialization of
 	   individual elements.  The exception is that a CONSTRUCTOR node
@@ -4795,6 +4803,7 @@ gimplify_init_constructor (tree *expr_p,
 	   can only do so if it known to be a valid constant initializer.  */
 	valid_const_initializer
 	  = categorize_ctor_elements (ctor, &num_nonzero_elements,
+				      &num_unique_nonzero_elements,
 				      &num_ctor_elements, &complete_p);
 
 	/* If a const aggregate variable is being initialized, then it
@@ -4803,7 +4812,15 @@ gimplify_init_constructor (tree *expr_p,
 	    && num_nonzero_elements > 1
 	    && TREE_READONLY (object)
 	    && VAR_P (object)
-	    && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object)))
+	    && (flag_merge_constants >= 2 || !TREE_ADDRESSABLE (object))
+	    /* For ctors that have many repeated nonzero elements
+	       represented through RANGE_EXPRs, prefer initializing
+	       those through runtime loops over copies of large amounts
+	       of data from readonly data section.  */
+	    && (num_unique_nonzero_elements
+		> num_nonzero_elements / unique_nonzero_ratio
+		|| ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)
+		    <= (unsigned HOST_WIDE_INT) min_unique_size)))
 	  {
 	    if (notify_temp_creation)
 	      return GS_ERROR;
@@ -4896,6 +4913,13 @@ gimplify_init_constructor (tree *expr_p,
 	       is so large as to make individual moves inefficient.  */
 	    if (size > 0
 		&& num_nonzero_elements > 1
+		/* For ctors that have many repeated nonzero elements
+		   represented through RANGE_EXPRs, prefer initializing
+		   those through runtime loops over copies of large amounts
+		   of data from readonly data section.  */
+		&& (num_unique_nonzero_elements
+		    > num_nonzero_elements / unique_nonzero_ratio
+		    || size < min_unique_size)
 		&& (size < num_nonzero_elements
 		    || !can_move_by_pieces (size, align)))
 	      {
--- gcc/testsuite/g++.dg/tree-ssa/pr82294.C.jj	2018-12-14 11:52:31.949071052 +0100
+++ gcc/testsuite/g++.dg/tree-ssa/pr82294.C	2018-12-14 11:52:31.949071052 +0100
@@ -0,0 +1,13 @@
+// PR c++/82294
+// { dg-do compile { target c++11 } }
+// { dg-options "-O2 -fdump-tree-gimple" }
+
+// Verify we don't "optimize" the ctor as copying a 1KB .rodata
+// object into the variable.  It is better to initialize it through
+// a loop.
+// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } }
+
+struct S { int x; explicit constexpr S (); };
+constexpr S::S () : x{7} {}
+struct T { S arr[256]; explicit T (); };
+T::T () {}
--- gcc/testsuite/g++.dg/tree-ssa/pr87436.C.jj	2018-12-14 11:52:31.949071052 +0100
+++ gcc/testsuite/g++.dg/tree-ssa/pr87436.C	2018-12-14 11:52:31.949071052 +0100
@@ -0,0 +1,25 @@
+// PR c++/87436
+// { dg-do compile { target { c++11 && size32plus } } }
+// { dg-options "-O2 -fdump-tree-gimple" }
+
+// Verify we don't "optimize" the ctor as copying a 384MB .rodata
+// object into the variable.  It is better to initialize it through
+// two nested loops.
+// { dg-final { scan-tree-dump-not "this->arr = " "gimple" } }
+
+struct S {
+  int a = -1;
+  short b = 3;
+  int x = 0;
+  int y = 1;
+  int z = 42;
+  float f = 0.123f;
+};
+
+struct T { S arr[4096][4096]; };
+
+T *
+foo ()
+{
+  return new T;
+}


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]