This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Optimize in VRP loads from constant arrays (take 2)


On Fri, Apr 21, 2017 at 04:42:03PM +0200, Richard Biener wrote:
> On April 21, 2017 4:06:59 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
> >This patch attempts to implement the optimization mentioned in
> >http://gcc.gnu.org/ml/gcc-patches/2017-02/msg00945.html
> >If we know a (relatively small) value range for ARRAY_REF index
> >in constant array load and the values in the array for all the indexes
> >in the array are the same (and constant), we can just replace the load
> >with the constant.
> 
> But this should be done during propagation (and thus can even produce a range rather than just a constant).

True, I've changed the implementation to do that.
Except that it is only useful during propagation for integral or pointer
types, for anything else (and for pointers when not just doing NULL vs.
non-NULL vr) it is also performed during simplification (in that case
it requires operand_equal_p values).

The patch also newly computes range for the index and range from the array
bounds (taking into account arrays at struct end) and intersects them,
plus takes into account that some iterators might have a couple of zero LBS
bits (as computed by ccp).

> Also much of the fold_const_aggregate_ref work can be shared for all indices.

Maybe.  Is that required for the patch, or can it be done incrementally?

> >Shall I introduce a param for the size of the range to consider?
> 
> I think so.  Eventually we can pre-compute/cache a "range tree" for a
> Constant initializer?

param introduced.

> That said I'm happy with moving it to propagation time and considering ranges
> And leave compile-time optimization for future work (with possibly increasing the number of elements to consider)

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Note, the patch doesn't handle the case when the constant load is aggregate,
where fold_const_aggregate_ref_1 returns a CONSTRUCTOR.  CONSTRUCTOR is not
gimple min invariant, and when not empty can't be in the IL anyway, so I was
thinking we could perhaps in that case just modify the rhs to use a constant
index (e.g. vr0.min) instead of the rhs with variable index.  It didn't work
because operand_equal_p doesn't handle non-empty CONSTRUCTORs (they compare
unequal even if they have the same elements).

2017-04-29  Jakub Jelinek  <jakub@redhat.com>

	* params.def (PARAM_MAX_VRP_CONSTANT_ARRAY_LOADS): New.
	* tree.c (array_at_struct_end_p): Return false if ref is STRING_CST.
	* tree-vrp.c: Include gimplify.h.
	(load_index): New variable.
	(vrp_load_valueize, extract_range_from_load): New functions.
	(extract_range_from_assignment, simplify_stmt_using_ranges): Use
	extract_range_from_load.
	(stmt_interesting_for_vrp): Return true for loads with handled
	component rhs.

	* gcc.dg/tree-ssa/vrp113.c: New test.

--- gcc/params.def.jj	2017-03-19 11:57:24.000000000 +0100
+++ gcc/params.def	2017-04-28 13:24:04.670084868 +0200
@@ -1275,6 +1275,12 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTION
 	  "edge of a switch statement during VRP",
 	  10, 0, 0)
 
+DEFPARAM (PARAM_MAX_VRP_CONSTANT_ARRAY_LOADS,
+	  "max-vrp-constant-array-loads",
+	  "Maximum number of constant array load indexes to check for "
+	  "VRP optimizations",
+	  256, 0, 0)
+
 DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
 	  "vect-epilogues-nomask",
 	  "Enable loop epilogue vectorization using smaller vector size.",
--- gcc/tree.c.jj	2017-04-27 15:41:53.000000000 +0200
+++ gcc/tree.c	2017-04-28 15:26:18.005275533 +0200
@@ -13271,6 +13271,10 @@ array_at_struct_end_p (tree ref, bool al
       && !(flag_unconstrained_commons
 	   && VAR_P (ref) && DECL_COMMON (ref)))
     return false;
+  /* Similarly for string literals, we are constrained by their given
+     domain.  */
+  else if (TREE_CODE (ref) == STRING_CST)
+    return false;
 
   return true;
 }
--- gcc/tree-vrp.c.jj	2017-04-25 18:35:35.606504460 +0200
+++ gcc/tree-vrp.c	2017-04-28 17:12:29.310298865 +0200
@@ -62,6 +62,7 @@ along with GCC; see the file COPYING3.
 #include "alloc-pool.h"
 #include "domwalk.h"
 #include "tree-cfgcleanup.h"
+#include "gimplify.h"
 
 #define VR_INITIALIZER { VR_UNDEFINED, NULL_TREE, NULL_TREE, NULL }
 
@@ -3779,6 +3780,211 @@ extract_range_from_comparison (value_ran
     set_value_range_to_truthvalue (vr, type);
 }
 
+/* Variables used to communicate index and its current value
+   between extract_range_from_load and its valueize function.  */
+static tree load_index[2];
+
+/* Valueize callback for extract_range_from_load.  */
+
+static tree
+vrp_load_valueize (tree name)
+{
+  if (name == load_index[0])
+    return load_index[1];
+  return name;
+}
+
+/* Extract a range from a constant load or simplify it to a constant,
+   if there is a single ARRAY_REF among handled components, we have
+   a narrow range for the index or the array bounds imply a narrow
+   range and constant loads with all the indexes in the range yield
+   the same constant or a range can be derived from them.
+   RHS is the gimple_assign_rhs1 of the load.
+   If VR is non-NULL, the function extracts a value range from the
+   constant load values.
+   If VR is NULL, all constants must be the same and we return that
+   constant.  */
+
+static tree
+extract_range_from_load (value_range *vr, tree rhs)
+{
+  tree t, *p = NULL;
+  tree index = NULL_TREE;
+  value_range vr0 = VR_INITIALIZER;
+  int step = 1;
+  if (vr)
+    set_value_range_to_varying (vr);
+  for (t = rhs; handled_component_p (t); t = TREE_OPERAND (t, 0))
+    switch (TREE_CODE (t))
+      {
+      case ARRAY_REF:
+	if (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST)
+	  break;
+	if (TREE_CODE (TREE_OPERAND (t, 1)) == SSA_NAME)
+	  {
+	    if (index)
+	      return NULL_TREE;
+	    index = TREE_OPERAND (t, 1);
+	    if (!INTEGRAL_TYPE_P (TREE_TYPE (index)))
+	      return NULL_TREE;
+	    tree lb = array_ref_low_bound (t);
+	    if (lb == NULL_TREE || TREE_CODE (lb) != INTEGER_CST)
+	      return NULL_TREE;
+	    value_range vr1 = VR_INITIALIZER;
+	    extract_range_from_unary_expr (&vr0, NOP_EXPR, TREE_TYPE (lb),
+					   index);
+	    tree ub = NULL_TREE;
+	    if (!array_at_struct_end_p (t))
+	      {
+		ub = array_ref_up_bound (t);
+		if (ub == NULL_TREE
+		    || TREE_CODE (ub) != INTEGER_CST
+		    || tree_int_cst_lt (ub, lb))
+		  ub = NULL_TREE;
+	      }
+	    set_value_range (&vr1, VR_RANGE, lb,
+			     ub ? ub : vrp_val_max (TREE_TYPE (lb)), NULL);
+	    vrp_intersect_ranges (&vr0, &vr1);
+	    if (vr0.type != VR_RANGE)
+	      return NULL_TREE;
+	    step = wi::ffs (get_nonzero_bits (index));
+	    if (step <= 1)
+	      step = 1;
+	    else
+	      {
+		step = MIN (step - 1, 30);
+		step = MIN (step, TYPE_PRECISION (TREE_TYPE (index))
+				  + TYPE_UNSIGNED (TREE_TYPE (index)) - 2);
+		wide_int w = vr0.min;
+		w += wi::mask (step, false, w.get_precision ());
+		w &= wi::mask (step, true, w.get_precision ());
+		if (wi::gt_p (w, vr0.max, TYPE_SIGN (TREE_TYPE (vr0.min)))
+		    || wi::lt_p (w, vr0.min, TYPE_SIGN (TREE_TYPE (vr0.min))))
+		  return NULL_TREE;
+		vr0.min = wide_int_to_tree (TREE_TYPE (vr0.min), w);
+		step = 1 << step;
+	      }
+	    wide_int diff = vr0.max;
+	    diff -= vr0.min;
+	    diff = wi::udiv_trunc (diff, step);
+
+	    /* As we have to check every index in the range, avoid
+	       doing it too many times.  */
+	    if (!wi::ltu_p (diff,
+			    PARAM_VALUE (PARAM_MAX_VRP_CONSTANT_ARRAY_LOADS)))
+	      return NULL_TREE;
+	    if (tree_int_cst_lt (vr0.min, vrp_val_min (TREE_TYPE (index)))
+		|| tree_int_cst_lt (vrp_val_max (TREE_TYPE (index)), vr0.max))
+	      return NULL_TREE;
+	  }
+	break;
+      case ARRAY_RANGE_REF:
+	return NULL_TREE;
+      default:
+	break;
+      }
+  if (index == NULL_TREE)
+    return NULL_TREE;
+  load_index[0] = index;
+  load_index[1] = vr0.min;
+  tree c = fold_const_aggregate_ref_1 (rhs, vrp_load_valueize);
+  /* fold_const_aggregate_ref_1 can unfortunately only valueize a very
+     small subset of expressions so far.  */
+  if (c == NULL_TREE)
+    {
+      rhs = unshare_expr (rhs);
+      for (t = rhs;
+	   TREE_CODE (t) != ARRAY_REF || TREE_OPERAND (t, 1) != index;
+	   t = TREE_OPERAND (t, 0))
+	;
+      p = &TREE_OPERAND (t, 1);
+      *p = load_index[1];
+      c = fold_const_aggregate_ref_1 (rhs, vrp_load_valueize);
+    }
+  if (c == NULL_TREE || !is_gimple_min_invariant (c))
+    return NULL_TREE;
+  if (vr)
+    {
+      if (INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
+	{
+	  if (TREE_CODE (c) != INTEGER_CST)
+	    return NULL_TREE;
+	  set_value_range_to_value (vr, c, NULL);
+	}
+      else if (POINTER_TYPE_P (TREE_TYPE (rhs)))
+	{
+	  bool strict_overflow_p;
+	  if (integer_zerop (c))
+	    set_value_range_to_null (vr, TREE_TYPE (rhs));
+	  else if (tree_single_nonzero_warnv_p (c, &strict_overflow_p))
+	    set_value_range_to_nonnull (vr, TREE_TYPE (rhs));
+	  else
+	    return NULL_TREE;
+	}
+      else
+	return NULL_TREE;
+    }
+  wide_int w = vr0.min;
+  while (1)
+    {
+      w += step;
+      if (wi::gt_p (w, vr0.max, TYPE_SIGN (TREE_TYPE (vr0.min)))
+	  || wi::le_p (w, vr0.min, TYPE_SIGN (TREE_TYPE (vr0.min))))
+	break;
+      load_index[1] = wide_int_to_tree (TREE_TYPE (vr0.min), w);
+      if (p)
+	*p = load_index[1];
+      tree c2 = fold_const_aggregate_ref_1 (rhs, vrp_load_valueize);
+      if (c2 == NULL_TREE || !is_gimple_min_invariant (c2))
+	{
+	  c = NULL_TREE;
+	  break;
+	}
+      if (vr && INTEGRAL_TYPE_P (TREE_TYPE (rhs)))
+	{
+	  if (TREE_CODE (c2) != INTEGER_CST)
+	    {
+	      c = NULL_TREE;
+	      break;
+	    }
+	  if (tree_int_cst_lt (c2, vr->min))
+	    {
+	      if (TREE_OVERFLOW_P (c2))
+		c2 = drop_tree_overflow (c2);
+	      vr->min = c2;
+	    }
+	  else if (tree_int_cst_lt (vr->max, c2))
+	    {
+	      if (TREE_OVERFLOW_P (c2))
+		c2 = drop_tree_overflow (c2);
+	      vr->max = c2;
+	    }
+	}
+      else if (vr && integer_zerop (c))
+	{
+	  if (!integer_zerop (c2))
+	    {
+	      c = NULL_TREE;
+	      break;
+	    }
+	}
+      else if (vr)
+	{
+	  bool strict_overflow_p;
+	  if (!tree_single_nonzero_warnv_p (c2, &strict_overflow_p))
+	    {
+	      c = NULL_TREE;
+	      break;
+	    }
+	}
+      else if (!operand_equal_p (c, c2, 0))
+	return NULL_TREE;
+    }
+  if (c == NULL_TREE && vr)
+    set_value_range_to_varying (vr);
+  return c;
+}
+
 /* Helper function for simplify_internal_call_using_ranges and
    extract_range_basic.  Return true if OP0 SUBCODE OP1 for
    SUBCODE {PLUS,MINUS,MULT}_EXPR is known to never overflow or
@@ -4280,6 +4486,9 @@ extract_range_from_assignment (value_ran
   else if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
 	   && is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
     set_value_range_to_value (vr, gimple_assign_rhs1 (stmt), NULL);
+  else if (gimple_assign_single_p (stmt)
+	   && handled_component_p (gimple_assign_rhs1 (stmt)))
+    extract_range_from_load (vr, gimple_assign_rhs1 (stmt));
   else
     set_value_range_to_varying (vr);
 
@@ -7339,12 +7548,14 @@ stmt_interesting_for_vrp (gimple *stmt)
 
       /* In general, assignments with virtual operands are not useful
 	 for deriving ranges, with the obvious exception of calls to
-	 builtin functions.  */
+	 builtin functions and for handled_component_p loads.  */
       if (lhs && TREE_CODE (lhs) == SSA_NAME
 	  && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
 	      || POINTER_TYPE_P (TREE_TYPE (lhs)))
-	  && (is_gimple_call (stmt)
-	      || !gimple_vuse (stmt)))
+	  && (!gimple_vuse (stmt)
+	      || is_gimple_call (stmt)
+	      || (gimple_assign_single_p (stmt)
+		  && handled_component_p (gimple_assign_rhs1 (stmt)))))
 	return true;
       else if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
 	switch (gimple_call_internal_fn (stmt))
@@ -10742,6 +10953,23 @@ simplify_stmt_using_ranges (gimple_stmt_
 	  return simplify_min_or_max_using_ranges (gsi, stmt);
 
 	default:
+	  if (gimple_assign_single_p (stmt)
+	      && handled_component_p (rhs1)
+	      && !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
+	    {
+	      /* For integral loads this is handled by computing
+		 value ranges and if they are singleton, replacing.
+		 For pointers we only compute NULL or non-NULL ranges,
+		 and can here actually simplify to a particular
+		 ADDR_EXPR if identical everywhere.  For other types
+		 this is the only possibility to optimize.  */
+	      tree c = extract_range_from_load (NULL, rhs1);
+	      if (c)
+		{
+		  gimple_assign_set_rhs_from_tree (gsi, c);
+		  return true;
+		}
+	    }
 	  break;
 	}
     }
--- gcc/testsuite/gcc.dg/tree-ssa/vrp113.c.jj	2017-04-27 16:34:42.170486063 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/vrp113.c	2017-04-28 17:23:43.431690269 +0200
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp -fdump-tree-vrp1" } */
+
+#define NULL (void *) 0
+struct S { int a, b; };
+const struct S var1[16] = { { 1, 2 }, { 1, 2 }, { 1, 2 },
+			    { 1, 2 }, { 2, 3 }, { 4, 5 } };
+const float var2[32] = { 7.0f, -64.0f, 12.0f, 24.0f,
+			 7.0f, -65.0f, 13.0f, 25.0f,
+			 7.0f, -66.0f, 14.0f, 26.0f,
+			 7.0f, -67.0f, 15.0f, 27.0f,
+			 7.0f, -66.0f, 14.0f, 26.0f,
+			 7.0f, -67.0f, 15.0f, 27.0f,
+			 7.0f, -68.0f, 16.0f, 28.0f,
+			 7.0f, -69.0f, 17.0f, 27.0f };
+const signed char var3[] = { -12, 8, 0, 9, 21, 2, 22, 3, 20, 4, 21, 5,
+			     20, 6, 21, 7, 22, 8, 21, 9, 10, 11, 12, 13 };
+const char str[] = "abc";
+const char *const var4[] = { str, str, str, NULL, NULL, NULL };
+void link_error (void);
+
+int
+f1 (int x)
+{
+  return var1[x & 3].a + var1[(x & 1) + 2].b + "abcd\4\4\4\4\4\4\4\4efgh"[(x & 7) + 4];
+}
+
+float
+f2 (int x)
+{
+  return var2[x & -4];
+}
+
+void
+f3 (int x)
+{
+  if (x >= 2 && x <= 9)
+    {
+      if (var3[x * 2] < 20 || var3[x * 2] > 22)
+	link_error ();
+    }
+  if (x > 0)
+    {
+      if (var3[x] < 0 || var3[x] > 22)
+	link_error ();
+    }
+  if (var3[x] < -12)
+    link_error ();
+  if (x < 3)
+    {
+      if (var4[x] == NULL)
+	link_error ();
+    }
+  else
+    {
+      if (var4[x] != NULL)
+	link_error ();
+    }
+}
+
+const char *
+f4 (int x)
+{
+  if (x >= 3)
+    return "def";
+  return var4[x];
+}
+
+/* { dg-final { scan-tree-dump "return 7;" "evrp" } } */
+/* { dg-final { scan-tree-dump "(_\[0-9]* =|return) 7\.0e\\+0;" "vrp1" } } */
+/* { dg-final { scan-tree-dump-not "link_error" "vrp1" } } */
+/* { dg-final { scan-tree-dump-not "var4" "vrp1" } } */
+/* { dg-final { scan-tree-dump "&str" "vrp1" } } */
+


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]