This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: ivopts improvement


On 03/14/2011 10:58 AM, Zdenek Dvorak wrote:
> Hi,
> 
>> diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c
>> --- gcc/tree-ssa-loop-ivopts.c	(working copy)
>> +++ gcc/tree-ssa-loop-ivopts.c	(working copy)
>> @@ -1194,6 +1300,7 @@ record_use (struct ivopts_data *data, tr
>>  	    gimple stmt, enum use_type use_type)
>>  {
>>    struct iv_use *use = XCNEW (struct iv_use);
>> +  tree tmp;
>>  
>>    use->id = n_iv_uses (data);
>>    use->type = use_type;
>> @@ -1204,11 +1311,14 @@ record_use (struct ivopts_data *data, tr
>>  
>>    /* To avoid showing ssa name in the dumps, if it was not reset by the
>>       caller.  */
>> +  tmp = iv->ssa_name;
>>    iv->ssa_name = NULL_TREE;
>>  
>>    if (dump_file && (dump_flags & TDF_DETAILS))
>>      dump_use (dump_file, use);
>>  
>> +  iv->ssa_name = tmp;
>> +
>>    VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
>>  
>>    return use;
> 
> this is not ok.  You can get the ssa name from extract_cond_operands.
> 

Ok, I did that now.

>> +  /* Determine if the exit test is formulated in terms of the phi or the
>> +     increment of the use iv.  */
>> +  use_uses_inced_iv
>> +    = gimple_code (SSA_NAME_DEF_STMT (use->iv->ssa_name)) != GIMPLE_PHI;
>> +
>> +  /* Determine if the exit test is before or after the increment of the
>> +     cand.  */
>> +  use_after_cand_inc
>> +    = stmt_after_increment (data->current_loop, cand, use->stmt);
>> +
>> +  /* For now, we only handle these cases.  */
>> +  if (use_after_cand_inc != use_uses_inced_iv)
>> +    return false;
> 
> what is this trying to achieve?  USE_USES_INCED_IV is pretty much meaningless --
> the value of USE does not depend in any way on whether it comes
> directly from
> a PHI node or from some other calculation.

it is trying to allow for

do
  {
    *p = '\0';
    i++; /* use_uses_inced_iv == true */
    p++; /* use_after_cand_inc == true */
    if (!(i < n))
      break;
  }
while (1);

and for

do
  {
    *p = '\0';
    if (!(i < n))
      break;
    i++; /* use_uses_inced_iv == false */
    p++; /* use_after_cand_inc == false */
  }
while (1);

but not for

do
  {
    *p = '\0';
    i++; /* use_uses_inced_iv == true */
    if (!(i < n))
      break;
    p++; /* use_after_cand_inc == false */
  }
while (1);

and not for

do
  {
    *p = '\0';
    p++; /* use_after_cand_inc == true */
    if (!(i < n))
      break;
    i++; /* use_uses_inced_iv == false */
  }
while (1);

In the latter 2 cases, I cannot simply replace i < n with p < base + n.

>> +  /* Calculate bound.  */
>> +  bound = fold_build_plus (PLUS_EXPR, base_ptr,
>> +			   fold_convert (sizetype, use_lt_bound));
>> +  if (bound == NULL_TREE)
>> +    return false;
> 
> How could the result be NULL_TREE?

It can't, indeed. I removed that now.

- Tom
diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c
--- gcc/tree-ssa-loop-ivopts.c	(working copy)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -419,6 +419,67 @@
   return fold_build2 (code, TREE_TYPE (a), a, b);
 }
 
+/* Folds (TREE_TYPE (A))(A CODE B), where CODE is either PLUS_EXPR or
+   MINUS_EXPR.  Returns the folded expression if folding is successful.
+   Otherwise, return NULL_TREE.  Handles the case that A is a pointer
+   robustly.  */
+
+static inline tree
+fold_plus (enum tree_code code, tree a, tree b)
+{
+  tree a_type = TREE_TYPE (a);
+  tree res;
+
+  STRIP_NOPS (a);
+  robust_plus (&code, a, &b);
+
+  res = fold_binary (code, TREE_TYPE (a), a, b);
+  if (res == NULL_TREE)
+    return NULL_TREE;
+
+  return fold_convert (a_type, res);
+}
+
+/* Folds (TREE_TYPE (A))(A CODE B), where CODE is either PLUS_EXPR or
+   MINUS_EXPR, possibly using the defining stmt of A.  Returns the folded
+   expression if folding is successful.  Otherwise, return NULL_TREE.  */
+
+static inline tree
+fold_walk_def_plus (enum tree_code code, tree a, tree b)
+{
+  tree a_type = TREE_TYPE (a);
+  tree res, a0, a1;
+  gimple def_stmt;
+
+  res = fold_plus (code, a, b);
+  if (res != NULL_TREE)
+    return res;
+
+  STRIP_NOPS (a);
+
+  if (TREE_CODE (a) != SSA_NAME)
+    return NULL_TREE;
+
+  def_stmt = SSA_NAME_DEF_STMT (a);
+  if (!is_gimple_assign (def_stmt)
+      || (gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
+	  && gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR))
+    return NULL_TREE;
+  a0 = gimple_assign_rhs1 (def_stmt);
+  a1 = gimple_assign_rhs2 (def_stmt);
+
+  def_stmt = SSA_NAME_DEF_STMT (a1);
+  if (is_gimple_assign (def_stmt)
+      && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
+    a1 = fold_convert (TREE_TYPE (a1), gimple_assign_rhs1 (def_stmt));
+
+  res = fold_plus (code, fold_build_plus (PLUS_EXPR, a0, a1), b);
+  if (res == NULL_TREE)
+    return NULL_TREE;
+
+  return fold_convert (a_type, res);
+}
+
 /* Number of uses recorded in DATA.  */
 
 static inline unsigned
@@ -825,17 +886,25 @@
 
   if (!slot)
     {
-      /* Try to determine number of iterations.  We must know it
-	 unconditionally (i.e., without possibility of # of iterations
-	 being zero).  Also, we cannot safely work with ssa names that
-	 appear in phi nodes on abnormal edges, so that we do not create
-	 overlapping life ranges for them (PR 27283).  */
+      /* Try to determine number of iterations.  We cannot safely work with ssa
+         names that appear in phi nodes on abnormal edges, so that we do not
+         create overlapping life ranges for them (PR 27283).  */
       desc = XNEW (struct tree_niter_desc);
       if (number_of_iterations_exit (data->current_loop,
 				     exit, desc, true)
-	  && integer_zerop (desc->may_be_zero)
      	  && !contains_abnormal_ssa_name_p (desc->niter))
-	niter = desc->niter;
+	{
+	  if (!integer_zerop (desc->may_be_zero))
+            /* Construct COND_EXPR that describes the number of iterations.
+               Either the COND_EXPR is not too expensive, and we can use it as
+               loop bound, or we can deduce a LT_EXPR bound from it.  */
+	    niter
+	      = build3 (COND_EXPR, TREE_TYPE (desc->niter), desc->may_be_zero,
+			build_int_cst_type (TREE_TYPE (desc->niter), 0),
+			desc->niter);
+	  else
+	    niter = desc->niter;
+	}
       else
 	niter = NULL_TREE;
 
@@ -4353,6 +4422,155 @@
   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
 }
 
+/* Get the loop bound and comparison operator of USE->iv, and store them in
+   BOUND_P and COMP_P.  Returns false if unsuccessful.  */
+
+static bool
+get_use_lt_bound (struct iv_use *use, tree use_iv, tree *bound_p,
+		  enum tree_code *comp_p)
+{
+  gimple stmt = use->stmt;
+
+  if (gimple_code (stmt) != GIMPLE_COND
+      || gimple_cond_lhs (stmt) != use_iv)
+    return false;
+
+  *comp_p = gimple_cond_code (stmt);
+  *bound_p = gimple_cond_rhs (stmt);
+
+  return true;
+}
+
+/* Tries to replace loop exit test USE, by one formulated in terms of a LT_EXPR
+   comparison with CAND.  Stores the resulting comparison in COMP_P and bound in
+   BOUND_P.  */
+
+static bool
+iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use,
+                           struct iv_cand *cand, tree *bound_p,
+			   enum tree_code *comp_p)
+{
+  enum tree_code use_comp, canon_comp;
+  tree base_ptr, use_iv_real_base, use_lt_bound, bound, *use_iv;
+  bool ok, use_uses_inced_iv, use_after_cand_inc;
+  tree use_type, cand_type, cand_iv_base = cand->iv->base;
+
+  /* Initialize cand_type, use_iv and use_type.  */
+  STRIP_NOPS (cand_iv_base);
+  cand_type = TREE_TYPE (cand_iv_base);
+  ok = extract_cond_operands (data, use->stmt, &use_iv, NULL, NULL, NULL);
+  gcc_assert (ok);
+  use_type = TREE_TYPE (*use_iv);
+      
+  /* We're trying to replace 'i < n' with 'p < base + n' in
+
+     void
+     f1 (char *base, unsigned long int s, unsigned long int n)
+     {
+       unsigned long int i = s;
+       char *p = base + s;
+       do
+         {
+	   *p = '\0';
+	   p++;
+	   i++;
+	 }
+       while (i < n);
+     }
+
+     Overflow of base + n can't happen because either:
+     - s < n, and i will step to n, and p will step to base + n, or
+     - s >= n, so base + n < base + s, and assuming pointer arithmetic
+       doesn't overflow, base + s doesn't overflow, so base + n won't.
+
+     This transformation is not valid if i and n are signed, because
+     base + n might underflow.
+  */
+
+  /* Use should be an unsigned integral.  */
+  if (!INTEGRAL_TYPE_P (use_type) || !TYPE_UNSIGNED (use_type))
+    return false;
+
+  /* Cand should be a pointer, and pointer overflow should be undefined.  */
+  if (!POINTER_TYPE_P (cand_type) || !POINTER_TYPE_OVERFLOW_UNDEFINED)
+    return false;
+
+  /* Make sure that the loop iterates till the loop bound is hit.  */
+  if (!data->loop_single_exit_p)
+    return false;
+
+  /* We only handle this case for the moment.  */
+  if (!tree_int_cst_equal (use->iv->step, cand->iv->step))
+    return false;
+
+  /* For now, we only handle the case that cand is a source level cand.  It
+     is possible to also allow other cands, provided we can prove there is
+     pointer arithmetic in the loop body reaching base + n.  */
+  if (cand->pos != IP_ORIGINAL)
+    return false;
+
+  /* Determine if the exit test is formulated in terms of the phi or the
+     increment of the use iv.  */
+  use_uses_inced_iv
+    = gimple_code (SSA_NAME_DEF_STMT (*use_iv)) != GIMPLE_PHI;
+
+  /* Determine if the exit test is before or after the increment of the
+     cand.  */
+  use_after_cand_inc
+    = stmt_after_increment (data->current_loop, cand, use->stmt);
+
+  /* For now, we only handle these cases.  */
+  if (use_after_cand_inc != use_uses_inced_iv)
+    return false;
+
+  /* Get the base of the non-incremented loop var.  */
+  if (use_uses_inced_iv)
+    {
+      use_iv_real_base = fold_plus (MINUS_EXPR, use->iv->base, use->iv->step);
+      if (use_iv_real_base == NULL_TREE)
+	return false;
+    }
+  else
+    use_iv_real_base = use->iv->base;
+
+  /* Detect p = base + s.  */
+  base_ptr = fold_walk_def_plus (MINUS_EXPR, cand->iv->base,
+				 fold_convert (sizetype, use_iv_real_base));
+  if (base_ptr == NULL_TREE)
+    return false;
+  STRIP_NOPS (base_ptr);
+  if (TREE_CODE (base_ptr) != SSA_NAME)
+    return false;
+
+  /* Get the bound of the iv of the use.  */
+  if (!get_use_lt_bound (use, *use_iv, &use_lt_bound, &use_comp))
+    return false;
+
+  /* Determine canon_comp.  */
+  if (*comp_p == NE_EXPR)
+    canon_comp = use_comp;
+  else if (*comp_p == EQ_EXPR)
+    canon_comp = invert_tree_comparison (use_comp, false);
+  else
+    gcc_unreachable ();
+
+  /* Allow positive and negative step, and inclusive and exclusive bound.
+     To trigger inclusive bound, we need -funsafe-loop-optimizations.  */
+  if (canon_comp != LT_EXPR && canon_comp != GT_EXPR
+      && canon_comp != LE_EXPR && canon_comp != GE_EXPR)
+    return false;
+
+  /* Calculate bound.  */
+  bound = fold_build_plus (PLUS_EXPR, base_ptr,
+			   fold_convert (sizetype, use_lt_bound));
+  if (expression_expensive_p (bound))
+    return false;
+
+  *comp_p = use_comp;
+  *bound_p = bound;
+  return true;
+}
+
 /* Check whether it is possible to express the condition in USE by comparison
    of candidate CAND.  If so, store the value compared with to BOUND, and the
    comparison operator to COMP.  */
@@ -4431,6 +4649,21 @@
   *bound = aff_combination_to_tree (&bnd);
   *comp = iv_elimination_compare (data, use);
 
+  /* Try to implement nit using a '<' instead.  */
+  if (TREE_CODE (nit) == COND_EXPR)
+    {
+      if (iv_elimination_compare_lt (data, use, cand, bound, comp))
+        return true;
+
+      /* We could try to see if the non-lt bound is not too expensive, but the
+         cost infrastructure needs tuning for that first.  Even though
+         expression_expensive_p always returns true for COND_EXPRs, it happens
+         that the bound is folded into a MAX_EXPR, which is approved by
+         expression_expensive_p, but attributed a too low cost by force_var_cost
+         in case the MAX_EXPR would expand into control flow.  */
+      return false;
+    }
+
   /* It is unlikely that computing the number of iterations using division
      would be more profitable than keeping the original induction variable.  */
   if (expression_expensive_p (*bound))

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]