This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR66313


So I've come back to PR66313 and found a solution to the tailrecursion
missed optimization when fixing the factoring folding to use an unsigned
type when we're not sure of overflow.

The folding part is identical to my last try from 2015, the tailrecursion
part makes us handle intermittent stmts that were introduced by foldings
that "clobber" our quest walking the single-use chain of stmts between
the call and the return (and failing at all stmts that are not part
of said chain).  A simple solution is to move the stmts that are not
part of the chain and that we can move before the call.  That handles
the leaf conversions that now appear for tree-ssa/tailrecursion-6.c

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2017-05-31  Richard Biener  <rguenther@suse.de>

	PR middle-end/66313
	* fold-const.c (fold_plusminus_mult_expr): If the factored
	factor may be zero use a wrapping type for the inner operation.
	* tree-tailcall.c (independent_of_stmt_p): Pass in to_move bitmap
	and handle moved defs.
	(process_assignment): Properly guard the unary op case.  Return a
	tri-state indicating that moving the stmt before the call may allow
	to continue.  Pass through to_move.
	(find_tail_calls): Handle moving unrelated defs before
	the call.

	* c-c++-common/ubsan/pr66313.c: New testcase.
	* gcc.dg/tree-ssa/loop-15.c: Adjust.

Index: gcc/fold-const.c
===================================================================
*** gcc/fold-const.c.orig	2015-10-29 12:32:33.302782318 +0100
--- gcc/fold-const.c	2015-10-29 14:08:39.936497739 +0100
*************** fold_plusminus_mult_expr (location_t loc
*** 6916,6925 ****
      }
    same = NULL_TREE;
  
!   if (operand_equal_p (arg01, arg11, 0))
!     same = arg01, alt0 = arg00, alt1 = arg10;
!   else if (operand_equal_p (arg00, arg10, 0))
      same = arg00, alt0 = arg01, alt1 = arg11;
    else if (operand_equal_p (arg00, arg11, 0))
      same = arg00, alt0 = arg01, alt1 = arg10;
    else if (operand_equal_p (arg01, arg10, 0))
--- 6916,6926 ----
      }
    same = NULL_TREE;
  
!   /* Prefer factoring a common non-constant.  */
!   if (operand_equal_p (arg00, arg10, 0))
      same = arg00, alt0 = arg01, alt1 = arg11;
+   else if (operand_equal_p (arg01, arg11, 0))
+     same = arg01, alt0 = arg00, alt1 = arg10;
    else if (operand_equal_p (arg00, arg11, 0))
      same = arg00, alt0 = arg01, alt1 = arg10;
    else if (operand_equal_p (arg01, arg10, 0))
*************** fold_plusminus_mult_expr (location_t loc
*** 6974,6987 ****
  	}
      }
  
!   if (same)
      return fold_build2_loc (loc, MULT_EXPR, type,
  			fold_build2_loc (loc, code, type,
  				     fold_convert_loc (loc, type, alt0),
  				     fold_convert_loc (loc, type, alt1)),
  			fold_convert_loc (loc, type, same));
  
!   return NULL_TREE;
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
--- 6975,7010 ----
  	}
      }
  
!   if (!same)
!     return NULL_TREE;
! 
!   if (! INTEGRAL_TYPE_P (type)
!       || TYPE_OVERFLOW_WRAPS (type)
!       /* We are neither factoring zero nor minus one.  */
!       || TREE_CODE (same) == INTEGER_CST)
      return fold_build2_loc (loc, MULT_EXPR, type,
  			fold_build2_loc (loc, code, type,
  				     fold_convert_loc (loc, type, alt0),
  				     fold_convert_loc (loc, type, alt1)),
  			fold_convert_loc (loc, type, same));
  
!   /* Same may be zero and thus the operation 'code' may overflow.  Likewise
!      same may be minus one and thus the multiplication may overflow.  Perform
!      the operations in an unsigned type.  */
!   tree utype = unsigned_type_for (type);
!   tree tem = fold_build2_loc (loc, code, utype,
! 			      fold_convert_loc (loc, utype, alt0),
! 			      fold_convert_loc (loc, utype, alt1));
!   /* If the sum evaluated to a constant that is not -INF the multiplication
!      cannot overflow.  */
!   if (TREE_CODE (tem) == INTEGER_CST
!       && ! wi::eq_p (tem, wi::min_value (TYPE_PRECISION (utype), SIGNED)))
!     return fold_build2_loc (loc, MULT_EXPR, type,
! 			    fold_convert (type, tem), same);
! 
!   return fold_convert_loc (loc, type,
! 			   fold_build2_loc (loc, MULT_EXPR, utype, tem,
! 					    fold_convert_loc (loc, utype, same)));
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
Index: gcc/testsuite/c-c++-common/ubsan/pr66313.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/c-c++-common/ubsan/pr66313.c	2015-10-29 14:08:39.969498105 +0100
***************
*** 0 ****
--- 1,26 ----
+ /* { dg-do run } */
+ /* { dg-options "-fsanitize=undefined -fsanitize-undefined-trap-on-error" } */
+ 
+ int __attribute__((noinline,noclone))
+ f(int a, int b, int c)
+ {
+   return a * b + a * c;
+ }
+ int __attribute__((noinline,noclone))
+ g(int a)
+ {
+   return a * (__INT_MAX__/2) + a * (__INT_MAX__/2 + 2);
+ }
+ int __attribute__((noinline,noclone))
+ h(int a, int b)
+ {
+   return a * (__INT_MAX__/2 + 1) + b * (__INT_MAX__/2 + 1);
+ }
+ int main()
+ {
+   volatile int tem = f(0, __INT_MAX__, __INT_MAX__);
+   tem = f(-1, __INT_MAX__/2 + 1, __INT_MAX__/2 + 1);
+   tem = g(-1);
+   tem = h(-1, -1);
+   return 0;
+ }
Index: gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
===================================================================
*** gcc/testsuite/gcc.dg/tree-ssa/loop-15.c.orig	2015-10-29 12:32:33.302782318 +0100
--- gcc/testsuite/gcc.dg/tree-ssa/loop-15.c	2015-10-29 14:08:39.989498327 +0100
*************** int bla(void)
*** 19,26 ****
  }
  
  /* Since the loop is removed, there should be no addition.  */
! /* { dg-final { scan-tree-dump-times "\\+" 0 "optimized" } } */
! /* { dg-final { scan-tree-dump-times "n_. \\* n_." 1 "optimized" } } */
  
  /* The if from the loop header copying remains in the code.  */
  /* { dg-final { scan-tree-dump-times "if " 1 "optimized" } } */
--- 19,26 ----
  }
  
  /* Since the loop is removed, there should be no addition.  */
! /* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
! /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
  
  /* The if from the loop header copying remains in the code.  */
  /* { dg-final { scan-tree-dump-times "if " 1 "optimized" } } */


Index: gcc/tree-tailcall.c
===================================================================
--- gcc/tree-tailcall.c	(revision 248722)
+++ gcc/tree-tailcall.c	(working copy)
@@ -184,7 +184,8 @@ suitable_for_tail_call_opt_p (void)
    containing the value of EXPR at GSI.  */
 
 static tree
-independent_of_stmt_p (tree expr, gimple *at, gimple_stmt_iterator gsi)
+independent_of_stmt_p (tree expr, gimple *at, gimple_stmt_iterator gsi,
+		       bitmap to_move)
 {
   basic_block bb, call_bb, at_bb;
   edge e;
@@ -196,6 +197,9 @@ independent_of_stmt_p (tree expr, gimple
   if (TREE_CODE (expr) != SSA_NAME)
     return NULL_TREE;
 
+  if (bitmap_bit_p (to_move, SSA_NAME_VERSION (expr)))
+    return expr;
+
   /* Mark the blocks in the chain leading to the end.  */
   at_bb = gimple_bb (at);
   call_bb = gimple_bb (gsi_stmt (gsi));
@@ -250,13 +254,16 @@ independent_of_stmt_p (tree expr, gimple
   return expr;
 }
 
+enum par { FAIL, OK, TRY_MOVE };
+
 /* Simulates the effect of an assignment STMT on the return value of the tail
    recursive CALL passed in ASS_VAR.  M and A are the multiplicative and the
    additive factor for the real return value.  */
 
-static bool
-process_assignment (gassign *stmt, gimple_stmt_iterator call, tree *m,
-		    tree *a, tree *ass_var)
+static par
+process_assignment (gassign *stmt,
+		    gimple_stmt_iterator call, tree *m,
+		    tree *a, tree *ass_var, bitmap to_move)
 {
   tree op0, op1 = NULL_TREE, non_ass_var = NULL_TREE;
   tree dest = gimple_assign_lhs (stmt);
@@ -276,7 +283,7 @@ process_assignment (gassign *stmt, gimpl
       if (gimple_assign_cast_p (stmt))
 	{
 	  if (TYPE_MODE (TREE_TYPE (dest)) != TYPE_MODE (TREE_TYPE (src_var)))
-	    return false;
+	    return FAIL;
 
 	  /* Even if the type modes are the same, if the precision of the
 	     type is smaller than mode's precision,
@@ -284,11 +291,11 @@ process_assignment (gassign *stmt, gimpl
 	  if (INTEGRAL_TYPE_P (TREE_TYPE (dest))
 	      && (GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (dest)))
 		  > TYPE_PRECISION (TREE_TYPE (dest))))
-	    return false;
+	    return FAIL;
 	}
 
       *ass_var = dest;
-      return true;
+      return OK;
     }
 
   switch (rhs_class)
@@ -303,7 +310,7 @@ process_assignment (gassign *stmt, gimpl
       break;
 
     default:
-      return false;
+      return FAIL;
     }
 
   /* Accumulator optimizations will reverse the order of operations.
@@ -311,42 +318,45 @@ process_assignment (gassign *stmt, gimpl
      that addition and multiplication are associative.  */
   if (!flag_associative_math)
     if (FLOAT_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
-      return false;
+      return FAIL;
 
-  if (rhs_class == GIMPLE_UNARY_RHS)
+  if (rhs_class == GIMPLE_UNARY_RHS
+      && op0 == *ass_var)
     ;
   else if (op0 == *ass_var
-	   && (non_ass_var = independent_of_stmt_p (op1, stmt, call)))
+	   && (non_ass_var = independent_of_stmt_p (op1, stmt, call,
+						    to_move)))
     ;
   else if (op1 == *ass_var
-	   && (non_ass_var = independent_of_stmt_p (op0, stmt, call)))
+	   && (non_ass_var = independent_of_stmt_p (op0, stmt, call,
+						    to_move)))
     ;
   else
-    return false;
+    return TRY_MOVE;
 
   switch (code)
     {
     case PLUS_EXPR:
       *a = non_ass_var;
       *ass_var = dest;
-      return true;
+      return OK;
 
     case POINTER_PLUS_EXPR:
       if (op0 != *ass_var)
-	return false;
+	return FAIL;
       *a = non_ass_var;
       *ass_var = dest;
-      return true;
+      return OK;
 
     case MULT_EXPR:
       *m = non_ass_var;
       *ass_var = dest;
-      return true;
+      return OK;
 
     case NEGATE_EXPR:
       *m = build_minus_one_cst (TREE_TYPE (op0));
       *ass_var = dest;
-      return true;
+      return OK;
 
     case MINUS_EXPR:
       if (*ass_var == op0)
@@ -358,12 +368,10 @@ process_assignment (gassign *stmt, gimpl
         }
 
       *ass_var = dest;
-      return true;
-
-      /* TODO -- Handle POINTER_PLUS_EXPR.  */
+      return OK;
 
     default:
-      return false;
+      return FAIL;
     }
 }
 
@@ -523,6 +531,7 @@ find_tail_calls (basic_block bb, struct
      since we are running after dce.  */
   m = NULL_TREE;
   a = NULL_TREE;
+  auto_bitmap to_move;
 
   abb = bb;
   agsi = gsi;
@@ -540,27 +549,36 @@ find_tail_calls (basic_block bb, struct
 	}
 
       stmt = gsi_stmt (agsi);
-
-      if (gimple_code (stmt) == GIMPLE_LABEL
-	  || gimple_code (stmt) == GIMPLE_NOP)
-	continue;
-
       if (gimple_code (stmt) == GIMPLE_RETURN)
 	break;
 
-      if (gimple_clobber_p (stmt))
-	continue;
-
-      if (is_gimple_debug (stmt))
+      if (gimple_code (stmt) == GIMPLE_LABEL
+	  || gimple_code (stmt) == GIMPLE_NOP
+	  || gimple_clobber_p (stmt)
+	  || is_gimple_debug (stmt))
 	continue;
 
       if (gimple_code (stmt) != GIMPLE_ASSIGN)
 	return;
 
       /* This is a gimple assign. */
-      if (! process_assignment (as_a <gassign *> (stmt), gsi, &tmp_m,
-				&tmp_a, &ass_var))
+      par ret = process_assignment (as_a <gassign *> (stmt), gsi,
+				    &tmp_m, &tmp_a, &ass_var, to_move);
+      if (ret == FAIL)
 	return;
+      else if (ret == TRY_MOVE)
+	{
+	  if (! tail_recursion)
+	    return;
+	  for (unsigned opno = 1; opno < gimple_num_ops (stmt); ++opno)
+	    {
+	      tree op = gimple_op (stmt, opno);
+	      if (independent_of_stmt_p (op, stmt, gsi, to_move) != op)
+		return;
+	    }
+	  bitmap_set_bit (to_move, SSA_NAME_VERSION (gimple_assign_lhs (stmt)));
+	  continue;
+	}
 
       if (tmp_a)
 	{
@@ -601,6 +619,19 @@ find_tail_calls (basic_block bb, struct
   if (m && POINTER_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl))))
     return;
 
+  /* Move queued defs.  */
+  if (tail_recursion)
+    {
+      bitmap_iterator bi;
+      unsigned i;
+      EXECUTE_IF_SET_IN_BITMAP (to_move, 0, i, bi)
+	{
+	  stmt = SSA_NAME_DEF_STMT (ssa_name (i));
+	  gimple_stmt_iterator mgsi = gsi_for_stmt (stmt);
+	  gsi_move_before (&mgsi, &gsi);
+	}
+    }
+
   nw = XNEW (struct tailcall);
 
   nw->call_gsi = gsi;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]