[PATCH] Slightly improve phiopt value_replacement optimization (PR middle-end/62263, PR middle-end/82498)

Richard Biener rguenther@suse.de
Sun Oct 15 12:14:00 GMT 2017


On October 13, 2017 9:43:10 PM GMT+02:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>First of all, there was a typo, we are optimizing
>(x != 0) ? x + y : y
>into x + y rather than y.
>
>And, as the comment mentions, the condition that there is just a single
>stmt is too restrictive and in the various patterns posted in the
>various
>rotate related PRs there are cases where in addition to the last
>assign stmt there are some preparation statements (sometimes
>conversion,
>sometimes bit masking with constant, sometimes both).
>I think it is undesirable to turn the conditional code into always
>executed
>one if it is too large, so this patch allows just very few very cheap
>preparation statements which feed each other and it is easily possible
>to
>propagate the cond_rhs constant through those statements to compute
>what
>the result from those would be (and make sure no UB).
>
>With this patch on top of the previous one, on rotate-8.c testcase
>on x86_64-linux and i686-linux we get the most efficient code in all
>cases
>- just a rol/ror instruction with perhaps some moves into registers
>needed
>to perform that, but without any conditionals, masking etc.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2017-10-13  Jakub Jelinek  <jakub@redhat.com>
>
>	PR middle-end/62263
>	PR middle-end/82498
>	* tree-ssa-phiopt.c (value_replacement): Comment fix.  Handle
>	up to 2 preparation statements for ASSIGN in MIDDLE_BB.
>
>	* c-c++-common/rotate-8.c: Expect no PHIs in optimized dump.
>
>--- gcc/tree-ssa-phiopt.c.jj	2017-10-10 22:04:08.000000000 +0200
>+++ gcc/tree-ssa-phiopt.c	2017-10-13 17:55:01.578450763 +0200
>@@ -995,11 +995,13 @@ value_replacement (basic_block cond_bb,
> 
>     }
> 
>-  /* Now optimize (x != 0) ? x + y : y to just y.
>-     The following condition is too restrictive, there can easily be
>another
>-     stmt in middle_bb, for instance a CONVERT_EXPR for the second
>argument.  */
>-  gimple *assign = last_and_only_stmt (middle_bb);
>-  if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
>+  /* Now optimize (x != 0) ? x + y : y to just x + y.  */
>+  gsi = gsi_last_nondebug_bb (middle_bb);
>+  if (gsi_end_p (gsi))
>+    return 0;
>+
>+  gimple *assign = gsi_stmt (gsi);
>+  if (!is_gimple_assign (assign)
>       || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
>       || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
> 	  && !POINTER_TYPE_P (TREE_TYPE (arg0))))
>@@ -1009,6 +1011,71 @@ value_replacement (basic_block cond_bb,
>   if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
>     return 0;
> 
>+  /* Allow up to 2 cheap preparation statements that prepare argument
>+     for assign, e.g.:
>+      if (y_4 != 0)
>+	goto <bb 3>;
>+      else
>+	goto <bb 4>;
>+     <bb 3>:
>+      _1 = (int) y_4;
>+      iftmp.0_6 = x_5(D) r<< _1;
>+     <bb 4>:
>+      # iftmp.0_2 = PHI <iftmp.0_6(3), x_5(D)(2)>
>+     or:
>+      if (y_3(D) == 0)
>+	goto <bb 4>;
>+      else
>+	goto <bb 3>;
>+     <bb 3>:
>+      y_4 = y_3(D) & 31;
>+      _1 = (int) y_4;
>+      _6 = x_5(D) r<< _1;
>+     <bb 4>:
>+      # _2 = PHI <x_5(D)(2), _6(3)>  */
>+  gimple *prep_stmt[2] = { NULL, NULL };
>+  int prep_cnt;
>+  for (prep_cnt = 0; ; prep_cnt++)
>+    {
>+      gsi_prev_nondebug (&gsi);
>+      if (gsi_end_p (gsi))
>+	break;
>+
>+      gimple *g = gsi_stmt (gsi);
>+      if (gimple_code (g) == GIMPLE_LABEL)
>+	break;
>+
>+      if (prep_cnt == 2 || !is_gimple_assign (g))
>+	return 0;
>+
>+      tree lhs = gimple_assign_lhs (g);
>+      tree rhs1 = gimple_assign_rhs1 (g);
>+      use_operand_p use_p;
>+      gimple *use_stmt;
>+      if (TREE_CODE (lhs) != SSA_NAME
>+	  || TREE_CODE (rhs1) != SSA_NAME
>+	  || !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>+	  || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
>+	  || !single_imm_use (lhs, &use_p, &use_stmt)
>+	  || use_stmt != (prep_cnt ? prep_stmt[prep_cnt - 1] : assign))
>+	return 0;
>+      switch (gimple_assign_rhs_code (g))
>+	{
>+	CASE_CONVERT:
>+	  break;
>+	case PLUS_EXPR:
>+	case BIT_AND_EXPR:
>+	case BIT_IOR_EXPR:
>+	case BIT_XOR_EXPR:
>+	  if (TREE_CODE (gimple_assign_rhs2 (g)) != INTEGER_CST)
>+	    return 0;
>+	  break;
>+	default:
>+	  return 0;
>+	}
>+      prep_stmt[prep_cnt] = g;
>+    }
>+
>   /* Only transform if it removes the condition.  */
>if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
>e0, e1))
>     return 0;
>@@ -1019,7 +1086,7 @@ value_replacement (basic_block cond_bb,
>       && profile_status_for_fn (cfun) != PROFILE_ABSENT
>&& EDGE_PRED (middle_bb, 0)->probability < profile_probability::even ()
>       /* If assign is cheap, there is no point avoiding it.  */
>-      && estimate_num_insns (assign, &eni_time_weights)
>+      && estimate_num_insns (bb_seq (middle_bb), &eni_time_weights)
> 	 >= 3 * estimate_num_insns (cond, &eni_time_weights))
>     return 0;
> 
>@@ -1030,6 +1097,32 @@ value_replacement (basic_block cond_bb,
>   tree cond_lhs = gimple_cond_lhs (cond);
>   tree cond_rhs = gimple_cond_rhs (cond);
> 
>+  /* Propagate the cond_rhs constant through preparation stmts,
>+     make sure UB isn't invoked while doing that.  */
>+  for (int i = prep_cnt - 1; i >= 0; --i)
>+    {
>+      gimple *g = prep_stmt[i];
>+      tree grhs1 = gimple_assign_rhs1 (g);
>+      if (!operand_equal_for_phi_arg_p (cond_lhs, grhs1))
>+	return 0;
>+      cond_lhs = gimple_assign_lhs (g);
>+      cond_rhs = fold_convert (TREE_TYPE (grhs1), cond_rhs);
>+      if (TREE_CODE (cond_rhs) != INTEGER_CST
>+	  || TREE_OVERFLOW (cond_rhs))
>+	return 0;
>+      if (gimple_assign_rhs_class (g) == GIMPLE_BINARY_RHS)
>+	{
>+	  cond_rhs = int_const_binop (gimple_assign_rhs_code (g), cond_rhs,
>+				      gimple_assign_rhs2 (g));
>+	  if (TREE_OVERFLOW (cond_rhs))
>+	    return 0;
>+	}
>+      cond_rhs = fold_convert (TREE_TYPE (cond_lhs), cond_rhs);
>+      if (TREE_CODE (cond_rhs) != INTEGER_CST
>+	  || TREE_OVERFLOW (cond_rhs))
>+	return 0;
>+    }
>+
>   if (((code == NE_EXPR && e1 == false_edge)
> 	|| (code == EQ_EXPR && e1 == true_edge))
>       && arg0 == lhs
>@@ -1071,7 +1164,15 @@ value_replacement (basic_block cond_bb,
> 	    duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires),
> 					   phires_range_info);
> 	}
>-      gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
>+      gimple_stmt_iterator gsi_from;
>+      for (int i = prep_cnt - 1; i >= 0; --i)
>+	{
>+	  tree plhs = gimple_assign_lhs (prep_stmt[i]);
>+	  SSA_NAME_RANGE_INFO (plhs) = NULL;
>+	  gsi_from = gsi_for_stmt (prep_stmt[i]);
>+	  gsi_move_before (&gsi_from, &gsi);
>+	}
>+      gsi_from = gsi_for_stmt (assign);
>       gsi_move_before (&gsi_from, &gsi);
>       replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
>       return 2;
>--- gcc/testsuite/c-c++-common/rotate-8.c.jj	2017-10-13
>15:55:32.000000000 +0200
>+++ gcc/testsuite/c-c++-common/rotate-8.c	2017-10-13 17:57:32.861627651
>+0200
>@@ -3,6 +3,7 @@
> /* { dg-do compile } */
> /* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
>/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } }
>*/
>+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
> 
> unsigned int
> f1 (unsigned int x, unsigned char y)
>
>	Jakub



More information about the Gcc-patches mailing list