This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Improve TRUTH_{AND,OR}IF_EXPR expansion (PR rtl-optimization/78200)


Hi!

The following patch attempts to improve expansion, if we have code like:
  <bb 16> [local count: 102513059]:
  if_conversion_var_52 = MEM[base: st1_22, offset: 0B];
  if (if_conversion_var_52 < 0)
    goto <bb 17>; [41.00%]
  else
    goto <bb 18>; [59.00%]

...

  <bb 18> [local count: 60482706]:
  _81 = _11 == 2;
  _82 = if_conversion_var_52 > 0;
  _79 = _81 & _82;
  if (_79 != 0)
    goto <bb 19>; [29.26%]
  else
    goto <bb 20>; [70.74%]

Here, the single pred of the bb performed a similar comparison to what
one of the & (or |) operands does, and as & (or |) is not ordered, we can
choose which operand we'll expand first.  If we expand if_conversion_var_52 > 0
first, there is a chance that we can reuse flags from the previous
comparison.  The patch does it only if there are no (non-virtual) phis on the
current bb, all stmts before the current condition are TERable, so there is
nothing that would clobber the flags in between.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk if it
helps for SPEC?  Venkat, do you think you could benchmark it in the setup
where you've measured the slowdown to see if it helps?  I see the patch
changes the loop:
 	.p2align 4,,10
 	.p2align 3
 .L11:
+	jle	.L10
 	cmpl	$2, %eax
-	jne	.L10
-	testq	%r8, %r8
-	jg	.L12
+	je	.L12
 	.p2align 4,,10
 	.p2align 3
 .L10:
 	addq	%r9, %rsi
 	cmpq	%rsi, %rdx
 	jbe	.L35
 .L13:
 	movl	24(%rsi), %eax
 	testl	%eax, %eax
 	jle	.L10
 	movq	(%rsi), %r8
 	testq	%r8, %r8
 	jns	.L11
 	cmpl	$1, %eax
 	jne	.L10
 .L12:
 	addq	$1, %r10
 	movl	$1, %r11d
 	movq	st5(,%r10,8), %rax
 	movq	%rsi, (%rax)
 	addq	%r9, %rsi
 	movq	%r8, 8(%rax)
 	movq	st5(,%rdi,8), %rax
 	movq	%r8, 16(%rax)
 	cmpq	%rsi, %rdx
 	ja	.L13
which I assume shall be an improvement, since we can save one extra
comparison.

2018-03-27  Jakub Jelinek  <jakub@redhat.com>
	    Richard Biener  <rguenth@suse.de>

	PR rtl-optimization/78200
	* cfgexpand.c (gimple_bb_info_for_bb): New variable.
	(expand_bb_seq, expand_phi_nodes): New functions.
	(expand_gimple_cond): Use heuristics if it is desirable to
	swap TRUTH_{AND,OR}IF_EXPR operands.
	(expand_gimple_basic_block): Remember GIMPLE il for bbs
	being expanded or already expanded.
	(pass_expand::execute): Initialize and then free the
	gimple_bb_info_for_bb vector.

--- gcc/cfgexpand.c.jj	2018-02-09 06:44:36.413805556 +0100
+++ gcc/cfgexpand.c	2018-03-26 13:35:57.536509844 +0200
@@ -2357,6 +2357,34 @@ label_rtx_for_bb (basic_block bb ATTRIBU
   return l;
 }
 
+/* Maps blocks to their GIMPLE IL.  */
+static vec<gimple_bb_info, va_heap, vl_embed> *gimple_bb_info_for_bb;
+
+/* Like bb_seq, except that during expansion returns the GIMPLE seq
+   even for the blocks that have been already expanded or are being
+   currently expanded.  */
+
+static gimple_seq
+expand_bb_seq (basic_block bb)
+{
+  if ((bb->flags & BB_RTL)
+      && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb))
+    return (*gimple_bb_info_for_bb)[bb->index].seq;
+  return bb_seq (bb);
+}
+
+/* Like phi_nodes, except that during expansion returns the GIMPLE PHIs
+   even for the blocks that have been already expanded or are being
+   currently expanded.  */
+
+static gimple_seq
+expand_phi_nodes (basic_block bb)
+{
+  if ((bb->flags & BB_RTL)
+      && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb))
+    return (*gimple_bb_info_for_bb)[bb->index].phi_nodes;
+  return phi_nodes (bb);
+}
 
 /* A subroutine of expand_gimple_cond.  Given E, a fallthrough edge
    of a basic block where we just expanded the conditional at the end,
@@ -2475,6 +2503,65 @@ expand_gimple_cond (basic_block bb, gcon
 		  op0 = gimple_assign_rhs1 (second);
 		  op1 = gimple_assign_rhs2 (second);
 		}
+
+	      /* We'll expand RTL for op0 first, see if we'd better expand RTL
+		 for op1 first.  Do that if the previous bb ends with
+		 if (x op cst), op1's def_stmt rhs is x op2 cst and there are
+		 no non-virtual PHIs nor non-TERed stmts in BB before STMT.  */
+	      while (TREE_CODE (op1) == SSA_NAME
+		     && (code == TRUTH_ANDIF_EXPR || code == TRUTH_ORIF_EXPR)
+		     && single_pred_p (bb))
+		{
+		  gimple *def1 = SSA_NAME_DEF_STMT (op1);
+		  if (!is_gimple_assign (def1)
+		      || (TREE_CODE_CLASS (gimple_assign_rhs_code (def1))
+			  != tcc_comparison))
+		    break;
+
+		  basic_block pred = single_pred (bb);
+		  gimple_seq pred_seq = expand_bb_seq (pred);
+		  gimple_stmt_iterator i = gsi_last (pred_seq);
+		  if (!gsi_end_p (i) && is_gimple_debug (gsi_stmt (i)))
+		    gsi_prev_nondebug (&i);
+
+		  gimple *last = gsi_stmt (i);
+		  if (!last
+		      || gimple_code (last) != GIMPLE_COND
+		      || gimple_assign_rhs1 (def1) != gimple_cond_lhs (last)
+		      || !operand_equal_p (gimple_assign_rhs2 (def1),
+					   gimple_cond_rhs (last), 0))
+		    break;
+
+		  gimple_seq phi_seq = expand_phi_nodes (bb);
+		  i = gsi_start (phi_seq);
+		  if (!gsi_end_p (i)
+		      && !virtual_operand_p (gimple_phi_result (gsi_stmt (i))))
+		    break;
+
+		  /* Check if all stmts before STMT are TERed.  */
+		  gimple_seq cur_seq = expand_bb_seq (bb);
+		  i = gsi_start_nondebug (cur_seq);
+		  int cnt = 100;
+		  while (!gsi_end_p (i) && gsi_stmt (i) != stmt)
+		    {
+		      gimple *g = gsi_stmt (i);
+		      if (!is_gimple_assign (g))
+			break;
+		      if (--cnt == 0)
+			break;
+		      tree lhs = gimple_assign_lhs (g);
+		      if (TREE_CODE (lhs) != SSA_NAME
+			  || !get_gimple_for_ssa_name (lhs))
+			break;
+		      gsi_next_nondebug (&i);
+		    }
+
+		  if (gsi_stmt (i) != stmt)
+		    break;
+
+		  std::swap (op0, op1);
+		  break;
+		}
 	    }
 	}
     }
@@ -5501,6 +5588,10 @@ expand_gimple_basic_block (basic_block b
   if (optimize)
     reorder_operands (bb);
   stmts = bb_seq (bb);
+  if ((unsigned) bb->index >= vec_safe_length (gimple_bb_info_for_bb))
+    vec_safe_grow_cleared (gimple_bb_info_for_bb, bb->index + 1);
+  (*gimple_bb_info_for_bb)[bb->index].seq = stmts;
+  (*gimple_bb_info_for_bb)[bb->index].phi_nodes = phi_nodes (bb);
   bb->il.gimple.seq = NULL;
   bb->il.gimple.phi_nodes = NULL;
   rtl_profile_for_bb (bb);
@@ -6419,6 +6510,8 @@ pass_expand::execute (function *fun)
       >= PARAM_VALUE (PARAM_MAX_DEBUG_MARKER_COUNT))
     cfun->debug_nonbind_markers = false;
 
+  vec_safe_grow_cleared (gimple_bb_info_for_bb, n_basic_blocks_for_fn (cfun));
+
   lab_rtx_for_bb = new hash_map<basic_block, rtx_code_label *>;
   FOR_BB_BETWEEN (bb, init_block->next_bb, EXIT_BLOCK_PTR_FOR_FN (fun),
 		  next_bb)
@@ -6433,6 +6526,8 @@ pass_expand::execute (function *fun)
       deep_ter_debug_map = NULL;
     }
 
+  vec_free (gimple_bb_info_for_bb);
+
   /* Free stuff we no longer need after GIMPLE optimizations.  */
   free_dominance_info (CDI_DOMINATORS);
   free_dominance_info (CDI_POST_DOMINATORS);

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]