This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RE: [PATCH] Improve TRUTH_{AND,OR}IF_EXPR expansion (PR rtl-optimization/78200)


Hi Jakub, 

> -----Original Message-----
> From: Jakub Jelinek <jakub@redhat.com>
> Sent: Tuesday, March 27, 2018 2:40 PM
> To: Richard Biener <rguenther@suse.de>
> Cc: gcc-patches@gcc.gnu.org; Kumar, Venkataramanan
> <Venkataramanan.Kumar@amd.com>
> Subject: [PATCH] Improve TRUTH_{AND,OR}IF_EXPR expansion (PR rtl-
> optimization/78200)
> 
> Hi!
> 
> The following patch attempts to improve expansion, if we have code like:
>   <bb 16> [local count: 102513059]:
>   if_conversion_var_52 = MEM[base: st1_22, offset: 0B];
>   if (if_conversion_var_52 < 0)
>     goto <bb 17>; [41.00%]
>   else
>     goto <bb 18>; [59.00%]
> 
> ...
> 
>   <bb 18> [local count: 60482706]:
>   _81 = _11 == 2;
>   _82 = if_conversion_var_52 > 0;
>   _79 = _81 & _82;
>   if (_79 != 0)
>     goto <bb 19>; [29.26%]
>   else
>     goto <bb 20>; [70.74%]
> 
> Here, the single pred of the bb performed a similar comparison to what one
> of the & (or |) operands does, and as & (or |) is not ordered, we can choose
> which operand we'll expand first.  If we expand if_conversion_var_52 > 0
> first, there is a chance that we can reuse flags from the previous comparison.
> The patch does it only if there are no (non-virtual) phis on the current bb, all
> stmts before the current condition are TERable, so there is nothing that
> would clobber the flags in between.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk if it
> helps for SPEC?  Venkat, do you think you could benchmark it in the setup
> where you've measured the slowdown to see if it helps?  I see the patch
> changes the loop:

Sure will benchmark and get back to you. 
 

>  	.p2align 4,,10
>  	.p2align 3
>  .L11:
> +	jle	.L10
>  	cmpl	$2, %eax
> -	jne	.L10
> -	testq	%r8, %r8
> -	jg	.L12
> +	je	.L12
>  	.p2align 4,,10
>  	.p2align 3
>  .L10:
>  	addq	%r9, %rsi
>  	cmpq	%rsi, %rdx
>  	jbe	.L35
>  .L13:
>  	movl	24(%rsi), %eax
>  	testl	%eax, %eax
>  	jle	.L10
>  	movq	(%rsi), %r8
>  	testq	%r8, %r8
>  	jns	.L11
>  	cmpl	$1, %eax
>  	jne	.L10
>  .L12:
>  	addq	$1, %r10
>  	movl	$1, %r11d
>  	movq	st5(,%r10,8), %rax
>  	movq	%rsi, (%rax)
>  	addq	%r9, %rsi
>  	movq	%r8, 8(%rax)
>  	movq	st5(,%rdi,8), %rax
>  	movq	%r8, 16(%rax)
>  	cmpq	%rsi, %rdx
>  	ja	.L13
> which I assume shall be an improvement, since we can save one extra
> comparison.
> 
> 2018-03-27  Jakub Jelinek  <jakub@redhat.com>
> 	    Richard Biener  <rguenth@suse.de>
> 
> 	PR rtl-optimization/78200
> 	* cfgexpand.c (gimple_bb_info_for_bb): New variable.
> 	(expand_bb_seq, expand_phi_nodes): New functions.
> 	(expand_gimple_cond): Use heuristics if it is desirable to
> 	swap TRUTH_{AND,OR}IF_EXPR operands.
> 	(expand_gimple_basic_block): Remember GIMPLE il for bbs
> 	being expanded or already expanded.
> 	(pass_expand::execute): Initialize and then free the
> 	gimple_bb_info_for_bb vector.
> 
> --- gcc/cfgexpand.c.jj	2018-02-09 06:44:36.413805556 +0100
> +++ gcc/cfgexpand.c	2018-03-26 13:35:57.536509844 +0200
> @@ -2357,6 +2357,34 @@ label_rtx_for_bb (basic_block bb ATTRIBU
>    return l;
>  }
> 
> +/* Maps blocks to their GIMPLE IL.  */
> +static vec<gimple_bb_info, va_heap, vl_embed> *gimple_bb_info_for_bb;
> +
> +/* Like bb_seq, except that during expansion returns the GIMPLE seq
> +   even for the blocks that have been already expanded or are being
> +   currently expanded.  */
> +
> +static gimple_seq
> +expand_bb_seq (basic_block bb)
> +{
> +  if ((bb->flags & BB_RTL)
> +      && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb))
> +    return (*gimple_bb_info_for_bb)[bb->index].seq;
> +  return bb_seq (bb);
> +}
> +
> +/* Like phi_nodes, except that during expansion returns the GIMPLE PHIs
> +   even for the blocks that have been already expanded or are being
> +   currently expanded.  */
> +
> +static gimple_seq
> +expand_phi_nodes (basic_block bb)
> +{
> +  if ((bb->flags & BB_RTL)
> +      && (unsigned) bb->index < vec_safe_length (gimple_bb_info_for_bb))
> +    return (*gimple_bb_info_for_bb)[bb->index].phi_nodes;
> +  return phi_nodes (bb);
> +}
> 
>  /* A subroutine of expand_gimple_cond.  Given E, a fallthrough edge
>     of a basic block where we just expanded the conditional at the end, @@ -
> 2475,6 +2503,65 @@ expand_gimple_cond (basic_block bb, gcon
>  		  op0 = gimple_assign_rhs1 (second);
>  		  op1 = gimple_assign_rhs2 (second);
>  		}
> +
> +	      /* We'll expand RTL for op0 first, see if we'd better expand RTL
> +		 for op1 first.  Do that if the previous bb ends with
> +		 if (x op cst), op1's def_stmt rhs is x op2 cst and there are
> +		 no non-virtual PHIs nor non-TERed stmts in BB before STMT.
> */
> +	      while (TREE_CODE (op1) == SSA_NAME
> +		     && (code == TRUTH_ANDIF_EXPR || code ==
> TRUTH_ORIF_EXPR)
> +		     && single_pred_p (bb))
> +		{
> +		  gimple *def1 = SSA_NAME_DEF_STMT (op1);
> +		  if (!is_gimple_assign (def1)
> +		      || (TREE_CODE_CLASS (gimple_assign_rhs_code (def1))
> +			  != tcc_comparison))
> +		    break;
> +
> +		  basic_block pred = single_pred (bb);
> +		  gimple_seq pred_seq = expand_bb_seq (pred);
> +		  gimple_stmt_iterator i = gsi_last (pred_seq);
> +		  if (!gsi_end_p (i) && is_gimple_debug (gsi_stmt (i)))
> +		    gsi_prev_nondebug (&i);
> +
> +		  gimple *last = gsi_stmt (i);
> +		  if (!last
> +		      || gimple_code (last) != GIMPLE_COND
> +		      || gimple_assign_rhs1 (def1) != gimple_cond_lhs (last)
> +		      || !operand_equal_p (gimple_assign_rhs2 (def1),
> +					   gimple_cond_rhs (last), 0))
> +		    break;
> +
> +		  gimple_seq phi_seq = expand_phi_nodes (bb);
> +		  i = gsi_start (phi_seq);
> +		  if (!gsi_end_p (i)
> +		      && !virtual_operand_p (gimple_phi_result (gsi_stmt (i))))
> +		    break;
> +
> +		  /* Check if all stmts before STMT are TERed.  */
> +		  gimple_seq cur_seq = expand_bb_seq (bb);
> +		  i = gsi_start_nondebug (cur_seq);
> +		  int cnt = 100;
> +		  while (!gsi_end_p (i) && gsi_stmt (i) != stmt)
> +		    {
> +		      gimple *g = gsi_stmt (i);
> +		      if (!is_gimple_assign (g))
> +			break;
> +		      if (--cnt == 0)
> +			break;
> +		      tree lhs = gimple_assign_lhs (g);
> +		      if (TREE_CODE (lhs) != SSA_NAME
> +			  || !get_gimple_for_ssa_name (lhs))
> +			break;
> +		      gsi_next_nondebug (&i);
> +		    }
> +
> +		  if (gsi_stmt (i) != stmt)
> +		    break;
> +
> +		  std::swap (op0, op1);
> +		  break;
> +		}
>  	    }
>  	}
>      }
> @@ -5501,6 +5588,10 @@ expand_gimple_basic_block (basic_block b
>    if (optimize)
>      reorder_operands (bb);
>    stmts = bb_seq (bb);
> +  if ((unsigned) bb->index >= vec_safe_length (gimple_bb_info_for_bb))
> +    vec_safe_grow_cleared (gimple_bb_info_for_bb, bb->index + 1);
> + (*gimple_bb_info_for_bb)[bb->index].seq = stmts;
> + (*gimple_bb_info_for_bb)[bb->index].phi_nodes = phi_nodes (bb);
>    bb->il.gimple.seq = NULL;
>    bb->il.gimple.phi_nodes = NULL;
>    rtl_profile_for_bb (bb);
> @@ -6419,6 +6510,8 @@ pass_expand::execute (function *fun)
>        >= PARAM_VALUE (PARAM_MAX_DEBUG_MARKER_COUNT))
>      cfun->debug_nonbind_markers = false;
> 
> +  vec_safe_grow_cleared (gimple_bb_info_for_bb, n_basic_blocks_for_fn
> + (cfun));
> +
>    lab_rtx_for_bb = new hash_map<basic_block, rtx_code_label *>;
>    FOR_BB_BETWEEN (bb, init_block->next_bb, EXIT_BLOCK_PTR_FOR_FN
> (fun),
>  		  next_bb)
> @@ -6433,6 +6526,8 @@ pass_expand::execute (function *fun)
>        deep_ter_debug_map = NULL;
>      }
> 
> +  vec_free (gimple_bb_info_for_bb);
> +
>    /* Free stuff we no longer need after GIMPLE optimizations.  */
>    free_dominance_info (CDI_DOMINATORS);
>    free_dominance_info (CDI_POST_DOMINATORS);
> 
> 	Jakub

Regards,
Venkat.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]