[PATCH 1/7] ifcvt: Check if cmovs are needed.

Richard Sandiford richard.sandiford@arm.com
Fri Nov 5 15:33:48 GMT 2021


Robin Dapp <rdapp@linux.ibm.com> writes:
> Hi Richard,
>
> after giving it a second thought, and seeing that most of the changes to 
> existing code are not strictly necessary anymore, I figured it could be 
> easier not changing the current control flow too much like in the 
> attached patch.
>
> The changes remaining are to "outsource" the maybe_expand_insn part and 
> making the emit_conditional_move with full comparison and rev_comparsion 
> externally available.
>
> I suppose straightening of the arguably somewhat baroque parts, we can 
> defer to a separate patch.
>
> On s390 this works nicely but I haven't yet done a bootstrap on other archs.
>
> Regards
>   Robin
>
> commit eb50384ee0cdeeefa61ae89bdbb2875500b7ce60
> Author: Robin Dapp <rdapp@linux.ibm.com>
> Date:   Wed Nov 27 13:53:40 2019 +0100
>
>     ifcvt/optabs: Allow using a CC comparison for emit_conditional_move.
>     
>     Currently we only ever call emit_conditional_move with the comparison
>     (as well as its comparands) we got from the jump.  Thus, backends are
>     going to emit a CC comparison for every conditional move that is being
>     generated instead of re-using the existing CC.
>     This, combined with emitting temporaries for each conditional move,
>     causes sky-high costs for conditional moves.
>     
>     This patch allows to re-use a CC so the costing situation is improved a
>     bit.

Sorry for the slow reply.

> diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
> index 6ae883cbdd4..f7765e60548 100644
> --- a/gcc/ifcvt.c
> +++ b/gcc/ifcvt.c
> @@ -772,7 +772,7 @@ static int noce_try_addcc (struct noce_if_info *);
>  static int noce_try_store_flag_constants (struct noce_if_info *);
>  static int noce_try_store_flag_mask (struct noce_if_info *);
>  static rtx noce_emit_cmove (struct noce_if_info *, rtx, enum rtx_code, rtx,
> -			    rtx, rtx, rtx);
> +			    rtx, rtx, rtx, rtx = NULL, rtx = NULL);
>  static int noce_try_cmove (struct noce_if_info *);
>  static int noce_try_cmove_arith (struct noce_if_info *);
>  static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
> @@ -1711,7 +1711,8 @@ noce_try_store_flag_mask (struct noce_if_info *if_info)
>  
>  static rtx
>  noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
> -		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue)
> +		 rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue, rtx cc_cmp,
> +		 rtx rev_cc_cmp)
>  {
>    rtx target ATTRIBUTE_UNUSED;
>    int unsignedp ATTRIBUTE_UNUSED;
> @@ -1743,23 +1744,30 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
>        end_sequence ();
>      }
>  
> -  /* Don't even try if the comparison operands are weird
> -     except that the target supports cbranchcc4.  */
> -  if (! general_operand (cmp_a, GET_MODE (cmp_a))
> -      || ! general_operand (cmp_b, GET_MODE (cmp_b)))
> -    {
> -      if (!have_cbranchcc4
> -	  || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
> -	  || cmp_b != const0_rtx)
> -	return NULL_RTX;
> -    }
> -
>    unsignedp = (code == LTU || code == GEU
>  	       || code == LEU || code == GTU);
>  
> -  target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
> -				  vtrue, vfalse, GET_MODE (x),
> -				  unsignedp);
> +  if (cc_cmp != NULL_RTX && rev_cc_cmp != NULL_RTX)
> +    target = emit_conditional_move (x, cc_cmp, rev_cc_cmp,
> +				    vtrue, vfalse, GET_MODE (x));
> +  else
> +    {
> +      /* Don't even try if the comparison operands are weird
> +	 except that the target supports cbranchcc4.  */
> +      if (! general_operand (cmp_a, GET_MODE (cmp_a))
> +	  || ! general_operand (cmp_b, GET_MODE (cmp_b)))
> +	{
> +	  if (!have_cbranchcc4
> +	      || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
> +	      || cmp_b != const0_rtx)
> +	    return NULL_RTX;
> +	}
> +
> +      target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
> +				      vtrue, vfalse, GET_MODE (x),
> +				      unsignedp);
> +    }
> +

It's hard to judge this in isolation because it's not clear when
and how the new arguments are going to be used, but it seems OK
in principle.  Do you still want:

  /* If earliest == jump, try to build the cmove insn directly.
     This is helpful when combine has created some complex condition
     (like for alpha's cmovlbs) that we can't hope to regenerate
     through the normal interface.  */

  if (if_info->cond_earliest == if_info->jump)
    {

to be used when cc_cmp and rev_cc_cmp are nonnull?

>    if (target)
>      return target;
>  
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 019bbb62882..25eecf29ed8 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -52,6 +52,9 @@ static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>  static rtx expand_unop_direct (machine_mode, optab, rtx, rtx, int);
>  static void emit_libcall_block_1 (rtx_insn *, rtx, rtx, rtx, bool);
>  
> +static rtx emit_conditional_move (rtx, rtx, rtx, rtx, machine_mode);
> +rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);

This is redundant with the header file declaration.

> +
>  /* Debug facility for use in GDB.  */
>  void debug_optab_libfuncs (void);
>  
> @@ -4875,6 +4878,7 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
>    /* get_condition will prefer to generate LT and GT even if the old
>       comparison was against zero, so undo that canonicalization here since
>       comparisons against zero are cheaper.  */
> +
>    if (code == LT && op1 == const1_rtx)
>      code = LE, op1 = const0_rtx;
>    else if (code == GT && op1 == constm1_rtx)
> @@ -4925,18 +4929,10 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
>  			    OPTAB_WIDEN, &comparison, &cmpmode);
>  	  if (comparison)
>  	    {
> -	      class expand_operand ops[4];
> -
> -	      create_output_operand (&ops[0], target, mode);
> -	      create_fixed_operand (&ops[1], comparison);
> -	      create_input_operand (&ops[2], op2, mode);
> -	      create_input_operand (&ops[3], op3, mode);
> -	      if (maybe_expand_insn (icode, 4, ops))
> -		{
> -		  if (ops[0].value != target)
> -		    convert_move (target, ops[0].value, false);
> -		  return target;
> -		}
> +	       rtx res = emit_conditional_move (target, comparison,
> +						op2, op3, mode);
> +	       if (res != NULL_RTX)
> +		 return res;
>  	    }
>  	  delete_insns_since (last);
>  	  restore_pending_stack_adjust (&save);
> @@ -4959,6 +4955,73 @@ emit_conditional_move (rtx target, enum rtx_code code, rtx op0, rtx op1,
>      }
>  }
>  
> +/* Helper function that, in addition to COMPARISON, also tries
> +   the reversed REV_COMPARISON with swapped OP2 and OP3.  As opposed
> +   to when we pass the specific constituents of a comparison, no
> +   additional insns are emitted for it.  It might still be necessary
> +   to emit more than one insn for the final conditional move, though.  */
> +
> +rtx
> +emit_conditional_move (rtx target, rtx comparison, rtx rev_comparison,
> +		       rtx op2, rtx op3, machine_mode mode)
> +{
> +  rtx res = emit_conditional_move (target, comparison, op2, op3, mode);
> +
> +  if (res != NULL_RTX)
> +    return res;
> +
> +  return emit_conditional_move (target, rev_comparison, op3, op2, mode);
> +}
> +
> +/* Helper for emitting a conditional move.  */
> +
> +static rtx
> +emit_conditional_move (rtx target, rtx comparison,
> +		       rtx op2, rtx op3, machine_mode mode)

I think it'd be better to call one of these functions something else,
rather than make the interpretation of the third parameter depend on
the total number of parameters.  In the second overload, the comparison
rtx effectively replaces four parameters of the existing
emit_conditional_move, so perhaps that's the one that should remain
emit_conditional_move.  Maybe the first one should be called
emit_conditional_move_with_rev or something.

Part of me wonders if this would be simpler if we created a structure
to describe a comparison and passed that around instead of individual
fields, but I guess it could become a rat hole.

Thanks,
Richard

> +{
> +  enum insn_code icode;
> +
> +  if (comparison == NULL_RTX || !COMPARISON_P (comparison))
> +    return NULL_RTX;
> +
> +  /* If the two source operands are identical, that's just a move.  */
> +  if (rtx_equal_p (op2, op3))
> +    {
> +      if (!target)
> +	target = gen_reg_rtx (mode);
> +
> +      emit_move_insn (target, op3);
> +      return target;
> +    }
> +
> +  if (mode == VOIDmode)
> +    mode = GET_MODE (op2);
> +
> +  icode = direct_optab_handler (movcc_optab, mode);
> +
> +  if (icode == CODE_FOR_nothing)
> +    return NULL_RTX;
> +
> +  if (!target)
> +    target = gen_reg_rtx (mode);
> +
> +  class expand_operand ops[4];
> +
> +  create_output_operand (&ops[0], target, mode);
> +  create_fixed_operand (&ops[1], comparison);
> +  create_input_operand (&ops[2], op2, mode);
> +  create_input_operand (&ops[3], op3, mode);
> +
> +  if (maybe_expand_insn (icode, 4, ops))
> +    {
> +      if (ops[0].value != target)
> +	convert_move (target, ops[0].value, false);
> +      return target;
> +    }
> +
> +  return NULL_RTX;
> +}
> +
>  
>  /* Emit a conditional negate or bitwise complement using the
>     negcc or notcc optabs if available.  Return NULL_RTX if such operations
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index 3bbceff92d9..f853b93f37f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -281,6 +281,7 @@ extern void emit_indirect_jump (rtx);
>  /* Emit a conditional move operation.  */
>  rtx emit_conditional_move (rtx, enum rtx_code, rtx, rtx, machine_mode,
>  			   rtx, rtx, machine_mode, int);
> +rtx emit_conditional_move (rtx, rtx, rtx, rtx, rtx, machine_mode);
>  
>  /* Emit a conditional negate or bitwise complement operation.  */
>  rtx emit_conditional_neg_or_complement (rtx, rtx_code, machine_mode, rtx,


More information about the Gcc-patches mailing list