This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][GCC][AArch64] optimize float immediate moves (1 /4) - infrastructure.


On Mon, Jun 26, 2017 at 11:49:42AM +0100, Tamar Christina wrote:
> Hi All,
> 
> I've updated patch accordingly.
> 
> This mostly involves removing the loop to create the ival
> and removing the *2 code and instead defaulting to 64bit
> and switching to 128 when needed.
> 
> Regression tested on aarch64-none-linux-gnu and no regressions.
> 
> OK for trunk?

Almost, with a couple of small changes and clarifications.

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 04417dcd609f6e8ff594a9c5853b3143696d3208..efb027f7fa9b9750b019c529bbcfc8b73dbaf804 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4668,6 +4670,62 @@ aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
>    return true;
>  }
>  
> +/* Return the binary representation of floating point constant VALUE in INTVAL.
> +   If the value cannot be converted, return false without setting INTVAL.
> +   The conversion is done in the given MODE.  */
> +bool
> +aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
> +{
> +  machine_mode mode = GET_MODE (value);
> +  if (GET_CODE (value) != CONST_DOUBLE
> +      || !SCALAR_FLOAT_MODE_P (mode)
> +      || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
> +    return false;
> +
> +  unsigned HOST_WIDE_INT ival = 0;
> +
> +  /* Only support up to DF mode.  */
> +  gcc_assert (GET_MODE_BITSIZE (mode) <= 64);

GET_MODE_BITSIZE (DFmode) would be more self-documenting.

> +
> +  long res[2];
> +  real_to_target (res,
> +		  CONST_DOUBLE_REAL_VALUE (value),
> +		  REAL_MODE_FORMAT (mode));
> +
> +  ival = zext_hwi (res[0], 32);
> +  if (GET_MODE_BITSIZE (mode) == 64)

Likewise here.

> +    ival |= (zext_hwi (res[1], 32) << 32);
> +
> +  *intval = ival;
> +  return true;
> +}
> +

> @@ -4680,6 +4738,46 @@ aarch64_float_const_zero_rtx_p (rtx x)
>    return real_equal (CONST_DOUBLE_REAL_VALUE (x), &dconst0);
>  }
>  
> +/* Return TRUE if rtx X is immediate constant that fits in a single
> +   MOVI immediate operation.  */
> +bool
> +aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
> +{
> +  if (!TARGET_SIMD)
> +     return false;
> +
> +  machine_mode vmode, imode;
> +  unsigned HOST_WIDE_INT ival;
> +
> +  /* Don't write float constants out to memory.  */

This comment seems (only a little) out of line with the code below - "Don't
write float constants out to memory if we can represent them as integers."

> +  if (GET_CODE (x) == CONST_DOUBLE
> +      && SCALAR_FLOAT_MODE_P (mode))
> +    {
> +      if (!aarch64_reinterpret_float_as_int (x, &ival))
> +	return false;
> +
> +      imode = int_mode_for_mode (mode);
> +    }
> +  else if (GET_CODE (x) == CONST_INT
> +	   && SCALAR_INT_MODE_P (mode))
> +    {
> +       imode = mode;
> +       ival = INTVAL (x);
> +    }
> +  else
> +    return false;
> +
> +   /* use a 64 bit mode for everything except for DI/DF mode, where we use
> +     a 128 bit vector mode.  */
> +  int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
> +
> +  vmode = aarch64_simd_container_mode (imode, width);
> +  rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
> +
> +  return aarch64_simd_valid_immediate (v_op, vmode, false, NULL);

I still wonder whether we could rewrite aarch64_simd_valid)_immediate to
avoid the need for a 128-bit vector here - 64-bits are good enough.

This doesn't need fixed for this patch, but it would make for a small
optimisation.

> +}
> +
> +
>  /* Return the fixed registers used for condition codes.  */
>  
>  static bool
> @@ -5857,12 +5955,6 @@ aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
>        return NO_REGS;
>      }
>  
> -  /* If it's an integer immediate that MOVI can't handle, then
> -     FP_REGS is not an option, so we return NO_REGS instead.  */
> -  if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
> -      && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
> -    return NO_REGS;
> -

I don't understand this relaxation could you explain what this achieves
and why it is safe in this patch?

>    /* Register eliminiation can result in a request for
>       SP+constant->FP_REGS.  We cannot support such operations which
>       use SP as source and an FP_REG as destination, so reject out
> @@ -6773,26 +6865,44 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
>        return true;
>  
>      case CONST_DOUBLE:
> +
> +	 /* First determine number of instructions to do the move
> +	    as an integer constant.  */
> +	if (!aarch64_float_const_representable_p (x)
> +	    && !aarch64_can_const_movi_rtx_p (x, mode)
> +	    && aarch64_float_const_rtx_p (x))
> +	  {
> +	    unsigned HOST_WIDE_INT ival;
> +	    bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
> +	    gcc_assert (succeed);
> +
> +	    machine_mode imode = mode == HFmode ? SImode : int_mode_for_mode (mode);
> +	    int ncost = aarch64_internal_mov_immediate
> +		(NULL_RTX, gen_int_mode (ival, imode), false, imode);
> +	    *cost += COSTS_N_INSNS (ncost);
> +	    return true;
> +	  }
> +
>        if (speed)
>  	{
> -	  /* mov[df,sf]_aarch64.  */
> -	  if (aarch64_float_const_representable_p (x))
> -	    /* FMOV (scalar immediate).  */
> -	    *cost += extra_cost->fp[mode == DFmode].fpconst;
> -	  else if (!aarch64_float_const_zero_rtx_p (x))
> -	    {
> -	      /* This will be a load from memory.  */
> -	      if (mode == DFmode)
> +	/* mov[df,sf]_aarch64.  */
> +	if (aarch64_float_const_representable_p (x))
> +	  /* FMOV (scalar immediate).  */
> +	  *cost += extra_cost->fp[mode == DFmode].fpconst;
> +	else if (!aarch64_float_const_zero_rtx_p (x))
> +	  {
> +	    /* This will be a load from memory.  */
> +	    if (mode == DFmode)
>  		*cost += extra_cost->ldst.loadd;
> -	      else
> +	    else
>  		*cost += extra_cost->ldst.loadf;
> -	    }
> -	  else
> -	    /* Otherwise this is +0.0.  We get this using MOVI d0, #0
> -	       or MOV v0.s[0], wzr - neither of which are modeled by the
> -	       cost tables.  Just use the default cost.  */
> -	    {
> -	    }
> +	  }
> +	else
> +	  /* Otherwise this is +0.0.  We get this using MOVI d0, #0
> +	     or MOV v0.s[0], wzr - neither of which are modeled by the
> +	     cost tables.  Just use the default cost.  */
> +	  {
> +	  }


Why reindent this code? From what I can tell this makes the code less
conformant to GNU style.

>  	}
>  
>        return true;
> @@ -6974,7 +7084,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
>  	  if (speed)
>  	    *cost += extra_cost->fp[mode == DFmode].compare;
>  
> -          if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
> +	  if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))

Likewise here?

>              {
>  	      *cost += rtx_cost (op0, VOIDmode, COMPARE, 0, speed);
>                /* FCMP supports constant 0.0 for no extra cost. */

> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index cd7ded986630c14ed6d42618b2a1f9baa0cbd192..6992c82fa790eac34669fcc5b030e395ad332201 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -53,6 +53,11 @@
>         (ior (match_operand 0 "register_operand")
>  	    (match_test "op == const0_rtx"))))
>  
> +(define_predicate "aarch64_reg_or_fp_float"
> +  (ior (match_operand 0 "register_operand")
> +	(and (match_code "const_double")
> +	     (match_test "aarch64_float_const_rtx_p (op)"))))
> +

Unused?

>  (define_predicate "aarch64_reg_or_fp_zero"
>    (ior (match_operand 0 "register_operand")
>  	(and (match_code "const_double")

Thanks,
James


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]