[PATCH 4/5] Add Power10 XXSPLTIDP for vector constants

will schmidt will_schmidt@vnet.ibm.com
Fri Nov 5 19:24:36 GMT 2021


On Fri, 2021-11-05 at 00:10 -0400, Michael Meissner wrote:
> Generate XXSPLTIDP for vectors on power10.
> 
> This patch implements XXSPLTIDP support for all vector constants.  The
> XXSPLTIDP instruction is given a 32-bit immediate that is converted to a vector
> of two DFmode constants.  The immediate is in SFmode format, so only constants
> that fit as SFmode values can be loaded with XXSPLTIDP.
> 
> The constraint (eP) added in the previous patch for XXSPLTIW is also used
> for XXSPLTIDP.
> 

ok


> DImode scalar constants are not handled.  This is due to the majority of DImode
> constants will be in the GPR registers.  With vector registers, you have the
> problem that XXSPLTIDP splats the double word into both elements of the
> vector.  However, if TImode is loaded with an integer constant, it wants a full
> 128-bit constant.

This may be worth as adding to a todo somewhere in the code.

> 
> SFmode and DFmode scalar constants are not handled in this patch.  The
> support for for those constants will be in the next patch.

ok

> 
> I have added a temporary switch (-msplat-float-constant) to control whether or
> not the XXSPLTIDP instruction is generated.
> 
> I added 2 new tests to test loading up V2DI and V2DF vector constants.




> 
> 2021-11-05  Michael Meissner  <meissner@the-meissners.org>
> 
> gcc/
> 
> 	* config/rs6000/predicates.md (easy_fp_constant): Add support for
> 	generating XXSPLTIDP.
> 	(vsx_prefixed_constant): Likewise.
> 	(easy_vector_constant): Likewise.
> 	* config/rs6000/rs6000-protos.h (constant_generates_xxspltidp):
> 	New declaration.
> 	* config/rs6000/rs6000.c (output_vec_const_move): Add support for
> 	generating XXSPLTIDP.
> 	(prefixed_xxsplti_p): Likewise.
> 	(constant_generates_xxspltidp): New function.
> 	* config/rs6000/rs6000.opt (-msplat-float-constant): New debug option.
> 
> gcc/testsuite/
> 
> 	* gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
> 	regex for power10.
> 	* gcc.target/powerpc/vec-splat-constant-v2df.c: New test.
> 	* gcc.target/powerpc/vec-splat-constant-v2di.c: New test.
> ---


ok

>  gcc/config/rs6000/predicates.md               |   9 ++
>  gcc/config/rs6000/rs6000-protos.h             |   1 +
>  gcc/config/rs6000/rs6000.c                    | 108 ++++++++++++++++++
>  gcc/config/rs6000/rs6000.opt                  |   4 +
>  .../powerpc/pr86731-fwrapv-longlong.c         |   9 +-
>  .../powerpc/vec-splat-constant-v2df.c         |  64 +++++++++++
>  .../powerpc/vec-splat-constant-v2di.c         |  50 ++++++++
>  7 files changed, 241 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c
> 
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index ed6252bd0c4..d748b11857c 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -610,6 +610,9 @@ (define_predicate "easy_fp_constant"
> 
>        if (constant_generates_xxspltiw (&vsx_const))
>  	return true;
> +
> +      if (constant_generates_xxspltidp (&vsx_const))
> +	return true;
>      }
> 
>    /* Otherwise consider floating point constants hard, so that the
> @@ -653,6 +656,9 @@ (define_predicate "vsx_prefixed_constant"
>    if (constant_generates_xxspltiw (&vsx_const))
>      return true;
> 
> +  if (constant_generates_xxspltidp (&vsx_const))
> +    return true;
> +
>    return false;
>  })
> 
> @@ -727,6 +733,9 @@ (define_predicate "easy_vector_constant"
> 
>  	  if (constant_generates_xxspltiw (&vsx_const))
>  	    return true;
> +
> +	  if (constant_generates_xxspltidp (&vsx_const))
> +	    return true;
>  	}


ok

> 
>        if (TARGET_P9_VECTOR
> diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
> index 99c6a671289..2d28df7442d 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -253,6 +253,7 @@ extern bool vec_const_128bit_to_bytes (rtx, machine_mode,
>  				       vec_const_128bit_type *);
>  extern unsigned constant_generates_lxvkq (vec_const_128bit_type *);
>  extern unsigned constant_generates_xxspltiw (vec_const_128bit_type *);
> +extern unsigned constant_generates_xxspltidp (vec_const_128bit_type *);
>  #endif /* RTX_CODE */
> 
>  #ifdef TREE_CODE
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index be24f56eb31..8fde48cf2b3 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -7012,6 +7012,13 @@ output_vec_const_move (rtx *operands)
>  	      operands[2] = GEN_INT (imm);
>  	      return "xxspltiw %x0,%2";
>  	    }
> +
> +	  imm = constant_generates_xxspltidp (&vsx_const);
> +	  if (imm)


Just a nit that the two lines could be combined into a similar form
as used elsewhere as ...
	if (constant_generates_xxspltidp(&vsx_const))


> +	    {
> +	      operands[2] = GEN_INT (imm);
> +	      return "xxspltidp %x0,%2";
> +	    }

>  	}
> 
>        if (TARGET_P9_VECTOR
> @@ -26809,6 +26816,9 @@ prefixed_xxsplti_p (rtx_insn *insn)
>      {
>        if (constant_generates_xxspltiw (&vsx_const))
>  	return true;
> +
> +      if (constant_generates_xxspltidp (&vsx_const))
> +	return true;
>      }
> 
>    return false;
> @@ -29014,6 +29024,104 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
>    return vsx_const->words[0];
>  }
> 
> +/* Determine if a vector constant can be loaded with XXSPLTIDP.  Return zero if
> +   the XXSPLTIDP instruction cannot be used.  Otherwise return the immediate
> +   value to be used with the XXSPLTIDP instruction.  */
> +
> +unsigned
> +constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
> +{
> +  if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
> +    return 0;
> +
> +  /* Make sure that the two 64-bit segments are the same.  */
> +  if (!vsx_const->all_double_words_same)
> +    return 0;

Perhaps more like "Reject if the two 64-bit segments are (not?) the
same."


> +
> +  /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
> +     Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW).  */
> +  if (vsx_const->all_bytes_same
> +      || vsx_const->all_half_words_same
> +      || vsx_const->all_words_same)
> +    return 0;
> +
> +  unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
> +
> +  /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
> +     pattern and the signalling NaN bit pattern.  Recognize infinity and
> +     negative infinity.  */
> +
> +  /* Bit representation of DFmode normal quiet NaN.  */
> +#define RS6000_CONST_DF_NAN	HOST_WIDE_INT_UC (0x7ff8000000000000)
> +
> +  /* Bit representation of DFmode normal signaling NaN.  */
> +#define RS6000_CONST_DF_NANS	HOST_WIDE_INT_UC (0x7ff4000000000000)
> +
> +  /* Bit representation of DFmode positive infinity.  */
> +#define RS6000_CONST_DF_INF	HOST_WIDE_INT_UC (0x7ff0000000000000)
> +
> +  /* Bit representation of DFmode negative infinity.  */
> +#define RS6000_CONST_DF_NEG_INF	HOST_WIDE_INT_UC (0xfff0000000000000)

Defines may be more useful in a header file?  

> +
> +  if (value != RS6000_CONST_DF_NAN
> +      && value != RS6000_CONST_DF_NANS
> +      && value != RS6000_CONST_DF_INF
> +      && value != RS6000_CONST_DF_NEG_INF)
> +    {
> +      /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
> +	 the exponent, and 52 bits for the mantissa (not counting the hidden
> +	 bit used for normal numbers).  NaN values have the exponent set to all
> +	 1 bits, and the mantissa non-zero (mantissa == 0 is infinity).  */
> +
> +      int df_exponent = (value >> 52) & 0x7ff;
> +      unsigned HOST_WIDE_INT df_mantissa
> +	= value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);


Should the "=" be on the end of the previous line? 


> +
> +      if (df_exponent == 0x7ff && df_mantissa != 0)	/* other NaNs.  */
> +	return 0;
> +
> +      /* Avoid values that are DFmode subnormal values.  Subnormal numbers have
> +	 the exponent all 0 bits, and the mantissa non-zero.  If the value is
> +	 subnormal, then the hidden bit in the mantissa is not set.  */
> +      if (df_exponent == 0 && df_mantissa != 0)		/* subnormal.  */
> +	return 0;
> +    }
> +
> +  /* Change the representation to DFmode constant.  */
> +  long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
> +
> +  /* real_from_target takes the target words in  target order.  */

Extra space before target order.

> +  if (!BYTES_BIG_ENDIAN)
> +    std::swap (df_words[0], df_words[1]);
> +
> +  REAL_VALUE_TYPE rv_type;
> +  real_from_target (&rv_type, df_words, DFmode);
> +
> +  const REAL_VALUE_TYPE *rv = &rv_type;
> +
> +  /* Validate that the number can be stored as a SFmode value.  */
> +  if (!exact_real_truncate (SFmode, rv))
> +    return 0;
> +
> +  /* Validate that the number is not a SFmode subnormal value (exponent is 0,
> +     mantissa field is non-zero) which is undefined for the XXSPLTIDP
> +     instruction.  */
> +  long sf_value;
> +  real_to_target (&sf_value, rv, SFmode);
> +
> +  /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
> +     and 23 bits for the mantissa.  Subnormal numbers have the exponent all
> +     0 bits, and the mantissa non-zero.  */
> +  long sf_exponent = (sf_value >> 23) & 0xFF;
> +  long sf_mantissa = sf_value & 0x7FFFFF;
> +
> +  if (sf_exponent == 0 && sf_mantissa != 0)
> +    return 0;
> +
> +  /* Return the immediate to be used.  */
> +  return sf_value;
> +}

ok

> +
>  
>  struct gcc_target targetm = TARGET_INITIALIZER;
> 
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index ec7b106fddb..c1d661d7e6b 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -644,6 +644,10 @@ msplat-word-constant
>  Target Var(TARGET_SPLAT_WORD_CONSTANT) Init(1) Save
>  Generate (do not generate) code that uses the XXSPLTIW instruction.
> 
> +msplat-float-constant
> +Target Var(TARGET_SPLAT_FLOAT_CONSTANT) Init(1) Save
> +Generate (do not generate) code that uses the XXSPLTIDP instruction.
> +
>  mieee128-constant
>  Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
>  Generate (do not generate) code that uses the LXVKQ instruction.

ok


> diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
> index bd1502bb30a..dcb30e1d886 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
> @@ -24,11 +24,12 @@ vector signed long long splats4(void)
>          return (vector signed long long) vec_sl(mzero, mzero);
>  }
> 
> -/* Codegen will consist of splat and shift instructions for most types.
> -   If folding is enabled, the vec_sl tests using vector long long type will
> -   generate a lvx instead of a vspltisw+vsld pair.  */
> +/* Codegen will consist of splat and shift instructions for most types.  If
> +   folding is enabled, the vec_sl tests using vector long long type will
> +   generate a lvx instead of a vspltisw+vsld pair.  On power10, it will
> +   generate a xxspltidp instruction instead of the lvx.  */
> 
>  /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
>  /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
> -/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxspltidp\M} 2 } } */


ok

No further comments, 
Thanks
-Will


> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
> new file mode 100644
> index 00000000000..82ffc86f8aa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
> @@ -0,0 +1,64 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <math.h>
> +
> +/* Test generating V2DFmode constants with the ISA 3.1 (power10) XXSPLTIDP
> +   instruction.  */
> +
> +vector double
> +v2df_double_0 (void)
> +{
> +  return (vector double) { 0.0, 0.0 };			/* XXSPLTIB or XXLXOR.  */
> +}
> +
> +vector double
> +v2df_double_1 (void)
> +{
> +  return (vector double) { 1.0, 1.0 };			/* XXSPLTIDP.  */
> +}
> +
> +#ifndef __FAST_MATH__
> +vector double
> +v2df_double_m0 (void)
> +{
> +  return (vector double) { -0.0, -0.0 };		/* XXSPLTIDP.  */
> +}
> +
> +vector double
> +v2df_double_nan (void)
> +{
> +  return (vector double) { __builtin_nan (""),
> +			   __builtin_nan ("") };	/* XXSPLTIDP.  */
> +}
> +
> +vector double
> +v2df_double_inf (void)
> +{
> +  return (vector double) { __builtin_inf (),
> +			   __builtin_inf () };		/* XXSPLTIDP.  */
> +}
> +
> +vector double
> +v2df_double_m_inf (void)
> +{
> +  return (vector double) { - __builtin_inf (),
> +			   - __builtin_inf () };	/* XXSPLTIDP.  */
> +}
> +#endif
> +
> +vector double
> +v2df_double_pi (void)
> +{
> +  return (vector double) { M_PI, M_PI };		/* PLVX.  */
> +}
> +
> +vector double
> +v2df_double_denorm (void)
> +{
> +  return (vector double) { (double)0x1p-149f,
> +			   (double)0x1p-149f };		/* PLVX.  */
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c
> new file mode 100644
> index 00000000000..4d44f943d26
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2di.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +/* Test generating V2DImode constants that have the same bit pattern as
> +   V2DFmode constants that can be loaded with the XXSPLTIDP instruction with
> +   the ISA 3.1 (power10).  */
> +
> +vector long long
> +vector_0 (void)
> +{
> +  /* XXSPLTIB or XXLXOR.  */
> +  return (vector long long) { 0LL, 0LL };
> +}
> +
> +vector long long
> +vector_1 (void)
> +{
> +  /* XXSPLTIB and VEXTSB2D.  */
> +  return (vector long long) { 1LL, 1LL };
> +}
> +
> +/* 0x8000000000000000LL is the bit pattern for -0.0, which can be generated
> +   with XXSPLTISDP.  */
> +vector long long
> +vector_float_neg_0 (void)
> +{
> +  /* XXSPLTIDP.  */
> +  return (vector long long) { 0x8000000000000000LL, 0x8000000000000000LL };
> +}
> +
> +/* 0x3ff0000000000000LL is the bit pattern for 1.0 which can be generated with
> +   XXSPLTISDP.  */
> +vector long long
> +vector_float_1_0 (void)
> +{
> +  /* XXSPLTIDP.  */
> +  return (vector long long) { 0x3ff0000000000000LL, 0x3ff0000000000000LL };
> +}
> +
> +/* 0x400921fb54442d18LL is the bit pattern for PI, which cannot be generated
> +   with XXSPLTIDP.  */
> +vector long long
> +scalar_pi (void)
> +{
> +  /* PLXV.  */
> +  return (vector long long) { 0x400921fb54442d18LL, 0x400921fb54442d18LL };
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxspltidp\M} 2 } } */
> -- 
> 2.31.1
> 
> 



More information about the Gcc-patches mailing list