PR84033, powerpc64le -moptimize-swaps bad code with vec_vbpermq

Bill Schmidt wschmidt@linux.vnet.ibm.com
Thu Jan 25 15:06:00 GMT 2018


On Jan 25, 2018, at 4:09 AM, Alan Modra <amodra@gmail.com> wrote:
> 
> vbpermq produces its output in bits 48..63 of the target vector reg,
> so the output cannot be lane swapped.  Bootstrapped and regression
> tested powerpc64le-linux.  OK to apply mainline, and backport to the
> branches?

I can't approve, but FWIW, I agree with this patch.

Bill

> 
> gcc/
> 	PR target/84033
> 	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Exclude
> 	UNSPEC_VBPERMQ.  Sort other unspecs.
> gcc/testsuite/
> 	PR target/84033
> 	* gcc.target/powerpc/swaps-p8-46.c: New.
> 
> diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
> index 1f95290..ffcbba9 100644
> --- a/gcc/config/rs6000/rs6000-p8swap.c
> +++ b/gcc/config/rs6000/rs6000-p8swap.c
> @@ -741,6 +741,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
> 	  {
> 	  default:
> 	    break;
> +	  case UNSPEC_VBPERMQ:
> 	  case UNSPEC_VMRGH_DIRECT:
> 	  case UNSPEC_VMRGL_DIRECT:
> 	  case UNSPEC_VPACK_SIGN_SIGN_SAT:
> @@ -762,8 +763,14 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
> 	  case UNSPEC_VSUMSWS:
> 	  case UNSPEC_VSUMSWS_DIRECT:
> 	  case UNSPEC_VSX_CONCAT:
> +	  case UNSPEC_VSX_CVDPSPN:
> +	  case UNSPEC_VSX_CVSPDP:
> +	  case UNSPEC_VSX_CVSPDPN:
> +	  case UNSPEC_VSX_EXTRACT:
> 	  case UNSPEC_VSX_SET:
> 	  case UNSPEC_VSX_SLDWI:
> +	  case UNSPEC_VSX_VEC_INIT:
> +	  case UNSPEC_VSX_VSLO:
> 	  case UNSPEC_VUNPACK_HI_SIGN:
> 	  case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
> 	  case UNSPEC_VUNPACK_LO_SIGN:
> @@ -774,12 +781,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
> 	  case UNSPEC_VUPKLPX:
> 	  case UNSPEC_VUPKLS_V4SF:
> 	  case UNSPEC_VUPKLU_V4SF:
> -	  case UNSPEC_VSX_CVDPSPN:
> -	  case UNSPEC_VSX_CVSPDP:
> -	  case UNSPEC_VSX_CVSPDPN:
> -	  case UNSPEC_VSX_EXTRACT:
> -	  case UNSPEC_VSX_VSLO:
> -	  case UNSPEC_VSX_VEC_INIT:
> 	    return 0;
> 	  case UNSPEC_VSPLT_DIRECT:
> 	  case UNSPEC_VSX_XXSPLTD:
> diff --git a/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c
> new file mode 100644
> index 0000000..23494b6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/swaps-p8-46.c
> @@ -0,0 +1,34 @@
> +/* { dg-do run { target { powerpc64le-*-* } } } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
> +/* { dg-options "-mcpu=power8 -O2 " } */
> +
> +typedef __attribute__ ((__aligned__ (8))) unsigned long long __m64;
> +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
> +
> +/* PR84033.  Extracted from xmmintrin.h but with a pointer param to
> +   allow swaps to happen when not inline.  */
> +int __attribute__ ((__noinline__))
> +_mm_movemask_ps (__m128 *__A)
> +{
> +  __vector __m64 result;
> +  static const __vector unsigned int perm_mask =
> +    {
> +      0x00204060, 0x80808080, 0x80808080, 0x80808080
> +    };
> +
> +  result = (__vector __m64)
> +    __builtin_vec_vbpermq ((__vector unsigned char) (*__A),
> +			   (__vector unsigned char) perm_mask);
> +  return result[1];
> +}
> +
> +int
> +main (void)
> +{
> +  union { unsigned int i[4]; __m128 m; } x
> +    = { 0x80000000, 0x80000000, 0x7fffffff, 0x7fffffff };
> +  if (_mm_movemask_ps (&x.m) != 3)
> +    __builtin_abort ();
> +  return 0;
> +}
> 
> -- 
> Alan Modra
> Australia Development Lab, IBM
> 



More information about the Gcc-patches mailing list