[PING PATCH] rs6000: Add Power10 optimization for most _mm_movemask*

Paul A. Clarke pc@us.ibm.com
Mon Nov 8 17:42:56 GMT 2021


Gentle ping...

On Thu, Oct 21, 2021 at 12:22:12PM -0500, Paul A. Clarke via Gcc-patches wrote:
> Power10 ISA added `vextract*` instructions which are realized in the
> `vec_extractm` instrinsic.
> 
> Use `vec_extractm` for `_mm_movemask_ps`, `_mm_movemask_pd`, and
> `_mm_movemask_epi8` compatibility intrinsics, when `_ARCH_PWR10`.
> 
> 2021-10-21  Paul A. Clarke  <pc@us.ibm.com>
> 
> gcc
> 	* config/rs6000/xmmintrin.h (_mm_movemask_ps): Use vec_extractm
> 	when _ARCH_PWR10.
> 	* config/rs6000/emmintrin.h (_mm_movemask_pd): Likewise.
> 	(_mm_movemask_epi8): Likewise.
> ---
> Tested on Power10 powerpc64le-linux (compiled with and without
> `-mcpu=power10`).
> 
> OK for trunk?
> 
>  gcc/config/rs6000/emmintrin.h | 8 ++++++++
>  gcc/config/rs6000/xmmintrin.h | 4 ++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
> index 32ad72b4cc35..ab16c13c379e 100644
> --- a/gcc/config/rs6000/emmintrin.h
> +++ b/gcc/config/rs6000/emmintrin.h
> @@ -1233,6 +1233,9 @@ _mm_loadl_pd (__m128d __A, double const *__B)
>  extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_movemask_pd (__m128d  __A)
>  {
> +#ifdef _ARCH_PWR10
> +  return vec_extractm ((__v2du) __A);
> +#else
>    __vector unsigned long long result;
>    static const __vector unsigned int perm_mask =
>      {
> @@ -1252,6 +1255,7 @@ _mm_movemask_pd (__m128d  __A)
>  #else
>    return result[0];
>  #endif
> +#endif /* !_ARCH_PWR10 */
>  }
>  #endif /* _ARCH_PWR8 */
>  
> @@ -2030,6 +2034,9 @@ _mm_min_epu8 (__m128i __A, __m128i __B)
>  extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_movemask_epi8 (__m128i __A)
>  {
> +#ifdef _ARCH_PWR10
> +  return vec_extractm ((__v16qu) __A);
> +#else
>    __vector unsigned long long result;
>    static const __vector unsigned char perm_mask =
>      {
> @@ -2046,6 +2053,7 @@ _mm_movemask_epi8 (__m128i __A)
>  #else
>    return result[0];
>  #endif
> +#endif /* !_ARCH_PWR10 */
>  }
>  #endif /* _ARCH_PWR8 */
>  
> diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
> index ae1a33e8d95b..4c093fd1d5ae 100644
> --- a/gcc/config/rs6000/xmmintrin.h
> +++ b/gcc/config/rs6000/xmmintrin.h
> @@ -1352,6 +1352,9 @@ _mm_storel_pi (__m64 *__P, __m128 __A)
>  extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_movemask_ps (__m128  __A)
>  {
> +#ifdef _ARCH_PWR10
> +  return vec_extractm ((vector unsigned int) __A);
> +#else
>    __vector unsigned long long result;
>    static const __vector unsigned int perm_mask =
>      {
> @@ -1371,6 +1374,7 @@ _mm_movemask_ps (__m128  __A)
>  #else
>    return result[0];
>  #endif
> +#endif /* !_ARCH_PWR10 */
>  }
>  #endif /* _ARCH_PWR8 */
>  
> -- 
> 2.27.0
> 


More information about the Gcc-patches mailing list