This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/2, x86] Add palignr support for AVX2.


On Wed, Oct 01, 2014 at 04:12:15PM +0200, Jakub Jelinek wrote:
> For PR62128, IMHO the right fix is attached.  Note, this is again covered
> in vshuf-*.c tests (test 22 in both vshuf-v32*.c and vshuf-v16*.c).
> With that attached patch, pr62128.c (aka test_22 in vshuf-v32qi.c), changes:
> -       vpshufb .LC0(%rip), %ymm0, %ymm1
> -       vpshufb .LC1(%rip), %ymm0, %ymm0
> -       vpermq  $78, %ymm1, %ymm1
> -       vpor    %ymm1, %ymm0, %ymm0
> +       vpermq  $78, %ymm0, %ymm1
> +       vpalignr        $1, %ymm0, %ymm1, %ymm0
>         ret
> 
> 2014-10-01  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR target/62128
> 	* config/i386/i386.c (expand_vec_perm_1): Try expand_vec_perm_palignr
> 	if it expands to a single insn only.
> 	(expand_vec_perm_palignr): Add SINGLE_INSN_ONLY_P argument.  If true,
> 	fail unless in_order is true.  Add forward declaration.
> 	(expand_vec_perm_vperm2f128): Fix up comment about which permutation
> 	is useful for one_operand_p.
> 	(ix86_expand_vec_perm_const_1): Adjust expand_vec_perm_palignr caller.

Now bootstrapped/regtested on x86_64-linux and i686-linux (and additionally
tested also with --target_board=unix/-mavx2), ok for trunk?

> --- gcc/config/i386/i386.c.jj	2014-10-01 15:42:28.000000000 +0200
> +++ gcc/config/i386/i386.c	2014-10-01 15:50:06.234079887 +0200
> @@ -39636,6 +39636,7 @@ struct expand_vec_perm_d
>  static bool canonicalize_perm (struct expand_vec_perm_d *d);
>  static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
>  static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
> +static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
>  
>  /* Get a vector mode of the same size as the original but with elements
>     twice as wide.  This is only guaranteed to apply to integral vectors.  */
> @@ -43225,6 +43226,10 @@ expand_vec_perm_1 (struct expand_vec_per
>    if (expand_vec_perm_pshufb (d))
>      return true;
>  
> +  /* Try the AVX2 vpalignr instruction.  */
> +  if (expand_vec_perm_palignr (d, true))
> +    return true;
> +
>    /* Try the AVX512F vpermi2 instructions.  */
>    rtx vec[64];
>    enum machine_mode mode = d->vmode;
> @@ -43286,10 +43291,11 @@ expand_vec_perm_pshuflw_pshufhw (struct
>     the permutation using the SSSE3 palignr instruction.  This succeeds
>     when all of the elements in PERM fit within one vector and we merely
>     need to shift them down so that a single vector permutation has a
> -   chance to succeed.  */
> +   chance to succeed.  If SINGLE_INSN_ONLY_P, succeed if only
> +   the vpalignr instruction itself can perform the requested permutation.  */
>  
>  static bool
> -expand_vec_perm_palignr (struct expand_vec_perm_d *d)
> +expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
>  {
>    unsigned i, nelt = d->nelt;
>    unsigned min, max;
> @@ -43320,8 +43326,9 @@ expand_vec_perm_palignr (struct expand_v
>  
>    /* Given that we have SSSE3, we know we'll be able to implement the
>       single operand permutation after the palignr with pshufb for
> -     128-bit vectors.  */
> -  if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16)
> +     128-bit vectors.  If SINGLE_INSN_ONLY_P, in_order has to be computed
> +     first.  */
> +  if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
>      return true;
>  
>    dcopy = *d;
> @@ -43342,6 +43349,9 @@ expand_vec_perm_palignr (struct expand_v
>      }
>    dcopy.one_operand_p = true;
>  
> +  if (single_insn_only_p && !in_order)
> +    return false;
> +
>    /* For AVX2, test whether we can permute the result in one instruction.  */
>    if (d->testing_p)
>      {
> @@ -43922,7 +43932,8 @@ expand_vec_perm_vperm2f128 (struct expan
>  	  return true;
>  	}
>  
> -      /* For one operand, the only useful vperm2f128 permutation is 0x10.  */
> +      /* For one operand, the only useful vperm2f128 permutation is 0x01
> +	 aka lanes swap.  */
>        if (d->one_operand_p)
>  	return false;
>      }
> @@ -44811,7 +44822,7 @@ ix86_expand_vec_perm_const_1 (struct exp
>    if (expand_vec_perm_pshuflw_pshufhw (d))
>      return true;
>  
> -  if (expand_vec_perm_palignr (d))
> +  if (expand_vec_perm_palignr (d, false))
>      return true;
>  
>    if (expand_vec_perm_interleave2 (d))


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]