This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH i386 AVX512] [63.1/n] Add vpshufb, perm autogen (except for v64qi).


On Thu, Oct 09, 2014 at 04:15:23PM +0400, Ilya Tocar wrote:
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -21358,32 +21358,169 @@ ix86_expand_int_vcond (rtx operands[])
>    return true;
>  }
>  
> +/* AVX512F does support 64-byte integer vector operations,
> +   thus the longest vector we are faced with is V64QImode.  */
> +#define MAX_VECT_LEN	64
> +
> +struct expand_vec_perm_d
> +{
> +  rtx target, op0, op1;
> +  unsigned char perm[MAX_VECT_LEN];
> +  enum machine_mode vmode;
> +  unsigned char nelt;
> +  bool one_operand_p;
> +  bool testing_p;
> +};
> +
>  static bool
> -ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
> +ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1, struct expand_vec_perm_d *d)

Too long line, please wrap it.

>  {
> -  enum machine_mode mode = GET_MODE (op0);
> +  enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
> +
>    switch (mode)
>      {
> +    case V8HImode:
> +      if (!TARGET_AVX512VL || !TARGET_AVX512BW)
> +	return false;
> +      break;
> +    case V16HImode:
> +      if (!TARGET_AVX512VL || !TARGET_AVX512BW)
> +	return false;
> +    case V32HImode:
> +      if (!TARGET_AVX512BW)
> +	return false;
> +      break;
> +    case V4SImode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V8SImode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V16SImode:
> +      if (!TARGET_AVX512F)
> +	return false;
> +      break;
> +    case V4SFmode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V8SFmode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V16SFmode:
> +      if (!TARGET_AVX512F)
> +	return false;
> +      break;
> +    case V2DImode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V4DImode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V8DImode:
> +      if (!TARGET_AVX512F)
> +	return false;
> +      break;
> +    case V2DFmode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V4DFmode:
> +      if (!TARGET_AVX512VL)
> +	return false;
> +      break;
> +    case V8DFmode:
> +      if (!TARGET_AVX512F)
> +	return false;
> +      break;
> +    default:
> +      return false;
> +    }
> +
> +  /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const expander,
> +     so args are either in d, or in op0, op1 etc.  */
> +  if (d)
> +    {
> +      rtx vec[64];
> +      target = d->target;
> +      op0 = d->op0;
> +      op1 = d->op1;
> +      for (int i = 0; i < d->nelt; ++i)
> +	vec[i] = GEN_INT (d->perm[i]);
> +      mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (d->nelt, vec));

Shouldn't the mask use integral vector mode rather than floating?

My strong preference would be:
  enum machine_mode maskmode = mode;
  rtx (*gen) (rtx, rtx, rtx, rtx);
right below the enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
line and then inside of the first switch just do:
...
    case V16SImode:
      if (!TARGET_AVX512F)
	return false;
      gen = gen_avx512f_vpermi2varv16si3;
      break;
    case V4SFmode:
      if (!TARGET_AVX512VL)
	return false;
      gen = gen_avx512vl_vpermi2varv4sf3;
      maskmode = V4SImode;
      break;
...
etc., then in the mask = line use:
    mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
and finally instead of the second switch do:
  emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
  return true;

Otherwise, the patch LGTM, but will leave the final approval to Uros.

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]