This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH i386 1/8] [AVX512] Adjust register classes.


On 08/07/2013 01:07 PM, Kirill Yukhin wrote:
> +  -1, -1, -1, -1, -1, -1, -1, -1,       /* new SSE registers 16-23*/
> +  -1, -1, -1, -1, -1, -1, -1, -1,       /* new SSE registers 24-31*/

Don't say "new", say "avx512" -- that comment will be there for 10 years.

> @@ -4080,6 +4111,8 @@ ix86_conditional_register_usage (void)
>  	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
>        for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
>  	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
> +      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
> +	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
>      }
>  
>    /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
> @@ -4120,6 +4153,12 @@ ix86_conditional_register_usage (void)
>      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>        if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
>  	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
> +
> +  /* If AVX512F is disabled, squash the registers.  */
> +  if (! TARGET_AVX512F)
> +    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> +      if (TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
> +	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";

Using TEST_HARD_REG_BIT is silly when FIRST/LAST_EXT_REX_SSE_REG is available.

> @@ -34285,14 +34346,20 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
>      {
>        /* We implement the move patterns for all vector modes into and
>  	 out of SSE registers, even when no operation instructions
> -	 are available.  OImode move is available only when AVX is
> -	 enabled.  */
> -      return ((TARGET_AVX && mode == OImode)
> -	      || VALID_AVX256_REG_MODE (mode)
> -	      || VALID_SSE_REG_MODE (mode)
> -	      || VALID_SSE2_REG_MODE (mode)
> -	      || VALID_MMX_REG_MODE (mode)
> -	      || VALID_MMX_REG_MODE_3DNOW (mode));
> +	 are available.  In xmm16-xmm31 we can store only 512 bit
> +	 modes.  OImode move is available only when AVX is enabled.
> +	 XImode is available only when AVX512F is enabled.  */
> +      return ((TARGET_AVX512F
> +	       && (mode == XImode
> +		   || VALID_AVX512F_REG_MODE (mode)
> +		   || VALID_AVX512F_SCALAR_MODE (mode)))
> +	      || (!EXT_REX_SSE_REGNO_P (regno)
> +		  && ((TARGET_AVX && mode == OImode)
> +		      || VALID_AVX256_REG_MODE (mode)
> +		      || VALID_SSE_REG_MODE (mode)
> +		      || VALID_SSE2_REG_MODE (mode)
> +		      || VALID_MMX_REG_MODE (mode)
> +		      || VALID_MMX_REG_MODE_3DNOW (mode))));

Break up this condition.  It's too big.  It'll probably be best to
split it up by regno first.

>  
>  (define_insn "*mov<mode>_internal"
> -  [(set (match_operand:V16 0 "nonimmediate_operand"               "=x,x ,m")
> -	(match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
> +  [(set (match_operand:V16 0 "nonimmediate_operand"               "=v,v ,m")
> +	(match_operand:V16 1 "nonimmediate_or_sse_const_operand"  "C ,vm,v"))]
>    "TARGET_SSE

We really need to change the mode iterator names here.  "V16" is not a
good description of 16, 32, and 64 byte vectors.

>     && (register_operand (operands[0], <MODE>mode)
>         || register_operand (operands[1], <MODE>mode))"
>  {
> +  int mode = get_attr_mode (insn);
>    switch (which_alternative)
>      {
>      case 0:
>        return standard_sse_constant_opcode (insn, operands[1]);
>      case 1:
>      case 2:
> -      switch (get_attr_mode (insn))
> +      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
> +	 in avx512f, so we need to use workarounds, to access sse registers
> +	 16-31, which are evex-only.  */
> +      if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
> +	  && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
> +	      || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
> +	{
> +	  if (memory_operand (operands[0], <MODE>mode))
> +	    {
> +	      if (GET_MODE_SIZE (<MODE>mode) == 32)
> +		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
> +	      else if (GET_MODE_SIZE (<MODE>mode) == 16)
> +		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
> +	      else
> +		gcc_unreachable ();
> +	    }
> +	  else if (memory_operand (operands[1], <MODE>mode))
> +	    {
> +	      if (GET_MODE_SIZE (<MODE>mode) == 32)
> +		return "vinsert<shuffletype>64x4\t{$0x0, %1, %g0, %g0|%g0, %g0, %1, $0x0}";
> +	      else if (GET_MODE_SIZE (<MODE>mode) == 16)
> +		return "vinsert<shuffletype>32x4\t{$0x0, %1, %g0, %g0|%g0, %g0, %1, $0x0}";
> +	      else
> +		gcc_unreachable ();
> +	    }

This creates a false dependency on the old %g0.  Wouldn't vbroadcast be better?

> @@ -603,9 +663,9 @@
>  })
>  
>  (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
> -  [(set (match_operand:VF 0 "register_operand" "=x")
> +  [(set (match_operand:VF 0 "register_operand" "=v")
>  	(unspec:VF
> -	  [(match_operand:VF 1 "memory_operand" "m")]
> +	  [(match_operand:VF 1 "memory_operand" "vm")]

memory_operand; v will not match.

> @@ -2154,13 +2214,13 @@
>     (set_attr "mode" "<MODE>")])
>  
>  (define_insn "*fma_fnmsub_<mode>"
> -  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
> +  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
>  	(fma:FMAMODE
>  	  (neg:FMAMODE
> -	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
> -	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm,x,m")
> +	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
> +	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
>  	  (neg:FMAMODE
> -	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
> +	    (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
>    "TARGET_FMA || TARGET_FMA4"
>    "@
>     vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}

There are lots of changes to fma patterns that don't look right.
E.g. are you missing changes to the isa attribute, controlling
the enabled attribute here?

But since you don't change FMAMODE, I don't see how 'v' ever applies.
You'd need to mask the operation on the high elements, which is not
done here either.


r~a


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]