This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH i386 1/8] [AVX512] Adjust register classes.
- From: Richard Henderson <rth at redhat dot com>
- To: Kirill Yukhin <kirill dot yukhin at gmail dot com>
- Cc: ubizjak at gmail dot com, jakub at redhat dot com, vmakarov at redhat dot com, gcc-patches at gcc dot gnu dot org
- Date: Mon, 19 Aug 2013 13:58:28 -0700
- Subject: Re: [PATCH i386 1/8] [AVX512] Adjust register classes.
- References: <CAGs3RfuFxPd-Q-q_MK_YtbWd6=oXdzFN8MHngt3u5m9_GjFGXA at mail dot gmail dot com> <20130807200725 dot GB42201 at msticlxl57 dot ims dot intel dot com>
On 08/07/2013 01:07 PM, Kirill Yukhin wrote:
> + -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 16-23*/
> + -1, -1, -1, -1, -1, -1, -1, -1, /* new SSE registers 24-31*/
Don't say "new", say "avx512" -- that comment will be there for 10 years.
> @@ -4080,6 +4111,8 @@ ix86_conditional_register_usage (void)
> fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
> for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
> fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
> + for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
> + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
> }
>
> /* See the definition of CALL_USED_REGISTERS in i386.h. */
> @@ -4120,6 +4153,12 @@ ix86_conditional_register_usage (void)
> for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
> fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
> +
> + /* If AVX512F is disabled, squash the registers. */
> + if (! TARGET_AVX512F)
> + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
> + if (TEST_HARD_REG_BIT (reg_class_contents[(int)EVEX_SSE_REGS], i))
> + fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
Using TEST_HARD_REG_BIT is silly when FIRST/LAST_EXT_REX_SSE_REG is available.
> @@ -34285,14 +34346,20 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
> {
> /* We implement the move patterns for all vector modes into and
> out of SSE registers, even when no operation instructions
> - are available. OImode move is available only when AVX is
> - enabled. */
> - return ((TARGET_AVX && mode == OImode)
> - || VALID_AVX256_REG_MODE (mode)
> - || VALID_SSE_REG_MODE (mode)
> - || VALID_SSE2_REG_MODE (mode)
> - || VALID_MMX_REG_MODE (mode)
> - || VALID_MMX_REG_MODE_3DNOW (mode));
> + are available. In xmm16-xmm31 we can store only 512 bit
> + modes. OImode move is available only when AVX is enabled.
> + XImode is available only when AVX512F is enabled. */
> + return ((TARGET_AVX512F
> + && (mode == XImode
> + || VALID_AVX512F_REG_MODE (mode)
> + || VALID_AVX512F_SCALAR_MODE (mode)))
> + || (!EXT_REX_SSE_REGNO_P (regno)
> + && ((TARGET_AVX && mode == OImode)
> + || VALID_AVX256_REG_MODE (mode)
> + || VALID_SSE_REG_MODE (mode)
> + || VALID_SSE2_REG_MODE (mode)
> + || VALID_MMX_REG_MODE (mode)
> + || VALID_MMX_REG_MODE_3DNOW (mode))));
Break up this condition. It's too big. It'll probably be best to
split it up by regno first.
>
> (define_insn "*mov<mode>_internal"
> - [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m")
> - (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
> + [(set (match_operand:V16 0 "nonimmediate_operand" "=v,v ,m")
> + (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
> "TARGET_SSE
We really need to change the mode iterator names here. "V16" is not a
good description of 16, 32, and 64 byte vectors.
> && (register_operand (operands[0], <MODE>mode)
> || register_operand (operands[1], <MODE>mode))"
> {
> + int mode = get_attr_mode (insn);
> switch (which_alternative)
> {
> case 0:
> return standard_sse_constant_opcode (insn, operands[1]);
> case 1:
> case 2:
> - switch (get_attr_mode (insn))
> + /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
> + in avx512f, so we need to use workarounds, to access sse registers
> + 16-31, which are evex-only. */
> + if (TARGET_AVX512F && GET_MODE_SIZE (<MODE>mode) < 64
> + && (EXT_REX_SSE_REGNO_P (REGNO (operands[0]))
> + || EXT_REX_SSE_REGNO_P (REGNO (operands[1]))))
> + {
> + if (memory_operand (operands[0], <MODE>mode))
> + {
> + if (GET_MODE_SIZE (<MODE>mode) == 32)
> + return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
> + else if (GET_MODE_SIZE (<MODE>mode) == 16)
> + return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
> + else
> + gcc_unreachable ();
> + }
> + else if (memory_operand (operands[1], <MODE>mode))
> + {
> + if (GET_MODE_SIZE (<MODE>mode) == 32)
> + return "vinsert<shuffletype>64x4\t{$0x0, %1, %g0, %g0|%g0, %g0, %1, $0x0}";
> + else if (GET_MODE_SIZE (<MODE>mode) == 16)
> + return "vinsert<shuffletype>32x4\t{$0x0, %1, %g0, %g0|%g0, %g0, %1, $0x0}";
> + else
> + gcc_unreachable ();
> + }
This creates a false dependency on the old %g0. Wouldn't vbroadcast be better?
> @@ -603,9 +663,9 @@
> })
>
> (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
> - [(set (match_operand:VF 0 "register_operand" "=x")
> + [(set (match_operand:VF 0 "register_operand" "=v")
> (unspec:VF
> - [(match_operand:VF 1 "memory_operand" "m")]
> + [(match_operand:VF 1 "memory_operand" "vm")]
memory_operand; v will not match.
> @@ -2154,13 +2214,13 @@
> (set_attr "mode" "<MODE>")])
>
> (define_insn "*fma_fnmsub_<mode>"
> - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x")
> + [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
> (fma:FMAMODE
> (neg:FMAMODE
> - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x"))
> - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m")
> + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
> + (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
> (neg:FMAMODE
> - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))]
> + (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
> "TARGET_FMA || TARGET_FMA4"
> "@
> vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
There are lots of changes to fma patterns that don't look right.
E.g. are you missing changes to the isa attribute, controlling
the enabled attribute here?
But since you don't change FMAMODE, I don't see how 'v' ever applies.
You'd need to mask the operation on the high elements, which is not
done here either.
r~a