This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix ICE with AVX512F and AMD tuning (PR target/70300)


Hi!

vec_interleave_lowv4sf only supports =x, x, x alternative, not =v, v, v
(which should be supportable for AVX512VL only anyway, but probably
stage1 material), without AVX512VL and with ext sse reg input operand
we have to either due to interleaving or broadcast in the destination,
or disable the splitter.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-21  Jakub Jelinek  <jakub@redhat.com>

	PR target/70300
	* config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
	instead of source if operands[1] is xmm16 and above and
	!TARGET_AVX512VL.  Use avx512f_vec_dupv16sf_1 instead of
	vec_interleave_lowv4sf if we need to unpack xmm16 and above.

	* gcc.target/i386/pr70300.c: New test.

--- gcc/config/i386/i386.md.jj	2016-03-17 12:53:25.000000000 +0100
+++ gcc/config/i386/i386.md	2016-03-21 11:50:54.539001151 +0100
@@ -4229,17 +4229,28 @@ (define_split
     {
       /* If it is unsafe to overwrite upper half of source, we need
 	 to move to destination and unpack there.  */
-      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
-	   || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
-	  && true_regnum (operands[0]) != true_regnum (operands[1]))
+      if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+	    || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+	   && true_regnum (operands[0]) != true_regnum (operands[1]))
+	  || (EXT_REX_SSE_REG_P (operands[1])
+	      && !TARGET_AVX512VL))
 	{
 	  rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
 	  emit_move_insn (tmp, operands[1]);
 	}
       else
 	operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
-      		 			     operands[3]));
+      /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
+	 =v, v, then vbroadcastss will be only needed for AVX512F without
+	 AVX512VL.  */
+      if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
+	emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+					       operands[3]));
+      else
+	{
+	  rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0);
+	  emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
+	}
     }
   else
     emit_insn (gen_vec_setv4sf_0 (operands[3],
--- gcc/testsuite/gcc.target/i386/pr70300.c.jj	2016-03-21 11:56:16.455580855 +0100
+++ gcc/testsuite/gcc.target/i386/pr70300.c	2016-03-21 11:55:55.000000000 +0100
@@ -0,0 +1,25 @@
+/* PR target/70300 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */
+
+typedef _Complex A __attribute__ ((mode (SC)));
+typedef _Complex B __attribute__ ((mode (DC)));
+typedef _Complex C __attribute__ ((mode (TC)));
+
+C
+foo (A a, B b, C c, A d, B e, C f)
+{
+  b -= a;
+  d += a;
+  a += f;
+  return a + b + d + e;
+}
+
+__attribute__((target ("avx512vl"))) C
+bar (A a, B b, C c, A d, B e, C f)
+{
+  b -= a;
+  d += a;
+  a += f;
+  return a + b + d + e;
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]