This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix vec_set_hi* patterns (PR target/70059)


Hi!

Unlike the older vec_set_hi 256-bit patterns, which are correctly
using the VEC_SELECT as the first operand of VEC_CONCAT and
match_operand as second, because that is what the instruction does,
picks up low part from operand 1 and puts the operand 2 as the high part
of the result, the 512-bit vec_set_hi patterns (both the avx512f ones
and avx512dq ones) use the same order of VEC_CONCAT operands as vec_set_lo
(and differ just by different second operand of VEC_SELECT).
This leads to wrong-code on the following testcases, simplify-rtx.c during
combine simply looks at the RTL patterns and simplifies stuff according
to how the RTL patterns look like.

Fixed thusly, unfortunately I don't have access to avx512f (and not even to
avx512dq) hw, so while I will bootstrap/regtest it on Haswell-E, can't test
the tests if they now work at runtime (they link and the assembly of the foo
routine has changed and looks good to me).  Can somebody test this please
on real hw or emulator?
Ok for trunk if it passes?

2016-03-03  Jakub Jelinek  <jakub@redhat.com>

	PR target/70059
	* config/i386/sse.md (vec_set_lo_<mode><mask_name>,
	<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask): Formatting
	fixes.
	(vec_set_hi_<mode><mask_name>): Likewise.  Swap VEC_CONCAT operands.

	* gcc.target/i386/avx512f-pr70059.c: New test.
	* gcc.target/i386/avx512dq-pr70059.c: New test.

--- gcc/config/i386/sse.md.jj	2016-03-02 20:14:11.000000000 +0100
+++ gcc/config/i386/sse.md	2016-03-03 10:40:58.325680697 +0100
@@ -12426,13 +12426,13 @@ (define_expand "<extract_type_2>_vinsert
 {
   int mask = INTVAL (operands[3]);
   if (mask == 0)
-    emit_insn (gen_vec_set_lo_<mode>_mask
-      (operands[0], operands[1], operands[2],
-       operands[4], operands[5]));
+    emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
+					   operands[2], operands[4],
+					   operands[5]));
   else
-    emit_insn (gen_vec_set_hi_<mode>_mask
-      (operands[0], operands[1], operands[2],
-       operands[4], operands[5]));
+    emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
+					   operands[2], operands[4],
+					   operands[5]));
   DONE;
 })
 
@@ -12443,9 +12443,9 @@ (define_insn "vec_set_lo_<mode><mask_nam
 	  (vec_select:<ssehalfvecmode>
 	    (match_operand:V16FI 1 "register_operand" "v")
 	    (parallel [(const_int 8) (const_int 9)
-	      (const_int 10) (const_int 11)
-	      (const_int 12) (const_int 13)
-              (const_int 14) (const_int 15)]))))]
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))))]
   "TARGET_AVX512DQ"
   "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
   [(set_attr "type" "sselog")
@@ -12456,13 +12456,13 @@ (define_insn "vec_set_lo_<mode><mask_nam
 (define_insn "vec_set_hi_<mode><mask_name>"
   [(set (match_operand:V16FI 0 "register_operand" "=v")
 	(vec_concat:V16FI
-	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
 	  (vec_select:<ssehalfvecmode>
 	    (match_operand:V16FI 1 "register_operand" "v")
 	    (parallel [(const_int 0) (const_int 1)
-	      (const_int 2) (const_int 3)
-	      (const_int 4) (const_int 5)
-              (const_int 6) (const_int 7)]))))]
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512DQ"
   "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
   [(set_attr "type" "sselog")
@@ -12477,7 +12477,7 @@ (define_insn "vec_set_lo_<mode><mask_nam
 	  (vec_select:<ssehalfvecmode>
 	    (match_operand:V8FI 1 "register_operand" "v")
 	    (parallel [(const_int 4) (const_int 5)
-              (const_int 6) (const_int 7)]))))]
+		       (const_int 6) (const_int 7)]))))]
   "TARGET_AVX512F"
   "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
   [(set_attr "type" "sselog")
@@ -12488,11 +12488,11 @@ (define_insn "vec_set_lo_<mode><mask_nam
 (define_insn "vec_set_hi_<mode><mask_name>"
   [(set (match_operand:V8FI 0 "register_operand" "=v")
 	(vec_concat:V8FI
-	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
 	  (vec_select:<ssehalfvecmode>
 	    (match_operand:V8FI 1 "register_operand" "v")
 	    (parallel [(const_int 0) (const_int 1)
-              (const_int 2) (const_int 3)]))))]
+		       (const_int 2) (const_int 3)]))
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX512F"
   "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
   [(set_attr "type" "sselog")
--- gcc/testsuite/gcc.target/i386/avx512f-pr70059.c.jj	2016-03-03 11:06:00.949063626 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-pr70059.c	2016-03-03 11:10:42.772195215 +0100
@@ -0,0 +1,33 @@
+/* PR target/70059 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include "avx512f-check.h"
+
+__attribute__((noinline, noclone)) __m512i
+foo (__m256i a, __m256i b)
+{
+  __m512i r = _mm512_undefined_si512 ();
+  r = _mm512_inserti64x4 (r, a, 0);
+  r = _mm512_inserti64x4 (r, b, 1);
+  return r;
+}
+
+static void
+avx512f_test (void)
+{
+  union256i_q a, b;
+  union512i_q r;
+  long long r_ref[8];
+  int i;
+  for (i = 0; i < 4; i++)
+    {
+      a.a[i] = 0x0101010101010101ULL * i;
+      b.a[i] = 0x1010101010101010ULL * i;
+      r_ref[i] = a.a[i];
+      r_ref[i + 4] = b.a[i];
+    }
+  r.x = foo (a.x, b.x);
+  check_union512i_q (r, r_ref);
+}
--- gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c.jj	2016-03-03 11:21:09.740587014 +0100
+++ gcc/testsuite/gcc.target/i386/avx512dq-pr70059.c	2016-03-03 11:19:57.000000000 +0100
@@ -0,0 +1,33 @@
+/* PR target/70059 */
+/* { dg-do run } */
+/* { dg-require-effective-target avx512dq } */
+/* { dg-options "-O2 -mavx512dq" } */
+
+#include "avx512dq-check.h"
+
+__attribute__((noinline, noclone)) __m512i
+foo (__m256i a, __m256i b)
+{
+  __m512i r = _mm512_undefined_si512 ();
+  r = _mm512_inserti32x8 (r, a, 0);
+  r = _mm512_inserti32x8 (r, b, 1);
+  return r;
+}
+
+static void
+avx512dq_test (void)
+{
+  union256i_q a, b;
+  union512i_q r;
+  long long r_ref[8];
+  int i;
+  for (i = 0; i < 4; i++)
+    {
+      a.a[i] = 0x0101010101010101ULL * i;
+      b.a[i] = 0x1010101010101010ULL * i;
+      r_ref[i] = a.a[i];
+      r_ref[i + 4] = b.a[i];
+    }
+  r.x = foo (a.x, b.x);
+  check_union512i_q (r, r_ref);
+}

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]