This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC PATCH] Improve -mavx{,2} vector extraction


On Fri, Sep 16, 2011 at 11:16:44AM +0200, Jakub Jelinek wrote:
> The avx2_extracti128 pattern looked like wrong RTL, as to extract
> a 2 element vector from 4 element vector it used just one constant
> in the parallel instead of two.  I've changed it into a define_expand.

Actually there were two further issues with avx2_extracti128, one introduced
by my change (pasto in switch control expression), one preexisting
(no idea why it didn't fail before) - in vextracti128 the source operand
has to be register and destination operand has to be register or memory,
while the predicates were incorrectly swapped.

So here is a version that has been (together with the smin/smax patch)
bootstrapped/regtested on x86_64-linux and i686-linux and additionally
tested with RUNTESTFLAGS='--target_board=unix\{-m32,-m64\} i386.exp vect.exp'
on AVX capable hw.

Sorry for the screw-up.

2011-09-16  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (vec_extract_hi_<mode>,
	vec_extract_hi_v16hi, vec_extract_hi_v32qi): Use
	vextracti128 instead of vextractf128 for -mavx2 and
	integer vectors.  For V4DFmode fix up mode attribute.
	(VEC_EXTRACT_MODE): For TARGET_AVX add 32-byte vectors.
	(vec_set_lo_<mode>, vec_set_hi_<mode>): For VI8F_256 modes use V4DF
	instead of V8SF mode attribute.
	(avx2_extracti128): Change into define_expand.
	* config/i386/i386.c (ix86_expand_vector_extract): Handle
	32-byte vector modes if TARGET_AVX.

	* gcc.target/i386/sse2-extract-1.c: New test.
	* gcc.target/i386/avx-extract-1.c: New test.

--- gcc/config/i386/sse.md.jj	2011-09-15 17:36:20.000000000 +0200
+++ gcc/config/i386/sse.md	2011-09-16 10:51:51.000000000 +0200
@@ -3863,13 +3863,23 @@ (define_insn "vec_extract_hi_<mode>"
 	  (match_operand:VI8F_256 1 "register_operand" "x,x")
 	  (parallel [(const_int 2) (const_int 3)])))]
   "TARGET_AVX"
-  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+  if (get_attr_mode (insn) == MODE_OI)
+    return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+  else
+    return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,store")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
+   (set (attr "mode")
+     (if_then_else
+       (and (match_test "TARGET_AVX2")
+	    (eq (const_string "<MODE>mode") (const_string "V4DImode")))
+     (const_string "OI")
+     (const_string "V4DF")))])
 
 (define_insn_and_split "vec_extract_lo_<mode>"
   [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
@@ -3898,13 +3908,23 @@ (define_insn "vec_extract_hi_<mode>"
 	  (parallel [(const_int 4) (const_int 5)
 		     (const_int 6) (const_int 7)])))]
   "TARGET_AVX"
-  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+  if (get_attr_mode (insn) == MODE_OI)
+    return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+  else
+    return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,store")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
+   (set (attr "mode")
+     (if_then_else
+       (and (match_test "TARGET_AVX2")
+	    (eq (const_string "<MODE>mode") (const_string "V8SImode")))
+     (const_string "OI")
+     (const_string "V8SF")))])
 
 (define_insn_and_split "vec_extract_lo_v16hi"
   [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
@@ -3937,13 +3957,21 @@ (define_insn "vec_extract_hi_v16hi"
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
   "TARGET_AVX"
-  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+  if (get_attr_mode (insn) == MODE_OI)
+    return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+  else
+    return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,store")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
+   (set (attr "mode")
+     (if_then_else (match_test "TARGET_AVX2")
+   (const_string "OI")
+   (const_string "V8SF")))])
 
 (define_insn_and_split "vec_extract_lo_v32qi"
   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
@@ -3984,13 +4012,21 @@ (define_insn "vec_extract_hi_v32qi"
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
   "TARGET_AVX"
-  "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
+{
+  if (get_attr_mode (insn) == MODE_OI)
+    return "vextracti128\t{$0x1, %1, %0|%0, %1, 0x1}";
+  else
+    return "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}";
+}
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "memory" "none,store")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
+   (set (attr "mode")
+     (if_then_else (match_test "TARGET_AVX2")
+   (const_string "OI")
+   (const_string "V8SF")))])
 
 (define_insn "*sse4_1_extractps"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
@@ -4024,7 +4060,10 @@ (define_insn_and_split "*vec_extract_v4s
 
 ;; Modes handled by vec_extract patterns.
 (define_mode_iterator VEC_EXTRACT_MODE
-  [V16QI V8HI V4SI V2DI
+  [(V32QI "TARGET_AVX") V16QI
+   (V16HI "TARGET_AVX") V8HI
+   (V8SI "TARGET_AVX") V4SI
+   (V4DI "TARGET_AVX") V2DI
    (V8SF "TARGET_AVX") V4SF
    (V4DF "TARGET_AVX") V2DF])
 
@@ -11952,7 +11991,7 @@ (define_insn "vec_set_lo_<mode>"
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
+   (set_attr "mode" "V4DF")])
 
 (define_insn "vec_set_hi_<mode>"
   [(set (match_operand:VI8F_256 0 "register_operand" "=x")
@@ -11967,7 +12006,7 @@ (define_insn "vec_set_hi_<mode>"
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "V8SF")])
+   (set_attr "mode" "V4DF")])
 
 (define_insn "vec_set_lo_<mode>"
   [(set (match_operand:VI4F_256 0 "register_operand" "=x")
@@ -12158,17 +12197,29 @@ (define_expand "vec_init<mode>"
   DONE;
 })
 
-(define_insn "avx2_extracti128"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-	(vec_select:V2DI
-	  (match_operand:V4DI 1 "nonimmediate_operand" "xm")
-	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+(define_expand "avx2_extracti128"
+  [(match_operand:V2DI 0 "nonimmediate_operand" "")
+   (match_operand:V4DI 1 "register_operand" "")
+   (match_operand:SI 2 "const_0_to_1_operand" "")]
   "TARGET_AVX2"
-  "vextracti128\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
+{
+  rtx (*insn)(rtx, rtx);
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      insn = gen_vec_extract_lo_v4di;
+      break;
+    case 1:
+      insn = gen_vec_extract_hi_v4di;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (insn (operands[0], operands[1]));
+  DONE;
+})
 
 (define_expand "avx2_inserti128"
   [(match_operand:V4DI 0 "register_operand" "")
--- gcc/config/i386/i386.c.jj	2011-09-15 19:27:03.000000000 +0200
+++ gcc/config/i386/i386.c	2011-09-16 09:37:43.000000000 +0200
@@ -32592,6 +32592,84 @@ ix86_expand_vector_extract (bool mmx_ok,
       use_vec_extr = TARGET_SSE4_1;
       break;
 
+    case V8SFmode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V4SFmode);
+	  if (elt < 4)
+	    emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
+	  return;
+	}
+      break;
+
+    case V4DFmode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V2DFmode);
+	  if (elt < 2)
+	    emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
+	  return;
+	}
+      break;
+
+    case V32QImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V16QImode);
+	  if (elt < 16)
+	    emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
+	  return;
+	}
+      break;
+
+    case V16HImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V8HImode);
+	  if (elt < 8)
+	    emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 7);
+	  return;
+	}
+      break;
+
+    case V8SImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V4SImode);
+	  if (elt < 4)
+	    emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
+	  return;
+	}
+      break;
+
+    case V4DImode:
+      if (TARGET_AVX)
+	{
+	  tmp = gen_reg_rtx (V2DImode);
+	  if (elt < 2)
+	    emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
+	  else
+	    emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
+	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
+	  return;
+	}
+      break;
+
     case V8QImode:
       /* ??? Could extract the appropriate HImode element and shift.  */
     default:
--- gcc/testsuite/gcc.target/i386/sse2-extract-1.c.jj	2011-09-16 10:41:45.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/sse2-extract-1.c	2011-09-16 10:41:55.000000000 +0200
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -msse2" } */
+/* { dg-require-effective-target sse2_runtime } */
+
+extern void abort (void);
+typedef unsigned long long uint64_t;
+
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+#define FN(elcount, type, idx) \
+__attribute__((noinline, noclone)) \
+type f##type##elcount##_##idx (vector (elcount, type) x) { return x[idx] + 1; }
+#define T2(elcount, type) \
+  H (elcount, type) \
+  F (elcount, type, 0) \
+  F (elcount, type, 1)
+#define T4(elcount, type) \
+  T2 (elcount, type) \
+  F (elcount, type, 2) \
+  F (elcount, type, 3)
+#define T8(elcount, type) \
+  T4 (elcount, type) \
+  F (elcount, type, 4) \
+  F (elcount, type, 5) \
+  F (elcount, type, 6) \
+  F (elcount, type, 7)
+#define T16(elcount, type) \
+  T8 (elcount, type) \
+  F (elcount, type, 8) \
+  F (elcount, type, 9) \
+  F (elcount, type, 10) \
+  F (elcount, type, 11) \
+  F (elcount, type, 12) \
+  F (elcount, type, 13) \
+  F (elcount, type, 14) \
+  F (elcount, type, 15)
+#define T32(elcount, type) \
+  T16 (elcount, type) \
+  F (elcount, type, 16) \
+  F (elcount, type, 17) \
+  F (elcount, type, 18) \
+  F (elcount, type, 19) \
+  F (elcount, type, 20) \
+  F (elcount, type, 21) \
+  F (elcount, type, 22) \
+  F (elcount, type, 23) \
+  F (elcount, type, 24) \
+  F (elcount, type, 25) \
+  F (elcount, type, 26) \
+  F (elcount, type, 27) \
+  F (elcount, type, 28) \
+  F (elcount, type, 29) \
+  F (elcount, type, 30) \
+  F (elcount, type, 31)
+#define TESTS_SSE2 \
+T2 (2, double) E \
+T2 (2, uint64_t) E \
+T4 (4, float) E \
+T4 (4, int) E \
+T8 (8, short) E \
+T16 (16, char) E
+#define TESTS_AVX \
+T4 (4, double) E \
+T4 (4, uint64_t) E \
+T8 (8, float) E \
+T8 (8, int) E \
+T16 (16, short) E \
+T32 (32, char) E
+#ifdef __AVX__
+#define TESTS TESTS_SSE2 TESTS_AVX
+#else
+#define TESTS TESTS_SSE2
+#endif
+
+#define F FN
+#define H(elcount, type)
+#define E
+TESTS
+
+int
+main ()
+{
+#undef F
+#undef H
+#undef E
+#define H(elcount, type) \
+  vector (elcount, type) v##type##elcount = {
+#define E };
+#define F(elcount, type, idx) idx + 1,
+  TESTS
+#undef F
+#undef H
+#undef E
+#define H(elcount, type)
+#define E
+#define F(elcount, type, idx) \
+  if (f##type##elcount##_##idx (v##type##elcount) != idx + 2) \
+    abort ();
+  TESTS
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/avx-extract-1.c.jj	2011-09-16 10:44:19.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx-extract-1.c	2011-09-16 10:44:58.000000000 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-require-effective-target avx_runtime } */
+
+#include "sse2-extract-1.c"


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]