This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[autovect] i386 widening casts


        * config/i386/i386.c (ix86_expand_sse_unpack): New.
        * config/i386/i386-protos.h: Update.
        * config/i386/sse.md (vec_unpacku_hi_v16qi, vec_unpacks_hi_v16qi,
        vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi, vec_unpacku_hi_v8hi,
        vec_unpacks_hi_v8hi, vec_unpacku_lo_v8hi, vec_unpacks_lo_v8hi,
        vec_unpacku_hi_v4si, vec_unpacks_hi_v4si, vec_unpacku_lo_v4si,
        vec_unpacks_lo_v4si): New.

        * tree-vectorizer.c (supportable_widening_operation): Adjust for
        little-endian byte ordering.

        * gcc.dg/vect/vect-117.c (main1): Fix follow-on int/size_t
        mismatches.
        * gcc.dg/vect/vect-reduc-dot-u16.c: Update comments.

=== config/i386/i386-protos.h
==================================================================
--- config/i386/i386-protos.h	(revision 107681)
+++ config/i386/i386-protos.h	(local)
@@ -150,6 +150,7 @@
 extern int ix86_expand_fp_movcc (rtx[]);
 extern bool ix86_expand_fp_vcond (rtx[]);
 extern bool ix86_expand_int_vcond (rtx[]);
+extern void ix86_expand_sse_unpack (rtx[], bool, bool);
 extern int ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
=== config/i386/i386.c
==================================================================
--- config/i386/i386.c	(revision 107681)
+++ config/i386/i386.c	(local)
@@ -10902,6 +10902,52 @@
   return true;
 }
 
+/* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
+   true if we should do zero extension, else sign extension.  HIGH_P is
+   true if we want the N/2 high elements, else the low elements.  */
+
+void
+ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx, rtx);
+  rtx se, dest;
+
+  switch (imode)
+    {
+    case V16QImode:
+      if (high_p)
+	unpack = gen_vec_interleave_highv16qi;
+      else
+	unpack = gen_vec_interleave_lowv16qi;
+      break;
+    case V8HImode:
+      if (high_p)
+	unpack = gen_vec_interleave_highv8hi;
+      else
+	unpack = gen_vec_interleave_lowv8hi;
+      break;
+    case V4SImode:
+      if (high_p)
+	unpack = gen_vec_interleave_highv4si;
+      else
+	unpack = gen_vec_interleave_lowv4si;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = gen_lowpart (imode, operands[0]);
+
+  if (unsigned_p)
+    se = force_reg (imode, CONST0_RTX (imode));
+  else
+    se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+			      operands[1], pc_rtx, pc_rtx);
+
+  emit_insn (unpack (dest, operands[1], se));
+}
+
 /* Expand conditional increment or decrement using adb/sbb instructions.
    The default case using setcc followed by the conditional move can be
    done by generic code.  */
=== config/i386/sse.md
==================================================================
--- config/i386/sse.md	(revision 107681)
+++ config/i386/sse.md	(local)
@@ -3662,6 +3662,114 @@
   DONE;
 })
 
+(define_expand "vec_unpacku_hi_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+  [(match_operand:V8HI 0 "register_operand" "")
+   (match_operand:V16QI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+  [(match_operand:V4SI 0 "register_operand" "")
+   (match_operand:V8HI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4si"
+  [(match_operand:V2DI 0 "register_operand" "")
+   (match_operand:V4SI 1 "register_operand" "")]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_unpack (operands, false, false);
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Miscellaneous
=== testsuite/gcc.dg/vect/vect-117.c
==================================================================
--- testsuite/gcc.dg/vect/vect-117.c	(revision 107681)
+++ testsuite/gcc.dg/vect/vect-117.c	(local)
@@ -16,13 +16,13 @@
   /* Unknown evolution.  */
   for (i = 0; i < N; i++)
     {
-       ia[i] = (int *) *p[i];
+       ia[i] = (size_t *) *p[i];
     }
 
   /* check results: */
   for (i = 0; i < N; i++)
     {
-      if (ia[i] != (int *) *p[i])
+      if (ia[i] != (size_t *) *p[i])
          abort();
     }
   return 0;
=== testsuite/gcc.dg/vect/vect-reduc-dot-u16.c
==================================================================
--- testsuite/gcc.dg/vect/vect-reduc-dot-u16.c	(revision 107681)
+++ testsuite/gcc.dg/vect/vect-reduc-dot-u16.c	(local)
@@ -12,6 +12,7 @@
 unsigned short Y[N] __attribute__ ((__aligned__(16)));
 
 /* short->short->int dot product. Not vectorized. */
+/* ??? Is vectorized on i386.  */
 unsigned int
 foo1(int len) {
   int i;
@@ -28,6 +29,10 @@
 /* short->int->int dot product. Should be vectorized on ppc,
    but for some reason currently the GIMPLE arguments are cast to int 
    instead of 'unsigned int'. */
+/* ??? This is exactly correct.  The multiplication promotes to int,
+   then is promoted to unsigned int for the addition.  Which results
+   in an int->unsigned int cast, which since no bits are modified in
+   the cast should be trivially vectorizable.  */
 unsigned int
 foo2(int len) {
   int i;
=== tree-vectorizer.c
==================================================================
--- tree-vectorizer.c	(revision 107681)
+++ tree-vectorizer.c	(local)
@@ -1775,6 +1775,7 @@
   tree expr = TREE_OPERAND (stmt, 1);
   tree type = TREE_TYPE (expr);
   tree wide_vectype = get_vectype_for_scalar_type (type);
+  enum tree_code c1, c2;
 
   /* The result of a vectorized widening operation usually requires two vectors 
      (because the widened results do not fit int one vector). The generated 
@@ -1816,21 +1817,38 @@
   switch (code)
     {
     case WIDEN_MULT_EXPR:
-      *code1 = VEC_WIDEN_MULT_HI_EXPR;
-      *code2 = VEC_WIDEN_MULT_LO_EXPR;
-      optab1 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, vectype);
-      optab2 = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, vectype);
+      if (BYTES_BIG_ENDIAN)
+	{
+	  c1 = VEC_WIDEN_MULT_HI_EXPR;
+	  c2 = VEC_WIDEN_MULT_LO_EXPR;
+	}
+      else
+	{
+	  c2 = VEC_WIDEN_MULT_HI_EXPR;
+	  c1 = VEC_WIDEN_MULT_LO_EXPR;
+	}
       break;
     case NOP_EXPR:
-      *code1 = VEC_UNPACK_HI_EXPR;
-      *code2 = VEC_UNPACK_LO_EXPR;
-      optab1 = optab_for_tree_code (VEC_UNPACK_HI_EXPR, vectype);
-      optab2 = optab_for_tree_code (VEC_UNPACK_LO_EXPR, vectype);
+      if (BYTES_BIG_ENDIAN)
+	{
+	  c1 = VEC_UNPACK_HI_EXPR;
+	  c2 = VEC_UNPACK_LO_EXPR;
+	}
+      else
+	{
+	  c2 = VEC_UNPACK_HI_EXPR;
+	  c1 = VEC_UNPACK_LO_EXPR;
+	}
       break;
     default:
       gcc_unreachable ();
     }
 
+  *code1 = c1;
+  *code2 = c2;
+  optab1 = optab_for_tree_code (c1, vectype);
+  optab2 = optab_for_tree_code (c2, vectype);
+
   if (!optab1 || !optab2)
     return false;
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]