This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[autovect] ia64 updates


Updates ia64 with all the new named patterns as well.


r~


        * config/ia64/ia64.c (TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN): New.
        (TARGET_VECTORIZE_BUILTIN_EXTRACT_ODD): New.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,
        TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, ia64_builtin_mul_widen_even,
        ia64_builtin_mul_widen_odd, builtin_ia64_pmpy_r, builtin_ia64_pmpy_l,
        IA64_BUILTIN_PMPY_R, IA64_BUILTIN_PMPY_L): New
        (ia64_init_builtins): Initialize builtin_ia64_pmpy_[rl].
        (ia64_expand_builtin): Expand them.
        (ia64_expand_unpack): New.
        * config/ia64/vect.md (smulv4hi3_highpart, umulv4hi3_highpart): New.
        (vec_pack_ssat_v4hi): Rename from pack2_sss.
        (vec_pack_usat_v4hi): Rename from pack2_uss.
        (vec_pack_ssat_v2si): Rename from pack4_sss.
        (vec_pack_mod_v4hi, vec_pack_mod_v2si): New.
        (vec_interleave_lowv8qi): Rename from unpack1_l.
        (vec_interleave_highv8qi): Rename from unpack1_h.
        (vec_interleave_lowv4hi): Rename from unpack2_l.
        (vec_interleave_highv4hi): Rename from unpack2_h.
        (vec_interleave_lowv2si): Rename from unpack4_l.
        (vec_interleave_highv2si): Rename from unpack4_h.
        (vec_unpacku_hi_v8qi, vec_unpacks_hi_v8qi): New.
        (vec_unpacku_lo_v8qi, vec_unpacks_lo_v8qi): New.
        (vec_unpacku_hi_v4hi, vec_unpacks_hi_v4hi): New.
        (vec_unpacku_lo_v4hi, vec_unpacks_lo_v4hi): New.
        * config/ia64/ia64-protos.h (ia64_expand_unpack): Declare.

        * gcc.dg/vect/vect-noreduc.c: Broaden expected not-vectorized text.

Index: testsuite/gcc.dg/vect/vect-noreduc.c
===================================================================
--- testsuite/gcc.dg/vect/vect-noreduc.c	(revision 107980)
+++ testsuite/gcc.dg/vect/vect-noreduc.c	(working copy)
@@ -25,6 +25,6 @@ init_regset_vector (vector, space, nelts
     }
 }
 
-/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "not vectorized:" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c	(revision 107980)
+++ config/ia64/ia64.c	(working copy)
@@ -267,6 +267,8 @@ static const char *ia64_mangle_fundament
 static const char *ia64_invalid_conversion (tree, tree);
 static const char *ia64_invalid_unary_op (int, tree);
 static const char *ia64_invalid_binary_op (int, tree, tree);
+static tree ia64_builtin_mul_widen_even (tree type);
+static tree ia64_builtin_mul_widen_odd (tree type);
 
 /* Table of valid machine attributes.  */
 static const struct attribute_spec ia64_attribute_table[] =
@@ -444,6 +446,17 @@ static const struct attribute_spec ia64_
 #undef TARGET_INVALID_BINARY_OP
 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
 
+#undef TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN
+#define TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN \
+  interleave_vectorize_builtin_extract_even
+#undef TARGET_VECTORIZE_BUILTIN_EXTRACT_ODD
+#define TARGET_VECTORIZE_BUILTIN_EXTRACT_ODD \
+  interleave_vectorize_builtin_extract_odd
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN ia64_builtin_mul_widen_even
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD ia64_builtin_mul_widen_odd
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 typedef enum
@@ -1766,6 +1779,49 @@ ia64_expand_vecint_minmax (enum rtx_code
   return true;
 }
 
+/* Sign or zero extend a vector, producing either high or low results.  */
+
+void
+ia64_expand_unpack (rtx operands[2], bool unsignedp, bool highp)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx, rtx);
+  rtx se, dest;
+
+  switch (imode)
+    {
+    case V8QImode:
+      if (highp)
+	unpack = gen_vec_interleave_highv8qi;
+      else
+	unpack = gen_vec_interleave_lowv8qi;
+      break;
+    case V4HImode:
+      if (highp)
+	unpack = gen_vec_interleave_highv4hi;
+      else
+	unpack = gen_vec_interleave_lowv4hi;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = gen_lowpart (imode, operands[0]);
+  if (unsignedp)
+    se = force_reg (imode, CONST0_RTX (imode));
+  else
+    {
+      bool neg;
+
+      se = gen_reg_rtx (imode);
+      neg = ia64_expand_vecint_compare (LT, imode, se, operands[1],
+					CONST0_RTX (imode));
+      gcc_assert (!neg);
+    }
+
+  emit_insn (unpack (dest, operands[1], se));
+}
+
 /* Emit an integral vector widening sum operations.  */
 
 void
@@ -1783,13 +1839,13 @@ ia64_expand_widen_sum (rtx operands[3], 
   switch (mode)
     {
     case V8QImode:
-      unpack_l = gen_unpack1_l;
-      unpack_h = gen_unpack1_h;
+      unpack_l = gen_vec_interleave_lowv8qi;
+      unpack_h = gen_vec_interleave_highv8qi;
       plus = gen_addv4hi3;
       break;
     case V4HImode:
-      unpack_l = gen_unpack2_l;
-      unpack_h = gen_unpack2_h;
+      unpack_l = gen_vec_interleave_lowv4hi;
+      unpack_h = gen_vec_interleave_highv4hi;
       plus = gen_addv2si3;
       break;
     default:
@@ -1850,10 +1906,14 @@ ia64_expand_dot_prod_v8qi (rtx operands[
   h1 = gen_reg_rtx (V4HImode);
   h2 = gen_reg_rtx (V4HImode);
 
-  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
-  emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
-  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
-  emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
+  emit_insn (gen_vec_interleave_lowv8qi (gen_lowpart (V8QImode, l1),
+					 operands[1], x1));
+  emit_insn (gen_vec_interleave_lowv8qi (gen_lowpart (V8QImode, l2),
+					 operands[2], x2));
+  emit_insn (gen_vec_interleave_highv8qi (gen_lowpart (V8QImode, h1),
+					  operands[1], x1));
+  emit_insn (gen_vec_interleave_highv8qi (gen_lowpart (V8QImode, h2),
+					  operands[2], x2));
 
   p1 = gen_reg_rtx (V2SImode);
   p2 = gen_reg_rtx (V2SImode);
@@ -1873,6 +1933,29 @@ ia64_expand_dot_prod_v8qi (rtx operands[
   emit_insn (gen_addv2si3 (operands[0], s2, s3));
 }
 
+/* Implement the TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN and
+   TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD hooks.  There is no tree code
+   for these operations, so we must create and use builtin functions.  */
+
+static GTY(()) tree builtin_ia64_pmpy_r;
+static GTY(()) tree builtin_ia64_pmpy_l;
+
+static tree
+ia64_builtin_mul_widen_even (tree type)
+{
+  if (TYPE_MODE (type) == V4HImode)
+    return builtin_ia64_pmpy_r;
+  return NULL;
+}
+
+static tree
+ia64_builtin_mul_widen_odd (tree type)
+{
+  if (TYPE_MODE (type) == V4HImode)
+    return builtin_ia64_pmpy_l;
+  return NULL;
+}
+
 /* Emit the appropriate sequence for a call.  */
 
 void
@@ -8273,7 +8356,9 @@ process_for_unwind_directive (FILE *asm_
 enum ia64_builtins
 {
   IA64_BUILTIN_BSP,
-  IA64_BUILTIN_FLUSHRS
+  IA64_BUILTIN_FLUSHRS,
+  IA64_BUILTIN_PMPY_R,
+  IA64_BUILTIN_PMPY_L
 };
 
 void
@@ -8281,6 +8366,7 @@ ia64_init_builtins (void)
 {
   tree fpreg_type;
   tree float80_type;
+  tree v4hi_type_node, pmpy_type_node;
 
   /* The __fpreg type.  */
   fpreg_type = make_node (REAL_TYPE);
@@ -8319,6 +8405,15 @@ ia64_init_builtins (void)
 	       build_function_type (void_type_node, void_list_node),
 	       IA64_BUILTIN_FLUSHRS);
 
+  v4hi_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  pmpy_type_node = build_function_type_list (v4hi_type_node, v4hi_type_node,
+					     v4hi_type_node, NULL_TREE);
+
+  builtin_ia64_pmpy_r = 
+    def_builtin ("__builtin_ia64_pmpy_r", pmpy_type_node, IA64_BUILTIN_PMPY_R);
+  builtin_ia64_pmpy_l = 
+    def_builtin ("__builtin_ia64_pmpy_l", pmpy_type_node, IA64_BUILTIN_PMPY_L);
+
 #undef def_builtin
 }
 
@@ -8345,6 +8440,30 @@ ia64_expand_builtin (tree exp, rtx targe
       emit_insn (gen_flushrs ());
       return const0_rtx;
 
+    case IA64_BUILTIN_PMPY_R:
+    case IA64_BUILTIN_PMPY_L:
+      {
+	tree arg1, arg2;
+	rtx op1, op2, insn;
+
+	arg1 = TREE_VALUE (TREE_OPERAND (exp, 1));
+	arg2 = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (exp, 1)));
+	op1 = expand_expr (arg1, NULL_RTX, V4HImode, EXPAND_NORMAL);
+	op2 = expand_expr (arg2, NULL_RTX, V4HImode, EXPAND_NORMAL);
+	op1 = force_reg (V4HImode, op1);
+	op2 = force_reg (V4HImode, op2);
+	if (target == 0 || !gr_register_operand (target, V4HImode))
+	  target= gen_reg_rtx (V4HImode);
+  
+	if (fcode == IA64_BUILTIN_PMPY_R)
+	  insn = gen_pmpy2_r (target, op1, op2);
+	else
+	  insn = gen_pmpy2_l (target, op1, op2);
+	emit_insn (insn);
+
+	return target;
+      }
+
     default:
       break;
     }
Index: config/ia64/vect.md
===================================================================
--- config/ia64/vect.md	(revision 107980)
+++ config/ia64/vect.md	(working copy)
@@ -212,6 +212,34 @@
   "pmpyshr2 %0 = %1, %2, 0"
   [(set_attr "itanium_class" "mmmul")])
 
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (sign_extend:V4SI
+		(match_operand:V4HI 1 "gr_register_operand" ""))
+	      (sign_extend:V4SI
+		(match_operand:V4HI 2 "gr_register_operand" "")))
+	    (const_int 16))))]
+  ""
+  "pmpyshr2 %0 = %1, %2, 16"
+  [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI 0 "gr_register_operand" "")
+	(truncate:V4HI
+	  (lshiftrt:V4SI
+	    (mult:V4SI
+	      (zero_extend:V4SI
+		(match_operand:V4HI 1 "gr_register_operand" ""))
+	      (zero_extend:V4SI
+		(match_operand:V4HI 2 "gr_register_operand" "")))
+	    (const_int 16))))]
+  ""
+  "pmpyshr2.u %0 = %1, %2, 16"
+  [(set_attr "itanium_class" "mmmul")])
+
 (define_insn "pmpy2_r"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(mult:V2SI
@@ -487,7 +515,7 @@
   "pcmp<vecsize>.gt %0 = %r1, %r2"
   [(set_attr "itanium_class" "mmalua")])
 
-(define_insn "pack2_sss"
+(define_insn "vec_pack_ssat_v4hi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_concat:V8QI
 	  (ss_truncate:V4QI
@@ -498,7 +526,7 @@
   "pack2.sss %0 = %r1, %r2"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "*pack2_uss"
+(define_insn "vec_pack_usat_v4hi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_concat:V8QI
 	  (us_truncate:V4QI
@@ -509,7 +537,7 @@
   "pack2.uss %0 = %r1, %r2"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "pack4_sss"
+(define_insn "vec_pack_ssat_v2si"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_concat:V4HI
 	  (ss_truncate:V2HI
@@ -520,7 +548,49 @@
   "pack4.sss %0 = %r1, %r2"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "unpack1_l"
+(define_expand "vec_pack_mod_v4hi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")
+   (match_operand:V4HI 2 "register_operand" "")]
+  ""
+{
+  rtx op1, op2, h1, l1, h2, l2;
+
+  op1 = gen_lowpart (V8QImode, operands[1]);
+  op2 = gen_lowpart (V8QImode, operands[2]);
+  h1 = gen_reg_rtx (V8QImode);
+  l1 = gen_reg_rtx (V8QImode);
+  h2 = gen_reg_rtx (V8QImode);
+  l2 = gen_reg_rtx (V8QImode);
+
+  emit_insn (gen_vec_interleave_highv8qi (h1, op1, op2));
+  emit_insn (gen_vec_interleave_lowv8qi (l1, op1, op2));
+  emit_insn (gen_vec_interleave_highv8qi (h2, l1, h1));
+  emit_insn (gen_vec_interleave_lowv8qi (l2, l1, h1));
+  emit_insn (gen_vec_interleave_lowv8qi (operands[0], l2, h2));
+  DONE;
+})
+
+(define_expand "vec_pack_mod_v2si"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V2SI 1 "register_operand" "")
+   (match_operand:V2SI 2 "register_operand" "")]
+  ""
+{
+  rtx op1, op2, h1, l1;
+
+  op1 = gen_lowpart (V4HImode, operands[1]);
+  op2 = gen_lowpart (V4HImode, operands[2]);
+  h1 = gen_reg_rtx (V4HImode);
+  l1 = gen_reg_rtx (V4HImode);
+
+  emit_insn (gen_vec_interleave_highv4hi (h1, op1, op2));
+  emit_insn (gen_vec_interleave_lowv4hi (l1, op1, op2));
+  emit_insn (gen_vec_interleave_lowv4hi (operands[0], l1, h1));
+  DONE;
+})
+
+(define_insn "vec_interleave_lowv8qi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
@@ -538,7 +608,7 @@
   "unpack1.l %0 = %r2, %r1"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "unpack1_h"
+(define_insn "vec_interleave_highv8qi"
   [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
 	(vec_select:V8QI
 	  (vec_concat:V16QI
@@ -680,7 +750,7 @@
   "mux1 %0 = %1, @brcst"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "unpack2_l"
+(define_insn "vec_interleave_lowv4hi"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
@@ -694,7 +764,7 @@
   "unpack2.l %0 = %r2, %r1"
   [(set_attr "itanium_class" "mmshf")])
 
-(define_insn "unpack2_h"
+(define_insn "vec_interleave_highv4hi"
   [(set (match_operand:V4HI 0 "gr_register_operand" "=r")
 	(vec_select:V4HI
 	  (vec_concat:V8HI
@@ -765,7 +835,7 @@
   [(set_attr "itanium_class" "mmshf")])
 
 ;; Note that mix4.r performs the exact same operation.
-(define_insn "*unpack4_l"
+(define_insn "vec_interleave_lowv2si"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
@@ -778,7 +848,7 @@
   [(set_attr "itanium_class" "mmshf")])
 
 ;; Note that mix4.l performs the exact same operation.
-(define_insn "*unpack4_h"
+(define_insn "vec_interleave_highv2si"
   [(set (match_operand:V2SI 0 "gr_register_operand" "=r")
 	(vec_select:V2SI
 	  (vec_concat:V4SI
@@ -831,6 +901,78 @@
   "unpack4.l %0 = %r2, %r1"
   [(set_attr "itanium_class" "mmshf")])
 
+(define_expand "vec_unpacku_hi_v8qi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8qi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8qi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8qi"
+  [(match_operand:V4HI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, false);
+  DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, true);
+  DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, true);
+  DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, true, false);
+  DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4hi"
+  [(match_operand:V2SI 0 "register_operand" "")
+   (match_operand:V4HI 1 "register_operand" "")]
+  ""
+{
+  ia64_expand_unpack (operands, false, false);
+  DONE;
+})
+
 ;; Missing operations
 ;; padd.uus
 ;; pavg
Index: config/ia64/ia64-protos.h
===================================================================
--- config/ia64/ia64-protos.h	(revision 107980)
+++ config/ia64/ia64-protos.h	(working copy)
@@ -50,6 +50,7 @@ extern bool ia64_expand_movxf_movrf (enu
 extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode);
 extern void ia64_expand_vecint_cmov (rtx[]);
 extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_expand_unpack (rtx [], bool, bool);
 extern void ia64_expand_widen_sum (rtx[], bool);
 extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
 extern void ia64_expand_call (rtx, rtx, rtx, int);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]