This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[autovect] ia64 updates
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 2 Dec 2005 17:10:08 -0800
- Subject: [autovect] ia64 updates
Updates ia64 with all the new named patterns as well.
r~
* config/ia64/ia64.c (TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN): New.
(TARGET_VECTORIZE_BUILTIN_EXTRACT_ODD): New.
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN,
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, ia64_builtin_mul_widen_even,
ia64_builtin_mul_widen_odd, builtin_ia64_pmpy_r, builtin_ia64_pmpy_l,
IA64_BUILTIN_PMPY_R, IA64_BUILTIN_PMPY_L): New
(ia64_init_builtins): Initialize builtin_ia64_pmpy_[rl].
(ia64_expand_builtin): Expand them.
(ia64_expand_unpack): New.
* config/ia64/vect.md (smulv4hi3_highpart, umulv4hi3_highpart): New.
(vec_pack_ssat_v4hi): Rename from pack2_sss.
(vec_pack_usat_v4hi): Rename from pack2_uss.
(vec_pack_ssat_v2si): Rename from pack4_sss.
(vec_pack_mod_v4hi, vec_pack_mod_v2si): New.
(vec_interleave_lowv8qi): Rename from unpack1_l.
(vec_interleave_highv8qi): Rename from unpack1_h.
(vec_interleave_lowv4hi): Rename from unpack2_l.
(vec_interleave_highv4hi): Rename from unpack2_h.
(vec_interleave_lowv2si): Rename from unpack4_l.
(vec_interleave_highv2si): Rename from unpack4_h.
(vec_unpacku_hi_v8qi, vec_unpacks_hi_v8qi): New.
(vec_unpacku_lo_v8qi, vec_unpacks_lo_v8qi): New.
(vec_unpacku_hi_v4hi, vec_unpacks_hi_v4hi): New.
(vec_unpacku_lo_v4hi, vec_unpacks_lo_v4hi): New.
* config/ia64/ia64-protos.h (ia64_expand_unpack): Declare.
* gcc.dg/vect/vect-noreduc.c: Broaden expected not-vectorized text.
Index: testsuite/gcc.dg/vect/vect-noreduc.c
===================================================================
--- testsuite/gcc.dg/vect/vect-noreduc.c (revision 107980)
+++ testsuite/gcc.dg/vect/vect-noreduc.c (working copy)
@@ -25,6 +25,6 @@ init_regset_vector (vector, space, nelts
}
}
-/* { dg-final { scan-tree-dump-times "not vectorized: unsupported use in stmt" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "not vectorized:" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c (revision 107980)
+++ config/ia64/ia64.c (working copy)
@@ -267,6 +267,8 @@ static const char *ia64_mangle_fundament
static const char *ia64_invalid_conversion (tree, tree);
static const char *ia64_invalid_unary_op (int, tree);
static const char *ia64_invalid_binary_op (int, tree, tree);
+static tree ia64_builtin_mul_widen_even (tree type);
+static tree ia64_builtin_mul_widen_odd (tree type);
/* Table of valid machine attributes. */
static const struct attribute_spec ia64_attribute_table[] =
@@ -444,6 +446,17 @@ static const struct attribute_spec ia64_
#undef TARGET_INVALID_BINARY_OP
#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
+#undef TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN
+#define TARGET_VECTORIZE_BUILTIN_EXTRACT_EVEN \
+ interleave_vectorize_builtin_extract_even
+#undef TARGET_VECTORIZE_BUILTIN_EXTRACT_ODD
+#define TARGET_VECTORIZE_BUILTIN_EXTRACT_ODD \
+ interleave_vectorize_builtin_extract_odd
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN ia64_builtin_mul_widen_even
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD ia64_builtin_mul_widen_odd
+
struct gcc_target targetm = TARGET_INITIALIZER;
typedef enum
@@ -1766,6 +1779,49 @@ ia64_expand_vecint_minmax (enum rtx_code
return true;
}
+/* Sign or zero extend a vector, producing either high or low results. */
+
+void
+ia64_expand_unpack (rtx operands[2], bool unsignedp, bool highp)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx, rtx);
+ rtx se, dest;
+
+ switch (imode)
+ {
+ case V8QImode:
+ if (highp)
+ unpack = gen_vec_interleave_highv8qi;
+ else
+ unpack = gen_vec_interleave_lowv8qi;
+ break;
+ case V4HImode:
+ if (highp)
+ unpack = gen_vec_interleave_highv4hi;
+ else
+ unpack = gen_vec_interleave_lowv4hi;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = gen_lowpart (imode, operands[0]);
+ if (unsignedp)
+ se = force_reg (imode, CONST0_RTX (imode));
+ else
+ {
+ bool neg;
+
+ se = gen_reg_rtx (imode);
+ neg = ia64_expand_vecint_compare (LT, imode, se, operands[1],
+ CONST0_RTX (imode));
+ gcc_assert (!neg);
+ }
+
+ emit_insn (unpack (dest, operands[1], se));
+}
+
/* Emit an integral vector widening sum operations. */
void
@@ -1783,13 +1839,13 @@ ia64_expand_widen_sum (rtx operands[3],
switch (mode)
{
case V8QImode:
- unpack_l = gen_unpack1_l;
- unpack_h = gen_unpack1_h;
+ unpack_l = gen_vec_interleave_lowv8qi;
+ unpack_h = gen_vec_interleave_highv8qi;
plus = gen_addv4hi3;
break;
case V4HImode:
- unpack_l = gen_unpack2_l;
- unpack_h = gen_unpack2_h;
+ unpack_l = gen_vec_interleave_lowv4hi;
+ unpack_h = gen_vec_interleave_highv4hi;
plus = gen_addv2si3;
break;
default:
@@ -1850,10 +1906,14 @@ ia64_expand_dot_prod_v8qi (rtx operands[
h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode);
- emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), operands[1], x1));
- emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), operands[2], x2));
- emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), operands[1], x1));
- emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), operands[2], x2));
+ emit_insn (gen_vec_interleave_lowv8qi (gen_lowpart (V8QImode, l1),
+ operands[1], x1));
+ emit_insn (gen_vec_interleave_lowv8qi (gen_lowpart (V8QImode, l2),
+ operands[2], x2));
+ emit_insn (gen_vec_interleave_highv8qi (gen_lowpart (V8QImode, h1),
+ operands[1], x1));
+ emit_insn (gen_vec_interleave_highv8qi (gen_lowpart (V8QImode, h2),
+ operands[2], x2));
p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode);
@@ -1873,6 +1933,29 @@ ia64_expand_dot_prod_v8qi (rtx operands[
emit_insn (gen_addv2si3 (operands[0], s2, s3));
}
+/* Implement the TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN and
+ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD hooks. There is no tree code
+ for these operations, so we must create and use builtin functions. */
+
+static GTY(()) tree builtin_ia64_pmpy_r;
+static GTY(()) tree builtin_ia64_pmpy_l;
+
+static tree
+ia64_builtin_mul_widen_even (tree type)
+{
+ if (TYPE_MODE (type) == V4HImode)
+ return builtin_ia64_pmpy_r;
+ return NULL;
+}
+
+static tree
+ia64_builtin_mul_widen_odd (tree type)
+{
+ if (TYPE_MODE (type) == V4HImode)
+ return builtin_ia64_pmpy_l;
+ return NULL;
+}
+
/* Emit the appropriate sequence for a call. */
void
@@ -8273,7 +8356,9 @@ process_for_unwind_directive (FILE *asm_
enum ia64_builtins
{
IA64_BUILTIN_BSP,
- IA64_BUILTIN_FLUSHRS
+ IA64_BUILTIN_FLUSHRS,
+ IA64_BUILTIN_PMPY_R,
+ IA64_BUILTIN_PMPY_L
};
void
@@ -8281,6 +8366,7 @@ ia64_init_builtins (void)
{
tree fpreg_type;
tree float80_type;
+ tree v4hi_type_node, pmpy_type_node;
/* The __fpreg type. */
fpreg_type = make_node (REAL_TYPE);
@@ -8319,6 +8405,15 @@ ia64_init_builtins (void)
build_function_type (void_type_node, void_list_node),
IA64_BUILTIN_FLUSHRS);
+ v4hi_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+ pmpy_type_node = build_function_type_list (v4hi_type_node, v4hi_type_node,
+ v4hi_type_node, NULL_TREE);
+
+ builtin_ia64_pmpy_r =
+ def_builtin ("__builtin_ia64_pmpy_r", pmpy_type_node, IA64_BUILTIN_PMPY_R);
+ builtin_ia64_pmpy_l =
+ def_builtin ("__builtin_ia64_pmpy_l", pmpy_type_node, IA64_BUILTIN_PMPY_L);
+
#undef def_builtin
}
@@ -8345,6 +8440,30 @@ ia64_expand_builtin (tree exp, rtx targe
emit_insn (gen_flushrs ());
return const0_rtx;
+ case IA64_BUILTIN_PMPY_R:
+ case IA64_BUILTIN_PMPY_L:
+ {
+ tree arg1, arg2;
+ rtx op1, op2, insn;
+
+ arg1 = TREE_VALUE (TREE_OPERAND (exp, 1));
+ arg2 = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (exp, 1)));
+ op1 = expand_expr (arg1, NULL_RTX, V4HImode, EXPAND_NORMAL);
+ op2 = expand_expr (arg2, NULL_RTX, V4HImode, EXPAND_NORMAL);
+ op1 = force_reg (V4HImode, op1);
+ op2 = force_reg (V4HImode, op2);
+ if (target == 0 || !gr_register_operand (target, V4HImode))
+ target= gen_reg_rtx (V4HImode);
+
+ if (fcode == IA64_BUILTIN_PMPY_R)
+ insn = gen_pmpy2_r (target, op1, op2);
+ else
+ insn = gen_pmpy2_l (target, op1, op2);
+ emit_insn (insn);
+
+ return target;
+ }
+
default:
break;
}
Index: config/ia64/vect.md
===================================================================
--- config/ia64/vect.md (revision 107980)
+++ config/ia64/vect.md (working copy)
@@ -212,6 +212,34 @@
"pmpyshr2 %0 = %1, %2, 0"
[(set_attr "itanium_class" "mmmul")])
+(define_insn "smulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (match_operand:V4HI 1 "gr_register_operand" ""))
+ (sign_extend:V4SI
+ (match_operand:V4HI 2 "gr_register_operand" "")))
+ (const_int 16))))]
+ ""
+ "pmpyshr2 %0 = %1, %2, 16"
+ [(set_attr "itanium_class" "mmmul")])
+
+(define_insn "umulv4hi3_highpart"
+ [(set (match_operand:V4HI 0 "gr_register_operand" "")
+ (truncate:V4HI
+ (lshiftrt:V4SI
+ (mult:V4SI
+ (zero_extend:V4SI
+ (match_operand:V4HI 1 "gr_register_operand" ""))
+ (zero_extend:V4SI
+ (match_operand:V4HI 2 "gr_register_operand" "")))
+ (const_int 16))))]
+ ""
+ "pmpyshr2.u %0 = %1, %2, 16"
+ [(set_attr "itanium_class" "mmmul")])
+
(define_insn "pmpy2_r"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI
@@ -487,7 +515,7 @@
"pcmp<vecsize>.gt %0 = %r1, %r2"
[(set_attr "itanium_class" "mmalua")])
-(define_insn "pack2_sss"
+(define_insn "vec_pack_ssat_v4hi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_concat:V8QI
(ss_truncate:V4QI
@@ -498,7 +526,7 @@
"pack2.sss %0 = %r1, %r2"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "*pack2_uss"
+(define_insn "vec_pack_usat_v4hi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_concat:V8QI
(us_truncate:V4QI
@@ -509,7 +537,7 @@
"pack2.uss %0 = %r1, %r2"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "pack4_sss"
+(define_insn "vec_pack_ssat_v2si"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_concat:V4HI
(ss_truncate:V2HI
@@ -520,7 +548,49 @@
"pack4.sss %0 = %r1, %r2"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "unpack1_l"
+(define_expand "vec_pack_mod_v4hi"
+ [(match_operand:V8QI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")
+ (match_operand:V4HI 2 "register_operand" "")]
+ ""
+{
+ rtx op1, op2, h1, l1, h2, l2;
+
+ op1 = gen_lowpart (V8QImode, operands[1]);
+ op2 = gen_lowpart (V8QImode, operands[2]);
+ h1 = gen_reg_rtx (V8QImode);
+ l1 = gen_reg_rtx (V8QImode);
+ h2 = gen_reg_rtx (V8QImode);
+ l2 = gen_reg_rtx (V8QImode);
+
+ emit_insn (gen_vec_interleave_highv8qi (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8qi (l1, op1, op2));
+ emit_insn (gen_vec_interleave_highv8qi (h2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv8qi (l2, l1, h1));
+ emit_insn (gen_vec_interleave_lowv8qi (operands[0], l2, h2));
+ DONE;
+})
+
+(define_expand "vec_pack_mod_v2si"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V2SI 1 "register_operand" "")
+ (match_operand:V2SI 2 "register_operand" "")]
+ ""
+{
+ rtx op1, op2, h1, l1;
+
+ op1 = gen_lowpart (V4HImode, operands[1]);
+ op2 = gen_lowpart (V4HImode, operands[2]);
+ h1 = gen_reg_rtx (V4HImode);
+ l1 = gen_reg_rtx (V4HImode);
+
+ emit_insn (gen_vec_interleave_highv4hi (h1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv4hi (l1, op1, op2));
+ emit_insn (gen_vec_interleave_lowv4hi (operands[0], l1, h1));
+ DONE;
+})
+
+(define_insn "vec_interleave_lowv8qi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@@ -538,7 +608,7 @@
"unpack1.l %0 = %r2, %r1"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "unpack1_h"
+(define_insn "vec_interleave_highv8qi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@@ -680,7 +750,7 @@
"mux1 %0 = %1, @brcst"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "unpack2_l"
+(define_insn "vec_interleave_lowv4hi"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
@@ -694,7 +764,7 @@
"unpack2.l %0 = %r2, %r1"
[(set_attr "itanium_class" "mmshf")])
-(define_insn "unpack2_h"
+(define_insn "vec_interleave_highv4hi"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
@@ -765,7 +835,7 @@
[(set_attr "itanium_class" "mmshf")])
;; Note that mix4.r performs the exact same operation.
-(define_insn "*unpack4_l"
+(define_insn "vec_interleave_lowv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
@@ -778,7 +848,7 @@
[(set_attr "itanium_class" "mmshf")])
;; Note that mix4.l performs the exact same operation.
-(define_insn "*unpack4_h"
+(define_insn "vec_interleave_highv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
@@ -831,6 +901,78 @@
"unpack4.l %0 = %r2, %r1"
[(set_attr "itanium_class" "mmshf")])
+(define_expand "vec_unpacku_hi_v8qi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8qi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8qi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8qi"
+ [(match_operand:V4HI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4hi"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4hi"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4hi"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4hi"
+ [(match_operand:V2SI 0 "register_operand" "")
+ (match_operand:V4HI 1 "register_operand" "")]
+ ""
+{
+ ia64_expand_unpack (operands, false, false);
+ DONE;
+})
+
;; Missing operations
;; padd.uus
;; pavg
Index: config/ia64/ia64-protos.h
===================================================================
--- config/ia64/ia64-protos.h (revision 107980)
+++ config/ia64/ia64-protos.h (working copy)
@@ -50,6 +50,7 @@ extern bool ia64_expand_movxf_movrf (enu
extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode);
extern void ia64_expand_vecint_cmov (rtx[]);
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
+extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
extern void ia64_expand_call (rtx, rtx, rtx, int);