This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[autovect] i386 widening casts
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 29 Nov 2005 13:05:11 -0800
- Subject: [autovect] i386 widening casts
* config/i386/i386.c (ix86_expand_sse_unpack): New.
* config/i386/i386-protos.h: Update.
* config/i386/sse.md (vec_unpacku_hi_v16qi, vec_unpacks_hi_v16qi,
vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi, vec_unpacku_hi_v8hi,
vec_unpacks_hi_v8hi, vec_unpacku_lo_v8hi, vec_unpacks_lo_v8hi,
vec_unpacku_hi_v4si, vec_unpacks_hi_v4si, vec_unpacku_lo_v4si,
vec_unpacks_lo_v4si): New.
* tree-vectorizer.c (supportable_widening_operation): Adjust for
little-endian byte ordering.
* gcc.dg/vect/vect-117.c (main1): Fix follow-on int/size_t
mismatches.
* gcc.dg/vect/vect-reduc-dot-u16.c: Update comments.
=== config/i386/i386-protos.h
==================================================================
--- config/i386/i386-protos.h (revision 107681)
+++ config/i386/i386-protos.h (local)
@@ -150,6 +150,7 @@
extern int ix86_expand_fp_movcc (rtx[]);
extern bool ix86_expand_fp_vcond (rtx[]);
extern bool ix86_expand_int_vcond (rtx[]);
+extern void ix86_expand_sse_unpack (rtx[], bool, bool);
extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx);
=== config/i386/i386.c
==================================================================
--- config/i386/i386.c (revision 107681)
+++ config/i386/i386.c (local)
@@ -10902,6 +10902,52 @@
return true;
}
+/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
+ true if we should do zero extension, else sign extension. HIGH_P is
+ true if we want the N/2 high elements, else the low elements. */
+
+void
+ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx, rtx);
+ rtx se, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = gen_lowpart (imode, operands[0]);
+
+ if (unsigned_p)
+ se = force_reg (imode, CONST0_RTX (imode));
+ else
+ se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
+
+ emit_insn (unpack (dest, operands[1], se));
+}
+
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
=== config/i386/sse.md
==================================================================
--- config/i386/sse.md (revision 107681)
+++ config/i386/sse.md (local)
@@ -3662,6 +3662,114 @@
DONE;
})
+(define_expand "vec_unpacku_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v16qi"
+ [(match_operand:V8HI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
+(define_expand "vec_unpacku_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, true);
+ DONE;
+})
+
+(define_expand "vec_unpacks_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, true);
+ DONE;
+})
+
+(define_expand "vec_unpacku_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, true, false);
+ DONE;
+})
+
+(define_expand "vec_unpacks_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ ix86_expand_sse_unpack (operands, false, false);
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Miscellaneous
=== testsuite/gcc.dg/vect/vect-117.c
==================================================================
--- testsuite/gcc.dg/vect/vect-117.c (revision 107681)
+++ testsuite/gcc.dg/vect/vect-117.c (local)
@@ -16,13 +16,13 @@
/* Unknown evolution. */
for (i = 0; i < N; i++)
{
- ia[i] = (int *) *p[i];
+ ia[i] = (size_t *) *p[i];
}
/* check results: */
for (i = 0; i < N; i++)
{
- if (ia[i] != (int *) *p[i])
+ if (ia[i] != (size_t *) *p[i])
abort();
}
return 0;
=== testsuite/gcc.dg/vect/vect-reduc-dot-u16.c
==================================================================
--- testsuite/gcc.dg/vect/vect-reduc-dot-u16.c (revision 107681)
+++ testsuite/gcc.dg/vect/vect-reduc-dot-u16.c (local)
@@ -12,6 +12,7 @@
unsigned short Y[N] __attribute__ ((__aligned__(16)));
/* short->short->int dot product. Not vectorized. */
+/* ??? Is vectorized on i386. */
unsigned int
foo1(int len) {
int i;
@@ -28,6 +29,10 @@
/* short->int->int dot product. Should be vectorized on ppc,
but for some reason currently the GIMPLE arguments are cast to int
instead of 'unsigned int'. */
+/* ??? This is exactly correct. The multiplication promotes to int,
+ then is promoted to unsigned int for the addition. Which results
+ in an int->unsigned int cast, which since no bits are modified in
+ the cast should be trivially vectorizable. */
unsigned int
foo2(int len) {
int i;
=== tree-vectorizer.c
==================================================================
--- tree-vectorizer.c (revision 107681)
+++ tree-vectorizer.c (local)
@@ -1775,6 +1775,7 @@
tree expr = TREE_OPERAND (stmt, 1);
tree type = TREE_TYPE (expr);
tree wide_vectype = get_vectype_for_scalar_type (type);
+ enum tree_code c1, c2;
/* The result of a vectorized widening operation usually requires two vectors
(because the widened results do not fit int one vector). The generated
@@ -1816,21 +1817,38 @@
switch (code)
{
case WIDEN_MULT_EXPR:
- *code1 = VEC_WIDEN_MULT_HI_EXPR;
- *code2 = VEC_WIDEN_MULT_LO_EXPR;
- optab1 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, vectype);
- optab2 = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, vectype);
+ if (BYTES_BIG_ENDIAN)
+ {
+ c1 = VEC_WIDEN_MULT_HI_EXPR;
+ c2 = VEC_WIDEN_MULT_LO_EXPR;
+ }
+ else
+ {
+ c2 = VEC_WIDEN_MULT_HI_EXPR;
+ c1 = VEC_WIDEN_MULT_LO_EXPR;
+ }
break;
case NOP_EXPR:
- *code1 = VEC_UNPACK_HI_EXPR;
- *code2 = VEC_UNPACK_LO_EXPR;
- optab1 = optab_for_tree_code (VEC_UNPACK_HI_EXPR, vectype);
- optab2 = optab_for_tree_code (VEC_UNPACK_LO_EXPR, vectype);
+ if (BYTES_BIG_ENDIAN)
+ {
+ c1 = VEC_UNPACK_HI_EXPR;
+ c2 = VEC_UNPACK_LO_EXPR;
+ }
+ else
+ {
+ c2 = VEC_UNPACK_HI_EXPR;
+ c1 = VEC_UNPACK_LO_EXPR;
+ }
break;
default:
gcc_unreachable ();
}
+ *code1 = c1;
+ *code2 = c2;
+ optab1 = optab_for_tree_code (c1, vectype);
+ optab2 = optab_for_tree_code (c2, vectype);
+
if (!optab1 || !optab2)
return false;