This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[autovect] i386 widening multiply
- From: Richard Henderson <rth at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 29 Nov 2005 15:51:45 -0800
- Subject: [autovect] i386 widening multiply
* config/i386/sse.md (vec_widen_smult_hi_v8hi,
vec_widen_smult_lo_v8hi, vec_widen_umult_hi_v8hi,
vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si,
vec_widen_umult_hi_v4si, vec_widen_umult_lo_v4si): New.
=== gcc/config/i386/sse.md
==================================================================
--- gcc/config/i386/sse.md (revision 107690)
+++ gcc/config/i386/sse.md (local)
@@ -2692,6 +2692,162 @@
DONE;
})
+(define_expand "vec_widen_smult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_sse2_smulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_sse2_smulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_sse2_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2, dest;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V8HImode);
+ t2 = gen_reg_rtx (V8HImode);
+ dest = gen_lowpart (V8HImode, operands[0]);
+
+ emit_insn (gen_mulv8hi3 (t1, op1, op2));
+ emit_insn (gen_sse2_umulv8hi3_highpart (t2, op1, op2));
+ emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_hi_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
+(define_expand "vec_widen_umult_lo_v4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx op1, op2, t1, t2;
+
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
+ emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
+ emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24