This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Add vec_pack_ufix_trunc_{v4df,v2df} expanders
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>, Uros Bizjak <ubizjak at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Tue, 1 Nov 2011 10:07:17 +0100
- Subject: [PATCH] Add vec_pack_ufix_trunc_{v4df,v2df} expanders
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
Similarly to the V{4,8}SFmode -> unsigned V{4,8}SImode conversion
support for AVX this one adds V{2,4}DFmode -> unsigned V{4,8}SImode
conversion.
Ok for trunk?
2011-11-01 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md (ssepackfltmode): New mode attr.
(vec_pack_ufix_trunc_<mode>): New expander using VF2 iterator.
--- gcc/config/i386/sse.md.jj 2011-11-01 09:04:37.000000000 +0100
+++ gcc/config/i386/sse.md 2011-11-01 09:37:36.000000000 +0100
@@ -3127,6 +3127,56 @@ (define_expand "vec_pack_sfix_trunc_v2df
DONE;
})
+(define_mode_attr ssepackfltmode
+ [(V4DF "V8SI") (V2DF "V4SI")])
+
+(define_expand "vec_pack_ufix_trunc_<mode>"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "register_operand" "")
+ (match_operand:VF2 2 "register_operand" "")]
+ "TARGET_AVX"
+{
+ REAL_VALUE_TYPE MTWO32r, TWO31r;
+ rtx two31r, mtwo32r, tmp[8];
+ int i;
+
+ for (i = 0; i < 6; i++)
+ tmp[i] = gen_reg_rtx (<MODE>mode);
+ tmp[6] = gen_reg_rtx (<ssepackfltmode>mode);
+ tmp[7] = gen_reg_rtx (<ssepackfltmode>mode);
+ real_ldexp (&TWO31r, &dconst1, 31);
+ two31r = const_double_from_real_value (TWO31r, DFmode);
+ two31r = ix86_build_const_vector (<MODE>mode, 1, two31r);
+ two31r = force_reg (<MODE>mode, two31r);
+ real_ldexp (&MTWO32r, &dconstm1, 32);
+ mtwo32r = const_double_from_real_value (MTWO32r, DFmode);
+ mtwo32r = ix86_build_const_vector (<MODE>mode, 1, mtwo32r);
+ mtwo32r = force_reg (<MODE>mode, mtwo32r);
+ emit_insn (gen_avx_cmp<mode>3 (tmp[0], operands[1], two31r, GEN_INT (29)));
+ emit_insn (gen_avx_cmp<mode>3 (tmp[1], operands[2], two31r, GEN_INT (29)));
+ emit_insn (gen_and<mode>3 (tmp[2], tmp[0], mtwo32r));
+ emit_insn (gen_and<mode>3 (tmp[3], tmp[1], mtwo32r));
+ emit_insn (gen_add<mode>3 (tmp[4], operands[1], tmp[2]));
+ emit_insn (gen_add<mode>3 (tmp[5], operands[2], tmp[3]));
+ if (<MODE>mode == V4DFmode)
+ {
+ emit_insn (gen_avx_cvttpd2dq256_2 (tmp[6], tmp[4]));
+ emit_insn (gen_avx_cvttpd2dq256_2 (tmp[7], tmp[5]));
+ emit_insn (gen_avx_vperm2f128v8si3 (operands[0], tmp[6], tmp[7],
+ GEN_INT (0x20)));
+ }
+ else
+ {
+ emit_insn (gen_sse2_cvttpd2dq (tmp[6], tmp[4]));
+ emit_insn (gen_sse2_cvttpd2dq (tmp[7], tmp[5]));
+ emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode,
+ operands[0]),
+ gen_lowpart (V2DImode, tmp[6]),
+ gen_lowpart (V2DImode, tmp[7])));
+ }
+ DONE;
+})
+
(define_expand "vec_pack_sfix_v4df"
[(match_operand:V8SI 0 "register_operand" "")
(match_operand:V4DF 1 "nonimmediate_operand" "")
Jakub