[PATCH][i386] Add some obvious missing vectorizer patterns for AVX
Richard Guenther
rguenther@suse.de
Mon May 10 13:02:00 GMT 2010
This adds patterns that do not require much thought. I duplicated
the existing (but odd to me) superfluous vec_concats for example
in vec_unpacks_hi_v8sf (AVX would have vextract for a
highpart vec_select - but there must be a reason to do it the
odd way for SSE).
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Ok for trunk?
Thanks,
Richard.
2010-05-10 Richard Guenther <rguenther@suse.de>
* config/i386/sse.md (reduc_splus_v8sf): Add.
(reduc_splus_v4df): Likewise.
(vec_unpacks_hi_v8sf): Likewise.
(vec_unpacks_lo_v8sf): Likewise.
(*avx_cvtps2pd256_2): Likewise.
(vec_unpacks_float_hi_v8si): Likewise.
(vec_unpacks_float_lo_v8si): Likewise.
(vec_interleave_highv4df): Likewise.
(vec_interleave_lowv4df): Likewise.
Index: trunk/gcc/config/i386/sse.md
===================================================================
*** trunk.orig/gcc/config/i386/sse.md 2010-05-10 13:27:11.000000000 +0200
--- trunk/gcc/config/i386/sse.md 2010-05-10 14:40:05.000000000 +0200
***************
*** 1377,1382 ****
--- 1377,1395 ----
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
+ (define_expand "reduc_splus_v8sf"
+ [(match_operand:V8SF 0 "register_operand" "")
+ (match_operand:V8SF 1 "register_operand" "")]
+ "TARGET_AVX"
+ {
+ rtx tmp = gen_reg_rtx (V8SFmode);
+ rtx tmp2 = gen_reg_rtx (V8SFmode);
+ emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
+ emit_insn (gen_avx_haddv8sf3 (tmp2, operands[1], operands[1]));
+ emit_insn (gen_avx_haddv8sf3 (operands[0], tmp2, tmp2));
+ DONE;
+ })
+
(define_expand "reduc_splus_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
***************
*** 1393,1398 ****
--- 1406,1422 ----
DONE;
})
+ (define_expand "reduc_splus_v4df"
+ [(match_operand:V4DF 0 "register_operand" "")
+ (match_operand:V4DF 1 "register_operand" "")]
+ "TARGET_AVX"
+ {
+ rtx tmp = gen_reg_rtx (V4DFmode);
+ emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
+ emit_insn (gen_avx_haddv4df3 (operands[0], tmp, tmp));
+ DONE;
+ })
+
(define_expand "reduc_splus_v2df"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "register_operand" "")]
***************
*** 3035,3040 ****
--- 3070,3087 ----
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+ (define_insn "*avx_cvtps2pd256_2"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "x")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "vcvtps2pd\t{%x1, %0|%0, %x1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(float_extend:V2DF
***************
*** 3069,3074 ****
--- 3116,3145 ----
operands[2] = gen_reg_rtx (V4SFmode);
})
+ (define_expand "vec_unpacks_hi_v8sf"
+ [(set (match_dup 2)
+ (vec_select:V8SF
+ (vec_concat:V16SF
+ (match_dup 2)
+ (match_operand:V8SF 1 "nonimmediate_operand" ""))
+ (parallel [(const_int 12)
+ (const_int 13)
+ (const_int 14)
+ (const_int 15)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)])))
+ (set (match_operand:V4DF 0 "register_operand" "")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ {
+ operands[2] = gen_reg_rtx (V8SFmode);
+ })
+
(define_expand "vec_unpacks_lo_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "")
(float_extend:V2DF
***************
*** 3077,3082 ****
--- 3148,3161 ----
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE2")
+ (define_expand "vec_unpacks_lo_v8sf"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (float_extend:V4DF
+ (vec_select:V4SF
+ (match_operand:V8SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX")
+
(define_expand "vec_unpacks_float_hi_v8hi"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V8HI 1 "register_operand" "")]
***************
*** 3149,3154 ****
--- 3228,3261 ----
(parallel [(const_int 0) (const_int 1)]))))]
"TARGET_SSE2")
+ (define_expand "vec_unpacks_float_hi_v8si"
+ [(set (match_dup 2)
+ (vec_select:V8SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)])))
+ (set (match_operand:V4DF 0 "register_operand" "")
+ (float:V4DF
+ (vec_select:V4SI
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX"
+ "operands[2] = gen_reg_rtx (V8SImode);")
+
+ (define_expand "vec_unpacks_float_lo_v8si"
+ [(set (match_operand:V4DF 0 "register_operand" "")
+ (float:V4DF
+ (vec_select:V4SI
+ (match_operand:V8SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
+ "TARGET_AVX")
+
(define_expand "vec_unpacku_float_hi_v4si"
[(set (match_dup 5)
(vec_select:V4SI
***************
*** 4328,4333 ****
--- 4482,4501 ----
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+ (define_insn "vec_interleave_highv4df"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 2) (const_int 6)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_AVX"
+ "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
+
(define_expand "vec_interleave_highv2df"
[(set (match_operand:V2DF 0 "register_operand" "")
(vec_select:V2DF
***************
*** 4434,4439 ****
--- 4602,4621 ----
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
+
+ (define_insn "vec_interleave_lowv4df"
+ [(set (match_operand:V4DF 0 "register_operand" "=x")
+ (vec_select:V4DF
+ (vec_concat:V8DF
+ (match_operand:V4DF 1 "register_operand" "x")
+ (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 1) (const_int 5)])))]
+ "TARGET_AVX"
+ "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "V4DF")])
(define_expand "vec_interleave_lowv2df"
[(set (match_operand:V2DF 0 "register_operand" "")
More information about the Gcc-patches
mailing list