[WIP PATCH]: Autovectorize V2SF mode
Uros Bizjak
ubizjak@gmail.com
Fri May 8 17:22:39 GMT 2020
Attached WIP patch enables auto-vectorization of basic V2SF operations
(plus, minus, mult, min/max). The compiler takes care that everything
is loaded from memory via movq insn, so top two registers always
remain zero.
We could probably vectorize some more operations (horizontal add,
horizontal sub, addsub, and conversions).
Uros.
-------------- next part --------------
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b40f443ba8a..d1c0e354162 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -21007,9 +21007,11 @@ ix86_vector_mode_supported_p (machine_mode mode)
return true;
if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
return true;
- if ((TARGET_MMX || TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
+ if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode))
return true;
- if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
+ if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE_3DNOW (mode))
return true;
return false;
}
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 472f90f9bc1..2d6fe9d1350 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -175,7 +175,13 @@
]
(const_string "TI"))
- (and (eq_attr "alternative" "13,14")
+ (and (eq_attr "alternative" "13")
+ (ior (and (match_test "<MODE>mode == V2SFmode")
+ (not (match_test "TARGET_MMX_WITH_SSE")))
+ (not (match_test "TARGET_SSE2"))))
+ (const_string "V2SF")
+
+ (and (eq_attr "alternative" "14")
(ior (match_test "<MODE>mode == V2SFmode")
(not (match_test "TARGET_SSE2"))))
(const_string "V2SF")
@@ -240,15 +246,30 @@
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+(define_expand "addv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (plus:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
+
(define_insn "*mmx_addv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
- (plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
- "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
- "pfadd\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+ (plus:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+ "@
+ pfadd\t{%2, %0|%0, %2}
+ addps\t{%2, %0|%0, %2}
+ vaddps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
(define_expand "mmx_subv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
@@ -262,17 +283,31 @@
(match_operand:V2SF 1 "nonimmediate_operand")))]
"TARGET_3DNOW")
+(define_expand "subv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (minus:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (MINUS, V2SFmode, operands);")
+
(define_insn "*mmx_subv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y,y")
- (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
- "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ [(set (match_operand:V2SF 0 "register_operand" "=y,y,x,Yv")
+ (minus:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "0,ym,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,0,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
pfsub\t{%2, %0|%0, %2}
- pfsubr\t{%1, %0|%0, %1}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ pfsubr\t{%1, %0|%0, %1}
+ subps\t{%2, %0|%0, %2}
+ vsubps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,native,*,*")
+ (set_attr "type" "mmxadd,mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,1,*,*")
+ (set_attr "mode" "V2SF,V2SF,V4SF,V4SF")])
(define_expand "mmx_mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand")
@@ -281,15 +316,30 @@
"TARGET_3DNOW"
"ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+(define_expand "mulv2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (mult:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+ "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
+
(define_insn "*mmx_mulv2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
- (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
- "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
- "pfmul\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxmul")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
+ (mult:V2SF
+ (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
+ "@
+ pfmul\t{%2, %0|%0, %2}
+ mulps\t{%2, %0|%0, %2}
+ vmulps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxmul,ssemul,ssemul")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
(define_expand "mmx_<code>v2sf3"
[(set (match_operand:V2SF 0 "register_operand")
@@ -309,21 +359,43 @@
ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
})
+(define_expand "<code>v2sf3"
+ [(set (match_operand:V2SF 0 "register_operand")
+ (smaxmin:V2SF
+ (match_operand:V2SF 1 "register_operand")
+ (match_operand:V2SF 2 "register_operand")))]
+ "TARGET_MMX_WITH_SSE"
+{
+ if (!flag_finite_math_only || flag_signed_zeros)
+ {
+ emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3
+ (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+ else
+ ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands);
+})
+
;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
;; are undefined in this condition, we're certain this is correct.
(define_insn "*mmx_<code>v2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(smaxmin:V2SF
- (match_operand:V2SF 1 "nonimmediate_operand" "%0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
- "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
- "pf<maxmin_float>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ (match_operand:V2SF 1 "register_mmxmem_operand" "%0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")))]
+ "(TARGET_3DNOW || TARGET_MMX_WITH_SSE)
+ && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
+ "pf<maxmin_float>\t{%2, %0|%0, %2}
+ <maxmin_float>ps\t{%2, %0|%0, %2}
+ v<maxmin_float>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
@@ -332,16 +404,20 @@
;; presence of -0.0 and NaN.
(define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
- [(set (match_operand:V2SF 0 "register_operand" "=y")
+ [(set (match_operand:V2SF 0 "register_operand" "=y,x,Yv")
(unspec:V2SF
- [(match_operand:V2SF 1 "register_operand" "0")
- (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
+ [(match_operand:V2SF 1 "register_operand" "0,0,Yv")
+ (match_operand:V2SF 2 "register_mmxmem_operand" "ym,x,Yv")]
IEEE_MAXMIN))]
- "TARGET_3DNOW"
- "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
- [(set_attr "type" "mmxadd")
- (set_attr "prefix_extra" "1")
- (set_attr "mode" "V2SF")])
+ "TARGET_3DNOW || TARGET_MMX_WITH_SSE"
+ "pf<ieee_maxmin>\t{%2, %0|%0, %2}
+ <ieee_maxmin>ps\t{%2, %0|%0, %2}
+ v<ieee_maxmin>ps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "isa" "*,sse2_noavx,avx")
+ (set_attr "mmx_isa" "native,*,*")
+ (set_attr "type" "mmxadd,sseadd,sseadd")
+ (set_attr "prefix_extra" "1,*,*")
+ (set_attr "mode" "V2SF,V4SF,V4SF")])
(define_insn "mmx_rcpv2sf2"
[(set (match_operand:V2SF 0 "register_operand" "=y")
More information about the Gcc-patches
mailing list