[PATCH] i386: Cleanup V2QI arithmetic instructions
Uros Bizjak
ubizjak@gmail.com
Thu Jan 13 18:13:58 GMT 2022
2022-01-13 Uroš Bizjak <ubizjak@gmail.com>
gcc/ChangeLog:
* config/i386/mmx.md (negv2qi): Disparage GPR alternative a bit.
Disable for TARGET_PARTIAL_REG_STALL unless optimizing for size.
(negv2qi splitters): Use lowpart_subreg instead of
gen_lowpart to create subreg.
(<plusminus:insn>v2qi3): Disparage GPR alternative a bit.
Disable for TARGET_PARTIAL_REG_STALL unless optimizing for size.
(<plusminus:insn>v2qi3 splitters): Use lowpart_subreg instead of
gen_lowpart to create subreg.
* config/i386/i386.md (*subqi_ext<mode>_2): Move.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Pushed to master.
Uros.
-------------- next part --------------
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9937643a273..bcaaa4993b1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6905,6 +6905,30 @@
[(set_attr "type" "alu")
(set_attr "mode" "SI")])
+(define_insn "*subqi_ext<mode>_2"
+ [(set (zero_extract:SWI248
+ (match_operand:SWI248 0 "register_operand" "+Q")
+ (const_int 8)
+ (const_int 8))
+ (subreg:SWI248
+ (minus:QI
+ (subreg:QI
+ (zero_extract:SWI248
+ (match_operand:SWI248 1 "register_operand" "0")
+ (const_int 8)
+ (const_int 8)) 0)
+ (subreg:QI
+ (zero_extract:SWI248
+ (match_operand:SWI248 2 "register_operand" "Q")
+ (const_int 8)
+ (const_int 8)) 0)) 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
+ rtx_equal_p (operands[0], operands[1])"
+ "sub{b}\t{%h2, %h0|%h0, %h2}"
+ [(set_attr "type" "alu")
+ (set_attr "mode" "QI")])
+
;; Subtract with jump on overflow.
(define_expand "subv<mode>4"
[(parallel [(set (reg:CCO FLAGS_REG)
@@ -6932,30 +6956,6 @@
operands[4] = gen_rtx_SIGN_EXTEND (<DPWI>mode, operands[2]);
})
-(define_insn "*subqi_ext<mode>_2"
- [(set (zero_extract:SWI248
- (match_operand:SWI248 0 "register_operand" "+Q")
- (const_int 8)
- (const_int 8))
- (subreg:SWI248
- (minus:QI
- (subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 1 "register_operand" "0")
- (const_int 8)
- (const_int 8)) 0)
- (subreg:QI
- (zero_extract:SWI248
- (match_operand:SWI248 2 "register_operand" "Q")
- (const_int 8)
- (const_int 8)) 0)) 0))
- (clobber (reg:CC FLAGS_REG))]
- "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
- rtx_equal_p (operands[0], operands[1])"
- "sub{b}\t{%h2, %h0|%h0, %h2}"
- [(set_attr "type" "alu")
- (set_attr "mode" "QI")])
-
(define_insn "*subv<mode>4"
[(set (reg:CCO FLAGS_REG)
(eq:CCO (minus:<DWI>
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 295a132bc46..3d99a5e851b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1633,12 +1633,20 @@
"TARGET_MMX_WITH_SSE"
"operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+(define_expand "neg<mode>2"
+ [(set (match_operand:VI_32 0 "register_operand")
+ (minus:VI_32
+ (match_dup 2)
+ (match_operand:VI_32 1 "register_operand")))]
+ "TARGET_SSE2"
+ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
(define_insn "negv2qi2"
[(set (match_operand:V2QI 0 "register_operand" "=?Q,&Yw")
(neg:V2QI
(match_operand:V2QI 1 "register_operand" "0,Yw")))
(clobber (reg:CC FLAGS_REG))]
- ""
+ "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
[(set_attr "isa" "*,sse2")
(set_attr "type" "multi")
@@ -1664,10 +1672,10 @@
(const_int 8)) 0)) 0))
(clobber (reg:CC FLAGS_REG))])]
{
- operands[3] = gen_lowpart (HImode, operands[1]);
- operands[2] = gen_lowpart (HImode, operands[0]);
- operands[1] = gen_lowpart (QImode, operands[1]);
- operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[3] = lowpart_subreg (HImode, operands[1], V2QImode);
+ operands[2] = lowpart_subreg (HImode, operands[0], V2QImode);
+ operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
+ operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
})
(define_split
@@ -1678,11 +1686,11 @@
"reload_completed"
[(set (match_dup 0) (match_dup 2))
(set (match_dup 0)
- (minus:V4QI (match_dup 0) (match_dup 1)))]
+ (minus:V16QI (match_dup 0) (match_dup 1)))]
{
- operands[2] = CONST0_RTX (V4QImode);
- operands[1] = gen_lowpart (V4QImode, operands[1]);
- operands[0] = gen_lowpart (V4QImode, operands[0]);
+ operands[2] = CONST0_RTX (V16QImode);
+ operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
+ operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
})
(define_expand "mmx_<insn><mode>3"
@@ -1718,14 +1726,6 @@
(set_attr "type" "mmxadd,sseadd,sseadd")
(set_attr "mode" "DI,TI,TI")])
-(define_expand "neg<mode>2"
- [(set (match_operand:VI_32 0 "register_operand")
- (minus:VI_32
- (match_dup 2)
- (match_operand:VI_32 1 "register_operand")))]
- "TARGET_SSE2"
- "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
-
(define_insn "<insn><mode>3"
[(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
(plusminus:VI_32
@@ -1745,7 +1745,7 @@
(match_operand:V2QI 1 "register_operand" "<comm>0,0,Yw")
(match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
(clobber (reg:CC FLAGS_REG))]
- ""
+ "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
"#"
[(set_attr "isa" "*,sse2_noavx,avx")
(set_attr "type" "multi,sseadd,sseadd")
@@ -1776,12 +1776,12 @@
(const_int 8)) 0)) 0))
(clobber (reg:CC FLAGS_REG))])]
{
- operands[5] = gen_lowpart (HImode, operands[2]);
- operands[4] = gen_lowpart (HImode, operands[1]);
- operands[3] = gen_lowpart (HImode, operands[0]);
- operands[2] = gen_lowpart (QImode, operands[2]);
- operands[1] = gen_lowpart (QImode, operands[1]);
- operands[0] = gen_lowpart (QImode, operands[0]);
+ operands[5] = lowpart_subreg (HImode, operands[2], V2QImode);
+ operands[4] = lowpart_subreg (HImode, operands[1], V2QImode);
+ operands[3] = lowpart_subreg (HImode, operands[0], V2QImode);
+ operands[2] = lowpart_subreg (QImode, operands[2], V2QImode);
+ operands[1] = lowpart_subreg (QImode, operands[1], V2QImode);
+ operands[0] = lowpart_subreg (QImode, operands[0], V2QImode);
})
(define_split
@@ -1792,11 +1792,11 @@
(clobber (reg:CC FLAGS_REG))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0)
- (plusminus:V4QI (match_dup 1) (match_dup 2)))]
+ (plusminus:V16QI (match_dup 1) (match_dup 2)))]
{
- operands[2] = gen_lowpart (V4QImode, operands[2]);
- operands[1] = gen_lowpart (V4QImode, operands[1]);
- operands[0] = gen_lowpart (V4QImode, operands[0]);
+ operands[2] = lowpart_subreg (V16QImode, operands[2], V2QImode);
+ operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
+ operands[0] = lowpart_subreg (V16QImode, operands[0], V2QImode);
})
(define_expand "mmx_<insn><mode>3"
More information about the Gcc-patches
mailing list