[PATCH, i386]: Add zero-extended variants of PLUS and MULT simple LEA peephole2s.
Uros Bizjak
ubizjak@gmail.com
Fri Aug 10 17:24:00 GMT 2012
Hello!
Attached patch adds zero-extended variants of PLUS and MULT simple LEA
peephole2s. Patch also disables PLUS peephole2s on TARGET_OPT_AGU
targets (atom), since we already split correct LEAs according to LEA
vs ADD priority.
2012-08-10 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (simple LEA peephole2s): Add zero-extend
variants of PLUS and MULT simple LEA patterns. Disable PLUS
patterns for TARGET_OPT_AGU.
Tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.
Uros.
-------------- next part --------------
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 190298)
+++ config/i386/i386.md (working copy)
@@ -17317,12 +17317,15 @@
;; Attempt to convert simple lea to add/shift.
;; These can be created by move expanders.
+;; Disable PLUS peepholes on TARGET_OPT_AGU, since all
+;; relevant lea instructions were already split.
(define_peephole2
[(set (match_operand:SWI48 0 "register_operand")
(plus:SWI48 (match_dup 0)
(match_operand:SWI48 1 "<nonmemory_operand>")))]
- "peep2_regno_dead_p (0, FLAGS_REG)"
+ "!TARGET_OPT_AGU
+ && peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
(clobber (reg:CC FLAGS_REG))])])
@@ -17330,7 +17333,8 @@
[(set (match_operand:SWI48 0 "register_operand")
(plus:SWI48 (match_operand:SWI48 1 "<nonmemory_operand>")
(match_dup 0)))]
- "peep2_regno_dead_p (0, FLAGS_REG)"
+ "!TARGET_OPT_AGU
+ && peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (match_dup 1)))
(clobber (reg:CC FLAGS_REG))])])
@@ -17338,9 +17342,9 @@
[(set (match_operand:SI 0 "register_operand")
(subreg:SI (plus:DI (match_operand:DI 1 "register_operand")
(match_operand:DI 2 "nonmemory_operand")) 0))]
- "TARGET_64BIT
- && peep2_regno_dead_p (0, FLAGS_REG)
- && REGNO (operands[0]) == REGNO (operands[1])"
+ "TARGET_64BIT && !TARGET_OPT_AGU
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (SImode, operands[2]);")
@@ -17349,27 +17353,81 @@
[(set (match_operand:SI 0 "register_operand")
(subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand")
(match_operand:DI 2 "register_operand")) 0))]
- "TARGET_64BIT
- && peep2_regno_dead_p (0, FLAGS_REG)
- && REGNO (operands[0]) == REGNO (operands[2])"
+ "TARGET_64BIT && !TARGET_OPT_AGU
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (SImode, operands[1]);")
(define_peephole2
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand"))))]
+ "TARGET_64BIT && !TARGET_OPT_AGU
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 1 "nonmemory_operand")
+ (match_operand:SI 2 "register_operand"))))]
+ "TARGET_64BIT && !TARGET_OPT_AGU
+ && REGNO (operands[0]) == REGNO (operands[2])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))])])
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (subreg:SI (plus:DI (match_dup 0)
+ (match_operand:DI 1 "nonmemory_operand")) 0)))]
+ "TARGET_64BIT && !TARGET_OPT_AGU
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand")
+ (match_dup 0)) 0)))]
+ "TARGET_64BIT && !TARGET_OPT_AGU
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[1] = gen_lowpart (SImode, operands[1]);
+ operands[2] = gen_lowpart (SImode, operands[0]);
+})
+
+(define_peephole2
[(set (match_operand:SWI48 0 "register_operand")
(mult:SWI48 (match_dup 0)
(match_operand:SWI48 1 "const_int_operand")))]
"exact_log2 (INTVAL (operands[1])) >= 0
&& peep2_regno_dead_p (0, FLAGS_REG)"
- [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 2)))
+ [(parallel [(set (match_dup 0) (ashift:SWI48 (match_dup 0) (match_dup 1)))
(clobber (reg:CC FLAGS_REG))])]
- "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
+ "operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
(define_peephole2
[(set (match_operand:SI 0 "register_operand")
(subreg:SI (mult:DI (match_operand:DI 1 "register_operand")
- (match_operand:DI 2 "const_int_operand")) 0))]
+ (match_operand:DI 2 "const_int_operand")) 0))]
"TARGET_64BIT
&& exact_log2 (INTVAL (operands[2])) >= 0
&& REGNO (operands[0]) == REGNO (operands[1])
@@ -17378,6 +17436,36 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (mult:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "const_int_operand"))))]
+ "TARGET_64BIT
+ && exact_log2 (INTVAL (operands[2])) >= 0
+ && REGNO (operands[0]) == REGNO (operands[1])
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (zero_extend (ashift:SI (match_dup 1) (match_dup 2))))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (subreg:SI (mult:DI (match_dup 0)
+ (match_operand:DI 1 "const_int_operand")) 0)))]
+ "TARGET_64BIT
+ && exact_log2 (INTVAL (operands[2])) >= 0
+ && peep2_regno_dead_p (0, FLAGS_REG)"
+ [(parallel [(set (match_dup 0)
+ (zero_extend:DI (ashift:SI (match_dup 2) (match_dup 1))))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[1] = GEN_INT (exact_log2 (INTVAL (operands[1])));
+ operands[2] = gen_lowpart (SImode, operands[0]);
+})
+
;; The ESP adjustments can be done by the push and pop instructions. Resulting
;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
;; On many CPUs it is also faster, since special hardware to avoid esp
More information about the Gcc-patches
mailing list