ARMv6 mul improvement
Paul Brook
paul@codesourcery.com
Thu Jan 4 23:14:00 GMT 2007
The ARMv6 architecture removes the restrictions on overlapping operands to
multiply instructions. The patch below relaxes the constraints on the
corresponding insn patterns so gcc can take advantage of this.
Tested with cross to arm-none-eabi.
Applied to trunk.
Paul
2005-10-24 Paul Brook <paul@codesourcery.com>
* config/arm/arm.md (arm_mulsi3, thumb_mulsi3, mulsi3_compare0,
mulsi_compare0_scratch, mulsi3addsi, mulsi3addsi_compare0,
mulsi3addsi_compare0_scratch, mulsidi3adddi, mulsidi3,
umulsidi3, umulsidi3adddi, smulsi3_highpart,
umulsi3_highpart): Make conditional on !arm_arch6.
(arm_mulsi3_v6, thumb_mulsi3_v6, mulsi3_compare0_v6,
mulsi_compare0_scratch_v6, mulsi3addsi_v6, mulsi3addsi_compare0_v6,
mulsi3addsi_compare0_scratch_v6, mulsidi3adddi_v6, mulsidi3_v6,
umulsidi3_v6, umulsidi3adddi_v6, smulsi3_highpart_v6,
umulsi3_highpart_v6): New insns.
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md (revision 159304)
+++ gcc/config/arm/arm.md (working copy)
@@ -1103,12 +1103,22 @@ (define_insn "*arm_mulsi3"
[(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
(match_operand:SI 1 "s_register_operand" "%?r,0")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && !arm_arch6"
"mul%?\\t%0, %2, %1"
[(set_attr "insn" "mul")
(set_attr "predicable" "yes")]
)
+(define_insn "*arm_mulsi3_v6"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (mult:SI (match_operand:SI 1 "s_register_operand" "r")
+ (match_operand:SI 2 "s_register_operand" "r")))]
+ "TARGET_32BIT && arm_arch6"
+ "mul%?\\t%0, %1, %2"
+ [(set_attr "insn" "mul")
+ (set_attr "predicable" "yes")]
+)
+
; Unfortunately with the Thumb the '&'/'0' trick can fails when operands
; 1 and 2; are the same, because reload will make operand 0 match
; operand 1 without realizing that this conflicts with operand 2. We fix
@@ -1118,7 +1128,7 @@ (define_insn "*thumb_mulsi3"
[(set (match_operand:SI 0 "register_operand" "=&l,&l,&l")
(mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0")
(match_operand:SI 2 "register_operand" "l,l,l")))]
- "TARGET_THUMB1"
+ "TARGET_THUMB1 && !arm_arch6"
"*
if (which_alternative < 2)
return \"mov\\t%0, %1\;mul\\t%0, %2\";
@@ -1129,6 +1139,19 @@ (define_insn "*thumb_mulsi3"
(set_attr "insn" "mul")]
)
+(define_insn "*thumb_mulsi3_v6"
+ [(set (match_operand:SI 0 "register_operand" "=l,l,l")
+ (mult:SI (match_operand:SI 1 "register_operand" "0,l,0")
+ (match_operand:SI 2 "register_operand" "l,0,0")))]
+ "TARGET_THUMB1 && arm_arch6"
+ "@
+ mul\\t%0, %2
+ mul\\t%0, %1
+ mul\\t%0, %1"
+ [(set_attr "length" "2")
+ (set_attr "insn" "mul")]
+)
+
(define_insn "*mulsi3_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV (mult:SI
@@ -1137,7 +1160,21 @@ (define_insn "*mulsi3_compare0"
(const_int 0)))
(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
(mult:SI (match_dup 2) (match_dup 1)))]
- "TARGET_ARM"
+ "TARGET_ARM && !arm_arch6"
+ "mul%.\\t%0, %2, %1"
+ [(set_attr "conds" "set")
+ (set_attr "insn" "muls")]
+)
+
+(define_insn "*mulsi3_compare0_v6"
+ [(set (reg:CC_NOOV CC_REGNUM)
+ (compare:CC_NOOV (mult:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (match_operand:SI 1 "s_register_operand" "r"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "s_register_operand" "=r")
+ (mult:SI (match_dup 2) (match_dup 1)))]
+ "TARGET_ARM && arm_arch6 && optimize_size"
"mul%.\\t%0, %2, %1"
[(set_attr "conds" "set")
(set_attr "insn" "muls")]
@@ -1150,7 +1187,20 @@ (define_insn "*mulsi_compare0_scratch"
(match_operand:SI 1 "s_register_operand" "%?r,0"))
(const_int 0)))
(clobber (match_scratch:SI 0 "=&r,&r"))]
- "TARGET_ARM"
+ "TARGET_ARM && !arm_arch6"
+ "mul%.\\t%0, %2, %1"
+ [(set_attr "conds" "set")
+ (set_attr "insn" "muls")]
+)
+
+(define_insn "*mulsi_compare0_scratch_v6"
+ [(set (reg:CC_NOOV CC_REGNUM)
+ (compare:CC_NOOV (mult:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (match_operand:SI 1 "s_register_operand" "r"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "TARGET_ARM && arm_arch6 && optimize_size"
"mul%.\\t%0, %2, %1"
[(set_attr "conds" "set")
(set_attr "insn" "muls")]
@@ -1164,7 +1214,19 @@ (define_insn "*mulsi3addsi"
(mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r")
(match_operand:SI 1 "s_register_operand" "%r,0,r,0"))
(match_operand:SI 3 "s_register_operand" "?r,r,0,0")))]
- "TARGET_32BIT"
+ "TARGET_32BIT && !arm_arch6"
+ "mla%?\\t%0, %2, %1, %3"
+ [(set_attr "insn" "mla")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsi3addsi_v6"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (plus:SI
+ (mult:SI (match_operand:SI 2 "s_register_operand" "r")
+ (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 3 "s_register_operand" "r")))]
+ "TARGET_32BIT && arm_arch6"
"mla%?\\t%0, %2, %1, %3"
[(set_attr "insn" "mla")
(set_attr "predicable" "yes")]
@@ -1181,7 +1243,24 @@ (define_insn "*mulsi3addsi_compare0"
(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r,&r")
(plus:SI (mult:SI (match_dup 2) (match_dup 1))
(match_dup 3)))]
- "TARGET_ARM"
+ "TARGET_ARM && arm_arch6"
+ "mla%.\\t%0, %2, %1, %3"
+ [(set_attr "conds" "set")
+ (set_attr "insn" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_v6"
+ [(set (reg:CC_NOOV CC_REGNUM)
+ (compare:CC_NOOV
+ (plus:SI (mult:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 3 "s_register_operand" "r"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "s_register_operand" "=r")
+ (plus:SI (mult:SI (match_dup 2) (match_dup 1))
+ (match_dup 3)))]
+ "TARGET_ARM && arm_arch6 && optimize_size"
"mla%.\\t%0, %2, %1, %3"
[(set_attr "conds" "set")
(set_attr "insn" "mlas")]
@@ -1196,7 +1275,22 @@ (define_insn "*mulsi3addsi_compare0_scra
(match_operand:SI 3 "s_register_operand" "?r,r,0,0"))
(const_int 0)))
(clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
- "TARGET_ARM"
+ "TARGET_ARM && !arm_arch6"
+ "mla%.\\t%0, %2, %1, %3"
+ [(set_attr "conds" "set")
+ (set_attr "insn" "mlas")]
+)
+
+(define_insn "*mulsi3addsi_compare0_scratch_v6"
+ [(set (reg:CC_NOOV CC_REGNUM)
+ (compare:CC_NOOV
+ (plus:SI (mult:SI
+ (match_operand:SI 2 "s_register_operand" "r")
+ (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 3 "s_register_operand" "r"))
+ (const_int 0)))
+ (clobber (match_scratch:SI 0 "=r"))]
+ "TARGET_ARM && arm_arch6 && optimize_size"
"mla%.\\t%0, %2, %1, %3"
[(set_attr "conds" "set")
(set_attr "insn" "mlas")]
@@ -1211,7 +1305,20 @@ (define_insn "*mulsidi3adddi"
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
(sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
(match_operand:DI 1 "s_register_operand" "0")))]
- "TARGET_32BIT && arm_arch3m"
+ "TARGET_32BIT && arm_arch3m && !arm_arch6"
+ "smlal%?\\t%Q0, %R0, %3, %2"
+ [(set_attr "insn" "smlal")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*mulsidi3adddi_v6"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (plus:DI
+ (mult:DI
+ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))
+ (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+ (match_operand:DI 1 "s_register_operand" "0")))]
+ "TARGET_32BIT && arm_arch6"
"smlal%?\\t%Q0, %R0, %3, %2"
[(set_attr "insn" "smlal")
(set_attr "predicable" "yes")]
@@ -1222,7 +1329,18 @@ (define_insn "mulsidi3"
(mult:DI
(sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
- "TARGET_32BIT && arm_arch3m"
+ "TARGET_32BIT && arm_arch3m && !arm_arch6"
+ "smull%?\\t%Q0, %R0, %1, %2"
+ [(set_attr "insn" "smull")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "mulsidi3_v6"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (mult:DI
+ (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+ "TARGET_32BIT && arm_arch6"
"smull%?\\t%Q0, %R0, %1, %2"
[(set_attr "insn" "smull")
(set_attr "predicable" "yes")]
@@ -1233,7 +1351,18 @@ (define_insn "umulsidi3"
(mult:DI
(zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
- "TARGET_32BIT && arm_arch3m"
+ "TARGET_32BIT && arm_arch3m && !arm_arch6"
+ "umull%?\\t%Q0, %R0, %1, %2"
+ [(set_attr "insn" "umull")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "umulsidi3_v6"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (mult:DI
+ (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
+ "TARGET_32BIT && arm_arch6"
"umull%?\\t%Q0, %R0, %1, %2"
[(set_attr "insn" "umull")
(set_attr "predicable" "yes")]
@@ -1248,7 +1377,20 @@ (define_insn "*umulsidi3adddi"
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
(zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
(match_operand:DI 1 "s_register_operand" "0")))]
- "TARGET_32BIT && arm_arch3m"
+ "TARGET_32BIT && arm_arch3m && !arm_arch6"
+ "umlal%?\\t%Q0, %R0, %3, %2"
+ [(set_attr "insn" "umlal")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*umulsidi3adddi_v6"
+ [(set (match_operand:DI 0 "s_register_operand" "=r")
+ (plus:DI
+ (mult:DI
+ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))
+ (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
+ (match_operand:DI 1 "s_register_operand" "0")))]
+ "TARGET_32BIT && arm_arch6"
"umlal%?\\t%Q0, %R0, %3, %2"
[(set_attr "insn" "umlal")
(set_attr "predicable" "yes")]
@@ -1263,7 +1405,22 @@ (define_insn "smulsi3_highpart"
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
(const_int 32))))
(clobber (match_scratch:SI 3 "=&r,&r"))]
- "TARGET_32BIT && arm_arch3m"
+ "TARGET_32BIT && arm_arch3m && !arm_arch6"
+ "smull%?\\t%3, %0, %2, %1"
+ [(set_attr "insn" "smull")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "smulsi3_highpart_v6"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI
+ (sign_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+ (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 "=r"))]
+ "TARGET_32BIT && arm_arch6"
"smull%?\\t%3, %0, %2, %1"
[(set_attr "insn" "smull")
(set_attr "predicable" "yes")]
@@ -1278,7 +1435,22 @@ (define_insn "umulsi3_highpart"
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
(const_int 32))))
(clobber (match_scratch:SI 3 "=&r,&r"))]
- "TARGET_32BIT && arm_arch3m"
+ "TARGET_32BIT && arm_arch3m && !arm_arch6"
+ "umull%?\\t%3, %0, %2, %1"
+ [(set_attr "insn" "umull")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "umulsi3_highpart_v6"
+ [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI
+ (zero_extend:DI (match_operand:SI 1 "s_register_operand" "r"))
+ (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r")))
+ (const_int 32))))
+ (clobber (match_scratch:SI 3 "=r"))]
+ "TARGET_32BIT && arm_arch6"
"umull%?\\t%3, %0, %2, %1"
[(set_attr "insn" "umull")
(set_attr "predicable" "yes")]
More information about the Gcc-patches
mailing list