This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][ARM] Fix low reg issue in Thumb-2 movsi patterns
- From: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: nd <nd at arm dot com>, Richard Earnshaw <Richard dot Earnshaw at arm dot com>, "Kyrylo Tkachov" <Kyrylo dot Tkachov at arm dot com>
- Date: Wed, 24 Jul 2019 15:16:11 +0000
- Subject: [PATCH][ARM] Fix low reg issue in Thumb-2 movsi patterns
- Arc-authentication-results: i=1; mx.microsoft.com 1;spf=pass smtp.mailfrom=arm.com;dmarc=pass action=none header.from=arm.com;dkim=pass header.d=arm.com;arc=none
- Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector9901; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-SenderADCheck; bh=p3M3evzh7nd0UM5kn7EQQBVzoLU/BADl28kKfwr+ruA=; b=Xk9JhBfNpYELGFmG4w/Wz3wUyuOG9uBHkbADqpNps28aTVXQJ8aNvlXO+X0hEN+KhY+3Aqv3wLOF8gx+qJXnLpsAQL+tf5cN0xsH6t80vlfo1d0PyOsSDg8Gtxe6aaY5z+xMZKJghkZIQL+qUC+CENM5h/mVIFsGYfG7m78+FLaBEvC4aBsS5C0Mh4HuvSQWPozJfYAFtQoNnOsxbCek4Gz85DzsOf3g77idHxSvKd9LjMfoATNE8Stg0mfpgV30eSLIASRxQ9A8jEQv6IZbSCbNkksp2JcLnkoyzXtJ0+p/ds2R0T1Q+UEPy8P6w678nsRidp3swD/UKKRBunLYaQ==
- Arc-seal: i=1; a=rsa-sha256; s=arcselector9901; d=microsoft.com; cv=none; b=iwUb/cctMraLWqlfiFN9gh5JVStgCLVkuvtYGoHM+fIQmocTcTKdyw5JiQrMtYmRiHHC144haxlFdU+s8GoMw85mmF+cunKsWtbpYTRpLsVzMXdRpeUWefL3Ob5voDotvGYH5w/Qlxw/oJoSr9C9O4hiT88P7lCOYp6kPY+n30VY+PIXENa2g9C+kMyrPfniGkIu9VKMYefisSovmJJ96gumGUKuwHO3yB/DItrqFehQekA/FgB/0lFJ65sRHXNfaljjmYduQgnT6UBjHK5hU8DgZ0kmkKZYYxE/L18NvbLag706Dc8dH9thKXXOTKjlWSVPTjd2bHOrA/djOw2BEQ==
- Original-authentication-results: spf=none (sender IP is ) smtp.mailfrom=Wilco dot Dijkstra at arm dot com;
The Thumb-2 movsi patterns try to prefer low registers for loads and stores.
However this is done incorrectly by using 2 separate variants with 'l' and 'h'
register classes. The register allocator will only use low registers, and
as a result we end up with significantly more spills and moves to high
registers. Fix this by merging the alternatives and use 'l*r' to indicate
preference for low registers. This saves ~400 instructions from the pr77308
testcase.
Bootstrap & regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57
ChangeLog:
2019-07-24 Wilco Dijkstra <wdijkstr@arm.com>
* config/arm/thumb2.md (thumb2_movsi_insn): Fix load/store low reg.
* config/arm/vfp.md (thumb2_movsi_vfp): Likewise.
--
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 78a6ea0b10dab97ed6651ce62e99cfd7a81722ab..c7000d0772a7e5887b6d05be188e8eb38c97217d 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -247,8 +247,8 @@ (define_insn "*thumb2_pop_single"
;; regs. The high register alternatives are not taken into account when
;; choosing register preferences in order to reflect their expense.
(define_insn "*thumb2_movsi_insn"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m")
- (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l*rk,m")
+ (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,l*rk"))]
"TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
&& ( register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
@@ -262,22 +262,20 @@ (define_insn "*thumb2_movsi_insn"
case 3: return \"mvn%?\\t%0, #%B1\";
case 4: return \"movw%?\\t%0, %1\";
case 5:
- case 6:
/* Cannot load it directly, split to load it via MOV / MOVT. */
if (!MEM_P (operands[1]) && arm_disable_literal_pool)
return \"#\";
return \"ldr%?\\t%0, %1\";
- case 7:
- case 8: return \"str%?\\t%1, %0\";
+ case 6: return \"str%?\\t%1, %0\";
default: gcc_unreachable ();
}
}
- [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,load_4,store_4,store_4")
- (set_attr "length" "2,4,2,4,4,4,4,4,4")
+ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mvn_imm,mov_imm,load_4,store_4")
+ (set_attr "length" "2,4,2,4,4,4,4")
(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
- (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*")
- (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")]
+ (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no")
+ (set_attr "pool_range" "*,*,*,*,*,4094,*")
+ (set_attr "neg_pool_range" "*,*,*,*,*,0,*")]
)
(define_insn "tls_load_dot_plus_four"
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index e0aaa7b00bb41c046da4531a293e123c94e8b9a4..b59dd6b71d228e042feda3a3a06d81dd01d200da 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -258,8 +258,8 @@ (define_insn "*arm_movsi_vfp"
;; is chosen with length 2 when the instruction is predicated for
;; arm_restrict_it.
(define_insn "*thumb2_movsi_vfp"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
- (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*UvTu,*t"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l*rk,m,*t, r,*t,*t, *Uv")
+ (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,l*rk, r,*t,*t,*UvTu,*t"))]
"TARGET_THUMB2 && TARGET_HARD_FLOAT
&& ( s_register_operand (operands[0], SImode)
|| s_register_operand (operands[1], SImode))"
@@ -275,32 +275,30 @@ (define_insn "*thumb2_movsi_vfp"
case 4:
return \"movw%?\\t%0, %1\";
case 5:
- case 6:
/* Cannot load it directly, split to load it via MOV / MOVT. */
if (!MEM_P (operands[1]) && arm_disable_literal_pool)
return \"#\";
return \"ldr%?\\t%0, %1\";
- case 7:
- case 8:
+ case 6:
return \"str%?\\t%1, %0\";
- case 9:
+ case 7:
return \"vmov%?\\t%0, %1\\t%@ int\";
- case 10:
+ case 8:
return \"vmov%?\\t%0, %1\\t%@ int\";
- case 11:
+ case 9:
return \"vmov%?.f32\\t%0, %1\\t%@ int\";
- case 12: case 13:
+ case 10: case 11:
return output_move_vfp (operands);
default:
gcc_unreachable ();
}
"
[(set_attr "predicable" "yes")
- (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
- (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
- (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
- (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
- (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
+ (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no")
+ (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
+ (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4")
+ (set_attr "pool_range" "*,*,*,*,*,4094,*,*,*,*,1018,*")
+ (set_attr "neg_pool_range" "*,*,*,*,*, 0,*,*,*,*,1008,*")]
)