This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[COMMITTED][AArch64] Improve TLS Descriptor pattern to release RTL loop IV opt


James Greenhalgh writes:

> On Tue, Jul 28, 2015 at 02:12:36PM +0100, Jiong Wang wrote:
>> 
>> The instruction sequences for preparing argument for TLS descriptor
>> runtime resolver and the later function call to resolver can actually be
>> hoisted out of the loop.
>> 
>> Currently we can't because we have exposed the hard register X0 as
>> destination of "set".  While GCC's RTL data flow infrastructure will
>> skip or do very conservative assumption when hard register involved in
>> and thus some loop IV opportunities are missed.
>> 
>> This patch add another "tlsdesc_small_pseudo_<mode>" pattern, and avoid
>> expose x0 to gcc generic code.
>> 
>> Generally, we define a new register class FIXED_R0 which only contains register
>> 0, so the instruction sequences generated from the new add pattern is the same
>> as tlsdesc_small_<mode>, while the operand 0 is wrapped as pseudo register that
>> RTL IV opt can handle it.
>> 
>> Ideally, we should allow operand 0 to be any pseudo register, but then
>> we can't model the override of x0 caused by the function call which is
>> hidded by the UNSPEC.
>> 
>> So here, we restricting operand 0 to be x0, the override of x0 can be
>> reflected to the gcc.
>> 
>> OK for trunk?
>
> OK.
>
> Thanks,
> James

When I am about to commit this patch, I realized I need to apply the
same trick as I have done at

  https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01653.html

then the included testcases can work well on any of tiny, small, large model.

commited the following patch:

Index: gcc/ChangeLog
===================================================================
--- gcc/ChangeLog	(revision 226682)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,16 @@
+2015-08-06    Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+	      Jiong Wang  <jiong.wang@arm.com>
+
+	* config/aarch64/aarch64.d (tlsdesc_small_pseudo_<mode>): New pattern.
+	* config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0.
+	(REG_CLASS_NAMES): Likewise.
+	(REG_CLASS_CONTENTS): Likewise.
+	* config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise.
+	(aarch64_register_move_cost): Likewise.
+	(aarch64_load_symref_appropriately): Invoke the new added pattern if
+	possible.
+	* config/aarch64/constraints.md (Uc0): New constraint.
+
 2015-08-06  Jiong Wang  <jiong.wang@arm.com>
 
 	* config/aarch64/constraints.md (Usf): Add the test of
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	(revision 226681)
+++ gcc/config/aarch64/aarch64.c	(working copy)
@@ -1048,12 +1048,26 @@
 
 	gcc_assert (mode == Pmode || mode == ptr_mode);
 
-	/* In ILP32, the got entry is always of SImode size.  Unlike
-	   small GOT, the dest is fixed at reg 0.  */
-	if (TARGET_ILP32)
-	  emit_insn (gen_tlsdesc_small_si (imm));
+	if (can_create_pseudo_p ())
+	  {
+	    rtx reg = gen_reg_rtx (mode);
+
+	    if (TARGET_ILP32)
+	      emit_insn (gen_tlsdesc_small_pseudo_si (imm, reg));
+	    else
+	      emit_insn (gen_tlsdesc_small_pseudo_di (imm, reg));
+
+	    emit_use (reg);
+	  }
 	else
-	  emit_insn (gen_tlsdesc_small_di (imm));
+	  {
+	    /* In ILP32, the got entry is always of SImode size.  Unlike
+	       small GOT, the dest is fixed at reg 0.  */
+	    if (TARGET_ILP32)
+	      emit_insn (gen_tlsdesc_small_si (imm));
+	    else
+	      emit_insn (gen_tlsdesc_small_di (imm));
+	  }
 	tp = aarch64_load_tp (NULL);
 
 	if (mode != Pmode)
Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	(revision 226681)
+++ gcc/config/aarch64/aarch64.md	(working copy)
@@ -4549,6 +4549,23 @@
   [(set_attr "type" "call")
    (set_attr "length" "16")])
 
+;; The same as tlsdesc_small_<mode> except that we don't expose hard register X0
+;; as the destination of set as it will cause trouble for RTL loop iv.
+;; RTL loop iv will abort ongoing optimization once it finds there is hard reg
+;; as destination of set.  This pattern thus could help these tlsdesc
+;; instruction sequences hoisted out of loop.
+(define_insn "tlsdesc_small_pseudo_<mode>"
+  [(set (match_operand:PTR 1 "register_operand" "=r")
+        (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
+		   UNSPEC_TLSDESC))
+   (clobber (reg:DI R0_REGNUM))
+   (clobber (reg:DI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_TLS_DESC"
+  "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+  [(set_attr "type" "call")
+   (set_attr "length" "16")])
+
 (define_insn "stack_tie"
   [(set (mem:BLK (scratch))
 	(unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
Index: gcc/testsuite/ChangeLog
===================================================================
--- gcc/testsuite/ChangeLog	(revision 226682)
+++ gcc/testsuite/ChangeLog	(working copy)
@@ -1,5 +1,9 @@
 2015-08-06  Jiong Wang  <jiong.wang@arm.com>
 
+	* gcc.target/aarch64/tlsdesc_hoist.c: New testcase.
+
+2015-08-06  Jiong Wang  <jiong.wang@arm.com>
+
 	* gcc.target/aarch64/noplt_3.c: New testcase.
 
 2015-08-06  Jiong Wang  <jiong.wang@arm.com>
Index: gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c
===================================================================
--- gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c	(revision 0)
+++ gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c	(working copy)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target tls_native } */
+/* { dg-options "-O2 -fpic -fdump-rtl-loop2_invariant" } */
+/* { dg-skip-if "-mcmodel=large, no support for -fpic" { aarch64-*-* }  { "-mcmodel=large" } { "" } } */
+
+int cal (int, int);
+__thread int tls_data;
+
+int
+foo (int bound)
+{
+  int i = 0;
+  int sum = 0;
+
+  for (i; i < bound; i++)
+    sum = cal (sum, tls_data);
+
+  return sum;
+}
+
+/* Insn sequences for TLS descriptor should be hoisted out of the loop.  */
+/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]