This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Improve prepare_shrink_wrap to sink more instructions


On 19/09/14 21:43, Jeff Law wrote:

On 09/15/14 08:33, Jiong Wang wrote:
Jeff,

   thanks, I partially understand your meaning here.

    take the function "ira_implicitly_set_insn_hard_regs" in ira-lives.c
for example,

    when generating address rtl, gcc will automatically generate "const"
operator to prefix
    the address expression, like the following. so a simple CONSTANT_P
check is enough in
    case there is no embedded register.

    (insn 309 310 308 3 (set (reg:DI 44 r15 [orig:94 ivtmp.674 ] [94])
          (const:DI (plus:DI (symbol_ref:DI ("recog_data") [flags 0x40]
<var_decl 0x2b2c2ff91510 recog_data>)
                  (const_int 480 [0x1e0])))) -1


    but for architecture like aarch64, the following instruction
sequences to forming address
    may be generated

(insn 73 14 74 4 (set (reg/f:DI 20 x20 [99])
          (high:DI (symbol_ref:DI ("global_a") [flags 0xc0]  <var_decl
0x7ff755a60900 stats>))) 35 {*movdi_aarch64}
       (expr_list:REG_EQUIV (high:DI (symbol_ref:DI ("global_a") [flags
0xc0]  <var_decl 0x7ff755a60900 stats>))
          (nil)))

(insn 17 30 25 5 (set (reg/f:DI 4 x4 [83])
          (lo_sum:DI (reg/f:DI 20 x20 [99])
              (symbol_ref:DI ("global_a") [flags 0xc0]  <var_decl
0x7ff755a60900 stats>))) {add_losym_di}
       (expr_list:REG_EQUIV (symbol_ref:DI ("global_a") [flags 0xc0]
<var_decl 0x7ff755a60900 stats>)
          (nil)))

   while CONSTANT_P could not catch the latter lo_sum case, as the
RTX_CLASS of lo_sum is RTX_OBJ not RTX_CONST_OBJ,
Hmm, it's been ~15 years since I regularly worked on a target that uses
HIGH/LO_SUM, I thought we wrapped the LO_SUM expression inside a CONST
as well, but reading the docs for CONST, that clearly isn't the case.

Sorry for that.  Can you (re) send your current patch for this for review?

patch attached.

please review, thanks.

gcc/
  * shrink-wrap.c (move_insn_for_shrink_wrap): Add further check when !REG_P (src) to
  release more instruction sink opportunities.

gcc/testsuite/
  * gcc.target/aarch64/shrink_wrap_symbol_ref_1.c: New testcase.
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index fd24135..739e957 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "bb-reorder.h"
 #include "shrink-wrap.h"
 #include "regcprop.h"
+#include "rtl-iter.h"

 #ifdef HAVE_simple_return

@@ -169,7 +170,9 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
 {
   rtx set, src, dest;
   bitmap live_out, live_in, bb_uses, bb_defs;
-  unsigned int i, dregno, end_dregno, sregno, end_sregno;
+  unsigned int i, dregno, end_dregno;
+  unsigned int sregno = FIRST_PSEUDO_REGISTER;
+  unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
   basic_block next_block;
   edge live_edge;

@@ -179,7 +182,34 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
     return false;
   src = SET_SRC (set);
   dest = SET_DEST (set);
-  if (!REG_P (dest) || !REG_P (src)
+
+  if (!REG_P (src))
+    {
+      unsigned int reg_num = 0;
+      unsigned int nonconstobj_num = 0;
+      rtx src_inner = NULL_RTX;
+
+      subrtx_var_iterator::array_type array;
+      FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
+	{
+	  rtx x = *iter;
+	  if (REG_P (x))
+	    {
+	      reg_num++;
+	      src_inner = x;
+	    }
+	  else if (!CONSTANT_P (x) && OBJECT_P (x))
+	    nonconstobj_num++;
+	}
+
+      if (nonconstobj_num > 0
+	  || reg_num > 1)
+	src = NULL_RTX;
+      else if (reg_num == 1)
+	src = src_inner;
+    }
+
+  if (!REG_P (dest) || src == NULL_RTX
       /* STACK or FRAME related adjustment might be part of prologue.
 	 So keep them in the entry block.  */
       || dest == stack_pointer_rtx
@@ -188,10 +218,13 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
     return false;

   /* Make sure that the source register isn't defined later in BB.  */
-  sregno = REGNO (src);
-  end_sregno = END_REGNO (src);
-  if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
-    return false;
+  if (REG_P (src))
+    {
+      sregno = REGNO (src);
+      end_sregno = END_REGNO (src);
+      if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
+	return false;
+    }

   /* Make sure that the destination register isn't referenced later in BB.  */
   dregno = REGNO (dest);
diff --git a/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c b/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c
new file mode 100644
index 0000000..ad2e588
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */
+
+extern char *asm_out_file;
+extern void default_elf_asm_output_ascii (char *, const char *, int);
+
+void
+assemble_string (const char *p, int size)
+{
+  int pos = 0;
+  int maximum = 2000;
+
+  while (pos < size)
+    {
+      int thissize = size - pos;
+
+      if (thissize > maximum)
+	thissize = maximum;
+
+      default_elf_asm_output_ascii (asm_out_file, p, thissize);;
+
+      pos += thissize;
+      p += thissize;
+    }
+}
+
+/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue"  } } */
+/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]