This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PR 62173, re-shuffle insns for RTL loop invariant hoisting


On 11/02/15 14:21, Kenneth Zadeck wrote:
On 02/11/2015 06:20 AM, Jiong Wang wrote:
2014-12-19 15:21 GMT+00:00 Kenneth Zadeck <zadeck@naturalbridge.com>:
however, since i am a nice person ....

loop-invariant solves the DF_UD_CHAIN which builds a data structure that
connects each use with all of the defs that reach it.   I believe that this
is the opposite of what you want here.

if you really need this, you need to also turn on the DF_DU_CHAIN which
builds the opposite structure.    Both structures can be space hogs, but
they are both turned on in other places in the compiler so it is unlikely to
be an issue.
Exactly, Thanks, Kenneth.

This approach works from my experiment and look much better than
previous REG_NOTE approach.
while it do have one problem. We need the UD/DU chain built before we
do insn re-shuffling.
While after re-shuffling, UD chain needs update, otherwise, the later
"check_dependecies" in find_invariant_insn may fail.

although we have re-shuffle instruction 1 into 2, the later check
still using old UD info, thus
decide instruction 2 is not iv.

1: regA <- vfp + regB
2: regA <- vfp + const

my current fix is to insert those re-shuffled insn into a table named
"vfp_const_iv", then skip those
dependencies check  for them as they don't have any dependencies.
You now are beginning to understand why Mark Wegman and I invented SSA
form almost 30 years ago!!!!

Indeed, thanks.

attachment is the new draft patch, pass x86-84 bootstrap
(actually it will not affect x86-64, because of addressing mode),
while failed on aarch64 bootstrap during stage2/3 binary comparision,
there must be some glitches needs to be fixed.

Will clean up later after I finished my upcoming long flight and what I
am seeking now is whether the general thoughts, code logic & framework, in this
patch is acceptable to the community?

personally, I think this version is much better than previous version.
new added code integrated with existed code in a more natural way. no use
of unsafe REG_NOTE info which is not updated in this pass.

and from AArch64 gcc bootstrap, 239 new loop invariant found by this
re-shuffling. so, this small improvement on rtl loop invariant pass do have
some value.

please review and give comments.

Thanks.

2015-02-11  Jiong Wang  <jiong.wang@arm.com>

gcc/
  * loop-invariant.c (find_defs): Enable DF_DU_CHAIN build.
  (vfp_const_iv): New hash table.
  (expensive_addr_check_p): New boolean.
  (init_inv_motion_data): Initialize new variables.
  (free_inv_motion_data): Release hash table.
  (create_new_invariant): Set cheap_address to false for iv in
  vfp_const_iv table.
  (find_invariant_insn): Skip dependencies check for iv in vfp_const_iv table.
  (use_for_single_du): New function.
  (reshuffle_insn_with_vfp): Likewise.
  (find_invariants_bb): Call reshuffle_insn_with_vfp.
gcc/testsuite/
  * gcc.dg/pr62173.c: New testcase.
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index f79b497..b1c4760 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -203,6 +203,8 @@ typedef struct invariant *invariant_p;
 /* The invariants.  */
 
 static vec<invariant_p> invariants;
+static hash_table <pointer_hash <rtx_insn> > *vfp_const_iv;
+static bool need_expensive_addr_check_p;
 
 /* Check the size of the invariant table and realloc if necessary.  */
 
@@ -695,7 +697,7 @@ find_defs (struct loop *loop)
 
   df_remove_problem (df_chain);
   df_process_deferred_rescans ();
-  df_chain_add_problem (DF_UD_CHAIN);
+  df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
   df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
   df_analyze_loop (loop);
   check_invariant_table_size ();
@@ -742,6 +744,9 @@ create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on,
 	 See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html .  */
       inv->cheap_address = address_cost (SET_SRC (set), word_mode,
 					 ADDR_SPACE_GENERIC, speed) < 3;
+
+      if (need_expensive_addr_check_p && vfp_const_iv->find (insn))
+	inv->cheap_address = false;
     }
   else
     {
@@ -952,7 +957,8 @@ find_invariant_insn (rtx_insn *insn, bool always_reached, bool always_executed)
     return;
 
   depends_on = BITMAP_ALLOC (NULL);
-  if (!check_dependencies (insn, depends_on))
+  if (!vfp_const_iv->find (insn)
+      && !check_dependencies (insn, depends_on))
     {
       BITMAP_FREE (depends_on);
       return;
@@ -1007,6 +1013,143 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
   record_uses (insn);
 }

+/*  Find the single use of def of insn. At the same time,
+    make sure def of insn is the single def which reach the use.  */
+    
+static rtx_insn *
+use_for_single_du (rtx_insn *insn)
+{
+  struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
+  df_ref def;
+
+  FOR_EACH_INSN_INFO_DEF (def, insn_info)
+    {
+      struct df_link *chain = DF_REF_CHAIN (def);
+
+      if (!chain
+	  || chain->next)
+	return NULL;
+
+      df_ref use = chain->ref;
+      chain = DF_REF_CHAIN (use);
+
+      if (!chain
+	  || chain->next)
+	return NULL;
+
+      return DF_REF_INSN (use);
+    }
+
+  return NULL;
+}
+
+/* This function also try to transform
+
+     RA <- fixed_reg + RC
+     RD <- MEM (RA + const_offset)
+
+   into:
+
+     RA <- fixed_reg + const_offset
+     RD <- MEM (RA + RC) <- pos0
+
+  If use of RA in the second insn is the single use, and the define of
+  RA in the first insn is the single def reach the second insn.
+
+  After this change, the first instruction is loop invariant.  */
+
+static void
+reshuffle_insn_with_vfp (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (!set
+      || GET_CODE (SET_SRC (set)) != PLUS)
+    return;
+
+  rtx dest = SET_DEST (set);
+  rtx op0 = XEXP (SET_SRC (set), 0);
+  rtx op1 = XEXP (SET_SRC (set), 1);
+  rtx non_vfp_op = op1;
+
+  if (op1 == frame_pointer_rtx
+      || op1 == stack_pointer_rtx
+      || op1 == virtual_stack_vars_rtx)
+    {
+      non_vfp_op = op0;
+      std::swap (op0, op1);
+    }
+
+  if (GET_CODE (op1) == SIGN_EXTEND
+      || GET_CODE (op1) == ZERO_EXTEND)
+    op1 = XEXP (op1, 0);
+
+  if (!(REG_P (dest) && REG_P (op0) && REG_P (op1)
+	&& (op0 == frame_pointer_rtx
+	    || op0 == stack_pointer_rtx
+	    || op0 == virtual_stack_vars_rtx)))
+    return;
+
+  rtx_insn *use_insn;
+
+  if (!(use_insn = use_for_single_du (insn)))
+    return;
+
+  rtx u_set = single_set (use_insn);
+
+  if (!(u_set && (REG_P (SET_DEST (u_set)) || MEM_P (SET_DEST (u_set)))))
+    return;
+
+  rtx mem_addr;
+
+  if (REG_P (SET_DEST (u_set)))
+    mem_addr = SET_SRC (u_set);
+  else
+    mem_addr = SET_DEST (u_set);
+
+  if (GET_CODE (mem_addr) == ZERO_EXTEND
+      || GET_CODE (mem_addr) == SIGN_EXTEND
+      || GET_CODE (mem_addr) == TRUNCATE)
+    mem_addr = XEXP (mem_addr, 0);
+
+  if (!MEM_P (mem_addr)
+      || GET_CODE (XEXP (mem_addr, 0)) != PLUS)
+    return;
+
+  rtx *mem_plus_loc = &XEXP (mem_addr, 0);
+  rtx u_op0 = XEXP (XEXP (mem_addr, 0), 0);
+  rtx u_op1 = XEXP (XEXP (mem_addr, 0), 1);
+
+  if (!(REG_P (u_op0) && CONST_INT_P (u_op1)
+	&& REGNO (dest) == REGNO (u_op0)))
+    return;
+
+  rtx new_src = plus_constant (GET_MODE (op0), op0, INTVAL (u_op1));
+  validate_change (insn, &SET_SRC (set), new_src, 1);
+  new_src = gen_rtx_PLUS (GET_MODE (u_op0), u_op0, non_vfp_op);
+  validate_change (use_insn, mem_plus_loc, new_src, 1);
+
+  if (apply_change_group ())
+    {
+      rtx_insn **slot = vfp_const_iv->find_slot (insn, INSERT);
+
+      if (*slot)
+	gcc_unreachable ();
+      else
+	*slot = insn;
+
+      need_expensive_addr_check_p = true;
+
+      if (dump_file)
+	fprintf (dump_file,
+		 "\nRe-associate insn %d and %d for later"
+		 " RTL loop invariant hoisting.\n",
+		 INSN_UID (insn), INSN_UID (use_insn));
+    }
+
+  return;
+}
+
 /* Finds invariants in basic block BB.  ALWAYS_REACHED is true if the
    basic block is always executed.  ALWAYS_EXECUTED is true if the basic
    block is always executed, unless the program ends due to a function
@@ -1022,6 +1147,8 @@ find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
       if (!NONDEBUG_INSN_P (insn))
 	continue;
 
+      reshuffle_insn_with_vfp (insn);
+
       find_invariants_insn (insn, always_reached, always_executed);
 
       if (always_reached
@@ -1647,6 +1774,9 @@ move_invariants (struct loop *loop)
 static void
 init_inv_motion_data (void)
 {
+  need_expensive_addr_check_p = false;
+  vfp_const_iv = new hash_table <pointer_hash <rtx_insn> > (4);
+
   actual_stamp = 1;
 
   invariants.create (100);
@@ -1682,6 +1812,8 @@ free_inv_motion_data (void)
       free (inv);
     }
   invariants.release ();
+
+  delete vfp_const_iv;
 }
 
 /* Move the invariants out of the LOOP.  */
diff --git a/gcc/testsuite/gcc.dg/pr62173.c b/gcc/testsuite/gcc.dg/pr62173.c
new file mode 100644
index 0000000..f059dee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr62173.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc64-*-* } } } */
+/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
+
+void foo (char);
+
+void
+bar(int i)
+{
+  char A[10];
+  int d = 0;
+  while (i > 0)
+    A[d++] = i--;
+
+  while (d > 0)
+    foo (A[d--]);
+}
+
+/* { dg-final { scan-rtl-dump "Re-associate insn .* loop invariant hoisting" "loop2_invariant" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]