This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PR 62173, re-shuffle insns for RTL loop invariant hoisting


For PR62173, the ideal solution is to resolve the problem on tree level ivopt pass.

While, apart from the tree level issue, PR 62173 also exposed another two RTL level issues.
one of them is looks like we could improve RTL level loop invariant hoisting by re-shuffle insns.

for Seb's testcase

void bar(int i) {
  char A[10];
  int d = 0;
  while (i > 0)
  A[d++] = i--;

  while (d > 0)
  foo(A[d--]);
}

the insn sequences to calculate A[I]'s address looks like:

(insn 76 75 77 22 (set (reg/f:DI 109)
  (plus:DI (reg/f:DI 64 sfp)
  (reg:DI 108 [ i ]))) seb-pop.c:8 84 {*adddi3_aarch64}
  (expr_list:REG_DEAD (reg:DI 108 [ i ])
  (nil)))
(insn 77 76 78 22 (set (reg:SI 110 [ D.2633 ])
  (zero_extend:SI (mem/j:QI (plus:DI (reg/f:DI 109)
  (const_int -16 [0xfffffffffffffff0])) [0 A S1 A8]))) seb-pop.c:8 76 {*zero_extendqisi2_aarch64}
  (expr_list:REG_DEAD (reg/f:DI 109)
  (nil)))

while for most RISC archs, reg + reg addressing is typical, so if we re-shuffle
the instruction sequences into the following:

(insn 96 94 97 22 (set (reg/f:DI 129)
  (plus:DI (reg/f:DI 64 sfp)
  (const_int -16 [0xfffffffffffffff0]))) seb-pop.c:8 84 {*adddi3_aarch64}
  (nil))
(insn 97 96 98 22 (set (reg:DI 130 [ i ])
  (sign_extend:DI (reg/v:SI 97 [ i ]))) seb-pop.c:8 70 {*extendsidi2_aarch64}
  (expr_list:REG_DEAD (reg/v:SI 97 [ i ])
  (nil)))
(insn 98 97 99 22 (set (reg:SI 131 [ D.2633 ])
  (zero_extend:SI (mem/j:QI (plus:DI (reg/f:DI 129)
  (reg:DI 130 [ i ])) [0 A S1 A8]))) seb-pop.c:8 76 {*zero_extendqisi2_aarch64}
  (expr_list:REG_DEAD (reg:DI 130 [ i ])
  (expr_list:REG_DEAD (reg/f:DI 129)
  (nil))))

which means re-associate the constant imm with the virtual frame pointer.

transform

     RA <- fixed_reg + RC
     RD <- MEM (RA + const_offset)

  into:

     RA <- fixed_reg + const_offset
     RD <- MEM (RA + RC)

then RA <- fixed_reg + const_offset is actually loop invariant, so the later
RTL GCSE PRE pass could catch it and do the hoisting, and thus ameliorate what tree
level ivopts could not sort out.

and this patch only tries to re-shuffle instructions within single basic block which
is a inner loop which is perf critical.

I am reusing the loop info in fwprop because there is loop info and it's run before
GCSE.

verified on aarch64 and mips64, the array base address hoisted out of loop.

bootstrap ok on x86-64 and aarch64.

comments?

thanks.

gcc/
  PR62173
  fwprop.c (prepare_for_gcse_pre): New function.
  (fwprop_done): Call it.
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 377b33c..b2a5918 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -1399,6 +1399,133 @@ forward_propagate_into (df_ref use)
   return false;
 }
 
+/* Loop invariant variable hoisting for critical code has
+   important impact on the performance.
+
+   The RTL GCSE PRE pass could detect more hoisting opportunities
+   if we re-shuffle the instructions to associate fixed registers
+   with constant.
+
+   This function try to transform
+
+     RA <- RB_fixed + RC
+     RD <- MEM (RA + const_offset)
+
+  into:
+
+     RA <- RB_fixed + const_offset
+     RD <- MEM (RA + RC)
+
+  If RA is DEAD after the second instruction.
+
+  After this change, the first instruction is loop invariant.  */
+
+static void
+prepare_for_gcse_pre ()
+{
+  struct loop *loop;
+
+  if (! current_loops)
+    return;
+
+  FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
+    {
+      if (loop && loop->header && loop->latch
+	  && loop->header->index == loop->latch->index)
+	{
+	  rtx_insn *insn, *next_insn;
+	  rtx single_set1, single_set2, old_dest;
+	  rtx op0, op0_;
+	  rtx op1, op1_;
+	  rtx inner;
+	  rtx *mem_plus_loc;
+
+	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, loop->header->index);
+
+	  FOR_BB_INSNS (bb, insn)
+	    {
+	      if (! NONDEBUG_INSN_P (insn))
+		continue;
+
+	      single_set1 = single_set (insn);
+
+	      if (! single_set1
+		  || GET_CODE (SET_SRC (single_set1)) != PLUS)
+		continue;
+
+	      old_dest = SET_DEST (single_set1);
+	      op0 = XEXP (SET_SRC (single_set1), 0);
+	      op1 = XEXP (SET_SRC (single_set1), 1);
+
+	      if (op1 == frame_pointer_rtx
+		  || op1 == stack_pointer_rtx
+		  || op1 == virtual_stack_vars_rtx)
+		std::swap (op0, op1);
+
+	      if (! (REG_P (old_dest) && REG_P (op0) && REG_P (op1)
+		     && (op0 == frame_pointer_rtx
+			 || op0 == stack_pointer_rtx
+			 || op0 == virtual_stack_vars_rtx)))
+		continue;
+
+	      if (! (next_insn = next_real_insn (insn)))
+		break;
+
+	      do
+		{
+		  if (DEBUG_INSN_P (next_insn))
+		    continue;
+
+		  single_set2 = single_set (next_insn);
+
+		  if (!single_set2 || ! REG_P (SET_DEST (single_set2)))
+		    continue;
+
+		  inner = SET_SRC (single_set2);
+
+		  if (GET_CODE (inner) == ZERO_EXTEND
+		      || GET_CODE (inner) == SIGN_EXTEND
+		      || GET_CODE (inner) == TRUNCATE)
+		    inner = XEXP (inner, 0);
+
+		  if (! MEM_P (inner)
+		      || GET_CODE (XEXP (inner, 0)) != PLUS)
+		    continue;
+
+		  mem_plus_loc = &XEXP (inner, 0);
+		  op0_ = XEXP (XEXP (inner, 0), 0);
+		  op1_ = XEXP (XEXP (inner, 0), 1);
+
+		  if (REG_P (op0_) && CONST_INT_P (op1_)
+		      && rtx_equal_p (op0_, old_dest)
+		      && GET_MODE (op0_) == GET_MODE (op1))
+		    {
+		      rtx new_src;
+
+		      if (find_regno_note (next_insn, REG_DEAD,
+					   REGNO (old_dest)))
+			{
+			  new_src = plus_constant (GET_MODE (op0), op0,
+						   INTVAL (op1_));
+			  validate_change (insn, &SET_SRC (single_set1),
+					   new_src, 1);
+			  new_src = gen_rtx_PLUS (GET_MODE (op0_), op0_, op1);
+			  validate_change (next_insn, mem_plus_loc, new_src, 1);
+			  if (apply_change_group () && dump_file)
+			    fprintf (dump_file,
+				     "\nRe-associate insn %d and %d for later"
+				     " RTL loop invariant hoisting.\n",
+				     INSN_UID (insn), INSN_UID (next_insn));
+			}
+		      break;
+		    }
+		} while ((next_insn = next_real_insn (next_insn))
+			 && bb == BLOCK_FOR_INSN (next_insn));
+	    }
+	}
+    }
+}
+
 
 static void
 fwprop_init (void)
@@ -1424,6 +1551,7 @@ fwprop_init (void)
 static void
 fwprop_done (void)
 {
+  prepare_for_gcse_pre ();
   loop_optimizer_finalize ();
 
   use_def_ref.release ();

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]