This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

loop.c patch: do some copy propagation


One of our customers wants better optimization for testcases like the
following, where the loop counter is a global variable:

int i;
struct
{
    int a;
    int b[20];
} a[10];
void foo ()
{
	for (i = 0; i < 10; i++)
		a[i].a = 5;
}

The current gcc can hoist the loads and stores to i out of the loop, but
the register that is used as replacement isn't recognized as a biv, since
it isn't incremented directly.  Instead, we have a sequence where the
loop counter is copied into a temporary, the temporary is incremented, and
then stored back into the loop counter.  The patch below tries to optimize
this by doing some copy propagation if we can determine that a certain
register is only used to hold a copy of the hoisted memory location.

Currently, this only works thanks to flag_rerun_loop_opt; the new registers
created by load_mems are ignored by the subsequent loop pass since their
numbers are too high.  This is something I'm investigating.  Also, unrolling
doesn't work very well yet for loops like this, since we don't recognize the
initial value yet.  I'll submit a patch for this soon.

One drawback of this patch is that it could potentially be very expensive
if there are many such temporaries.  However, that seems unlikely to me.

Here are the results for the program above, as a before - after diff
(target arm-elf):

foo:					foo:
	ldr	r3, .L9				ldr	r3, .L9
	mov	ip, sp				mov	ip, sp
	mov	r2, #0				mov	r2, #0
	stmfd	sp!, {fp, ip, lr, pc}		stmfd	sp!, {fp, ip, lr, pc}
	mov	r1, #5		      |		mov	r0, #5
	sub	fp, ip, #4			sub	fp, ip, #4
	mov	ip, r3		      <
	str	r2, [r3, #0]			str	r2, [r3, #0]
	ldr	r0, .L9+4	      |		ldr	r1, .L9+4
.L6:					.L6:
	add	r3, r2, r2, asl #1    <
	add	r2, r2, #1			add	r2, r2, #1
	rsb	r3, r3, r3, asl #3    <
	cmp	r2, #9				cmp	r2, #9
	str	r1, [r0, r3, asl #2]  |		str	r0, [r1], #84
	ble	.L6				ble	.L6
	str	r2, [ip, #0]	      |		str	r2, [r3, #0]
	ldmea	fp, {fp, sp, pc}		ldmea	fp, {fp, sp, pc}
.L9:					.L9:
	.word	i				.word	i
	.word	a				.word	a

An earlier version of this patch was tested with bootstrap & make check
on i586-linux.

Bernd

	* Makefile.in: Update dependencies.
	* loop.c: Include "basic-block.h".
	(try_copy_prop, replace_loop_reg): New functions.
	
Index: loop.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/loop.c,v
retrieving revision 1.195
diff -c -p -r1.195 loop.c
*** loop.c	1999/11/01 23:19:44	1.195
--- loop.c	1999/11/05 13:55:25
*************** Boston, MA 02111-1307, USA.  */
*** 41,46 ****
--- 41,47 ----
  #include "obstack.h"
  #include "function.h"
  #include "expr.h"
+ #include "basic-block.h"
  #include "insn-config.h"
  #include "insn-flags.h"
  #include "regs.h"
*************** static void load_mems_and_recount_loop_r
*** 337,342 ****
--- 338,345 ----
  static void load_mems PROTO((rtx, rtx, rtx, rtx));
  static int insert_loop_mem PROTO((rtx *, void *));
  static int replace_loop_mem PROTO((rtx *, void *));
+ static int replace_loop_reg PROTO((rtx *, void *));
+ static void try_copy_prop PROTO((rtx, rtx, rtx, rtx, int));
  static int replace_label PROTO((rtx *, void *));
  
  typedef struct rtx_and_int {
*************** load_mems (scan_start, end, loop_top, st
*** 9703,9868 ****
    rtx p;
    rtx label = NULL_RTX;
    rtx end_label = NULL_RTX;
  
!   if (loop_mems_idx > 0) 
!     {
!       /* Nonzero if the next instruction may never be executed.  */
!       int next_maybe_never = 0;
  
!       /* Check to see if it's possible that some instructions in the
! 	 loop are never executed.  */
!       for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top); 
! 	   p != NULL_RTX && !maybe_never; 
! 	   p = next_insn_in_loop (p, scan_start, end, loop_top))
  	{
! 	  if (GET_CODE (p) == CODE_LABEL)
  	    maybe_never = 1;
! 	  else if (GET_CODE (p) == JUMP_INSN
! 		   /* If we enter the loop in the middle, and scan
! 		      around to the beginning, don't set maybe_never
! 		      for that.  This must be an unconditional jump,
! 		      otherwise the code at the top of the loop might
! 		      never be executed.  Unconditional jumps are
! 		      followed a by barrier then loop end.  */
! 		   && ! (GET_CODE (p) == JUMP_INSN 
! 			 && JUMP_LABEL (p) == loop_top
! 			 && NEXT_INSN (NEXT_INSN (p)) == end
! 			 && simplejump_p (p)))
! 	    {
! 	      if (!condjump_p (p))
! 		/* Something complicated.  */
! 		maybe_never = 1;
! 	      else
! 		/* If there are any more instructions in the loop, they
! 		   might not be reached.  */
! 		next_maybe_never = 1; 
! 	    } 
! 	  else if (next_maybe_never)
! 	    maybe_never = 1;
! 	}
  
!       /* Actually move the MEMs.  */
!       for (i = 0; i < loop_mems_idx; ++i) 
  	{
! 	  int written = 0;
! 	  rtx reg;
! 	  rtx mem = loop_mems[i].mem;
! 	  rtx mem_list_entry;
! 
! 	  if (MEM_VOLATILE_P (mem) 
! 	      || invariant_p (XEXP (mem, 0)) != 1)
! 	    /* There's no telling whether or not MEM is modified.  */
! 	    loop_mems[i].optimize = 0;
! 
! 	  /* Go through the MEMs written to in the loop to see if this
! 	     one is aliased by one of them.  */
! 	  mem_list_entry = loop_store_mems;
! 	  while (mem_list_entry)
! 	    {
! 	      if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
! 		written = 1;
! 	      else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
! 					mem, rtx_varies_p))
! 		{
! 		  /* MEM is indeed aliased by this store.  */
! 		  loop_mems[i].optimize = 0;
! 		  break;
! 		}
! 	      mem_list_entry = XEXP (mem_list_entry, 1);
  	    }
  	  
! 	  /* If this MEM is written to, we must be sure that there
! 	     are no reads from another MEM that aliases this one.  */ 
! 	  if (loop_mems[i].optimize && written)
! 	    {
! 	      int j;
  
! 	      for (j = 0; j < loop_mems_idx; ++j)
! 		{
! 		  if (j == i)
! 		    continue;
! 		  else if (true_dependence (mem,
! 					    VOIDmode,
! 					    loop_mems[j].mem,
! 					    rtx_varies_p))
! 		    {
! 		      /* It's not safe to hoist loop_mems[i] out of
! 			 the loop because writes to it might not be
! 			 seen by reads from loop_mems[j].  */
! 		      loop_mems[i].optimize = 0;
! 		      break;
! 		    }
  		}
  	    }
  
! 	  if (maybe_never && may_trap_p (mem))
! 	    /* We can't access the MEM outside the loop; it might
! 	       cause a trap that wouldn't have happened otherwise.  */
! 	    loop_mems[i].optimize = 0;
  	  
! 	  if (!loop_mems[i].optimize)
! 	    /* We thought we were going to lift this MEM out of the
! 	       loop, but later discovered that we could not.  */
! 	    continue;
  
! 	  /* Allocate a pseudo for this MEM.  We set REG_USERVAR_P in
! 	     order to keep scan_loop from moving stores to this MEM
! 	     out of the loop just because this REG is neither a
! 	     user-variable nor used in the loop test.  */
! 	  reg = gen_reg_rtx (GET_MODE (mem));
! 	  REG_USERVAR_P (reg) = 1;
! 	  loop_mems[i].reg = reg;
! 
! 	  /* Now, replace all references to the MEM with the
! 	     corresponding pesudos.  */
! 	  for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top);
! 	       p != NULL_RTX;
! 	       p = next_insn_in_loop (p, scan_start, end, loop_top))
  	    {
! 	      rtx_and_int ri;
  	      ri.r = p;
  	      ri.i = i;
  	      for_each_rtx (&p, replace_loop_mem, &ri);
  	    }
  
! 	  if (!apply_change_group ())
! 	    /* We couldn't replace all occurrences of the MEM.  */
! 	    loop_mems[i].optimize = 0;
! 	  else
! 	    {
! 	      rtx set;
! 
! 	      /* Load the memory immediately before START, which is
! 		 the NOTE_LOOP_BEG.  */
! 	      set = gen_move_insn (reg, mem);
! 	      emit_insn_before (set, start);
  
! 	      if (written)
! 		{
! 		  if (label == NULL_RTX)
! 		    {
! 		      /* We must compute the former
! 			 right-after-the-end label before we insert
! 			 the new one.  */
! 		      end_label = next_label (end);
! 		      label = gen_label_rtx ();
! 		      emit_label_after (label, end);
! 		    }
  
! 		  /* Store the memory immediately after END, which is
! 		   the NOTE_LOOP_END.  */
! 		  set = gen_move_insn (copy_rtx (mem), reg); 
! 		  emit_insn_after (set, label);
! 		}
  
! 	      if (loop_dump_stream)
! 		{
! 		  fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
! 			   REGNO (reg), (written ? "r/w" : "r/o"));
! 		  print_rtl (loop_dump_stream, mem);
! 		  fputc ('\n', loop_dump_stream);
! 		}
  	    }
  	}
      }
  
--- 9706,9907 ----
    rtx p;
    rtx label = NULL_RTX;
    rtx end_label = NULL_RTX;
+   /* Nonzero if the next instruction may never be executed.  */
+   int next_maybe_never = 0;
+   int last_max_reg = max_reg_num ();
  
!   if (loop_mems_idx == 0)
!     return;
  
!   /* Check to see if it's possible that some instructions in the
!      loop are never executed.  */
!   for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top); 
!        p != NULL_RTX && !maybe_never; 
!        p = next_insn_in_loop (p, scan_start, end, loop_top))
!     {
!       if (GET_CODE (p) == CODE_LABEL)
! 	maybe_never = 1;
!       else if (GET_CODE (p) == JUMP_INSN
! 	       /* If we enter the loop in the middle, and scan
! 		  around to the beginning, don't set maybe_never
! 		  for that.  This must be an unconditional jump,
! 		  otherwise the code at the top of the loop might
! 		  never be executed.  Unconditional jumps are
! 		  followed a by barrier then loop end.  */
! 	       && ! (GET_CODE (p) == JUMP_INSN 
! 		     && JUMP_LABEL (p) == loop_top
! 		     && NEXT_INSN (NEXT_INSN (p)) == end
! 		     && simplejump_p (p)))
  	{
! 	  if (!condjump_p (p))
! 	    /* Something complicated.  */
  	    maybe_never = 1;
! 	  else
! 	    /* If there are any more instructions in the loop, they
! 	       might not be reached.  */
! 	    next_maybe_never = 1; 
! 	} 
!       else if (next_maybe_never)
! 	maybe_never = 1;
!     }
  
!   /* Actually move the MEMs.  */
!   for (i = 0; i < loop_mems_idx; ++i) 
!     {
!       regset copies;
!       int written = 0;
!       rtx reg;
!       rtx mem = loop_mems[i].mem;
!       rtx mem_list_entry;
! 
!       if (MEM_VOLATILE_P (mem) 
! 	  || invariant_p (XEXP (mem, 0)) != 1)
! 	/* There's no telling whether or not MEM is modified.  */
! 	loop_mems[i].optimize = 0;
! 
!       /* Go through the MEMs written to in the loop to see if this
! 	 one is aliased by one of them.  */
!       mem_list_entry = loop_store_mems;
!       while (mem_list_entry)
  	{
! 	  if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
! 	    written = 1;
! 	  else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
! 				    mem, rtx_varies_p))
! 	    {
! 	      /* MEM is indeed aliased by this store.  */
! 	      loop_mems[i].optimize = 0;
! 	      break;
  	    }
+ 	  mem_list_entry = XEXP (mem_list_entry, 1);
+ 	}
  	  
!       /* If this MEM is written to, we must be sure that there
! 	 are no reads from another MEM that aliases this one.  */ 
!       if (loop_mems[i].optimize && written)
! 	{
! 	  int j;
  
! 	  for (j = 0; j < loop_mems_idx; ++j)
! 	    {
! 	      if (j == i)
! 		continue;
! 	      else if (true_dependence (mem,
! 					VOIDmode,
! 					loop_mems[j].mem,
! 					rtx_varies_p))
! 		{
! 		  /* It's not safe to hoist loop_mems[i] out of
! 		     the loop because writes to it might not be
! 		     seen by reads from loop_mems[j].  */
! 		  loop_mems[i].optimize = 0;
! 		  break;
  		}
  	    }
+ 	}
  
!       if (maybe_never && may_trap_p (mem))
! 	/* We can't access the MEM outside the loop; it might
! 	   cause a trap that wouldn't have happened otherwise.  */
! 	loop_mems[i].optimize = 0;
  	  
!       if (!loop_mems[i].optimize)
! 	/* We thought we were going to lift this MEM out of the
! 	   loop, but later discovered that we could not.  */
! 	continue;
! 
!       copies = ALLOCA_REG_SET ();
! 
!       /* Allocate a pseudo for this MEM.  We set REG_USERVAR_P in
! 	 order to keep scan_loop from moving stores to this MEM
! 	 out of the loop just because this REG is neither a
! 	 user-variable nor used in the loop test.  */
!       reg = gen_reg_rtx (GET_MODE (mem));
!       REG_USERVAR_P (reg) = 1;
!       loop_mems[i].reg = reg;
! 
!       /* Now, replace all references to the MEM with the
! 	 corresponding pesudos.  */
!       for (p = next_insn_in_loop (scan_start, scan_start, end, loop_top);
! 	   p != NULL_RTX;
! 	   p = next_insn_in_loop (p, scan_start, end, loop_top))
! 	{
! 	  rtx_and_int ri;
! 	  rtx set;
  
! 	  if (GET_RTX_CLASS (GET_CODE (p)) == 'i')
  	    {
! 	      /* See if this copies the mem into a register that isn't
! 		 modified afterwards.  We'll try to do copy propagation
! 		 a little further on.  */
! 	      set = single_set (p);
! 	      if (set
! 		  /* @@@ This test is _way_ too conservative.  */
! 		  && ! maybe_never
! 		  && GET_CODE (SET_DEST (set)) == REG
! 		  && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
! 		  && REGNO (SET_DEST (set)) < last_max_reg
! 		  && VARRAY_INT (n_times_set, REGNO (SET_DEST (set))) == 1
! 		  && rtx_equal_p (SET_SRC (set), loop_mems[i].mem))
! 		SET_REGNO_REG_SET (copies, REGNO (SET_DEST (set)));
  	      ri.r = p;
  	      ri.i = i;
  	      for_each_rtx (&p, replace_loop_mem, &ri);
  	    }
  
! 	  if (GET_CODE (p) == CODE_LABEL
! 	      || GET_CODE (p) == JUMP_INSN)
! 	    maybe_never = 1;
! 	}
  
!       if (!apply_change_group ())
! 	/* We couldn't replace all occurrences of the MEM.  */
! 	loop_mems[i].optimize = 0;
!       else
! 	{
! 	  int j;
! 	  rtx set;
  
! 	  /* Load the memory immediately before START, which is
! 	     the NOTE_LOOP_BEG.  */
! 	  set = gen_move_insn (reg, mem);
! 	  emit_insn_before (set, start);
! 
! 	  if (written)
! 	    {
! 	      if (label == NULL_RTX)
! 		{
! 		  /* We must compute the former
! 		     right-after-the-end label before we insert
! 		     the new one.  */
! 		  end_label = next_label (end);
! 		  label = gen_label_rtx ();
! 		  emit_label_after (label, end);
! 		}
! 
! 	      /* Store the memory immediately after END, which is
! 		 the NOTE_LOOP_END.  */
! 	      set = gen_move_insn (copy_rtx (mem), reg); 
! 	      emit_insn_after (set, label);
! 	    }
  
! 	  if (loop_dump_stream)
! 	    {
! 	      fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
! 		       REGNO (reg), (written ? "r/w" : "r/o"));
! 	      print_rtl (loop_dump_stream, mem);
! 	      fputc ('\n', loop_dump_stream);
  	    }
+ 
+ 	  /* Attempt a bit of copy propagation.  This helps untangle the
+ 	     data flow, and enables {basic,general}_induction_var to find
+ 	     more bivs/givs.  */
+ 	  EXECUTE_IF_SET_IN_REG_SET
+ 	    (copies, FIRST_PSEUDO_REGISTER, j,
+ 	     {
+ 	       try_copy_prop (scan_start, loop_top, end, loop_mems[i].reg, j);
+ 	     });
+ 	  FREE_REG_SET (copies);
  	}
      }
  
*************** load_mems (scan_start, end, loop_top, st
*** 9890,9895 ****
--- 9929,9975 ----
      }
  }
  
+ /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
+    There must be exactly one insn that sets this pseudo; it will be
+    deleted if all replacements succeed.
+    The arguments SCAN_START, LOOP_TOP and END are as in load_mems.  */
+ static void
+ try_copy_prop (scan_start, loop_top, end, replacement, regno)
+      rtx scan_start, loop_top, end, replacement;
+      int regno;
+ {
+   rtx init_insn = 0;
+   rtx insn;
+   for (insn = next_insn_in_loop (scan_start, scan_start, end, loop_top);
+        insn != NULL_RTX;
+        insn = next_insn_in_loop (insn, scan_start, end, loop_top))
+     {
+       rtx set;
+       rtx array[3] = { regno_reg_rtx[regno], replacement, insn };
+       if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ 	continue;
+       set = single_set (insn);
+       if (set
+ 	  && GET_CODE (SET_DEST (set)) == REG
+ 	  && REGNO (SET_DEST (set)) == regno)
+ 	{
+ 	  if (init_insn)
+ 	    abort ();
+ 	  init_insn = insn;
+ 	}
+       for_each_rtx (&insn, replace_loop_reg, array);
+     }
+   if (! init_insn)
+     abort ();
+   if (apply_change_group ())
+     {
+       PUT_CODE (init_insn, NOTE);
+       NOTE_LINE_NUMBER (init_insn) = NOTE_INSN_DELETED;
+       if (loop_dump_stream)
+ 	fprintf (loop_dump_stream, "  Replaced reg %d.\n", regno);
+     }
+ }
+ 
  /* Replace MEM with its associated pseudo register.  This function is
     called from load_mems via for_each_rtx.  DATA is actually an
     rtx_and_int * describing the instruction currently being scanned
*************** replace_loop_mem (mem, data)
*** 9934,9939 ****
--- 10014,10041 ----
  
    /* Actually replace the MEM.  */
    validate_change (insn, mem, loop_mems[i].reg, 1);
+ 
+   return 0;
+ }
+ 
+ /* Replace one register with another.  Called through for_each_rtx; PX points
+    to the rtx being scanned.  DATA is actually an array of three rtx's; the
+    first one is the one to be replaced, and the second one the replacement.
+    The third one is the current insn.  */
+ 
+ static int
+ replace_loop_reg (px, data)
+      rtx *px;
+      void *data;
+ {
+   rtx x = *px;
+   rtx *array = (rtx *)data;
+ 
+   if (x == NULL_RTX)
+     return 0;
+ 
+   if (x == array[0])
+     validate_change (array[2], px, array[1], 1);
  
    return 0;
  }
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/egcs/gcc/Makefile.in,v
retrieving revision 1.333
diff -c -p -r1.333 Makefile.in
*** Makefile.in	1999/11/02 15:48:26	1.333
--- Makefile.in	1999/11/05 13:55:29
*************** profile.o : profile.c $(CONFIG_H) system
*** 1548,1555 ****
     gcov-io.h $(TREE_H) output.h $(REGS_H) toplev.h function.h insn-config.h \
     ggc.h
  loop.o : loop.c $(CONFIG_H) system.h $(RTL_H) flags.h loop.h insn-config.h \
!    insn-flags.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) real.h \
!    function.h toplev.h varray.h
  unroll.o : unroll.c $(CONFIG_H) system.h $(RTL_H) insn-config.h function.h \
     integrate.h $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) loop.h toplev.h varray.h
  flow.o : flow.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) flags.h insn-config.h \
--- 1548,1555 ----
     gcov-io.h $(TREE_H) output.h $(REGS_H) toplev.h function.h insn-config.h \
     ggc.h
  loop.o : loop.c $(CONFIG_H) system.h $(RTL_H) flags.h loop.h insn-config.h \
!    insn-flags.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) basic-block.h \
!    real.h function.h toplev.h varray.h
  unroll.o : unroll.c $(CONFIG_H) system.h $(RTL_H) insn-config.h function.h \
     integrate.h $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) loop.h toplev.h varray.h
  flow.o : flow.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) flags.h insn-config.h \


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]