PATCH for alias sets/stack slots, again

Mark Mitchell mark@markmitchell.com
Wed Feb 17 16:37:00 GMT 1999


Folks --

  Here's another go at the alias set/stack slot problem I mentioned
earlier.  In that mail, I suggested that we CLOBBER the stack slot
before reusing it.  That still seems the right eventual solution to
me.  However, there are infrastructure difficulties: reload moves
things around without adjusting CLOBBERs appropriately, and so we
remove all CLOBBERs after reload.  Apparently, reload can extend the
life or a REG past a CLOBBER for that REG, fooling later passes into
thinking the REG is dead.  It seems to me that this is, while correct,
not the optimal approach: we should instead fix reload to adjust the
CLOBBERs appropriately.  One (hokey) option would be to have reload
only delete CLOBBERs of REGs, and not of MEMs.  Additionally, the
memory cobbered would have to be marked MEM_VOLATILE_P to keep dead
store elimination from eliminating it.  And, there are a couple of
other nasty details, too obscure to mention here.

  So, this patch restores correctness by (possibly) increasing the
amount of stack space required for a function; we only reuse a stack
slot if the thing to be placed there has the same type (well, alias
set) as the thing that was there before.  This fixes the problems that
I'm seeing, and does not seem, after some experimentation, to blow up
stack usage too badly.

  Jeff, may I check this in?

-- 
Mark Mitchell 			mark@markmitchell.com
Mark Mitchell Consulting	http://www.markmitchell.com

Wed Feb 17 16:36:05 1999  Mark Mitchell  <mark@markmitchell.com>

	* cse.c (dump_class): New function.
	(invalidate_memory): Fix typo in comment.
	* function.c (temp_slot): Add an alias set field.  
	(assign_stack_temp): Only reuse slots if they will have the
	same alias set as before.
	(combine_temp_slots): Don't combine if -fstrict-aliasing;
	that's unsafe.
	* rtl.c (copy_rtx): Copy all the flags (in particular,
	MEM_SCALAR_P).
	
Index: cse.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/cse.c,v
retrieving revision 1.62
diff -c -p -r1.62 cse.c
*** cse.c	1999/01/27 01:42:10	1.62
--- cse.c	1999/02/18 00:17:24
*************** static void cse_check_loop_start PROTO((
*** 663,671 ****
--- 663,691 ----
  static void cse_set_around_loop	PROTO((rtx, rtx, rtx));
  static rtx cse_basic_block	PROTO((rtx, rtx, struct branch_path *, int));
  static void count_reg_usage	PROTO((rtx, int *, rtx, int));
+ static void dump_class          PROTO((struct table_elt*));
  
  extern int rtx_equal_function_value_matters;
  
+ /* Dump the expressions in the equivalence class indicated by CLASSP.
+    This function is used only for debugging.  */
+ static void
+ dump_class (classp)
+      struct table_elt *classp;
+ {
+   struct table_elt *elt;
+ 
+   fprintf (stderr, "Equivalence chain for ");
+   print_rtl (stderr, classp->exp);
+   fprintf (stderr, ": \n");
+   
+   for (elt = classp->first_same_value; elt; elt = elt->next_same_value)
+     {
+       print_rtl (stderr, elt->exp);
+       fprintf (stderr, "\n");
+     }
+ }
+ 
  /* Return an estimate of the cost of computing rtx X.
     One use is in cse, to decide which expression to keep in the hash table.
     Another is in rtl generation, to pick the cheapest way to multiply.
*************** cse_insn (insn, libcall_insn)
*** 7821,7827 ****
    prev_insn = insn;
  }
  
! /* Remove from the ahsh table all expressions that reference memory.  */
  static void
  invalidate_memory ()
  {
--- 7841,7847 ----
    prev_insn = insn;
  }
  
! /* Remove from the hash table all expressions that reference memory.  */
  static void
  invalidate_memory ()
  {
Index: function.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/function.c,v
retrieving revision 1.73
diff -c -p -r1.73 function.c
*** function.c	1999/02/10 23:10:39	1.73
--- function.c	1999/02/18 00:17:28
*************** struct temp_slot
*** 394,399 ****
--- 394,410 ----
    int align;
    /* The size, in units, of the slot.  */
    HOST_WIDE_INT size;
+   /* The alias set for the slot.  If the alias set is zero, we don't
+      know anything about the alias set of the slot.  We must only
+      reuse a slot if it is assigned an object of the same alias set.
+      Otherwise, the rest of the compiler may assume that the new use
+      of the slot cannot alias the old use of the slot, which is
+      false.  If the slot has alias set zero, then we can't reuse the
+      slot at all, since we have no idea what alias set may have been
+      imposed on the memory.  For example, if the stack slot is the
+      call frame for an inline functioned, we have no idea what alias
+      sets will be assigned to various pieces of the call frame.  */
+   int alias_set;
    /* The value of `sequence_rtl_expr' when this temporary is allocated.  */
    tree rtl_expr;
    /* Non-zero if this temporary is currently in use.  */
*************** assign_outer_stack_local (mode, size, al
*** 875,882 ****
     with this flag.  KEEP is 2 if we allocate a longer term temporary,
     whose lifetime is controlled by CLEANUP_POINT_EXPRs.  KEEP is 3
     if we are to allocate something at an inner level to be treated as
!    a variable in the block (e.g., a SAVE_EXPR).  */
  
  static rtx
  assign_stack_temp_for_type (mode, size, keep, type)
       enum machine_mode mode;
--- 886,895 ----
     with this flag.  KEEP is 2 if we allocate a longer term temporary,
     whose lifetime is controlled by CLEANUP_POINT_EXPRs.  KEEP is 3
     if we are to allocate something at an inner level to be treated as
!    a variable in the block (e.g., a SAVE_EXPR).  
  
+    TYPE is the type that will be used for the stack slot.  */
+ 
  static rtx
  assign_stack_temp_for_type (mode, size, keep, type)
       enum machine_mode mode;
*************** assign_stack_temp_for_type (mode, size, 
*** 885,890 ****
--- 898,904 ----
       tree type;
  {
    int align;
+   int alias_set;
    struct temp_slot *p, *best_p = 0;
  
    /* If SIZE is -1 it means that somebody tried to allocate a temporary
*************** assign_stack_temp_for_type (mode, size, 
*** 892,897 ****
--- 906,919 ----
    if (size == -1)
      abort ();
  
+   /* If we know the alias set for the memory that will be used, use
+      it.  If there's no TYPE, then we don't know anything about the
+      alias set for the memory.  */
+   if (type)
+     alias_set = get_alias_set (type);
+   else 
+     alias_set = 0;
+ 
    align = GET_MODE_ALIGNMENT (mode);
    if (mode == BLKmode)
      align = BIGGEST_ALIGNMENT;
*************** assign_stack_temp_for_type (mode, size, 
*** 907,912 ****
--- 929,936 ----
    for (p = temp_slots; p; p = p->next)
      if (p->align >= align && p->size >= size && GET_MODE (p->slot) == mode
  	&& ! p->in_use
+ 	&& (!flag_strict_aliasing
+ 	    || (alias_set && p->alias_set == alias_set))
  	&& (best_p == 0 || best_p->size > p->size
  	    || (best_p->size == p->size && best_p->align > p->align)))
        {
*************** assign_stack_temp_for_type (mode, size, 
*** 924,930 ****
        /* If there are enough aligned bytes left over, make them into a new
  	 temp_slot so that the extra bytes don't get wasted.  Do this only
  	 for BLKmode slots, so that we can be sure of the alignment.  */
!       if (GET_MODE (best_p->slot) == BLKmode)
  	{
  	  int alignment = best_p->align / BITS_PER_UNIT;
  	  HOST_WIDE_INT rounded_size = CEIL_ROUND (size, alignment);
--- 948,958 ----
        /* If there are enough aligned bytes left over, make them into a new
  	 temp_slot so that the extra bytes don't get wasted.  Do this only
  	 for BLKmode slots, so that we can be sure of the alignment.  */
!       if (GET_MODE (best_p->slot) == BLKmode
! 	  /* We can't split slots if -fstrict-aliasing because the
! 	     information about the alias set for the new slot will be
! 	     lost.  */
! 	  && !flag_strict_aliasing)
  	{
  	  int alignment = best_p->align / BITS_PER_UNIT;
  	  HOST_WIDE_INT rounded_size = CEIL_ROUND (size, alignment);
*************** assign_stack_temp_for_type (mode, size, 
*** 966,971 ****
--- 994,1000 ----
        p->slot = assign_stack_local (mode, size, align);
  
        p->align = align;
+       p->alias_set = alias_set;
  
        /* The following slot size computation is necessary because we don't
  	 know the actual size of the temporary slot until assign_stack_local
*************** combine_temp_slots ()
*** 1090,1095 ****
--- 1119,1129 ----
    struct temp_slot *p, *q;
    struct temp_slot *prev_p, *prev_q;
    int num_slots;
+ 
+   /* We can't combine slots, because the information about which slot
+      is in which alias set will be lost.  */
+   if (flag_strict_aliasing)
+     return;
  
    /* If there are a lot of temp slots, don't do anything unless 
       high levels of optimizaton.  */
Index: rtl.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/rtl.c,v
retrieving revision 1.26
diff -c -p -r1.26 rtl.c
*** rtl.c	1999/01/12 01:31:09	1.26
--- rtl.c	1999/02/18 00:17:28
*************** copy_rtx (orig)
*** 319,329 ****
      }
  
    copy = rtx_alloc (code);
!   PUT_MODE (copy, GET_MODE (orig));
!   copy->in_struct = orig->in_struct;
!   copy->volatil = orig->volatil;
!   copy->unchanging = orig->unchanging;
!   copy->integrated = orig->integrated;
    
    format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
  
--- 319,342 ----
      }
  
    copy = rtx_alloc (code);
! 
!   /* Copy the various flags, and other information.  We assume that
!      all fields need copying, and then clear the fields that should
!      not be copied.  That is the sensible default behavior, and forces
!      us to explicitly document why we are *not* copying a flag.  */
!   bcopy (orig, copy, sizeof (struct rtx_def) - sizeof (rtunion));
! 
!   /* We do not copy the USED flag, which is used as a mark bit during
!      walks over the RTL.  */
!   copy->used = 0;
! 
!   /* We do not copy JUMP, CALL, or FRAME_RELATED for INSNs.  */
!   if (GET_RTX_CLASS (code) == 'i')
!     {
!       copy->jump = 0;
!       copy->call = 0;
!       copy->frame_related = 0;
!     }
    
    format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
  


More information about the Gcc-patches mailing list