PATCH for alias set/stack temporary bug

Mark Mitchell mark@markmitchell.com
Wed Feb 10 13:16:00 GMT 1999


This patch fixes a rather serious bug involving alias sets and stack
temporaries.  Consider:

  void f(void*);

  struct S {
    int i;
    double d;
    int j;
  };

  struct T {
    double d;
    int i;
    int j;
  };

  void g()
  {
    {
      S s;
      f(&s);
      s.i = 3;
    }
    {
      T t;
      f(&t);
      t.d = 2.7;
    }
  }

The compiler will cleverly reuse the same stack space for `s' and `t'.
Unfortunately, the first few bytes of `s' and `t' are of different
types, and are therefore assigned different alias sets.  Therefore,
the compiler "knows" that `s.i' and `t.d' do not alias, and feels free
to make transformations depending on this fact.  In this example,
nothing untoward occurs, but I have real code involving many inline
functions where this results in bad code.

The solution is to clobber a stack slot (using alias set zero) right
before reusing it.  That lets the compiler know that uses of the
(conceptually) new temporary should not be intermingled with uses of
the original.

Much of the changes to function.c below are just changes in
indentation.  The change to rtl.c is somewhat orthogonal: copy_rtx was
not copying all of the fields that it should have been copying.

Jeff, may I check this in?

-- 
Mark Mitchell 			mark@markmitchell.com
Mark Mitchell Consulting	http://www.markmitchell.com

Wed Feb 10 10:00:29 1999  Mark Mitchell  <mark@markmitchell.com>

	* cse.c (dump_class): New function.
	(invalidate_memory): Fix typo in comment.
	* function.c (assign_stack_temp): Copy and clobber slots before
	reusing them.
	* rtl.c (copy_rtx): Copy all the flags (in particular,
	MEM_SCALAR_P).
	
Index: cse.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/cse.c,v
retrieving revision 1.62
diff -c -p -r1.62 cse.c
*** cse.c	1999/01/27 01:42:10	1.62
--- cse.c	1999/02/10 17:50:41
*************** static void cse_check_loop_start PROTO((
*** 663,671 ****
--- 663,691 ----
  static void cse_set_around_loop	PROTO((rtx, rtx, rtx));
  static rtx cse_basic_block	PROTO((rtx, rtx, struct branch_path *, int));
  static void count_reg_usage	PROTO((rtx, int *, rtx, int));
+ static void dump_class          PROTO((struct table_elt*));
  
  extern int rtx_equal_function_value_matters;
  
+ /* Dump the expressions in the equivalence class indicated by CLASSP.
+    This function is used only for debugging.  */
+ static void
+ dump_class (classp)
+      struct table_elt *classp;
+ {
+   struct table_elt *elt;
+ 
+   fprintf (stderr, "Equivalence chain for ");
+   print_rtl (stderr, classp->exp);
+   fprintf (stderr, ": \n");
+   
+   for (elt = classp->first_same_value; elt; elt = elt->next_same_value)
+     {
+       print_rtl (stderr, elt->exp);
+       fprintf (stderr, "\n");
+     }
+ }
+ 
  /* Return an estimate of the cost of computing rtx X.
     One use is in cse, to decide which expression to keep in the hash table.
     Another is in rtl generation, to pick the cheapest way to multiply.
*************** cse_insn (insn, libcall_insn)
*** 7821,7827 ****
    prev_insn = insn;
  }
  
! /* Remove from the ahsh table all expressions that reference memory.  */
  static void
  invalidate_memory ()
  {
--- 7841,7847 ----
    prev_insn = insn;
  }
  
! /* Remove from the hash table all expressions that reference memory.  */
  static void
  invalidate_memory ()
  {
Index: function.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/function.c,v
retrieving revision 1.70
diff -c -p -r1.70 function.c
*** function.c	1999/01/29 15:25:15	1.70
--- function.c	1999/02/10 17:50:45
*************** assign_stack_temp (mode, size, keep)
*** 856,862 ****
       HOST_WIDE_INT size;
       int keep;
  {
!   struct temp_slot *p, *best_p = 0;
  
    /* If SIZE is -1 it means that somebody tried to allocate a temporary
       of a variable size.  */
--- 856,864 ----
       HOST_WIDE_INT size;
       int keep;
  {
!   struct temp_slot *p;
!   /* Nonzero if we are reuse an existing MEM for the new slot.  */
!   int reusing_existing_slot_p = 0;
  
    /* If SIZE is -1 it means that somebody tried to allocate a temporary
       of a variable size.  */
*************** assign_stack_temp (mode, size, keep)
*** 867,937 ****
       exact size we require.  */
    for (p = temp_slots; p; p = p->next)
      if (p->size == size && GET_MODE (p->slot) == mode && ! p->in_use)
!       break;
  
    /* If we didn't find, one, try one that is larger than what we want.  We
       find the smallest such.  */
    if (p == 0)
!     for (p = temp_slots; p; p = p->next)
!       if (p->size > size && GET_MODE (p->slot) == mode && ! p->in_use
! 	  && (best_p == 0 || best_p->size > p->size))
! 	best_p = p;
! 
!   /* Make our best, if any, the one to use.  */
!   if (best_p)
!     {
!       /* If there are enough aligned bytes left over, make them into a new
! 	 temp_slot so that the extra bytes don't get wasted.  Do this only
! 	 for BLKmode slots, so that we can be sure of the alignment.  */
!       if (GET_MODE (best_p->slot) == BLKmode)
! 	{
! 	  int alignment = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
! 	  HOST_WIDE_INT rounded_size = CEIL_ROUND (size, alignment);
  
! 	  if (best_p->size - rounded_size >= alignment)
  	    {
! 	      p = (struct temp_slot *) oballoc (sizeof (struct temp_slot));
! 	      p->in_use = p->addr_taken = 0;
! 	      p->size = best_p->size - rounded_size;
! 	      p->base_offset = best_p->base_offset + rounded_size;
! 	      p->full_size = best_p->full_size - rounded_size;
! 	      p->slot = gen_rtx_MEM (BLKmode,
! 				     plus_constant (XEXP (best_p->slot, 0),
! 						    rounded_size));
! 	      p->address = 0;
! 	      p->rtl_expr = 0;
! 	      p->next = temp_slots;
! 	      temp_slots = p;
  
! 	      stack_slot_list = gen_rtx_EXPR_LIST (VOIDmode, p->slot,
! 						   stack_slot_list);
  
! 	      best_p->size = rounded_size;
! 	      best_p->full_size = rounded_size;
  	    }
  	}
  
!       p = best_p;
      }
! 	      
    /* If we still didn't find one, make a new temporary.  */
    if (p == 0)
      {
        HOST_WIDE_INT frame_offset_old = frame_offset;
  
        p = (struct temp_slot *) oballoc (sizeof (struct temp_slot));
! 
!       /* If the temp slot mode doesn't indicate the alignment,
! 	 use the largest possible, so no one will be disappointed.  */
        p->slot = assign_stack_local (mode, size, mode == BLKmode ? -1 : 0);
  
!       /* The following slot size computation is necessary because we don't
! 	 know the actual size of the temporary slot until assign_stack_local
! 	 has performed all the frame alignment and size rounding for the
! 	 requested temporary.  Note that extra space added for alignment
! 	 can be either above or below this stack slot depending on which
! 	 way the frame grows.  We include the extra space if and only if it
! 	 is above this slot.  */
  #ifdef FRAME_GROWS_DOWNWARD
        p->size = frame_offset_old - frame_offset;
  #else
--- 869,989 ----
       exact size we require.  */
    for (p = temp_slots; p; p = p->next)
      if (p->size == size && GET_MODE (p->slot) == mode && ! p->in_use)
!       {
! 	reusing_existing_slot_p = 1;
! 	break;
!       }
  
    /* If we didn't find, one, try one that is larger than what we want.  We
       find the smallest such.  */
    if (p == 0)
!     {
!       struct temp_slot *best_p = 0;
  
!       for (p = temp_slots; p; p = p->next)
! 	if (p->size > size && GET_MODE (p->slot) == mode && ! p->in_use
! 	    && (best_p == 0 || best_p->size > p->size))
! 	  best_p = p;
!       
!       /* Make our best, if any, the one to use.  */
!       if (best_p)
! 	{
! 	  /* Usually, we reuse BEST_P.  If we actually generate a new
! 	     MEM we clear this flag.  */
! 	  reusing_existing_slot_p = 1;
! 
! 	  /* If there are enough aligned bytes left over, make them
! 	     into a new temp_slot so that the extra bytes don't get
! 	     wasted.  Do this only for BLKmode slots, so that we can
! 	     be sure of the alignment.  */
! 	  if (GET_MODE (best_p->slot) == BLKmode)
  	    {
! 	      int alignment = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
! 	      HOST_WIDE_INT rounded_size = CEIL_ROUND (size, alignment);
! 
! 	      if (best_p->size - rounded_size >= alignment)
! 		{
! 		  /* Clobber the slot.  For the rationale, see the
! 		     comments under the REUSING_EXISTING_SLOT_P case
! 		     below.  (In this case, we're not reusing an
! 		     existing slot; we're splitting a piece off of
! 		     an existing slot, and using that piece.)  */
! 		  emit_insn (gen_rtx_CLOBBER (GET_MODE (best_p->slot), 
! 					      copy_rtx (best_p->slot)));
! 
! 		  p = (struct temp_slot *) oballoc (sizeof (struct temp_slot));
! 		  p->in_use = p->addr_taken = 0;
! 		  p->size = best_p->size - rounded_size;
! 		  p->base_offset = best_p->base_offset + rounded_size;
! 		  p->full_size = best_p->full_size - rounded_size;
! 		  p->slot = gen_rtx_MEM (BLKmode,
! 					 plus_constant (XEXP (best_p->slot, 0),
! 							rounded_size));
! 		  p->address = 0;
! 		  p->rtl_expr = 0;
! 		  p->next = temp_slots;
! 		  temp_slots = p;
! 
! 		  stack_slot_list = gen_rtx_EXPR_LIST (VOIDmode, p->slot,
! 						       stack_slot_list);
  
! 		  best_p->size = rounded_size;
! 		  best_p->full_size = rounded_size;
  
! 		  reusing_existing_slot_p = 0;
! 		}
  	    }
+ 
+ 	  p = best_p;
  	}
+     }
  
!   if (reusing_existing_slot_p)
!     {
!       /* We are going to reuse an existing temporary slot.  If one of
! 	 the MEMs gets an alias set, that should not affect the other
! 	 (conceptually new) temporary.  */
!       p->slot = copy_rtx (p->slot);
! 
!       /* Clear any MEM flags that may have been set from before; they
! 	 do not affect the new temporary.  */
!       RTX_UNCHANGING_P (p->slot) = 0;
!       MEM_IN_STRUCT_P (p->slot) = 0;
!       MEM_SCALAR_P (p->slot) = 0;
!       MEM_ALIAS_SET (p->slot) = 0;
! 
!       /* Consider what will happen if this slot has previously been
! 	 assigned one alias set, and is then assigned a new one.
! 	 Since the two alias sets are not the same, the compiler will
! 	 assume that they cannot conflict, and may perform incorrect
! 	 optimizations.  We therefore clobber the old value here,
! 	 using alias set zero, to indicate that this clobber conflicts
! 	 with any other read/write from this same temporary slot.
! 	 It's a shame that CLOBBER doesn't give us a way to say how
! 	 much memory is clobbered; for a BLKmode MEM this could
! 	 conceivably give the rest of the compiler a lot more
! 	 information about exactly what slot was being clobberred.  */
!       emit_insn (gen_rtx_CLOBBER (GET_MODE (p->slot), copy_rtx (p->slot)));
      }
!   
    /* If we still didn't find one, make a new temporary.  */
    if (p == 0)
      {
        HOST_WIDE_INT frame_offset_old = frame_offset;
  
        p = (struct temp_slot *) oballoc (sizeof (struct temp_slot));
!       
!       /* If the temp slot mode doesn't indicate the alignment, use the
! 	 largest possible, so no one will be disappointed.  */
        p->slot = assign_stack_local (mode, size, mode == BLKmode ? -1 : 0);
  
!       /* The following slot size computation is necessary because we
! 	 don't know the actual size of the temporary slot until
! 	 assign_stack_local has performed all the frame alignment and
! 	 size rounding for the requested temporary.  Note that extra
! 	 space added for alignment can be either above or below this
! 	 stack slot depending on which way the frame grows.  We
! 	 include the extra space if and only if it is above this slot.  */
  #ifdef FRAME_GROWS_DOWNWARD
        p->size = frame_offset_old - frame_offset;
  #else
*************** assign_stack_temp (mode, size, keep)
*** 971,982 ****
        p->keep = keep;
      }
  
-   /* We may be reusing an old slot, so clear any MEM flags that may have been
-      set from before.  */
-   RTX_UNCHANGING_P (p->slot) = 0;
-   MEM_IN_STRUCT_P (p->slot) = 0;
-   MEM_SCALAR_P (p->slot) = 0;
-   MEM_ALIAS_SET (p->slot) = 0;
    return p->slot;
  }
  
--- 1023,1028 ----
Index: rtl.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/rtl.c,v
retrieving revision 1.26
diff -c -p -r1.26 rtl.c
*** rtl.c	1999/01/12 01:31:09	1.26
--- rtl.c	1999/02/10 17:50:48
*************** copy_rtx (orig)
*** 319,329 ****
      }
  
    copy = rtx_alloc (code);
!   PUT_MODE (copy, GET_MODE (orig));
!   copy->in_struct = orig->in_struct;
!   copy->volatil = orig->volatil;
!   copy->unchanging = orig->unchanging;
!   copy->integrated = orig->integrated;
    
    format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
  
--- 319,342 ----
      }
  
    copy = rtx_alloc (code);
! 
!   /* Copy the various flags, and other information.  We assume that
!      all fields need copying, and then clear the fields that should
!      not be copied.  That is the sensible default behavior, and forces
!      us to explicitly document why we are *not* copying a flag.  */
!   bcopy (orig, copy, sizeof (struct rtx_def) - sizeof (rtunion));
! 
!   /* We do not copy the USED flag, which is used as a mark bit during
!      walks over the RTL.  */
!   copy->used = 0;
! 
!   /* We do not copy JUMP, CALL, or FRAME_RELATED for INSNs.  */
!   if (GET_RTX_CLASS (code) == 'i')
!     {
!       copy->jump = 0;
!       copy->call = 0;
!       copy->frame_related = 0;
!     }
    
    format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
  


More information about the Gcc-patches mailing list