[RFC] Handle memset in DSE (take 2)

Jakub Jelinek jakub@redhat.com
Mon Dec 15 13:26:00 GMT 2008


Hi!

On Fri, Dec 12, 2008 at 06:28:57PM +0100, Jakub Jelinek wrote:
> This (on top of http://gcc.gnu.org/ml/gcc-patches/2008-12/msg00751.html)
> is a step toward fixing PR31150 in RTL DSE.

As discussed on IRC, this updated patch uses the unsigned HOST_WIDE_INT
bitmask for positions_needed of <= HOST_BITS_PER_WIDE_INT bytes
long stores (the most common case, all non-BLKmode stores and also many
smaller BLKmode stores), but uses a real bitmap to track bytes that are not
needed for larger BLKmode stores.  It adds a couple of static inline
accessor functions which hide this implementation detail from the callers.

Bootstrapped/regtested on x86_64-linux.

2008-12-15  Jakub Jelinek  <jakub@redhat.com>

	* dse.c (struct store_info): Add is_large bool field, change
	positions_needed into a union of a bitmask and bitmap + count.
	(free_store_info): Free bitmap if is_large.
	(set_usage_bits): Don't look at stores where
	offset + width >= MAX_OFFSET.
	(set_position_unneeded, set_all_positions_unneeded,
	any_positions_needed_p, all_positions_needed_p): New static inline
	functions.
	(record_store): Handle BLKmode stores of CONST_INT, if
	MEM_SIZE is set on the MEM.  Use the new positions_needed
	accessor inlines.
	(replace_read): Handle reads from BLKmode CONST_INT stores.
	(check_mem_read_rtx): Use all_positions_needed_p function.
	(dse_step1): Free large positions_needed bitmaps and clear is_large.

	* dse.c (struct store_info): Change begin and end types to
	HOST_WIDE_INT.

	* dse.c (record_store): Fix check for unused store.

	* expr.c (block_clear_fn): No longer static.
	* expr.h (block_clear_fn): Declare.
	* dse.c (scan_insn): Memset and bzero can just read their
	arguments.

--- gcc/expr.c.jj	2008-11-20 14:26:03.000000000 +0100
+++ gcc/expr.c	2008-12-12 16:44:33.000000000 +0100
@@ -2698,7 +2698,7 @@ set_storage_via_libcall (rtx object, rtx
    for the function we use for block clears.  The first time FOR_CALL
    is true, we call assemble_external.  */
 
-static GTY(()) tree block_clear_fn;
+tree block_clear_fn;
 
 void
 init_block_clear_fn (const char *asmspec)
--- gcc/expr.h.jj	2008-12-11 11:40:19.000000000 +0100
+++ gcc/expr.h	2008-12-12 16:44:20.000000000 +0100
@@ -404,6 +404,7 @@ enum block_op_methods
   BLOCK_OP_TAILCALL
 };
 
+extern tree GTY(()) block_clear_fn;
 extern void init_block_move_fn (const char *);
 extern void init_block_clear_fn (const char *);
 
--- gcc/dse.c.jj	2008-11-27 17:08:26.000000000 +0100
+++ gcc/dse.c	2008-12-15 11:19:21.000000000 +0100
@@ -205,6 +205,9 @@ struct store_info 
   /* False means this is a clobber.  */
   bool is_set;
 
+  /* False if a single HOST_WIDE_INT bitmap is used for positions_needed.  */
+  bool is_large;
+
   /* The id of the mem group of the base address.  If rtx_varies_p is
      true, this is -1.  Otherwise, it is the index into the group
      table.  */
@@ -224,12 +227,26 @@ struct store_info 
 
   /* The offset of the first and byte before the last byte associated
      with the operation.  */
-  int begin, end;
+  HOST_WIDE_INT begin, end;
 
-  /* An bitmask as wide as the number of bytes in the word that
-     contains a 1 if the byte may be needed.  The store is unused if
-     all of the bits are 0.  */
-  unsigned HOST_WIDE_INT positions_needed;
+  union
+    {
+      /* A bitmask as wide as the number of bytes in the word that
+	 contains a 1 if the byte may be needed.  The store is unused if
+	 all of the bits are 0.  This is used if IS_LARGE is false.  */
+      unsigned HOST_WIDE_INT small_bitmask;
+
+      struct
+	{
+	  /* A bitmap with one bit per byte.  Cleared bit means the position
+	     is needed.  Used if IS_LARGE is false.  */
+	  bitmap bitmap;
+
+	  /* Number of set bits (i.e. unneeded bytes) in BITMAP.  If it is
+	     equal to END - BEGIN, the whole store is unused.  */
+	  int count;
+	} large;
+    } positions_needed;
 
   /* The next store info for this insn.  */
   struct store_info *next;
@@ -748,6 +765,8 @@ free_store_info (insn_info_t insn_info)
   while (store_info)
     {
       store_info_t next = store_info->next;
+      if (store_info->is_large)
+	BITMAP_FREE (store_info->positions_needed.large.bitmap);
       if (store_info->cse_base)
 	pool_free (cse_store_info_pool, store_info);
       else
@@ -899,7 +918,7 @@ set_usage_bits (group_info_t group, HOST
 {
   HOST_WIDE_INT i;
 
-  if ((offset > -MAX_OFFSET) && (offset < MAX_OFFSET))
+  if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET)
     for (i=offset; i<offset+width; i++)
       {
 	bitmap store1;
@@ -1161,6 +1180,75 @@ clear_rhs_from_active_local_stores (void
 }
 
 
+/* Mark byte POS bytes from the beginning of store S_INFO as unneeded.  */
+
+static inline void
+set_position_unneeded (store_info_t s_info, int pos)
+{
+  if (__builtin_expect (s_info->is_large, false))
+    {
+      if (!bitmap_bit_p (s_info->positions_needed.large.bitmap, pos))
+	{
+	  s_info->positions_needed.large.count++;
+	  bitmap_set_bit (s_info->positions_needed.large.bitmap, pos);
+	}
+    }
+  else
+    s_info->positions_needed.small_bitmask
+      &= ~(((unsigned HOST_WIDE_INT) 1) << pos);
+}
+
+/* Mark the whole store S_INFO as unneeded.  */
+
+static inline void
+set_all_positions_unneeded (store_info_t s_info)
+{
+  if (__builtin_expect (s_info->is_large, false))
+    {
+      int pos, end = s_info->end - s_info->begin;
+      for (pos = 0; pos < end; pos++)
+	bitmap_set_bit (s_info->positions_needed.large.bitmap, pos);
+      s_info->positions_needed.large.count = end;
+    }
+  else
+    s_info->positions_needed.small_bitmask = (unsigned HOST_WIDE_INT) 0;
+}
+
+/* Return TRUE if any bytes from S_INFO store are needed.  */
+
+static inline bool
+any_positions_needed_p (store_info_t s_info)
+{
+  if (__builtin_expect (s_info->is_large, false))
+    return (s_info->positions_needed.large.count
+	    < s_info->end - s_info->begin);
+  else
+    return (s_info->positions_needed.small_bitmask
+	    != (unsigned HOST_WIDE_INT) 0);
+}
+
+/* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
+   store are needed.  */
+
+static inline bool
+all_positions_needed_p (store_info_t s_info, int start, int width)
+{
+  if (__builtin_expect (s_info->is_large, false))
+    {
+      int end = start + width;
+      while (start < end)
+	if (bitmap_bit_p (s_info->positions_needed.large.bitmap, start++))
+	  return false;
+      return true;
+    }
+  else
+    {
+      unsigned HOST_WIDE_INT mask = lowpart_bitmask (width) << start;
+      return (s_info->positions_needed.small_bitmask & mask) == mask;
+    }
+}
+
+
 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
    there is a candidate store, after adding it to the appropriate
    local store group if so.  */
@@ -1182,14 +1270,15 @@ record_store (rtx body, bb_info_t bb_inf
   if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
     return 0;
 
+  mem = SET_DEST (body);
+
   /* If this is not used, then this cannot be used to keep the insn
      from being deleted.  On the other hand, it does provide something
      that can be used to prove that another store is dead.  */
   store_is_unused
-    = (find_reg_note (insn_info->insn, REG_UNUSED, body) != NULL);
+    = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
 
   /* Check whether that value is a suitable memory location.  */
-  mem = SET_DEST (body);
   if (!MEM_P (mem))
     {
       /* If the set or clobber is unused, then it does not effect our
@@ -1208,15 +1297,26 @@ record_store (rtx body, bb_info_t bb_inf
 	    fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
 	  add_wild_read (bb_info);
 	  insn_info->cannot_delete = true;
+	  return 0;
 	}
-      else if (!store_is_unused)
-	{
-	  /* If the set or clobber is unused, then it does not effect our
-	     ability to get rid of the entire insn.  */
-	  insn_info->cannot_delete = true;
-	  clear_rhs_from_active_local_stores ();
+      /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
+	 as memset (addr, 0, 36);  */
+      else if (!MEM_SIZE (mem)
+	       || !CONST_INT_P (MEM_SIZE (mem))
+	       || GET_CODE (body) != SET
+	       || INTVAL (MEM_SIZE (mem)) <= 0
+	       || INTVAL (MEM_SIZE (mem)) > MAX_OFFSET
+	       || !CONST_INT_P (SET_SRC (body)))
+	{
+	  if (!store_is_unused)
+	    {
+	      /* If the set or clobber is unused, then it does not effect our
+		 ability to get rid of the entire insn.  */
+	      insn_info->cannot_delete = true;
+	      clear_rhs_from_active_local_stores ();
+	    }
+	  return 0;
 	}
-      return 0;
     }
 
   /* We can still process a volatile mem, we just cannot delete it.  */
@@ -1229,12 +1329,20 @@ record_store (rtx body, bb_info_t bb_inf
       return 0;
     }
 
-  width = GET_MODE_SIZE (GET_MODE (mem));
+  if (GET_MODE (mem) == BLKmode)
+    width = INTVAL (MEM_SIZE (mem));
+  else
+    {
+      width = GET_MODE_SIZE (GET_MODE (mem));
+      gcc_assert ((unsigned) width <= HOST_BITS_PER_WIDE_INT);
+    }
 
   if (spill_alias_set)
     {
       bitmap store1 = clear_alias_group->store1_p;
       bitmap store2 = clear_alias_group->store2_p;
+
+      gcc_assert (GET_MODE (mem) != BLKmode);
       
       if (bitmap_bit_p (store1, spill_alias_set))
 	bitmap_set_bit (store2, spill_alias_set);
@@ -1315,7 +1423,7 @@ record_store (rtx body, bb_info_t bb_inf
 	      && (GET_MODE (mem) == entry->mode))
 	    {
 	      del = true;
-	      s_info->positions_needed = (unsigned HOST_WIDE_INT) 0;
+	      set_all_positions_unneeded (s_info);
 	    }
 	  if (dump_file)
 	    fprintf (dump_file, "    trying spill store in insn=%d alias_set=%d\n",
@@ -1329,10 +1437,10 @@ record_store (rtx body, bb_info_t bb_inf
 	    fprintf (dump_file, "    trying store in insn=%d gid=%d[%d..%d)\n",
 		     INSN_UID (ptr->insn), s_info->group_id, 
 		     (int)s_info->begin, (int)s_info->end);
-	  for (i = offset; i < offset+width; i++)
-	    if (i >= s_info->begin && i < s_info->end)
-	      s_info->positions_needed
-		&= ~(((unsigned HOST_WIDE_INT) 1) << (i - s_info->begin));
+	  for (i = MAX (offset, s_info->begin);
+	       i < offset + width && i < s_info->end;
+	       i++)
+	    set_position_unneeded (s_info, i - s_info->begin);
 	}
       else if (s_info->rhs)
 	/* Need to see if it is possible for this store to overwrite
@@ -1348,9 +1456,9 @@ record_store (rtx body, bb_info_t bb_inf
       
       /* An insn can be deleted if every position of every one of
 	 its s_infos is zero.  */
-      if (s_info->positions_needed != (unsigned HOST_WIDE_INT) 0)
+      if (any_positions_needed_p (s_info))
 	del = false;
-      
+
       if (del)
 	{
 	  insn_info_t insn_to_delete = ptr;
@@ -1368,8 +1476,6 @@ record_store (rtx body, bb_info_t bb_inf
       ptr = next;
     }
   
-  gcc_assert ((unsigned) width <= HOST_BITS_PER_WIDE_INT);
-  
   /* Finish filling in the store_info.  */
   store_info->next = insn_info->store_rec;
   insn_info->store_rec = store_info;
@@ -1377,7 +1483,17 @@ record_store (rtx body, bb_info_t bb_inf
   store_info->alias_set = spill_alias_set;
   store_info->mem_addr = get_addr (XEXP (mem, 0));
   store_info->cse_base = base;
-  store_info->positions_needed = lowpart_bitmask (width);
+  if (width > HOST_BITS_PER_WIDE_INT)
+    {
+      store_info->is_large = true;
+      store_info->positions_needed.large.count = 0;
+      store_info->positions_needed.large.bitmap = BITMAP_ALLOC (NULL);
+    }
+  else
+    {
+      store_info->is_large = false;
+      store_info->positions_needed.small_bitmask = lowpart_bitmask (width);
+    }
   store_info->group_id = group_id;
   store_info->begin = offset;
   store_info->end = offset + width;
@@ -1580,7 +1696,9 @@ replace_read (store_info_t store_info, i
   /* To get here the read is within the boundaries of the write so
      shift will never be negative.  Start out with the shift being in
      bytes.  */
-  if (BYTES_BIG_ENDIAN)
+  if (store_mode == BLKmode)
+    shift = 0;
+  else if (BYTES_BIG_ENDIAN)
     shift = store_info->end - read_info->end;
   else
     shift = read_info->begin - store_info->begin;
@@ -1608,6 +1726,31 @@ replace_read (store_info_t store_info, i
   if (shift)
     read_reg = find_shift_sequence (access_size, store_info, read_info, shift,
     				    optimize_bb_for_speed_p (BLOCK_FOR_INSN (read_insn->insn)));
+  else if (store_mode == BLKmode)
+    {
+      /* The store is a memset (addr, const_val, const_size).  */
+      gcc_assert (CONST_INT_P (store_info->rhs));
+      store_mode = int_mode_for_mode (read_mode);
+      if (store_mode == BLKmode)
+	read_reg = NULL_RTX;
+      else if (store_info->rhs == const0_rtx)
+	read_reg = extract_low_bits (read_mode, store_mode, const0_rtx);
+      else if (GET_MODE_BITSIZE (store_mode) > HOST_BITS_PER_WIDE_INT)
+	read_reg = NULL_RTX;
+      else
+	{
+	  unsigned HOST_WIDE_INT c
+	    = INTVAL (store_info->rhs) & (BITS_PER_UNIT - 1);
+	  int shift = BITS_PER_UNIT;
+	  while (shift < HOST_BITS_PER_WIDE_INT)
+	    {
+	      c |= (c << shift);
+	      shift <<= 1;
+	    }
+	  read_reg = GEN_INT (trunc_int_for_mode (c, store_mode));
+	  read_reg = extract_low_bits (read_mode, store_mode, read_reg);
+	}
+    }
   else
     read_reg = extract_low_bits (read_mode, store_mode,
 				 copy_rtx (store_info->rhs));
@@ -1833,18 +1976,15 @@ check_mem_read_rtx (rtx *loc, void *data
 	      else 
 		{
 		  if (store_info->rhs
-		      && (offset >= store_info->begin)
-		      && (offset + width <= store_info->end))
-		    {
-		      unsigned HOST_WIDE_INT mask
-			= (lowpart_bitmask (width)
-			   << (offset - store_info->begin));
-
-		      if ((store_info->positions_needed & mask) == mask
-			  && replace_read (store_info, i_ptr, 
-					   read_info, insn_info, loc))
-			return 0;
-		    }
+		      && offset >= store_info->begin
+		      && offset + width <= store_info->end
+		      && all_positions_needed_p (store_info,
+						 offset - store_info->begin,
+						 width)
+		      && replace_read (store_info, i_ptr, read_info,
+				       insn_info, loc))
+		    return 0;
+
 		  /* The bases are the same, just see if the offsets
 		     overlap.  */
 		  if ((offset < store_info->end) 
@@ -1902,18 +2042,12 @@ check_mem_read_rtx (rtx *loc, void *data
 	  if (store_info->rhs
 	      && store_info->group_id == -1
 	      && store_info->cse_base == base
-	      && (offset >= store_info->begin)
-	      && (offset + width <= store_info->end))
-	    {
-	      unsigned HOST_WIDE_INT mask
-		= (lowpart_bitmask (width)
-		   << (offset - store_info->begin));
-
-	      if ((store_info->positions_needed & mask) == mask
-		  && replace_read (store_info, i_ptr, 
-				   read_info, insn_info, loc))
-		return 0;
-	    }
+	      && offset >= store_info->begin
+	      && offset + width <= store_info->end
+	      && all_positions_needed_p (store_info,
+					 offset - store_info->begin, width)
+	      && replace_read (store_info, i_ptr,  read_info, insn_info, loc))
+	    return 0;
 
 	  if (!store_info->alias_set)
 	    remove = canon_true_dependence (store_info->mem, 
@@ -1986,18 +2120,53 @@ scan_insn (bb_info_t bb_info, rtx insn)
 
   if (CALL_P (insn))
     {
+      bool const_call, memset_call = false;
+
       insn_info->cannot_delete = true;
 
       /* Const functions cannot do anything bad i.e. read memory,
 	 however, they can read their parameters which may have
-	 been pushed onto the stack.  */
-      if (RTL_CONST_CALL_P (insn))
+	 been pushed onto the stack.
+	 memset and bzero don't read memory either.  */
+      const_call = RTL_CONST_CALL_P (insn);
+      if (!const_call)
+	{
+	  rtx call = PATTERN (insn);
+	  if (GET_CODE (call) == PARALLEL)
+	    call = XVECEXP (call, 0, 0);
+	  if (GET_CODE (call) == SET)
+	    call = SET_SRC (call);
+	  if (GET_CODE (call) == CALL
+	      && MEM_P (XEXP (call, 0))
+	      && GET_CODE (XEXP (XEXP (call, 0), 0)) == SYMBOL_REF)
+	    {
+	      rtx symbol = XEXP (XEXP (call, 0), 0);
+	      if (SYMBOL_REF_DECL (symbol)
+		  && TREE_CODE (SYMBOL_REF_DECL (symbol)) == FUNCTION_DECL)
+		{
+		  if (DECL_BUILT_IN_CLASS (SYMBOL_REF_DECL (symbol))
+		      == BUILT_IN_NORMAL)
+		    switch (DECL_FUNCTION_CODE (SYMBOL_REF_DECL (symbol)))
+		      {
+		      case BUILT_IN_MEMSET:
+		      case BUILT_IN_BZERO:
+			memset_call = true;
+		      default:
+			break;
+		      }
+		  else if (SYMBOL_REF_DECL (symbol) == block_clear_fn)
+		    memset_call = true;
+		}
+	    }
+	}
+      if (const_call || memset_call)
 	{
 	  insn_info_t i_ptr = active_local_stores;
 	  insn_info_t last = NULL;
 
 	  if (dump_file)
-	    fprintf (dump_file, "const call %d\n", INSN_UID (insn));
+	    fprintf (dump_file, "%s call %d\n",
+		     const_call ? "const" : "memset", INSN_UID (insn));
 
 	  /* See the head comment of the frame_read field.  */
 	  if (reload_completed)
@@ -2233,6 +2402,18 @@ dse_step1 (void)
 	    {
 	      if (ptr->contains_cselib_groups)
 		free_store_info (ptr);
+	      else
+		{
+		  store_info_t s_info;
+
+		  /* Free at least positions_needed bitmaps.  */
+		  for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
+		    if (s_info->is_large)
+		      {
+			BITMAP_FREE (s_info->positions_needed.large.bitmap);
+			s_info->is_large = false;
+		      }
+		}
 	      ptr = ptr->prev_insn;
 	    }
 


	Jakub



More information about the Gcc-patches mailing list