This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Handle memset in DSE (take 2)


Jakub Jelinek wrote:
> Hi!
>
> On Fri, Dec 12, 2008 at 06:28:57PM +0100, Jakub Jelinek wrote:
>   
>> This (on top of http://gcc.gnu.org/ml/gcc-patches/2008-12/msg00751.html)
>> is a step toward fixing PR31150 in RTL DSE.
>>     
>
>   
You need to add the bugzilla to the changelog.   Aside from that,
approved for whatever release you think it is appropriate (you are a
release manager.)

Kenny
> As discussed on IRC, this updated patch uses the unsigned HOST_WIDE_INT
> bitmask for positions_needed of <= HOST_BITS_PER_WIDE_INT bytes
> long stores (the most common case, all non-BLKmode stores and also many
> smaller BLKmode stores), but uses a real bitmap to track bytes that are not
> needed for larger BLKmode stores.  It adds a couple of static inline
> accessor functions which hide this implementation detail from the callers.
>
> Bootstrapped/regtested on x86_64-linux.
>
> 2008-12-15  Jakub Jelinek  <jakub@redhat.com>
>
> 	* dse.c (struct store_info): Add is_large bool field, change
> 	positions_needed into a union of a bitmask and bitmap + count.
> 	(free_store_info): Free bitmap if is_large.
> 	(set_usage_bits): Don't look at stores where
> 	offset + width >= MAX_OFFSET.
> 	(set_position_unneeded, set_all_positions_unneeded,
> 	any_positions_needed_p, all_positions_needed_p): New static inline
> 	functions.
> 	(record_store): Handle BLKmode stores of CONST_INT, if
> 	MEM_SIZE is set on the MEM.  Use the new positions_needed
> 	accessor inlines.
> 	(replace_read): Handle reads from BLKmode CONST_INT stores.
> 	(check_mem_read_rtx): Use all_positions_needed_p function.
> 	(dse_step1): Free large positions_needed bitmaps and clear is_large.
>
> 	* dse.c (struct store_info): Change begin and end types to
> 	HOST_WIDE_INT.
>
> 	* dse.c (record_store): Fix check for unused store.
>
> 	* expr.c (block_clear_fn): No longer static.
> 	* expr.h (block_clear_fn): Declare.
> 	* dse.c (scan_insn): Memset and bzero can just read their
> 	arguments.
>
> --- gcc/expr.c.jj	2008-11-20 14:26:03.000000000 +0100
> +++ gcc/expr.c	2008-12-12 16:44:33.000000000 +0100
> @@ -2698,7 +2698,7 @@ set_storage_via_libcall (rtx object, rtx
>     for the function we use for block clears.  The first time FOR_CALL
>     is true, we call assemble_external.  */
>  
> -static GTY(()) tree block_clear_fn;
> +tree block_clear_fn;
>  
>  void
>  init_block_clear_fn (const char *asmspec)
> --- gcc/expr.h.jj	2008-12-11 11:40:19.000000000 +0100
> +++ gcc/expr.h	2008-12-12 16:44:20.000000000 +0100
> @@ -404,6 +404,7 @@ enum block_op_methods
>    BLOCK_OP_TAILCALL
>  };
>  
> +extern tree GTY(()) block_clear_fn;
>  extern void init_block_move_fn (const char *);
>  extern void init_block_clear_fn (const char *);
>  
> --- gcc/dse.c.jj	2008-11-27 17:08:26.000000000 +0100
> +++ gcc/dse.c	2008-12-15 11:19:21.000000000 +0100
> @@ -205,6 +205,9 @@ struct store_info 
>    /* False means this is a clobber.  */
>    bool is_set;
>  
> +  /* False if a single HOST_WIDE_INT bitmap is used for positions_needed.  */
> +  bool is_large;
> +
>    /* The id of the mem group of the base address.  If rtx_varies_p is
>       true, this is -1.  Otherwise, it is the index into the group
>       table.  */
> @@ -224,12 +227,26 @@ struct store_info 
>  
>    /* The offset of the first and byte before the last byte associated
>       with the operation.  */
> -  int begin, end;
> +  HOST_WIDE_INT begin, end;
>  
> -  /* An bitmask as wide as the number of bytes in the word that
> -     contains a 1 if the byte may be needed.  The store is unused if
> -     all of the bits are 0.  */
> -  unsigned HOST_WIDE_INT positions_needed;
> +  union
> +    {
> +      /* A bitmask as wide as the number of bytes in the word that
> +	 contains a 1 if the byte may be needed.  The store is unused if
> +	 all of the bits are 0.  This is used if IS_LARGE is false.  */
> +      unsigned HOST_WIDE_INT small_bitmask;
> +
> +      struct
> +	{
> +	  /* A bitmap with one bit per byte.  Cleared bit means the position
> +	     is needed.  Used if IS_LARGE is false.  */
> +	  bitmap bitmap;
> +
> +	  /* Number of set bits (i.e. unneeded bytes) in BITMAP.  If it is
> +	     equal to END - BEGIN, the whole store is unused.  */
> +	  int count;
> +	} large;
> +    } positions_needed;
>  
>    /* The next store info for this insn.  */
>    struct store_info *next;
> @@ -748,6 +765,8 @@ free_store_info (insn_info_t insn_info)
>    while (store_info)
>      {
>        store_info_t next = store_info->next;
> +      if (store_info->is_large)
> +	BITMAP_FREE (store_info->positions_needed.large.bitmap);
>        if (store_info->cse_base)
>  	pool_free (cse_store_info_pool, store_info);
>        else
> @@ -899,7 +918,7 @@ set_usage_bits (group_info_t group, HOST
>  {
>    HOST_WIDE_INT i;
>  
> -  if ((offset > -MAX_OFFSET) && (offset < MAX_OFFSET))
> +  if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET)
>      for (i=offset; i<offset+width; i++)
>        {
>  	bitmap store1;
> @@ -1161,6 +1180,75 @@ clear_rhs_from_active_local_stores (void
>  }
>  
>  
> +/* Mark byte POS bytes from the beginning of store S_INFO as unneeded.  */
> +
> +static inline void
> +set_position_unneeded (store_info_t s_info, int pos)
> +{
> +  if (__builtin_expect (s_info->is_large, false))
> +    {
> +      if (!bitmap_bit_p (s_info->positions_needed.large.bitmap, pos))
> +	{
> +	  s_info->positions_needed.large.count++;
> +	  bitmap_set_bit (s_info->positions_needed.large.bitmap, pos);
> +	}
> +    }
> +  else
> +    s_info->positions_needed.small_bitmask
> +      &= ~(((unsigned HOST_WIDE_INT) 1) << pos);
> +}
> +
> +/* Mark the whole store S_INFO as unneeded.  */
> +
> +static inline void
> +set_all_positions_unneeded (store_info_t s_info)
> +{
> +  if (__builtin_expect (s_info->is_large, false))
> +    {
> +      int pos, end = s_info->end - s_info->begin;
> +      for (pos = 0; pos < end; pos++)
> +	bitmap_set_bit (s_info->positions_needed.large.bitmap, pos);
> +      s_info->positions_needed.large.count = end;
> +    }
> +  else
> +    s_info->positions_needed.small_bitmask = (unsigned HOST_WIDE_INT) 0;
> +}
> +
> +/* Return TRUE if any bytes from S_INFO store are needed.  */
> +
> +static inline bool
> +any_positions_needed_p (store_info_t s_info)
> +{
> +  if (__builtin_expect (s_info->is_large, false))
> +    return (s_info->positions_needed.large.count
> +	    < s_info->end - s_info->begin);
> +  else
> +    return (s_info->positions_needed.small_bitmask
> +	    != (unsigned HOST_WIDE_INT) 0);
> +}
> +
> +/* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
> +   store are needed.  */
> +
> +static inline bool
> +all_positions_needed_p (store_info_t s_info, int start, int width)
> +{
> +  if (__builtin_expect (s_info->is_large, false))
> +    {
> +      int end = start + width;
> +      while (start < end)
> +	if (bitmap_bit_p (s_info->positions_needed.large.bitmap, start++))
> +	  return false;
> +      return true;
> +    }
> +  else
> +    {
> +      unsigned HOST_WIDE_INT mask = lowpart_bitmask (width) << start;
> +      return (s_info->positions_needed.small_bitmask & mask) == mask;
> +    }
> +}
> +
> +
>  /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
>     there is a candidate store, after adding it to the appropriate
>     local store group if so.  */
> @@ -1182,14 +1270,15 @@ record_store (rtx body, bb_info_t bb_inf
>    if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
>      return 0;
>  
> +  mem = SET_DEST (body);
> +
>    /* If this is not used, then this cannot be used to keep the insn
>       from being deleted.  On the other hand, it does provide something
>       that can be used to prove that another store is dead.  */
>    store_is_unused
> -    = (find_reg_note (insn_info->insn, REG_UNUSED, body) != NULL);
> +    = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
>  
>    /* Check whether that value is a suitable memory location.  */
> -  mem = SET_DEST (body);
>    if (!MEM_P (mem))
>      {
>        /* If the set or clobber is unused, then it does not effect our
> @@ -1208,15 +1297,26 @@ record_store (rtx body, bb_info_t bb_inf
>  	    fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
>  	  add_wild_read (bb_info);
>  	  insn_info->cannot_delete = true;
> +	  return 0;
>  	}
> -      else if (!store_is_unused)
> -	{
> -	  /* If the set or clobber is unused, then it does not effect our
> -	     ability to get rid of the entire insn.  */
> -	  insn_info->cannot_delete = true;
> -	  clear_rhs_from_active_local_stores ();
> +      /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
> +	 as memset (addr, 0, 36);  */
> +      else if (!MEM_SIZE (mem)
> +	       || !CONST_INT_P (MEM_SIZE (mem))
> +	       || GET_CODE (body) != SET
> +	       || INTVAL (MEM_SIZE (mem)) <= 0
> +	       || INTVAL (MEM_SIZE (mem)) > MAX_OFFSET
> +	       || !CONST_INT_P (SET_SRC (body)))
> +	{
> +	  if (!store_is_unused)
> +	    {
> +	      /* If the set or clobber is unused, then it does not effect our
> +		 ability to get rid of the entire insn.  */
> +	      insn_info->cannot_delete = true;
> +	      clear_rhs_from_active_local_stores ();
> +	    }
> +	  return 0;
>  	}
> -      return 0;
>      }
>  
>    /* We can still process a volatile mem, we just cannot delete it.  */
> @@ -1229,12 +1329,20 @@ record_store (rtx body, bb_info_t bb_inf
>        return 0;
>      }
>  
> -  width = GET_MODE_SIZE (GET_MODE (mem));
> +  if (GET_MODE (mem) == BLKmode)
> +    width = INTVAL (MEM_SIZE (mem));
> +  else
> +    {
> +      width = GET_MODE_SIZE (GET_MODE (mem));
> +      gcc_assert ((unsigned) width <= HOST_BITS_PER_WIDE_INT);
> +    }
>  
>    if (spill_alias_set)
>      {
>        bitmap store1 = clear_alias_group->store1_p;
>        bitmap store2 = clear_alias_group->store2_p;
> +
> +      gcc_assert (GET_MODE (mem) != BLKmode);
>        
>        if (bitmap_bit_p (store1, spill_alias_set))
>  	bitmap_set_bit (store2, spill_alias_set);
> @@ -1315,7 +1423,7 @@ record_store (rtx body, bb_info_t bb_inf
>  	      && (GET_MODE (mem) == entry->mode))
>  	    {
>  	      del = true;
> -	      s_info->positions_needed = (unsigned HOST_WIDE_INT) 0;
> +	      set_all_positions_unneeded (s_info);
>  	    }
>  	  if (dump_file)
>  	    fprintf (dump_file, "    trying spill store in insn=%d alias_set=%d\n",
> @@ -1329,10 +1437,10 @@ record_store (rtx body, bb_info_t bb_inf
>  	    fprintf (dump_file, "    trying store in insn=%d gid=%d[%d..%d)\n",
>  		     INSN_UID (ptr->insn), s_info->group_id, 
>  		     (int)s_info->begin, (int)s_info->end);
> -	  for (i = offset; i < offset+width; i++)
> -	    if (i >= s_info->begin && i < s_info->end)
> -	      s_info->positions_needed
> -		&= ~(((unsigned HOST_WIDE_INT) 1) << (i - s_info->begin));
> +	  for (i = MAX (offset, s_info->begin);
> +	       i < offset + width && i < s_info->end;
> +	       i++)
> +	    set_position_unneeded (s_info, i - s_info->begin);
>  	}
>        else if (s_info->rhs)
>  	/* Need to see if it is possible for this store to overwrite
> @@ -1348,9 +1456,9 @@ record_store (rtx body, bb_info_t bb_inf
>        
>        /* An insn can be deleted if every position of every one of
>  	 its s_infos is zero.  */
> -      if (s_info->positions_needed != (unsigned HOST_WIDE_INT) 0)
> +      if (any_positions_needed_p (s_info))
>  	del = false;
> -      
> +
>        if (del)
>  	{
>  	  insn_info_t insn_to_delete = ptr;
> @@ -1368,8 +1476,6 @@ record_store (rtx body, bb_info_t bb_inf
>        ptr = next;
>      }
>    
> -  gcc_assert ((unsigned) width <= HOST_BITS_PER_WIDE_INT);
> -  
>    /* Finish filling in the store_info.  */
>    store_info->next = insn_info->store_rec;
>    insn_info->store_rec = store_info;
> @@ -1377,7 +1483,17 @@ record_store (rtx body, bb_info_t bb_inf
>    store_info->alias_set = spill_alias_set;
>    store_info->mem_addr = get_addr (XEXP (mem, 0));
>    store_info->cse_base = base;
> -  store_info->positions_needed = lowpart_bitmask (width);
> +  if (width > HOST_BITS_PER_WIDE_INT)
> +    {
> +      store_info->is_large = true;
> +      store_info->positions_needed.large.count = 0;
> +      store_info->positions_needed.large.bitmap = BITMAP_ALLOC (NULL);
> +    }
> +  else
> +    {
> +      store_info->is_large = false;
> +      store_info->positions_needed.small_bitmask = lowpart_bitmask (width);
> +    }
>    store_info->group_id = group_id;
>    store_info->begin = offset;
>    store_info->end = offset + width;
> @@ -1580,7 +1696,9 @@ replace_read (store_info_t store_info, i
>    /* To get here the read is within the boundaries of the write so
>       shift will never be negative.  Start out with the shift being in
>       bytes.  */
> -  if (BYTES_BIG_ENDIAN)
> +  if (store_mode == BLKmode)
> +    shift = 0;
> +  else if (BYTES_BIG_ENDIAN)
>      shift = store_info->end - read_info->end;
>    else
>      shift = read_info->begin - store_info->begin;
> @@ -1608,6 +1726,31 @@ replace_read (store_info_t store_info, i
>    if (shift)
>      read_reg = find_shift_sequence (access_size, store_info, read_info, shift,
>      				    optimize_bb_for_speed_p (BLOCK_FOR_INSN (read_insn->insn)));
> +  else if (store_mode == BLKmode)
> +    {
> +      /* The store is a memset (addr, const_val, const_size).  */
> +      gcc_assert (CONST_INT_P (store_info->rhs));
> +      store_mode = int_mode_for_mode (read_mode);
> +      if (store_mode == BLKmode)
> +	read_reg = NULL_RTX;
> +      else if (store_info->rhs == const0_rtx)
> +	read_reg = extract_low_bits (read_mode, store_mode, const0_rtx);
> +      else if (GET_MODE_BITSIZE (store_mode) > HOST_BITS_PER_WIDE_INT)
> +	read_reg = NULL_RTX;
> +      else
> +	{
> +	  unsigned HOST_WIDE_INT c
> +	    = INTVAL (store_info->rhs) & (BITS_PER_UNIT - 1);
> +	  int shift = BITS_PER_UNIT;
> +	  while (shift < HOST_BITS_PER_WIDE_INT)
> +	    {
> +	      c |= (c << shift);
> +	      shift <<= 1;
> +	    }
> +	  read_reg = GEN_INT (trunc_int_for_mode (c, store_mode));
> +	  read_reg = extract_low_bits (read_mode, store_mode, read_reg);
> +	}
> +    }
>    else
>      read_reg = extract_low_bits (read_mode, store_mode,
>  				 copy_rtx (store_info->rhs));
> @@ -1833,18 +1976,15 @@ check_mem_read_rtx (rtx *loc, void *data
>  	      else 
>  		{
>  		  if (store_info->rhs
> -		      && (offset >= store_info->begin)
> -		      && (offset + width <= store_info->end))
> -		    {
> -		      unsigned HOST_WIDE_INT mask
> -			= (lowpart_bitmask (width)
> -			   << (offset - store_info->begin));
> -
> -		      if ((store_info->positions_needed & mask) == mask
> -			  && replace_read (store_info, i_ptr, 
> -					   read_info, insn_info, loc))
> -			return 0;
> -		    }
> +		      && offset >= store_info->begin
> +		      && offset + width <= store_info->end
> +		      && all_positions_needed_p (store_info,
> +						 offset - store_info->begin,
> +						 width)
> +		      && replace_read (store_info, i_ptr, read_info,
> +				       insn_info, loc))
> +		    return 0;
> +
>  		  /* The bases are the same, just see if the offsets
>  		     overlap.  */
>  		  if ((offset < store_info->end) 
> @@ -1902,18 +2042,12 @@ check_mem_read_rtx (rtx *loc, void *data
>  	  if (store_info->rhs
>  	      && store_info->group_id == -1
>  	      && store_info->cse_base == base
> -	      && (offset >= store_info->begin)
> -	      && (offset + width <= store_info->end))
> -	    {
> -	      unsigned HOST_WIDE_INT mask
> -		= (lowpart_bitmask (width)
> -		   << (offset - store_info->begin));
> -
> -	      if ((store_info->positions_needed & mask) == mask
> -		  && replace_read (store_info, i_ptr, 
> -				   read_info, insn_info, loc))
> -		return 0;
> -	    }
> +	      && offset >= store_info->begin
> +	      && offset + width <= store_info->end
> +	      && all_positions_needed_p (store_info,
> +					 offset - store_info->begin, width)
> +	      && replace_read (store_info, i_ptr,  read_info, insn_info, loc))
> +	    return 0;
>  
>  	  if (!store_info->alias_set)
>  	    remove = canon_true_dependence (store_info->mem, 
> @@ -1986,18 +2120,53 @@ scan_insn (bb_info_t bb_info, rtx insn)
>  
>    if (CALL_P (insn))
>      {
> +      bool const_call, memset_call = false;
> +
>        insn_info->cannot_delete = true;
>  
>        /* Const functions cannot do anything bad i.e. read memory,
>  	 however, they can read their parameters which may have
> -	 been pushed onto the stack.  */
> -      if (RTL_CONST_CALL_P (insn))
> +	 been pushed onto the stack.
> +	 memset and bzero don't read memory either.  */
> +      const_call = RTL_CONST_CALL_P (insn);
> +      if (!const_call)
> +	{
> +	  rtx call = PATTERN (insn);
> +	  if (GET_CODE (call) == PARALLEL)
> +	    call = XVECEXP (call, 0, 0);
> +	  if (GET_CODE (call) == SET)
> +	    call = SET_SRC (call);
> +	  if (GET_CODE (call) == CALL
> +	      && MEM_P (XEXP (call, 0))
> +	      && GET_CODE (XEXP (XEXP (call, 0), 0)) == SYMBOL_REF)
> +	    {
> +	      rtx symbol = XEXP (XEXP (call, 0), 0);
> +	      if (SYMBOL_REF_DECL (symbol)
> +		  && TREE_CODE (SYMBOL_REF_DECL (symbol)) == FUNCTION_DECL)
> +		{
> +		  if (DECL_BUILT_IN_CLASS (SYMBOL_REF_DECL (symbol))
> +		      == BUILT_IN_NORMAL)
> +		    switch (DECL_FUNCTION_CODE (SYMBOL_REF_DECL (symbol)))
> +		      {
> +		      case BUILT_IN_MEMSET:
> +		      case BUILT_IN_BZERO:
> +			memset_call = true;
> +		      default:
> +			break;
> +		      }
> +		  else if (SYMBOL_REF_DECL (symbol) == block_clear_fn)
> +		    memset_call = true;
> +		}
> +	    }
> +	}
> +      if (const_call || memset_call)
>  	{
>  	  insn_info_t i_ptr = active_local_stores;
>  	  insn_info_t last = NULL;
>  
>  	  if (dump_file)
> -	    fprintf (dump_file, "const call %d\n", INSN_UID (insn));
> +	    fprintf (dump_file, "%s call %d\n",
> +		     const_call ? "const" : "memset", INSN_UID (insn));
>  
>  	  /* See the head comment of the frame_read field.  */
>  	  if (reload_completed)
> @@ -2233,6 +2402,18 @@ dse_step1 (void)
>  	    {
>  	      if (ptr->contains_cselib_groups)
>  		free_store_info (ptr);
> +	      else
> +		{
> +		  store_info_t s_info;
> +
> +		  /* Free at least positions_needed bitmaps.  */
> +		  for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
> +		    if (s_info->is_large)
> +		      {
> +			BITMAP_FREE (s_info->positions_needed.large.bitmap);
> +			s_info->is_large = false;
> +		      }
> +		}
>  	      ptr = ptr->prev_insn;
>  	    }
>  
>
>
> 	Jakub
>   


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]