This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Merge adjacent stores of constants (PR middle-end/22141)


Let me try this updated patch and i'll let you know.

Thanks,
Luis
On Fri, 2008-11-28 at 10:33 +0100, Jakub Jelinek wrote:
> On Fri, Nov 28, 2008 at 12:26:28AM -0200, Luis Machado wrote:
> > I've noticed 2% degradation on eon and 3% degradation on perlbmk
> > benchmarks for 32-bit PPC. I'm still going through the 64-bit numbers.
> 
> That's not very encouraging.  Anyway, the version of the patch I've posted
> yesterday, as I wrote, will not (usually) merge adjacent stores on ppc
> in loops (as the setting of the constants is hoisted before the loop, thus
> cselib didn't see them and didn't consider them to be constant stores).
> 
> Here is a newer version of the patch that should
> 1) enable the optimization on STRICT_ALIGNMENT targets if the memory is
>    sufficiently aligned
> 2) use REG_EQUAL notes if they say the store writes a constant.  This
>    makes ppc{,64} merge adjacent stores even in loops.
> With the small benchmark I posted yesterday, time shows:
> 		vanilla gcc		with the patch
> -O2 -m32	0m3.595s		0m1.908s
> -O2 -m64	0m3.606s		0m1.601s
> 
> So this patch could make a larger difference on ppc{,64}, no difference
> at all on x86_64 or i386.
> 
> Anyway, it would be interesting to understand what caused the 3% degradation
> on perlbmk.
> 
> The patch has been bootstrapped/regtested on x86_64-linux and
> powerpc64-linux (--with-cpu=default32).
> 
> 2008-11-28  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/22141
> 	* dse.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
> 	(struct adjacent_store_info): New type.
> 	(dse_encode_int, dse_decode_int, merge_adjacent_stores): New
> 	functions.
> 	(record_store): Add adjacent_store argument, fill it in.
> 	(scan_insn): Add adjacent_store variable, update record_store
> 	callers, call merge_adjacent_stores.
> 	* cselib.c (cselib_expand_value_rtx): Don't wrap CONST_INTs
> 	into CONST.  Handle SUBREG specially, to be able to simplify
> 	subregs of constants.
> 	(cselib_notice_new_pseudos): New function.
> 	* cselib.h (cselib_notice_new_pseudos): New prototype.
> 
> 	* dse.c (replace_read): Don't optimize PDP-endian targets.
> 
> 	* gcc.c-torture/execute/pr22141-1.c: New test.
> 	* gcc.c-torture/execute/pr22141-2.c: New test.
> 	* gcc.target/i386/pr22141.c: New test.
> 
> --- gcc/dse.c.jj	2008-11-24 12:05:40.000000000 +0100
> +++ gcc/dse.c	2008-11-28 01:00:40.000000000 +0100
> @@ -188,6 +188,10 @@ along with GCC; see the file COPYING3.  
>       does, assuming that the alias sets can be manipulated in the same
>       way.  */
> 
> +#ifndef SLOW_UNALIGNED_ACCESS
> +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
> +#endif
> +
>  /* There are limits to the size of constant offsets we model for the
>     global problem.  There are certainly test cases, that exceed this
>     limit, however, it is unlikely that there are important programs
> @@ -535,6 +539,13 @@ static alloc_pool clear_alias_mode_pool;
>     this for vararg functions because they play games with the frame.  */
>  static bool stores_off_frame_dead_at_return;
> 
> +struct adjacent_store_info
> +{
> +  unsigned HOST_WIDE_INT mask;
> +  unsigned char value[8];
> +  unsigned HOST_WIDE_INT alignment[8];
> +};
> +
>  /* Counter for stats.  */
>  static int globally_deleted; 
>  static int locally_deleted; 
> @@ -1161,16 +1172,53 @@ clear_rhs_from_active_local_stores (void
>  }
> 
> 
> +/* Store an integer into memory in target byte order starting
> +   at PTR + START and ending at PTR + START + WIDTH, modifying
> +   only bytes set in MASK.  */
> +
> +static void
> +dse_encode_int (unsigned HOST_WIDE_INT value, unsigned char *ptr,
> +		unsigned int start, unsigned int width,
> +		unsigned HOST_WIDE_INT mask)
> +{
> +  unsigned int byte, end = start + width;
> +  for (byte = start; byte < end; byte++)
> +    {
> +      if (mask & (((unsigned HOST_WIDE_INT) 1) << byte))
> +	ptr[byte]
> +	  = value >> ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
> +		      * BITS_PER_UNIT);
> +    }
> +}
> +
> +
> +/* Read back an integer from memory in target byte order.  */
> +
> +static unsigned HOST_WIDE_INT
> +dse_decode_int (unsigned char *ptr, unsigned int start, unsigned int width)
> +{
> +  unsigned HOST_WIDE_INT value = 0;
> +  unsigned int byte, end = start + width;
> +  for (byte = start; byte < end; byte++)
> +    value |= ((unsigned HOST_WIDE_INT) ptr[byte])
> +	     << ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
> +		 * BITS_PER_UNIT);
> +  return value;
> +}
> +
> +
>  /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
>     there is a candidate store, after adding it to the appropriate
>     local store group if so.  */
> 
>  static int
> -record_store (rtx body, bb_info_t bb_info)
> +record_store (rtx body, bb_info_t bb_info,
> +	      struct adjacent_store_info *adjacent_store)
>  {
> -  rtx mem;
> +  rtx mem, rhs;
>    HOST_WIDE_INT offset = 0;
>    HOST_WIDE_INT width = 0;
> +  HOST_WIDE_INT beg = 0;
>    alias_set_type spill_alias_set;
>    insn_info_t insn_info = bb_info->last_insn;
>    store_info_t store_info = NULL;
> @@ -1221,7 +1269,7 @@ record_store (rtx body, bb_info_t bb_inf
> 
>    /* We can still process a volatile mem, we just cannot delete it.  */
>    if (MEM_VOLATILE_P (mem))
> -      insn_info->cannot_delete = true;
> +    insn_info->cannot_delete = true;
> 
>    if (!canon_address (mem, &spill_alias_set, &group_id, &offset, &base))
>      {
> @@ -1283,6 +1331,61 @@ record_store (rtx body, bb_info_t bb_inf
>  		 (int)offset, (int)(offset+width));
>      }
> 
> +  if (GET_CODE (body) == SET
> +      /* No place to keep the value after ra.  */
> +      && !reload_completed
> +      && (REG_P (SET_SRC (body))
> +	  || GET_CODE (SET_SRC (body)) == SUBREG
> +	  || CONSTANT_P (SET_SRC (body)))
> +      /* Sometimes the store and reload is used for truncation and
> +	 rounding.  */
> +      && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
> +    {
> +      rhs = NULL;
> +      if (body == PATTERN (insn_info->insn))
> +	{
> +	  rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
> +	  if (tem && GET_CODE (XEXP (tem, 0)) == CONST_INT)
> +	    rhs = XEXP (tem, 0);
> +	}
> +      if (!rhs)
> +	rhs = cselib_expand_value_rtx (SET_SRC (body), scratch, 5);
> +      if (!rhs || !CONSTANT_P (rhs))
> +	rhs = SET_SRC (body);
> +    }
> +  else
> +    rhs = NULL;
> +
> +  /* Attempt to merge adjacent subword stores.  */
> +  if (width < UNITS_PER_WORD
> +      && CHAR_BIT == 8
> +      && BITS_PER_UNIT == 8
> +      && !reload_completed
> +      && GET_CODE (body) == SET
> +      && rhs
> +      && GET_CODE (rhs) == CONST_INT
> +      && GET_MODE_CLASS (GET_MODE (mem)) == MODE_INT
> +      && !insn_info->cannot_delete
> +      && !spill_alias_set)
> +    {
> +      beg = offset & (UNITS_PER_WORD - 1);
> +      if (beg + width > UNITS_PER_WORD
> +	  || (beg % width) != 0)
> +	adjacent_store->mask = 0;
> +      else
> +	{
> +	  adjacent_store->mask = ~(unsigned HOST_WIDE_INT) 0;
> +	  adjacent_store->mask <<= HOST_BITS_PER_WIDE_INT - width;
> +	  adjacent_store->mask >>= HOST_BITS_PER_WIDE_INT - (width + beg);
> +	  dse_encode_int (INTVAL (rhs), adjacent_store->value, beg, width,
> +			  adjacent_store->mask);
> +	  adjacent_store->alignment[beg] = MEM_ALIGN (mem);
> +	  beg = offset - beg;
> +	}
> +    }
> +  else
> +    adjacent_store->mask = 0;
> +
>    /* Check to see if this stores causes some other stores to be
>       dead.  */
>    ptr = active_local_stores;
> @@ -1333,6 +1436,42 @@ record_store (rtx body, bb_info_t bb_inf
>  	    if (i >= s_info->begin && i < s_info->end)
>  	      s_info->positions_needed
>  		&= ~(((unsigned HOST_WIDE_INT) 1) << (i - s_info->begin));
> +
> +	  /* Attempt to merge adjacent subword stores.  */
> +	  if (adjacent_store->mask
> +	      && s_info->begin < beg + UNITS_PER_WORD
> +	      && s_info->end > beg)
> +	    {
> +	      unsigned HOST_WIDE_INT mask;
> +
> +	      if (ptr->cannot_delete
> +		  || s_info->begin < beg
> +		  || s_info->end > beg + UNITS_PER_WORD
> +		  || ((s_info->begin - beg)
> +		       & (s_info->end - s_info->begin - 1))
> +		  || s_info->rhs == NULL
> +		  || GET_CODE (s_info->rhs) != CONST_INT
> +		  || GET_MODE_CLASS (GET_MODE (s_info->mem)) != MODE_INT
> +		  || (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem)
> +		      && MEM_ALIAS_SET (s_info->mem)
> +		      && MEM_ALIAS_SET (mem)))
> +		adjacent_store->mask = 0;
> +	      else
> +		{
> +		  mask = ~(unsigned HOST_WIDE_INT) 0;
> +		  mask <<= HOST_BITS_PER_WIDE_INT
> +			   - (s_info->end - s_info->begin);
> +		  mask >>= HOST_BITS_PER_WIDE_INT - (s_info->end - beg);
> +		  mask &= ~adjacent_store->mask;
> +		  dse_encode_int (INTVAL (s_info->rhs), adjacent_store->value,
> +				  s_info->begin - beg,
> +				  s_info->end - s_info->begin, mask);
> +		  adjacent_store->mask |= mask;
> +		  adjacent_store->alignment[s_info->begin - beg]
> +		    = MAX (adjacent_store->alignment[s_info->begin - beg],
> +			   MEM_ALIGN (s_info->mem));
> +		}
> +	    }
>  	}
>        else if (s_info->rhs)
>  	/* Need to see if it is possible for this store to overwrite
> @@ -1382,20 +1521,8 @@ record_store (rtx body, bb_info_t bb_inf
>    store_info->begin = offset;
>    store_info->end = offset + width;
>    store_info->is_set = GET_CODE (body) == SET;
> +  store_info->rhs = rhs;
> 
> -  if (store_info->is_set 
> -      /* No place to keep the value after ra.  */
> -      && !reload_completed
> -      && (REG_P (SET_SRC (body))
> -	  || GET_CODE (SET_SRC (body)) == SUBREG
> -	  || CONSTANT_P (SET_SRC (body)))
> -      /* Sometimes the store and reload is used for truncation and
> -	 rounding.  */
> -      && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
> -    store_info->rhs = SET_SRC (body);
> -  else
> -    store_info->rhs = NULL;
> -  
>    /* If this is a clobber, we return 0.  We will only be able to
>       delete this insn if there is only one store USED store, but we
>       can use the clobber to delete other stores earlier.  */
> @@ -1574,7 +1701,10 @@ replace_read (store_info_t store_info, i
>    int access_size; /* In bytes.  */
>    rtx insns, read_reg;
> 
> -  if (!dbg_cnt (dse))
> +  if (!dbg_cnt (dse)
> +      || (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
> +	  && (GET_MODE_BITSIZE (store_mode) > BITS_PER_WORD
> +	      || GET_MODE_BITSIZE (read_mode) > BITS_PER_WORD)))
>      return false;
> 
>    /* To get here the read is within the boundaries of the write so
> @@ -1949,6 +2079,180 @@ check_mem_read_use (rtx *loc, void *data
>    for_each_rtx (loc, check_mem_read_rtx, data);
>  }
> 
> +/* Attempt to merge adjacent stores of constants into one bigger
> +   store.  */
> +
> +static void
> +merge_adjacent_stores (struct adjacent_store_info *adjacent_store)
> +{
> +  store_info_t store_info = active_local_stores->store_rec;
> +  insn_info_t ptr;
> +  enum machine_mode mode, chosen_mode = VOIDmode;
> +  unsigned HOST_WIDE_INT chosen_offset = 0;
> +  HOST_WIDE_INT beg, mem_offset, value;
> +  int count = 0;
> +  rtx cst, mem, insn, insns, set, canon_addr;
> +  tree expr;
> +
> +  /* Skip the clobbers.  */
> +  while (!store_info->is_set)
> +    store_info = store_info->next;
> +
> +  for (mode = GET_MODE_WIDER_MODE (GET_MODE (store_info->mem));
> +       GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
> +       mode = GET_MODE_WIDER_MODE (mode))
> +    {
> +      unsigned HOST_WIDE_INT mask, offset;
> +
> +      offset = ((store_info->begin & (UNITS_PER_WORD - 1))
> +		- (store_info->begin & (GET_MODE_SIZE (mode) - 1)));
> +      mask = (((HOST_WIDE_INT) 1 << GET_MODE_SIZE (mode)) - 1) << offset;
> +      if ((adjacent_store->mask & mask) == mask
> +	  && (GET_MODE_ALIGNMENT (mode) <= adjacent_store->alignment[offset]
> +	      || !SLOW_UNALIGNED_ACCESS (mode,
> +					 adjacent_store->alignment[offset])))
> +	{
> +	  chosen_mode = mode;
> +	  chosen_offset = offset;
> +	}
> +    }
> +  if (chosen_mode == VOIDmode)
> +    return;
> +
> +  value = dse_decode_int (adjacent_store->value, chosen_offset,
> +			  GET_MODE_SIZE (chosen_mode));
> +
> +  start_sequence ();
> +  cst = GEN_INT (trunc_int_for_mode (value, chosen_mode));
> +  canon_addr = XEXP (store_info->mem, 0);
> +  XEXP (store_info->mem, 0) = store_info->mem_addr;
> +  mem = adjust_address (store_info->mem, chosen_mode,
> +			chosen_offset
> +			- (store_info->begin & (UNITS_PER_WORD - 1)));
> +  XEXP (store_info->mem, 0) = canon_addr;
> +  if (MEM_ALIGN (mem) < adjacent_store->alignment[chosen_offset])
> +    set_mem_align (mem, adjacent_store->alignment[chosen_offset]);
> +  expr = MEM_EXPR (mem);
> +  if (MEM_OFFSET (mem) && GET_CODE (MEM_OFFSET (mem)) != CONST_INT)
> +    set_mem_offset (mem, NULL_RTX);
> +  mem_offset = MEM_OFFSET (mem) ? INTVAL (MEM_OFFSET (mem)) : 0;
> +  beg = store_info->begin
> +	- (store_info->begin & (GET_MODE_SIZE (chosen_mode) - 1));
> +
> +  for (ptr = active_local_stores; ptr; ptr = ptr->next_local_store)
> +    {
> +      store_info_t s_info = ptr->store_rec;
> +
> +      /* Skip the clobbers.  */
> +      while (!s_info->is_set)
> +	s_info = s_info->next;
> +
> +      if (s_info->alias_set
> +	  || s_info->group_id != store_info->group_id
> +	  || s_info->cse_base != store_info->cse_base
> +	  || s_info->begin >= beg + GET_MODE_SIZE (chosen_mode)
> +	  || s_info->end <= beg)
> +	continue;
> +
> +      count++;
> +
> +      if (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem))
> +	{
> +	  gcc_assert (MEM_ALIAS_SET (s_info->mem) == 0
> +		      || MEM_ALIAS_SET (mem) == 0);
> +	  if (MEM_ALIAS_SET (s_info->mem) == 0)
> +	    set_mem_alias_set (mem, 0);
> +	}
> +
> +      if (expr && MEM_EXPR (s_info->mem) != expr)
> +	{
> +	  tree op0 = expr;
> +	  tree op1 = MEM_EXPR (s_info->mem);
> +	  int depth0 = 0, depth1 = 0;
> +
> +	  while (op0 && TREE_CODE (op0) == COMPONENT_REF)
> +	    {
> +	      op0 = TREE_OPERAND (op0, 0);
> +	      depth0++;
> +	    }
> +	  while (op1 && TREE_CODE (op1) == COMPONENT_REF)
> +	    {
> +	      op1 = TREE_OPERAND (op1, 0);
> +	      depth1++;
> +	    }
> +	  if ((op0 == NULL) != (op1 == NULL)
> +	      || (op0 != NULL && !operand_equal_p (op0, op1, 0)))
> +	    expr = NULL_TREE;
> +	  else
> +	    {
> +	      op1 = MEM_EXPR (s_info->mem);
> +	      while (depth1 > depth0)
> +		{
> +		  op1 = TREE_OPERAND (op1, 0);
> +		  depth1--;
> +		}
> +	      while (depth0)
> +		{
> +		  if (depth0 == depth1
> +		      && TREE_OPERAND (expr, 1) == TREE_OPERAND (op1, 1))
> +		    break;
> +		  if (MEM_OFFSET (mem))
> +		    {
> +		      tree off = component_ref_field_offset (expr);
> +		      tree bit_off
> +			= DECL_FIELD_BIT_OFFSET (TREE_OPERAND (expr, 1));
> +		      if (off == NULL_TREE
> +			  || !host_integerp (off, 0)
> +			  || bit_off == NULL_TREE
> +			  || !host_integerp (bit_off, 0))
> +			{
> +			  expr = NULL_TREE;
> +			  break;
> +			}
> +		      mem_offset += tree_low_cst (off, 0);
> +		      mem_offset += tree_low_cst (bit_off, 0) / BITS_PER_UNIT;
> +		    }
> +		  expr = TREE_OPERAND (expr, 0);
> +		  if (depth0 == depth1)
> +		    {
> +		      op1 = TREE_OPERAND (op1, 0);
> +		      depth1--;
> +		    }
> +		  depth0--;
> +		}
> +	    }
> +	}
> +    }
> +  if (MEM_EXPR (mem) != expr)
> +    set_mem_expr (mem, expr);
> +  if (MEM_OFFSET (mem))
> +    {
> +      if (!expr)
> +	set_mem_offset (mem, NULL_RTX);
> +      else if (INTVAL (MEM_OFFSET (mem)) != mem_offset)
> +	set_mem_offset (mem, GEN_INT (mem_offset));
> +    }
> +
> +  emit_move_insn (mem, cst);
> +  insn = get_last_insn ();
> +  set = single_set (insn);
> +  if (set && SET_DEST (set) == mem && SET_SRC (set) != cst)
> +    set_unique_reg_note (insn, REG_EQUAL, cst);
> +  insns = get_insns ();
> +  end_sequence ();
> +
> +  emit_insn_after (insns, active_local_stores->insn);
> +  cselib_notice_new_pseudos ();
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "Merging %d adjacent constant stores into ", count);
> +      print_inline_rtx (dump_file, mem, 0);
> +      fprintf (dump_file, " = ");
> +      print_inline_rtx (dump_file, cst, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +}
> +
>  /* Apply record_store to all candidate stores in INSN.  Mark INSN
>     if some part of it is not a candidate store and assigns to a
>     non-register target.  */
> @@ -1959,6 +2263,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
>    rtx body;
>    insn_info_t insn_info = (insn_info_t) pool_alloc (insn_info_pool);
>    int mems_found = 0;
> +  struct adjacent_store_info adjacent_store;
> +
>    memset (insn_info, 0, sizeof (struct insn_info));
> 
>    if (dump_file)
> @@ -2060,16 +2366,20 @@ scan_insn (bb_info_t bb_info, rtx insn)
>        || (RTX_FRAME_RELATED_P (insn))
>        || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
>      insn_info->cannot_delete = true;
> -  
> +
> +  memset (&adjacent_store, '\0', sizeof (adjacent_store));
> +  gcc_assert (BITS_PER_WORD <= HOST_BITS_PER_WIDE_INT);
> +  gcc_assert (UNITS_PER_WORD <= ARRAY_SIZE (adjacent_store.alignment));
>    body = PATTERN (insn);
>    if (GET_CODE (body) == PARALLEL)
>      {
>        int i;
>        for (i = 0; i < XVECLEN (body, 0); i++)
> -	mems_found += record_store (XVECEXP (body, 0, i), bb_info);
> +	mems_found += record_store (XVECEXP (body, 0, i), bb_info,
> +				    &adjacent_store);
>      }
>    else
> -    mems_found += record_store (body, bb_info);
> +    mems_found += record_store (body, bb_info, &adjacent_store);
> 
>    if (dump_file)
>      fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n", 
> @@ -2083,6 +2393,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
>      {
>        insn_info->next_local_store = active_local_stores;
>        active_local_stores = insn_info;
> +      if (adjacent_store.mask)
> +	merge_adjacent_stores (&adjacent_store);
>      }
>    else
>      insn_info->cannot_delete = true;
> --- gcc/cselib.c.jj	2008-11-26 20:44:28.000000000 +0100
> +++ gcc/cselib.c	2008-11-27 11:05:29.000000000 +0100
> @@ -1007,25 +1007,30 @@ cselib_expand_value_rtx (rtx orig, bitma
>  	return orig;
>        break;
> 
> -
> -    case VALUE:
> +    case SUBREG:
>        {
> -	rtx result;
> -	if (dump_file)
> -	  fprintf (dump_file, "expanding value %s into: ", GET_MODE_NAME (GET_MODE (orig)));
> -	
> -	result = expand_loc (CSELIB_VAL_PTR (orig)->locs, regs_active, max_depth);
> -	if (result 
> -	    && GET_CODE (result) == CONST_INT
> -	    && GET_MODE (orig) != VOIDmode)
> -	  {
> -	    result = gen_rtx_CONST (GET_MODE (orig), result);
> -	    if (dump_file)
> -	      fprintf (dump_file, "  wrapping const_int result in const to preserve mode %s\n", 
> -		       GET_MODE_NAME (GET_MODE (orig)));
> -	  }
> -	return result;
> +	rtx subreg = cselib_expand_value_rtx (SUBREG_REG (orig), regs_active,
> +					      max_depth - 1);
> +	if (!subreg)
> +	  return NULL;
> +	scopy = simplify_gen_subreg (GET_MODE (orig), subreg,
> +				     GET_MODE (SUBREG_REG (orig)),
> +				     SUBREG_BYTE (orig));
> +	if (scopy == NULL
> +	    || (GET_CODE (scopy) == SUBREG
> +		&& !REG_P (SUBREG_REG (scopy))
> +		&& !MEM_P (SUBREG_REG (scopy))))
> +	  return shallow_copy_rtx (orig);
> +	return scopy;
>        }
> +
> +    case VALUE:
> +      if (dump_file)
> +	fprintf (dump_file, "expanding value %s into: ",
> +		 GET_MODE_NAME (GET_MODE (orig)));
> +
> +      return expand_loc (CSELIB_VAL_PTR (orig)->locs, regs_active, max_depth);
> +
>      default:
>        break;
>      }
> @@ -1761,6 +1766,29 @@ cselib_init (bool record_memory)
>  				   entry_and_rtx_equal_p, NULL);
>  }
> 
> +/* Called when new pseudos were created between cselib_init and
> +   cselib_finish and cselib_* routines might see them.  */
> +
> +void
> +cselib_notice_new_pseudos (void)
> +{
> +  unsigned int nregs = max_reg_num ();
> +
> +  if (nregs > reg_values_size)
> +    {
> +      unsigned int new_size = nregs + (63 + nregs) / 16;
> +      reg_values = XRESIZEVEC (struct elt_list *, reg_values, new_size);
> +      memset (&reg_values[reg_values_size], '\0',
> +	      (new_size - reg_values_size) * sizeof (struct elt_list *));
> +      reg_values_size = new_size;
> +    }
> +  if (nregs > cselib_nregs)
> +    {
> +      cselib_nregs = nregs + (63 + nregs) / 16;
> +      used_regs = XRESIZEVEC (unsigned int, used_regs, cselib_nregs);
> +    }
> +}
> +
>  /* Called when the current user is done with cselib.  */
> 
>  void
> --- gcc/cselib.h.jj	2008-09-30 16:57:11.000000000 +0200
> +++ gcc/cselib.h	2008-11-27 09:41:20.000000000 +0100
> @@ -60,6 +60,7 @@ extern void (*cselib_discard_hook) (csel
> 
>  extern cselib_val *cselib_lookup (rtx, enum machine_mode, int);
>  extern void cselib_init (bool record_memory);
> +extern void cselib_notice_new_pseudos (void);
>  extern void cselib_clear_table (void);
>  extern void cselib_finish (void);
>  extern void cselib_process_insn (rtx);
> --- gcc/testsuite/gcc.target/i386/pr22141.c.jj	2008-11-26 20:45:52.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/pr22141.c	2008-11-26 20:45:52.000000000 +0100
> @@ -0,0 +1,126 @@
> +/* PR middle-end/22141 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  struct T
> +    {
> +      char a;
> +      char b;
> +      char c;
> +      char d;
> +    } t;
> +} u;
> +
> +struct U
> +{
> +  struct S s[4];
> +};
> +
> +void __attribute__((noinline))
> +c1 (struct T *p)
> +{
> +  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
> +    abort ();
> +  __builtin_memset (p, 0xaa, sizeof (*p));
> +}
> +
> +void __attribute__((noinline))
> +c2 (struct S *p)
> +{
> +  c1 (&p->t);
> +}
> +
> +void __attribute__((noinline))
> +c3 (struct U *p)
> +{
> +  c2 (&p->s[2]);
> +}
> +
> +void __attribute__((noinline))
> +f1 (void)
> +{
> +  u = (struct S) { { 1, 2, 3, 4 } };
> +}
> +
> +void __attribute__((noinline))
> +f2 (void)
> +{
> +  u.t.a = 1;
> +  u.t.b = 2;
> +  u.t.c = 3;
> +  u.t.d = 4;
> +}
> +
> +void __attribute__((noinline))
> +f3 (void)
> +{
> +  u.t.d = 4;
> +  u.t.b = 2;
> +  u.t.a = 1;
> +  u.t.c = 3;
> +}
> +
> +void __attribute__((noinline))
> +f4 (void)
> +{
> +  struct S v;
> +  v.t.a = 1;
> +  v.t.b = 2;
> +  v.t.c = 3;
> +  v.t.d = 4;
> +  c2 (&v);
> +}
> +
> +void __attribute__((noinline))
> +f5 (struct S *p)
> +{
> +  p->t.a = 1;
> +  p->t.c = 3;
> +  p->t.d = 4;
> +  p->t.b = 2;
> +}
> +
> +void __attribute__((noinline))
> +f6 (void)
> +{
> +  struct U v;
> +  v.s[2].t.a = 1;
> +  v.s[2].t.b = 2;
> +  v.s[2].t.c = 3;
> +  v.s[2].t.d = 4;
> +  c3 (&v);
> +}
> +
> +void __attribute__((noinline))
> +f7 (struct U *p)
> +{
> +  p->s[2].t.a = 1;
> +  p->s[2].t.c = 3;
> +  p->s[2].t.d = 4;
> +  p->s[2].t.b = 2;
> +}
> +
> +int
> +main (void)
> +{
> +  struct U w;
> +  f1 ();
> +  c2 (&u);
> +  f2 ();
> +  c1 (&u.t);
> +  f3 ();
> +  c2 (&u);
> +  f4 ();
> +  f5 (&u);
> +  c2 (&u);
> +  f6 ();
> +  f7 (&w);
> +  c3 (&w);
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
> --- gcc/testsuite/gcc.c-torture/execute/pr22141-2.c.jj	2008-11-26 20:45:52.000000000 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr22141-2.c	2008-11-26 20:45:52.000000000 +0100
> @@ -0,0 +1,122 @@
> +/* PR middle-end/22141 */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  struct T
> +    {
> +      char a;
> +      char b;
> +      char c;
> +      char d;
> +    } t;
> +} u __attribute__((aligned));
> +
> +struct U
> +{
> +  struct S s[4];
> +};
> +
> +void __attribute__((noinline))
> +c1 (struct T *p)
> +{
> +  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
> +    abort ();
> +  __builtin_memset (p, 0xaa, sizeof (*p));
> +}
> +
> +void __attribute__((noinline))
> +c2 (struct S *p)
> +{
> +  c1 (&p->t);
> +}
> +
> +void __attribute__((noinline))
> +c3 (struct U *p)
> +{
> +  c2 (&p->s[2]);
> +}
> +
> +void __attribute__((noinline))
> +f1 (void)
> +{
> +  u = (struct S) { { 1, 2, 3, 4 } };
> +}
> +
> +void __attribute__((noinline))
> +f2 (void)
> +{
> +  u.t.a = 1;
> +  u.t.b = 2;
> +  u.t.c = 3;
> +  u.t.d = 4;
> +}
> +
> +void __attribute__((noinline))
> +f3 (void)
> +{
> +  u.t.d = 4;
> +  u.t.b = 2;
> +  u.t.a = 1;
> +  u.t.c = 3;
> +}
> +
> +void __attribute__((noinline))
> +f4 (void)
> +{
> +  struct S v __attribute__((aligned));
> +  v.t.a = 1;
> +  v.t.b = 2;
> +  v.t.c = 3;
> +  v.t.d = 4;
> +  c2 (&v);
> +}
> +
> +void __attribute__((noinline))
> +f5 (struct S *p)
> +{
> +  p->t.a = 1;
> +  p->t.c = 3;
> +  p->t.d = 4;
> +  p->t.b = 2;
> +}
> +
> +void __attribute__((noinline))
> +f6 (void)
> +{
> +  struct U v __attribute__((aligned));
> +  v.s[2].t.a = 1;
> +  v.s[2].t.b = 2;
> +  v.s[2].t.c = 3;
> +  v.s[2].t.d = 4;
> +  c3 (&v);
> +}
> +
> +void __attribute__((noinline))
> +f7 (struct U *p)
> +{
> +  p->s[2].t.a = 1;
> +  p->s[2].t.c = 3;
> +  p->s[2].t.d = 4;
> +  p->s[2].t.b = 2;
> +}
> +
> +int
> +main (void)
> +{
> +  struct U w __attribute__((aligned));
> +  f1 ();
> +  c2 (&u);
> +  f2 ();
> +  c1 (&u.t);
> +  f3 ();
> +  c2 (&u);
> +  f4 ();
> +  f5 (&u);
> +  c2 (&u);
> +  f6 ();
> +  f7 (&w);
> +  c3 (&w);
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr22141-1.c.jj	2008-11-26 20:45:52.000000000 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr22141-1.c	2008-11-26 20:45:52.000000000 +0100
> @@ -0,0 +1,122 @@
> +/* PR middle-end/22141 */
> +
> +extern void abort (void);
> +
> +struct S
> +{
> +  struct T
> +    {
> +      char a;
> +      char b;
> +      char c;
> +      char d;
> +    } t;
> +} u;
> +
> +struct U
> +{
> +  struct S s[4];
> +};
> +
> +void __attribute__((noinline))
> +c1 (struct T *p)
> +{
> +  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
> +    abort ();
> +  __builtin_memset (p, 0xaa, sizeof (*p));
> +}
> +
> +void __attribute__((noinline))
> +c2 (struct S *p)
> +{
> +  c1 (&p->t);
> +}
> +
> +void __attribute__((noinline))
> +c3 (struct U *p)
> +{
> +  c2 (&p->s[2]);
> +}
> +
> +void __attribute__((noinline))
> +f1 (void)
> +{
> +  u = (struct S) { { 1, 2, 3, 4 } };
> +}
> +
> +void __attribute__((noinline))
> +f2 (void)
> +{
> +  u.t.a = 1;
> +  u.t.b = 2;
> +  u.t.c = 3;
> +  u.t.d = 4;
> +}
> +
> +void __attribute__((noinline))
> +f3 (void)
> +{
> +  u.t.d = 4;
> +  u.t.b = 2;
> +  u.t.a = 1;
> +  u.t.c = 3;
> +}
> +
> +void __attribute__((noinline))
> +f4 (void)
> +{
> +  struct S v;
> +  v.t.a = 1;
> +  v.t.b = 2;
> +  v.t.c = 3;
> +  v.t.d = 4;
> +  c2 (&v);
> +}
> +
> +void __attribute__((noinline))
> +f5 (struct S *p)
> +{
> +  p->t.a = 1;
> +  p->t.c = 3;
> +  p->t.d = 4;
> +  p->t.b = 2;
> +}
> +
> +void __attribute__((noinline))
> +f6 (void)
> +{
> +  struct U v;
> +  v.s[2].t.a = 1;
> +  v.s[2].t.b = 2;
> +  v.s[2].t.c = 3;
> +  v.s[2].t.d = 4;
> +  c3 (&v);
> +}
> +
> +void __attribute__((noinline))
> +f7 (struct U *p)
> +{
> +  p->s[2].t.a = 1;
> +  p->s[2].t.c = 3;
> +  p->s[2].t.d = 4;
> +  p->s[2].t.b = 2;
> +}
> +
> +int
> +main (void)
> +{
> +  struct U w;
> +  f1 ();
> +  c2 (&u);
> +  f2 ();
> +  c1 (&u.t);
> +  f3 ();
> +  c2 (&u);
> +  f4 ();
> +  f5 (&u);
> +  c2 (&u);
> +  f6 ();
> +  f7 (&w);
> +  c3 (&w);
> +  return 0;
> +}
> 
> 
> 	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]