[PATCH] Merge adjacent stores of constants (PR middle-end/22141)

Jakub Jelinek jakub@redhat.com
Thu Nov 27 15:56:00 GMT 2008


Hi!

This patch merges adjacent stores of constants into larger size
stores (up to wordsize, larger isn't IMNSHO beneficial).
The optimization is done as part of DSE1, eventhough it is just
related to DSE (in order to eliminate some stores, it first
appends a new larger store after them and then eliminates
2 or more as dead stores), because DSE has all the infrastructure
and information needed for this optimization.

E.g.
struct A { char a, b, c, d, e, f, g, h; };
void __attribute__((noinline)) foo (int count, struct A *p) {
  for (int i = 0; i < count; i++, p++) {
    p->a = 1; p->b = 2; p->c = 3; p->d = 4; p->e = 5; p->f = 6; p->g = 7; p->h = 8; }
}
struct A a[10000] __attribute__((aligned (8)));
int main (void) { for (int i = 0; i < 100000; i++) foo (10000, a); return 0; }
got 3 times faster on AMD Barcelona at -O2.  Before patch
        movb    $1, (%rsi)
        movb    $2, 1(%rsi)
        movb    $3, 2(%rsi)
        movb    $4, 3(%rsi)
        movb    $5, 4(%rsi)
        movb    $6, 5(%rsi)
        movb    $7, 6(%rsi)
        movb    $8, 7(%rsi)
and with the patch:
        movabsq $578437695752307201, %rdx
        movq    %rdx, (%rsi)
Similarly on ppc32, instead of generating (when not in loop):
        li 0,8
        stb 0,7(4)
        li 0,7
        stb 0,6(4)
        li 0,6
        stb 0,5(4)
        li 0,5
        stb 0,4(4)
        li 0,4
        stb 0,3(4)
        li 0,3
        stb 0,2(4)
        li 0,2
        stb 0,1(4)
        li 0,1
        stb 0,0(4)
it generates:
        lis 0,0x506
        ori 0,0,1800
        stw 0,4(4)
        lis 0,0x102
        ori 0,0,772
        stw 0,0(4)
This optimization triggered 54197 times during x86_64-linux
bootstrap/regtest.  cc1 .text size shrunk approx. by 3KB, libgcj.so
.text by 3.5KB.

Unfortunately, when I said for PR37135 that my patch transforms that
bug into a dup of PR22141, it isn't the case, as that transformation
needs both DSE1 (with my PR37135 change) and combine.  But DSE2 is
run after reload and thus merging of adjacent stores is much harder
(not done in the patch below).  As the code already calls 
cselib_expand_value_rtx for rhs (not needed for i386/x86_64, but needed
e.g. for ppc/ppc64), I wonder if it just shouldn't also special case
common bitfield setting patterns (i.e. read from memory, and with a
constant, optionally or with some constant, store into the same memory;
as a follow-up patch).  Also, on ppc (and probably any arch where
store insn doesn't allow the RHS to be immediate) the testcase above
isn't optimized, because the constant setters are hoisted before the loop
and so cselib doesn't see them, as they are in a separate BB.  I wonder
if I could trust REG_EQUAL notes containing CONST_INT that forwprop
generates (and in which case to trust them?  Only if the insn is
single_set?).

I'll bootstrap/regtest it on ppc-linux and ia64-linux soon (already done
on x86_64-linux), could somebody please test benchmark this with SPEC?

2008-11-27  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/22141
	* dse.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
	(struct adjacent_store_info): New type.
	(dse_encode_int, dse_decode_int, merge_adjacent_stores): New
	functions.
	(record_store): Add adjacent_store argument, fill it in.
	(scan_insn): Add adjacent_store variable, update record_store
	callers, call merge_adjacent_stores.
	* cselib.c (cselib_expand_value_rtx): Don't wrap CONST_INTs
	into CONST.  Handle SUBREG specially, to be able to simplify
	subregs of constants.
	(cselib_notice_new_pseudos): New function.
	* cselib.h (cselib_notice_new_pseudos): New prototype.

	* dse.c (replace_read): Don't optimize PDP-endian targets.

	* gcc.c-torture/execute/pr22141-1.c: New test.
	* gcc.c-torture/execute/pr22141-2.c: New test.
	* gcc.target/i386/pr22141.c: New test.

--- gcc/dse.c.jj	2008-11-24 12:05:40.000000000 +0100
+++ gcc/dse.c	2008-11-27 13:18:42.000000000 +0100
@@ -188,6 +188,10 @@ along with GCC; see the file COPYING3.  
      does, assuming that the alias sets can be manipulated in the same
      way.  */
 
+#ifndef SLOW_UNALIGNED_ACCESS
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
+#endif
+
 /* There are limits to the size of constant offsets we model for the
    global problem.  There are certainly test cases, that exceed this
    limit, however, it is unlikely that there are important programs
@@ -535,6 +539,13 @@ static alloc_pool clear_alias_mode_pool;
    this for vararg functions because they play games with the frame.  */
 static bool stores_off_frame_dead_at_return;
 
+struct adjacent_store_info
+{
+  unsigned HOST_WIDE_INT mask;
+  unsigned char value[8];
+  unsigned HOST_WIDE_INT alignment[8];
+};
+
 /* Counter for stats.  */
 static int globally_deleted; 
 static int locally_deleted; 
@@ -1161,16 +1172,53 @@ clear_rhs_from_active_local_stores (void
 }
 
 
+/* Store an integer into memory in target byte order starting
+   at PTR + START and ending at PTR + START + WIDTH, modifying
+   only bytes set in MASK.  */
+
+static void
+dse_encode_int (unsigned HOST_WIDE_INT value, unsigned char *ptr,
+		unsigned int start, unsigned int width,
+		unsigned HOST_WIDE_INT mask)
+{
+  unsigned int byte, end = start + width;
+  for (byte = start; byte < end; byte++)
+    {
+      if (mask & (((unsigned HOST_WIDE_INT) 1) << byte))
+	ptr[byte]
+	  = value >> ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
+		      * BITS_PER_UNIT);
+    }
+}
+
+
+/* Read back an integer from memory in target byte order.  */
+
+static unsigned HOST_WIDE_INT
+dse_decode_int (unsigned char *ptr, unsigned int start, unsigned int width)
+{
+  unsigned HOST_WIDE_INT value = 0;
+  unsigned int byte, end = start + width;
+  for (byte = start; byte < end; byte++)
+    value |= ((unsigned HOST_WIDE_INT) ptr[byte])
+	     << ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
+		 * BITS_PER_UNIT);
+  return value;
+}
+
+
 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
    there is a candidate store, after adding it to the appropriate
    local store group if so.  */
 
 static int
-record_store (rtx body, bb_info_t bb_info)
+record_store (rtx body, bb_info_t bb_info,
+	      struct adjacent_store_info *adjacent_store)
 {
-  rtx mem;
+  rtx mem, rhs;
   HOST_WIDE_INT offset = 0;
   HOST_WIDE_INT width = 0;
+  HOST_WIDE_INT beg = 0;
   alias_set_type spill_alias_set;
   insn_info_t insn_info = bb_info->last_insn;
   store_info_t store_info = NULL;
@@ -1221,7 +1269,7 @@ record_store (rtx body, bb_info_t bb_inf
 
   /* We can still process a volatile mem, we just cannot delete it.  */
   if (MEM_VOLATILE_P (mem))
-      insn_info->cannot_delete = true;
+    insn_info->cannot_delete = true;
 
   if (!canon_address (mem, &spill_alias_set, &group_id, &offset, &base))
     {
@@ -1283,6 +1331,53 @@ record_store (rtx body, bb_info_t bb_inf
 		 (int)offset, (int)(offset+width));
     }
 
+  if (GET_CODE (body) == SET
+      /* No place to keep the value after ra.  */
+      && !reload_completed
+      && (REG_P (SET_SRC (body))
+	  || GET_CODE (SET_SRC (body)) == SUBREG
+	  || CONSTANT_P (SET_SRC (body)))
+      /* Sometimes the store and reload is used for truncation and
+	 rounding.  */
+      && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
+    {
+      rhs = cselib_expand_value_rtx (SET_SRC (body), scratch, 5);
+      if (!rhs || !CONSTANT_P (rhs))
+	rhs = SET_SRC (body);
+    }
+  else
+    rhs = NULL;
+
+  /* Attempt to merge adjacent subword stores.  */
+  if (width < UNITS_PER_WORD
+      && CHAR_BIT == 8
+      && BITS_PER_UNIT == 8
+      && !reload_completed
+      && GET_CODE (body) == SET
+      && rhs
+      && GET_CODE (rhs) == CONST_INT
+      && GET_MODE_CLASS (GET_MODE (mem)) == MODE_INT
+      && !insn_info->cannot_delete
+      && !spill_alias_set)
+    {
+      beg = offset & (UNITS_PER_WORD - 1);
+      if (beg + width > UNITS_PER_WORD
+	  || (beg % width) != 0)
+	adjacent_store->mask = 0;
+      else
+	{
+	  adjacent_store->mask = ~(unsigned HOST_WIDE_INT) 0;
+	  adjacent_store->mask <<= HOST_BITS_PER_WIDE_INT - width;
+	  adjacent_store->mask >>= HOST_BITS_PER_WIDE_INT - (width + beg);
+	  dse_encode_int (INTVAL (rhs), adjacent_store->value, beg, width,
+			  adjacent_store->mask);
+	  adjacent_store->alignment[beg] = MEM_ALIGN (mem);
+	  beg = offset - beg;
+	}
+    }
+  else
+    adjacent_store->mask = 0;
+
   /* Check to see if this stores causes some other stores to be
      dead.  */
   ptr = active_local_stores;
@@ -1333,6 +1428,42 @@ record_store (rtx body, bb_info_t bb_inf
 	    if (i >= s_info->begin && i < s_info->end)
 	      s_info->positions_needed
 		&= ~(((unsigned HOST_WIDE_INT) 1) << (i - s_info->begin));
+
+	  /* Attempt to merge adjacent subword stores.  */
+	  if (adjacent_store->mask
+	      && s_info->begin < beg + UNITS_PER_WORD
+	      && s_info->end > beg)
+	    {
+	      unsigned HOST_WIDE_INT mask;
+
+	      if (ptr->cannot_delete
+		  || s_info->begin < beg
+		  || s_info->end > beg + UNITS_PER_WORD
+		  || ((s_info->begin - beg)
+		       & (s_info->end - s_info->begin - 1))
+		  || s_info->rhs == NULL
+		  || GET_CODE (s_info->rhs) != CONST_INT
+		  || GET_MODE_CLASS (GET_MODE (s_info->mem)) != MODE_INT
+		  || (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem)
+		      && MEM_ALIAS_SET (s_info->mem)
+		      && MEM_ALIAS_SET (mem)))
+		adjacent_store->mask = 0;
+	      else
+		{
+		  mask = ~(unsigned HOST_WIDE_INT) 0;
+		  mask <<= HOST_BITS_PER_WIDE_INT
+			   - (s_info->end - s_info->begin);
+		  mask >>= HOST_BITS_PER_WIDE_INT - (s_info->end - beg);
+		  mask &= ~adjacent_store->mask;
+		  dse_encode_int (INTVAL (s_info->rhs), adjacent_store->value,
+				  s_info->begin - beg,
+				  s_info->end - s_info->begin, mask);
+		  adjacent_store->mask |= mask;
+		  adjacent_store->alignment[s_info->begin - beg]
+		    = MAX (adjacent_store->alignment[s_info->begin - beg],
+			   MEM_ALIGN (s_info->mem));
+		}
+	    }
 	}
       else if (s_info->rhs)
 	/* Need to see if it is possible for this store to overwrite
@@ -1382,20 +1513,8 @@ record_store (rtx body, bb_info_t bb_inf
   store_info->begin = offset;
   store_info->end = offset + width;
   store_info->is_set = GET_CODE (body) == SET;
+  store_info->rhs = rhs;
 
-  if (store_info->is_set 
-      /* No place to keep the value after ra.  */
-      && !reload_completed
-      && (REG_P (SET_SRC (body))
-	  || GET_CODE (SET_SRC (body)) == SUBREG
-	  || CONSTANT_P (SET_SRC (body)))
-      /* Sometimes the store and reload is used for truncation and
-	 rounding.  */
-      && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
-    store_info->rhs = SET_SRC (body);
-  else
-    store_info->rhs = NULL;
-  
   /* If this is a clobber, we return 0.  We will only be able to
      delete this insn if there is only one store USED store, but we
      can use the clobber to delete other stores earlier.  */
@@ -1574,7 +1693,10 @@ replace_read (store_info_t store_info, i
   int access_size; /* In bytes.  */
   rtx insns, read_reg;
 
-  if (!dbg_cnt (dse))
+  if (!dbg_cnt (dse)
+      || (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+	  && (GET_MODE_BITSIZE (store_mode) > BITS_PER_WORD
+	      || GET_MODE_BITSIZE (read_mode) > BITS_PER_WORD)))
     return false;
 
   /* To get here the read is within the boundaries of the write so
@@ -1949,6 +2071,178 @@ check_mem_read_use (rtx *loc, void *data
   for_each_rtx (loc, check_mem_read_rtx, data);
 }
 
+/* Attempt to merge adjacent stores of constants into one bigger
+   store.  */
+
+static void
+merge_adjacent_stores (struct adjacent_store_info *adjacent_store)
+{
+  store_info_t store_info = active_local_stores->store_rec;
+  insn_info_t ptr;
+  enum machine_mode mode, chosen_mode = VOIDmode;
+  unsigned HOST_WIDE_INT chosen_offset = 0;
+  HOST_WIDE_INT beg, mem_offset, value;
+  int count = 0;
+  rtx cst, mem, insn, insns, set, canon_addr;
+  tree expr;
+
+  /* Skip the clobbers.  */
+  while (!store_info->is_set)
+    store_info = store_info->next;
+
+  for (mode = GET_MODE_WIDER_MODE (GET_MODE (store_info->mem));
+       GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+       mode = GET_MODE_WIDER_MODE (mode))
+    {
+      unsigned HOST_WIDE_INT mask, offset;
+
+      offset = ((store_info->begin & (UNITS_PER_WORD - 1))
+		- (store_info->begin & (GET_MODE_SIZE (mode) - 1)));
+      mask = (((HOST_WIDE_INT) 1 << GET_MODE_SIZE (mode)) - 1) << offset;
+      if ((adjacent_store->mask & mask) == mask
+	  && !SLOW_UNALIGNED_ACCESS (mode, adjacent_store->alignment[offset]))
+	{
+	  chosen_mode = mode;
+	  chosen_offset = offset;
+	}
+    }
+  if (chosen_mode == VOIDmode)
+    return;
+
+  value = dse_decode_int (adjacent_store->value, chosen_offset,
+			  GET_MODE_SIZE (chosen_mode));
+
+  start_sequence ();
+  cst = GEN_INT (trunc_int_for_mode (value, chosen_mode));
+  canon_addr = XEXP (store_info->mem, 0);
+  XEXP (store_info->mem, 0) = store_info->mem_addr;
+  mem = adjust_address (store_info->mem, chosen_mode,
+			chosen_offset
+			- (store_info->begin & (UNITS_PER_WORD - 1)));
+  XEXP (store_info->mem, 0) = canon_addr;
+  if (MEM_ALIGN (mem) < adjacent_store->alignment[chosen_offset])
+    set_mem_align (mem, adjacent_store->alignment[chosen_offset]);
+  expr = MEM_EXPR (mem);
+  if (MEM_OFFSET (mem) && GET_CODE (MEM_OFFSET (mem)) != CONST_INT)
+    set_mem_offset (mem, NULL_RTX);
+  mem_offset = MEM_OFFSET (mem) ? INTVAL (MEM_OFFSET (mem)) : 0;
+  beg = store_info->begin
+	- (store_info->begin & (GET_MODE_SIZE (chosen_mode) - 1));
+
+  for (ptr = active_local_stores; ptr; ptr = ptr->next_local_store)
+    {
+      store_info_t s_info = ptr->store_rec;
+
+      /* Skip the clobbers.  */
+      while (!s_info->is_set)
+	s_info = s_info->next;
+
+      if (s_info->alias_set
+	  || s_info->group_id != store_info->group_id
+	  || s_info->cse_base != store_info->cse_base
+	  || s_info->begin >= beg + GET_MODE_SIZE (chosen_mode)
+	  || s_info->end <= beg)
+	continue;
+
+      count++;
+
+      if (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem))
+	{
+	  gcc_assert (MEM_ALIAS_SET (s_info->mem) == 0
+		      || MEM_ALIAS_SET (mem) == 0);
+	  if (MEM_ALIAS_SET (s_info->mem) == 0)
+	    set_mem_alias_set (mem, 0);
+	}
+
+      if (expr && MEM_EXPR (s_info->mem) != expr)
+	{
+	  tree op0 = expr;
+	  tree op1 = MEM_EXPR (s_info->mem);
+	  int depth0 = 0, depth1 = 0;
+
+	  while (op0 && TREE_CODE (op0) == COMPONENT_REF)
+	    {
+	      op0 = TREE_OPERAND (op0, 0);
+	      depth0++;
+	    }
+	  while (op1 && TREE_CODE (op1) == COMPONENT_REF)
+	    {
+	      op1 = TREE_OPERAND (op1, 0);
+	      depth1++;
+	    }
+	  if ((op0 == NULL) != (op1 == NULL)
+	      || (op0 != NULL && !operand_equal_p (op0, op1, 0)))
+	    expr = NULL_TREE;
+	  else
+	    {
+	      op1 = MEM_EXPR (s_info->mem);
+	      while (depth1 > depth0)
+		{
+		  op1 = TREE_OPERAND (op1, 0);
+		  depth1--;
+		}
+	      while (depth0)
+		{
+		  if (depth0 == depth1
+		      && TREE_OPERAND (expr, 1) == TREE_OPERAND (op1, 1))
+		    break;
+		  if (MEM_OFFSET (mem))
+		    {
+		      tree off = component_ref_field_offset (expr);
+		      tree bit_off
+			= DECL_FIELD_BIT_OFFSET (TREE_OPERAND (expr, 1));
+		      if (off == NULL_TREE
+			  || !host_integerp (off, 0)
+			  || bit_off == NULL_TREE
+			  || !host_integerp (bit_off, 0))
+			{
+			  expr = NULL_TREE;
+			  break;
+			}
+		      mem_offset += tree_low_cst (off, 0);
+		      mem_offset += tree_low_cst (bit_off, 0) / BITS_PER_UNIT;
+		    }
+		  expr = TREE_OPERAND (expr, 0);
+		  if (depth0 == depth1)
+		    {
+		      op1 = TREE_OPERAND (op1, 0);
+		      depth1--;
+		    }
+		  depth0--;
+		}
+	    }
+	}
+    }
+  if (MEM_EXPR (mem) != expr)
+    set_mem_expr (mem, expr);
+  if (MEM_OFFSET (mem))
+    {
+      if (!expr)
+	set_mem_offset (mem, NULL_RTX);
+      else if (INTVAL (MEM_OFFSET (mem)) != mem_offset)
+	set_mem_offset (mem, GEN_INT (mem_offset));
+    }
+
+  emit_move_insn (mem, cst);
+  insn = get_last_insn ();
+  set = single_set (insn);
+  if (set && SET_DEST (set) == mem && SET_SRC (set) != cst)
+    set_unique_reg_note (insn, REG_EQUAL, cst);
+  insns = get_insns ();
+  end_sequence ();
+
+  emit_insn_after (insns, active_local_stores->insn);
+  cselib_notice_new_pseudos ();
+  if (dump_file)
+    {
+      fprintf (dump_file, "Merging %d adjacent constant stores into ", count);
+      print_inline_rtx (dump_file, mem, 0);
+      fprintf (dump_file, " = ");
+      print_inline_rtx (dump_file, cst, 0);
+      fprintf (dump_file, "\n");
+    }
+}
+
 /* Apply record_store to all candidate stores in INSN.  Mark INSN
    if some part of it is not a candidate store and assigns to a
    non-register target.  */
@@ -1959,6 +2253,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
   rtx body;
   insn_info_t insn_info = (insn_info_t) pool_alloc (insn_info_pool);
   int mems_found = 0;
+  struct adjacent_store_info adjacent_store;
+
   memset (insn_info, 0, sizeof (struct insn_info));
 
   if (dump_file)
@@ -2060,16 +2356,20 @@ scan_insn (bb_info_t bb_info, rtx insn)
       || (RTX_FRAME_RELATED_P (insn))
       || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
     insn_info->cannot_delete = true;
-  
+
+  memset (&adjacent_store, '\0', sizeof (adjacent_store));
+  gcc_assert (BITS_PER_WORD <= HOST_BITS_PER_WIDE_INT);
+  gcc_assert (UNITS_PER_WORD <= ARRAY_SIZE (adjacent_store.alignment));
   body = PATTERN (insn);
   if (GET_CODE (body) == PARALLEL)
     {
       int i;
       for (i = 0; i < XVECLEN (body, 0); i++)
-	mems_found += record_store (XVECEXP (body, 0, i), bb_info);
+	mems_found += record_store (XVECEXP (body, 0, i), bb_info,
+				    &adjacent_store);
     }
   else
-    mems_found += record_store (body, bb_info);
+    mems_found += record_store (body, bb_info, &adjacent_store);
 
   if (dump_file)
     fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n", 
@@ -2083,6 +2383,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
     {
       insn_info->next_local_store = active_local_stores;
       active_local_stores = insn_info;
+      if (adjacent_store.mask)
+	merge_adjacent_stores (&adjacent_store);
     }
   else
     insn_info->cannot_delete = true;
--- gcc/cselib.c.jj	2008-11-26 20:44:28.000000000 +0100
+++ gcc/cselib.c	2008-11-27 11:05:29.000000000 +0100
@@ -1007,25 +1007,30 @@ cselib_expand_value_rtx (rtx orig, bitma
 	return orig;
       break;
 
-
-    case VALUE:
+    case SUBREG:
       {
-	rtx result;
-	if (dump_file)
-	  fprintf (dump_file, "expanding value %s into: ", GET_MODE_NAME (GET_MODE (orig)));
-	
-	result = expand_loc (CSELIB_VAL_PTR (orig)->locs, regs_active, max_depth);
-	if (result 
-	    && GET_CODE (result) == CONST_INT
-	    && GET_MODE (orig) != VOIDmode)
-	  {
-	    result = gen_rtx_CONST (GET_MODE (orig), result);
-	    if (dump_file)
-	      fprintf (dump_file, "  wrapping const_int result in const to preserve mode %s\n", 
-		       GET_MODE_NAME (GET_MODE (orig)));
-	  }
-	return result;
+	rtx subreg = cselib_expand_value_rtx (SUBREG_REG (orig), regs_active,
+					      max_depth - 1);
+	if (!subreg)
+	  return NULL;
+	scopy = simplify_gen_subreg (GET_MODE (orig), subreg,
+				     GET_MODE (SUBREG_REG (orig)),
+				     SUBREG_BYTE (orig));
+	if (scopy == NULL
+	    || (GET_CODE (scopy) == SUBREG
+		&& !REG_P (SUBREG_REG (scopy))
+		&& !MEM_P (SUBREG_REG (scopy))))
+	  return shallow_copy_rtx (orig);
+	return scopy;
       }
+
+    case VALUE:
+      if (dump_file)
+	fprintf (dump_file, "expanding value %s into: ",
+		 GET_MODE_NAME (GET_MODE (orig)));
+
+      return expand_loc (CSELIB_VAL_PTR (orig)->locs, regs_active, max_depth);
+
     default:
       break;
     }
@@ -1761,6 +1766,29 @@ cselib_init (bool record_memory)
 				   entry_and_rtx_equal_p, NULL);
 }
 
+/* Called when new pseudos were created between cselib_init and
+   cselib_finish and cselib_* routines might see them.  */
+
+void
+cselib_notice_new_pseudos (void)
+{
+  unsigned int nregs = max_reg_num ();
+
+  if (nregs > reg_values_size)
+    {
+      unsigned int new_size = nregs + (63 + nregs) / 16;
+      reg_values = XRESIZEVEC (struct elt_list *, reg_values, new_size);
+      memset (&reg_values[reg_values_size], '\0',
+	      (new_size - reg_values_size) * sizeof (struct elt_list *));
+      reg_values_size = new_size;
+    }
+  if (nregs > cselib_nregs)
+    {
+      cselib_nregs = nregs + (63 + nregs) / 16;
+      used_regs = XRESIZEVEC (unsigned int, used_regs, cselib_nregs);
+    }
+}
+
 /* Called when the current user is done with cselib.  */
 
 void
--- gcc/cselib.h.jj	2008-09-30 16:57:11.000000000 +0200
+++ gcc/cselib.h	2008-11-27 09:41:20.000000000 +0100
@@ -60,6 +60,7 @@ extern void (*cselib_discard_hook) (csel
 
 extern cselib_val *cselib_lookup (rtx, enum machine_mode, int);
 extern void cselib_init (bool record_memory);
+extern void cselib_notice_new_pseudos (void);
 extern void cselib_clear_table (void);
 extern void cselib_finish (void);
 extern void cselib_process_insn (rtx);
--- gcc/testsuite/gcc.target/i386/pr22141.c.jj	2008-11-26 20:45:52.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr22141.c	2008-11-26 20:45:52.000000000 +0100
@@ -0,0 +1,126 @@
+/* PR middle-end/22141 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+struct S
+{
+  struct T
+    {
+      char a;
+      char b;
+      char c;
+      char d;
+    } t;
+} u;
+
+struct U
+{
+  struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+    abort ();
+  __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+  c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+  c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+  u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+  u.t.a = 1;
+  u.t.b = 2;
+  u.t.c = 3;
+  u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+  u.t.d = 4;
+  u.t.b = 2;
+  u.t.a = 1;
+  u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+  struct S v;
+  v.t.a = 1;
+  v.t.b = 2;
+  v.t.c = 3;
+  v.t.d = 4;
+  c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+  p->t.a = 1;
+  p->t.c = 3;
+  p->t.d = 4;
+  p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+  struct U v;
+  v.s[2].t.a = 1;
+  v.s[2].t.b = 2;
+  v.s[2].t.c = 3;
+  v.s[2].t.d = 4;
+  c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+  p->s[2].t.a = 1;
+  p->s[2].t.c = 3;
+  p->s[2].t.d = 4;
+  p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+  struct U w;
+  f1 ();
+  c2 (&u);
+  f2 ();
+  c1 (&u.t);
+  f3 ();
+  c2 (&u);
+  f4 ();
+  f5 (&u);
+  c2 (&u);
+  f6 ();
+  f7 (&w);
+  c3 (&w);
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
--- gcc/testsuite/gcc.c-torture/execute/pr22141-2.c.jj	2008-11-26 20:45:52.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr22141-2.c	2008-11-26 20:45:52.000000000 +0100
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+  struct T
+    {
+      char a;
+      char b;
+      char c;
+      char d;
+    } t;
+} u __attribute__((aligned));
+
+struct U
+{
+  struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+    abort ();
+  __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+  c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+  c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+  u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+  u.t.a = 1;
+  u.t.b = 2;
+  u.t.c = 3;
+  u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+  u.t.d = 4;
+  u.t.b = 2;
+  u.t.a = 1;
+  u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+  struct S v __attribute__((aligned));
+  v.t.a = 1;
+  v.t.b = 2;
+  v.t.c = 3;
+  v.t.d = 4;
+  c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+  p->t.a = 1;
+  p->t.c = 3;
+  p->t.d = 4;
+  p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+  struct U v __attribute__((aligned));
+  v.s[2].t.a = 1;
+  v.s[2].t.b = 2;
+  v.s[2].t.c = 3;
+  v.s[2].t.d = 4;
+  c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+  p->s[2].t.a = 1;
+  p->s[2].t.c = 3;
+  p->s[2].t.d = 4;
+  p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+  struct U w __attribute__((aligned));
+  f1 ();
+  c2 (&u);
+  f2 ();
+  c1 (&u.t);
+  f3 ();
+  c2 (&u);
+  f4 ();
+  f5 (&u);
+  c2 (&u);
+  f6 ();
+  f7 (&w);
+  c3 (&w);
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr22141-1.c.jj	2008-11-26 20:45:52.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr22141-1.c	2008-11-26 20:45:52.000000000 +0100
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+  struct T
+    {
+      char a;
+      char b;
+      char c;
+      char d;
+    } t;
+} u;
+
+struct U
+{
+  struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+    abort ();
+  __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+  c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+  c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+  u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+  u.t.a = 1;
+  u.t.b = 2;
+  u.t.c = 3;
+  u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+  u.t.d = 4;
+  u.t.b = 2;
+  u.t.a = 1;
+  u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+  struct S v;
+  v.t.a = 1;
+  v.t.b = 2;
+  v.t.c = 3;
+  v.t.d = 4;
+  c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+  p->t.a = 1;
+  p->t.c = 3;
+  p->t.d = 4;
+  p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+  struct U v;
+  v.s[2].t.a = 1;
+  v.s[2].t.b = 2;
+  v.s[2].t.c = 3;
+  v.s[2].t.d = 4;
+  c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+  p->s[2].t.a = 1;
+  p->s[2].t.c = 3;
+  p->s[2].t.d = 4;
+  p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+  struct U w;
+  f1 ();
+  c2 (&u);
+  f2 ();
+  c1 (&u.t);
+  f3 ();
+  c2 (&u);
+  f4 ();
+  f5 (&u);
+  c2 (&u);
+  f6 ();
+  f7 (&w);
+  c3 (&w);
+  return 0;
+}

	Jakub



More information about the Gcc-patches mailing list