[PATCH] Merge adjacent stores of constants (PR middle-end/22141)

Jakub Jelinek jakub@redhat.com
Fri Nov 28 10:48:00 GMT 2008


On Fri, Nov 28, 2008 at 12:26:28AM -0200, Luis Machado wrote:
> I've noticed 2% degradation on eon and 3% degradation on perlbmk
> benchmarks for 32-bit PPC. I'm still going through the 64-bit numbers.

That's not very encouraging.  Anyway, the version of the patch I've posted
yesterday, as I wrote, will not (usually) merge adjacent stores on ppc
in loops (as the setting of the constants is hoisted before the loop, thus
cselib didn't see them and didn't consider them to be constant stores).

Here is a newer version of the patch that should
1) enable the optimization on STRICT_ALIGNMENT targets if the memory is
   sufficiently aligned
2) use REG_EQUAL notes if they say the store writes a constant.  This
   makes ppc{,64} merge adjacent stores even in loops.
With the small benchmark I posted yesterday, time shows:
		vanilla gcc		with the patch
-O2 -m32	0m3.595s		0m1.908s
-O2 -m64	0m3.606s		0m1.601s

So this patch could make a larger difference on ppc{,64}, no difference
at all on x86_64 or i386.

Anyway, it would be interesting to understand what caused the 3% degradation
on perlbmk.

The patch has been bootstrapped/regtested on x86_64-linux and
powerpc64-linux (--with-cpu=default32).

2008-11-28  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/22141
	* dse.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
	(struct adjacent_store_info): New type.
	(dse_encode_int, dse_decode_int, merge_adjacent_stores): New
	functions.
	(record_store): Add adjacent_store argument, fill it in.
	(scan_insn): Add adjacent_store variable, update record_store
	callers, call merge_adjacent_stores.
	* cselib.c (cselib_expand_value_rtx): Don't wrap CONST_INTs
	into CONST.  Handle SUBREG specially, to be able to simplify
	subregs of constants.
	(cselib_notice_new_pseudos): New function.
	* cselib.h (cselib_notice_new_pseudos): New prototype.

	* dse.c (replace_read): Don't optimize PDP-endian targets.

	* gcc.c-torture/execute/pr22141-1.c: New test.
	* gcc.c-torture/execute/pr22141-2.c: New test.
	* gcc.target/i386/pr22141.c: New test.

--- gcc/dse.c.jj	2008-11-24 12:05:40.000000000 +0100
+++ gcc/dse.c	2008-11-28 01:00:40.000000000 +0100
@@ -188,6 +188,10 @@ along with GCC; see the file COPYING3.  
      does, assuming that the alias sets can be manipulated in the same
      way.  */
 
+#ifndef SLOW_UNALIGNED_ACCESS
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
+#endif
+
 /* There are limits to the size of constant offsets we model for the
    global problem.  There are certainly test cases, that exceed this
    limit, however, it is unlikely that there are important programs
@@ -535,6 +539,13 @@ static alloc_pool clear_alias_mode_pool;
    this for vararg functions because they play games with the frame.  */
 static bool stores_off_frame_dead_at_return;
 
+struct adjacent_store_info
+{
+  unsigned HOST_WIDE_INT mask;
+  unsigned char value[8];
+  unsigned HOST_WIDE_INT alignment[8];
+};
+
 /* Counter for stats.  */
 static int globally_deleted; 
 static int locally_deleted; 
@@ -1161,16 +1172,53 @@ clear_rhs_from_active_local_stores (void
 }
 
 
+/* Store an integer into memory in target byte order starting
+   at PTR + START and ending at PTR + START + WIDTH, modifying
+   only bytes set in MASK.  */
+
+static void
+dse_encode_int (unsigned HOST_WIDE_INT value, unsigned char *ptr,
+		unsigned int start, unsigned int width,
+		unsigned HOST_WIDE_INT mask)
+{
+  unsigned int byte, end = start + width;
+  for (byte = start; byte < end; byte++)
+    {
+      if (mask & (((unsigned HOST_WIDE_INT) 1) << byte))
+	ptr[byte]
+	  = value >> ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
+		      * BITS_PER_UNIT);
+    }
+}
+
+
+/* Read back an integer from memory in target byte order.  */
+
+static unsigned HOST_WIDE_INT
+dse_decode_int (unsigned char *ptr, unsigned int start, unsigned int width)
+{
+  unsigned HOST_WIDE_INT value = 0;
+  unsigned int byte, end = start + width;
+  for (byte = start; byte < end; byte++)
+    value |= ((unsigned HOST_WIDE_INT) ptr[byte])
+	     << ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
+		 * BITS_PER_UNIT);
+  return value;
+}
+
+
 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
    there is a candidate store, after adding it to the appropriate
    local store group if so.  */
 
 static int
-record_store (rtx body, bb_info_t bb_info)
+record_store (rtx body, bb_info_t bb_info,
+	      struct adjacent_store_info *adjacent_store)
 {
-  rtx mem;
+  rtx mem, rhs;
   HOST_WIDE_INT offset = 0;
   HOST_WIDE_INT width = 0;
+  HOST_WIDE_INT beg = 0;
   alias_set_type spill_alias_set;
   insn_info_t insn_info = bb_info->last_insn;
   store_info_t store_info = NULL;
@@ -1221,7 +1269,7 @@ record_store (rtx body, bb_info_t bb_inf
 
   /* We can still process a volatile mem, we just cannot delete it.  */
   if (MEM_VOLATILE_P (mem))
-      insn_info->cannot_delete = true;
+    insn_info->cannot_delete = true;
 
   if (!canon_address (mem, &spill_alias_set, &group_id, &offset, &base))
     {
@@ -1283,6 +1331,61 @@ record_store (rtx body, bb_info_t bb_inf
 		 (int)offset, (int)(offset+width));
     }
 
+  if (GET_CODE (body) == SET
+      /* No place to keep the value after ra.  */
+      && !reload_completed
+      && (REG_P (SET_SRC (body))
+	  || GET_CODE (SET_SRC (body)) == SUBREG
+	  || CONSTANT_P (SET_SRC (body)))
+      /* Sometimes the store and reload is used for truncation and
+	 rounding.  */
+      && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
+    {
+      rhs = NULL;
+      if (body == PATTERN (insn_info->insn))
+	{
+	  rtx tem = find_reg_note (insn_info->insn, REG_EQUAL, NULL_RTX);
+	  if (tem && GET_CODE (XEXP (tem, 0)) == CONST_INT)
+	    rhs = XEXP (tem, 0);
+	}
+      if (!rhs)
+	rhs = cselib_expand_value_rtx (SET_SRC (body), scratch, 5);
+      if (!rhs || !CONSTANT_P (rhs))
+	rhs = SET_SRC (body);
+    }
+  else
+    rhs = NULL;
+
+  /* Attempt to merge adjacent subword stores.  */
+  if (width < UNITS_PER_WORD
+      && CHAR_BIT == 8
+      && BITS_PER_UNIT == 8
+      && !reload_completed
+      && GET_CODE (body) == SET
+      && rhs
+      && GET_CODE (rhs) == CONST_INT
+      && GET_MODE_CLASS (GET_MODE (mem)) == MODE_INT
+      && !insn_info->cannot_delete
+      && !spill_alias_set)
+    {
+      beg = offset & (UNITS_PER_WORD - 1);
+      if (beg + width > UNITS_PER_WORD
+	  || (beg % width) != 0)
+	adjacent_store->mask = 0;
+      else
+	{
+	  adjacent_store->mask = ~(unsigned HOST_WIDE_INT) 0;
+	  adjacent_store->mask <<= HOST_BITS_PER_WIDE_INT - width;
+	  adjacent_store->mask >>= HOST_BITS_PER_WIDE_INT - (width + beg);
+	  dse_encode_int (INTVAL (rhs), adjacent_store->value, beg, width,
+			  adjacent_store->mask);
+	  adjacent_store->alignment[beg] = MEM_ALIGN (mem);
+	  beg = offset - beg;
+	}
+    }
+  else
+    adjacent_store->mask = 0;
+
   /* Check to see if this stores causes some other stores to be
      dead.  */
   ptr = active_local_stores;
@@ -1333,6 +1436,42 @@ record_store (rtx body, bb_info_t bb_inf
 	    if (i >= s_info->begin && i < s_info->end)
 	      s_info->positions_needed
 		&= ~(((unsigned HOST_WIDE_INT) 1) << (i - s_info->begin));
+
+	  /* Attempt to merge adjacent subword stores.  */
+	  if (adjacent_store->mask
+	      && s_info->begin < beg + UNITS_PER_WORD
+	      && s_info->end > beg)
+	    {
+	      unsigned HOST_WIDE_INT mask;
+
+	      if (ptr->cannot_delete
+		  || s_info->begin < beg
+		  || s_info->end > beg + UNITS_PER_WORD
+		  || ((s_info->begin - beg)
+		       & (s_info->end - s_info->begin - 1))
+		  || s_info->rhs == NULL
+		  || GET_CODE (s_info->rhs) != CONST_INT
+		  || GET_MODE_CLASS (GET_MODE (s_info->mem)) != MODE_INT
+		  || (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem)
+		      && MEM_ALIAS_SET (s_info->mem)
+		      && MEM_ALIAS_SET (mem)))
+		adjacent_store->mask = 0;
+	      else
+		{
+		  mask = ~(unsigned HOST_WIDE_INT) 0;
+		  mask <<= HOST_BITS_PER_WIDE_INT
+			   - (s_info->end - s_info->begin);
+		  mask >>= HOST_BITS_PER_WIDE_INT - (s_info->end - beg);
+		  mask &= ~adjacent_store->mask;
+		  dse_encode_int (INTVAL (s_info->rhs), adjacent_store->value,
+				  s_info->begin - beg,
+				  s_info->end - s_info->begin, mask);
+		  adjacent_store->mask |= mask;
+		  adjacent_store->alignment[s_info->begin - beg]
+		    = MAX (adjacent_store->alignment[s_info->begin - beg],
+			   MEM_ALIGN (s_info->mem));
+		}
+	    }
 	}
       else if (s_info->rhs)
 	/* Need to see if it is possible for this store to overwrite
@@ -1382,20 +1521,8 @@ record_store (rtx body, bb_info_t bb_inf
   store_info->begin = offset;
   store_info->end = offset + width;
   store_info->is_set = GET_CODE (body) == SET;
+  store_info->rhs = rhs;
 
-  if (store_info->is_set 
-      /* No place to keep the value after ra.  */
-      && !reload_completed
-      && (REG_P (SET_SRC (body))
-	  || GET_CODE (SET_SRC (body)) == SUBREG
-	  || CONSTANT_P (SET_SRC (body)))
-      /* Sometimes the store and reload is used for truncation and
-	 rounding.  */
-      && !(FLOAT_MODE_P (GET_MODE (mem)) && (flag_float_store)))
-    store_info->rhs = SET_SRC (body);
-  else
-    store_info->rhs = NULL;
-  
   /* If this is a clobber, we return 0.  We will only be able to
      delete this insn if there is only one store USED store, but we
      can use the clobber to delete other stores earlier.  */
@@ -1574,7 +1701,10 @@ replace_read (store_info_t store_info, i
   int access_size; /* In bytes.  */
   rtx insns, read_reg;
 
-  if (!dbg_cnt (dse))
+  if (!dbg_cnt (dse)
+      || (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+	  && (GET_MODE_BITSIZE (store_mode) > BITS_PER_WORD
+	      || GET_MODE_BITSIZE (read_mode) > BITS_PER_WORD)))
     return false;
 
   /* To get here the read is within the boundaries of the write so
@@ -1949,6 +2079,180 @@ check_mem_read_use (rtx *loc, void *data
   for_each_rtx (loc, check_mem_read_rtx, data);
 }
 
+/* Attempt to merge adjacent stores of constants into one bigger
+   store.  */
+
+static void
+merge_adjacent_stores (struct adjacent_store_info *adjacent_store)
+{
+  store_info_t store_info = active_local_stores->store_rec;
+  insn_info_t ptr;
+  enum machine_mode mode, chosen_mode = VOIDmode;
+  unsigned HOST_WIDE_INT chosen_offset = 0;
+  HOST_WIDE_INT beg, mem_offset, value;
+  int count = 0;
+  rtx cst, mem, insn, insns, set, canon_addr;
+  tree expr;
+
+  /* Skip the clobbers.  */
+  while (!store_info->is_set)
+    store_info = store_info->next;
+
+  for (mode = GET_MODE_WIDER_MODE (GET_MODE (store_info->mem));
+       GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+       mode = GET_MODE_WIDER_MODE (mode))
+    {
+      unsigned HOST_WIDE_INT mask, offset;
+
+      offset = ((store_info->begin & (UNITS_PER_WORD - 1))
+		- (store_info->begin & (GET_MODE_SIZE (mode) - 1)));
+      mask = (((HOST_WIDE_INT) 1 << GET_MODE_SIZE (mode)) - 1) << offset;
+      if ((adjacent_store->mask & mask) == mask
+	  && (GET_MODE_ALIGNMENT (mode) <= adjacent_store->alignment[offset]
+	      || !SLOW_UNALIGNED_ACCESS (mode,
+					 adjacent_store->alignment[offset])))
+	{
+	  chosen_mode = mode;
+	  chosen_offset = offset;
+	}
+    }
+  if (chosen_mode == VOIDmode)
+    return;
+
+  value = dse_decode_int (adjacent_store->value, chosen_offset,
+			  GET_MODE_SIZE (chosen_mode));
+
+  start_sequence ();
+  cst = GEN_INT (trunc_int_for_mode (value, chosen_mode));
+  canon_addr = XEXP (store_info->mem, 0);
+  XEXP (store_info->mem, 0) = store_info->mem_addr;
+  mem = adjust_address (store_info->mem, chosen_mode,
+			chosen_offset
+			- (store_info->begin & (UNITS_PER_WORD - 1)));
+  XEXP (store_info->mem, 0) = canon_addr;
+  if (MEM_ALIGN (mem) < adjacent_store->alignment[chosen_offset])
+    set_mem_align (mem, adjacent_store->alignment[chosen_offset]);
+  expr = MEM_EXPR (mem);
+  if (MEM_OFFSET (mem) && GET_CODE (MEM_OFFSET (mem)) != CONST_INT)
+    set_mem_offset (mem, NULL_RTX);
+  mem_offset = MEM_OFFSET (mem) ? INTVAL (MEM_OFFSET (mem)) : 0;
+  beg = store_info->begin
+	- (store_info->begin & (GET_MODE_SIZE (chosen_mode) - 1));
+
+  for (ptr = active_local_stores; ptr; ptr = ptr->next_local_store)
+    {
+      store_info_t s_info = ptr->store_rec;
+
+      /* Skip the clobbers.  */
+      while (!s_info->is_set)
+	s_info = s_info->next;
+
+      if (s_info->alias_set
+	  || s_info->group_id != store_info->group_id
+	  || s_info->cse_base != store_info->cse_base
+	  || s_info->begin >= beg + GET_MODE_SIZE (chosen_mode)
+	  || s_info->end <= beg)
+	continue;
+
+      count++;
+
+      if (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem))
+	{
+	  gcc_assert (MEM_ALIAS_SET (s_info->mem) == 0
+		      || MEM_ALIAS_SET (mem) == 0);
+	  if (MEM_ALIAS_SET (s_info->mem) == 0)
+	    set_mem_alias_set (mem, 0);
+	}
+
+      if (expr && MEM_EXPR (s_info->mem) != expr)
+	{
+	  tree op0 = expr;
+	  tree op1 = MEM_EXPR (s_info->mem);
+	  int depth0 = 0, depth1 = 0;
+
+	  while (op0 && TREE_CODE (op0) == COMPONENT_REF)
+	    {
+	      op0 = TREE_OPERAND (op0, 0);
+	      depth0++;
+	    }
+	  while (op1 && TREE_CODE (op1) == COMPONENT_REF)
+	    {
+	      op1 = TREE_OPERAND (op1, 0);
+	      depth1++;
+	    }
+	  if ((op0 == NULL) != (op1 == NULL)
+	      || (op0 != NULL && !operand_equal_p (op0, op1, 0)))
+	    expr = NULL_TREE;
+	  else
+	    {
+	      op1 = MEM_EXPR (s_info->mem);
+	      while (depth1 > depth0)
+		{
+		  op1 = TREE_OPERAND (op1, 0);
+		  depth1--;
+		}
+	      while (depth0)
+		{
+		  if (depth0 == depth1
+		      && TREE_OPERAND (expr, 1) == TREE_OPERAND (op1, 1))
+		    break;
+		  if (MEM_OFFSET (mem))
+		    {
+		      tree off = component_ref_field_offset (expr);
+		      tree bit_off
+			= DECL_FIELD_BIT_OFFSET (TREE_OPERAND (expr, 1));
+		      if (off == NULL_TREE
+			  || !host_integerp (off, 0)
+			  || bit_off == NULL_TREE
+			  || !host_integerp (bit_off, 0))
+			{
+			  expr = NULL_TREE;
+			  break;
+			}
+		      mem_offset += tree_low_cst (off, 0);
+		      mem_offset += tree_low_cst (bit_off, 0) / BITS_PER_UNIT;
+		    }
+		  expr = TREE_OPERAND (expr, 0);
+		  if (depth0 == depth1)
+		    {
+		      op1 = TREE_OPERAND (op1, 0);
+		      depth1--;
+		    }
+		  depth0--;
+		}
+	    }
+	}
+    }
+  if (MEM_EXPR (mem) != expr)
+    set_mem_expr (mem, expr);
+  if (MEM_OFFSET (mem))
+    {
+      if (!expr)
+	set_mem_offset (mem, NULL_RTX);
+      else if (INTVAL (MEM_OFFSET (mem)) != mem_offset)
+	set_mem_offset (mem, GEN_INT (mem_offset));
+    }
+
+  emit_move_insn (mem, cst);
+  insn = get_last_insn ();
+  set = single_set (insn);
+  if (set && SET_DEST (set) == mem && SET_SRC (set) != cst)
+    set_unique_reg_note (insn, REG_EQUAL, cst);
+  insns = get_insns ();
+  end_sequence ();
+
+  emit_insn_after (insns, active_local_stores->insn);
+  cselib_notice_new_pseudos ();
+  if (dump_file)
+    {
+      fprintf (dump_file, "Merging %d adjacent constant stores into ", count);
+      print_inline_rtx (dump_file, mem, 0);
+      fprintf (dump_file, " = ");
+      print_inline_rtx (dump_file, cst, 0);
+      fprintf (dump_file, "\n");
+    }
+}
+
 /* Apply record_store to all candidate stores in INSN.  Mark INSN
    if some part of it is not a candidate store and assigns to a
    non-register target.  */
@@ -1959,6 +2263,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
   rtx body;
   insn_info_t insn_info = (insn_info_t) pool_alloc (insn_info_pool);
   int mems_found = 0;
+  struct adjacent_store_info adjacent_store;
+
   memset (insn_info, 0, sizeof (struct insn_info));
 
   if (dump_file)
@@ -2060,16 +2366,20 @@ scan_insn (bb_info_t bb_info, rtx insn)
       || (RTX_FRAME_RELATED_P (insn))
       || find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
     insn_info->cannot_delete = true;
-  
+
+  memset (&adjacent_store, '\0', sizeof (adjacent_store));
+  gcc_assert (BITS_PER_WORD <= HOST_BITS_PER_WIDE_INT);
+  gcc_assert (UNITS_PER_WORD <= ARRAY_SIZE (adjacent_store.alignment));
   body = PATTERN (insn);
   if (GET_CODE (body) == PARALLEL)
     {
       int i;
       for (i = 0; i < XVECLEN (body, 0); i++)
-	mems_found += record_store (XVECEXP (body, 0, i), bb_info);
+	mems_found += record_store (XVECEXP (body, 0, i), bb_info,
+				    &adjacent_store);
     }
   else
-    mems_found += record_store (body, bb_info);
+    mems_found += record_store (body, bb_info, &adjacent_store);
 
   if (dump_file)
     fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n", 
@@ -2083,6 +2393,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
     {
       insn_info->next_local_store = active_local_stores;
       active_local_stores = insn_info;
+      if (adjacent_store.mask)
+	merge_adjacent_stores (&adjacent_store);
     }
   else
     insn_info->cannot_delete = true;
--- gcc/cselib.c.jj	2008-11-26 20:44:28.000000000 +0100
+++ gcc/cselib.c	2008-11-27 11:05:29.000000000 +0100
@@ -1007,25 +1007,30 @@ cselib_expand_value_rtx (rtx orig, bitma
 	return orig;
       break;
 
-
-    case VALUE:
+    case SUBREG:
       {
-	rtx result;
-	if (dump_file)
-	  fprintf (dump_file, "expanding value %s into: ", GET_MODE_NAME (GET_MODE (orig)));
-	
-	result = expand_loc (CSELIB_VAL_PTR (orig)->locs, regs_active, max_depth);
-	if (result 
-	    && GET_CODE (result) == CONST_INT
-	    && GET_MODE (orig) != VOIDmode)
-	  {
-	    result = gen_rtx_CONST (GET_MODE (orig), result);
-	    if (dump_file)
-	      fprintf (dump_file, "  wrapping const_int result in const to preserve mode %s\n", 
-		       GET_MODE_NAME (GET_MODE (orig)));
-	  }
-	return result;
+	rtx subreg = cselib_expand_value_rtx (SUBREG_REG (orig), regs_active,
+					      max_depth - 1);
+	if (!subreg)
+	  return NULL;
+	scopy = simplify_gen_subreg (GET_MODE (orig), subreg,
+				     GET_MODE (SUBREG_REG (orig)),
+				     SUBREG_BYTE (orig));
+	if (scopy == NULL
+	    || (GET_CODE (scopy) == SUBREG
+		&& !REG_P (SUBREG_REG (scopy))
+		&& !MEM_P (SUBREG_REG (scopy))))
+	  return shallow_copy_rtx (orig);
+	return scopy;
       }
+
+    case VALUE:
+      if (dump_file)
+	fprintf (dump_file, "expanding value %s into: ",
+		 GET_MODE_NAME (GET_MODE (orig)));
+
+      return expand_loc (CSELIB_VAL_PTR (orig)->locs, regs_active, max_depth);
+
     default:
       break;
     }
@@ -1761,6 +1766,29 @@ cselib_init (bool record_memory)
 				   entry_and_rtx_equal_p, NULL);
 }
 
+/* Called when new pseudos were created between cselib_init and
+   cselib_finish and cselib_* routines might see them.  */
+
+void
+cselib_notice_new_pseudos (void)
+{
+  unsigned int nregs = max_reg_num ();
+
+  if (nregs > reg_values_size)
+    {
+      unsigned int new_size = nregs + (63 + nregs) / 16;
+      reg_values = XRESIZEVEC (struct elt_list *, reg_values, new_size);
+      memset (&reg_values[reg_values_size], '\0',
+	      (new_size - reg_values_size) * sizeof (struct elt_list *));
+      reg_values_size = new_size;
+    }
+  if (nregs > cselib_nregs)
+    {
+      cselib_nregs = nregs + (63 + nregs) / 16;
+      used_regs = XRESIZEVEC (unsigned int, used_regs, cselib_nregs);
+    }
+}
+
 /* Called when the current user is done with cselib.  */
 
 void
--- gcc/cselib.h.jj	2008-09-30 16:57:11.000000000 +0200
+++ gcc/cselib.h	2008-11-27 09:41:20.000000000 +0100
@@ -60,6 +60,7 @@ extern void (*cselib_discard_hook) (csel
 
 extern cselib_val *cselib_lookup (rtx, enum machine_mode, int);
 extern void cselib_init (bool record_memory);
+extern void cselib_notice_new_pseudos (void);
 extern void cselib_clear_table (void);
 extern void cselib_finish (void);
 extern void cselib_process_insn (rtx);
--- gcc/testsuite/gcc.target/i386/pr22141.c.jj	2008-11-26 20:45:52.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/pr22141.c	2008-11-26 20:45:52.000000000 +0100
@@ -0,0 +1,126 @@
+/* PR middle-end/22141 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+struct S
+{
+  struct T
+    {
+      char a;
+      char b;
+      char c;
+      char d;
+    } t;
+} u;
+
+struct U
+{
+  struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+    abort ();
+  __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+  c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+  c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+  u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+  u.t.a = 1;
+  u.t.b = 2;
+  u.t.c = 3;
+  u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+  u.t.d = 4;
+  u.t.b = 2;
+  u.t.a = 1;
+  u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+  struct S v;
+  v.t.a = 1;
+  v.t.b = 2;
+  v.t.c = 3;
+  v.t.d = 4;
+  c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+  p->t.a = 1;
+  p->t.c = 3;
+  p->t.d = 4;
+  p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+  struct U v;
+  v.s[2].t.a = 1;
+  v.s[2].t.b = 2;
+  v.s[2].t.c = 3;
+  v.s[2].t.d = 4;
+  c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+  p->s[2].t.a = 1;
+  p->s[2].t.c = 3;
+  p->s[2].t.d = 4;
+  p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+  struct U w;
+  f1 ();
+  c2 (&u);
+  f2 ();
+  c1 (&u.t);
+  f3 ();
+  c2 (&u);
+  f4 ();
+  f5 (&u);
+  c2 (&u);
+  f6 ();
+  f7 (&w);
+  c3 (&w);
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
--- gcc/testsuite/gcc.c-torture/execute/pr22141-2.c.jj	2008-11-26 20:45:52.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr22141-2.c	2008-11-26 20:45:52.000000000 +0100
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+  struct T
+    {
+      char a;
+      char b;
+      char c;
+      char d;
+    } t;
+} u __attribute__((aligned));
+
+struct U
+{
+  struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+    abort ();
+  __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+  c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+  c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+  u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+  u.t.a = 1;
+  u.t.b = 2;
+  u.t.c = 3;
+  u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+  u.t.d = 4;
+  u.t.b = 2;
+  u.t.a = 1;
+  u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+  struct S v __attribute__((aligned));
+  v.t.a = 1;
+  v.t.b = 2;
+  v.t.c = 3;
+  v.t.d = 4;
+  c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+  p->t.a = 1;
+  p->t.c = 3;
+  p->t.d = 4;
+  p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+  struct U v __attribute__((aligned));
+  v.s[2].t.a = 1;
+  v.s[2].t.b = 2;
+  v.s[2].t.c = 3;
+  v.s[2].t.d = 4;
+  c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+  p->s[2].t.a = 1;
+  p->s[2].t.c = 3;
+  p->s[2].t.d = 4;
+  p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+  struct U w __attribute__((aligned));
+  f1 ();
+  c2 (&u);
+  f2 ();
+  c1 (&u.t);
+  f3 ();
+  c2 (&u);
+  f4 ();
+  f5 (&u);
+  c2 (&u);
+  f6 ();
+  f7 (&w);
+  c3 (&w);
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr22141-1.c.jj	2008-11-26 20:45:52.000000000 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr22141-1.c	2008-11-26 20:45:52.000000000 +0100
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+  struct T
+    {
+      char a;
+      char b;
+      char c;
+      char d;
+    } t;
+} u;
+
+struct U
+{
+  struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+  if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+    abort ();
+  __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+  c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+  c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+  u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+  u.t.a = 1;
+  u.t.b = 2;
+  u.t.c = 3;
+  u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+  u.t.d = 4;
+  u.t.b = 2;
+  u.t.a = 1;
+  u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+  struct S v;
+  v.t.a = 1;
+  v.t.b = 2;
+  v.t.c = 3;
+  v.t.d = 4;
+  c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+  p->t.a = 1;
+  p->t.c = 3;
+  p->t.d = 4;
+  p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+  struct U v;
+  v.s[2].t.a = 1;
+  v.s[2].t.b = 2;
+  v.s[2].t.c = 3;
+  v.s[2].t.d = 4;
+  c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+  p->s[2].t.a = 1;
+  p->s[2].t.c = 3;
+  p->s[2].t.d = 4;
+  p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+  struct U w;
+  f1 ();
+  c2 (&u);
+  f2 ();
+  c1 (&u.t);
+  f3 ();
+  c2 (&u);
+  f4 ();
+  f5 (&u);
+  c2 (&u);
+  f6 ();
+  f7 (&w);
+  c3 (&w);
+  return 0;
+}


	Jakub



More information about the Gcc-patches mailing list