This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Updated patch for adjacent store merging (PR middle-end/22141)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 24 Sep 2009 21:31:25 +0200
- Subject: [PATCH] Updated patch for adjacent store merging (PR middle-end/22141)
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This is updated version of the adjacent store merging patch from last
November. Bootstrapped/regtested on x86_64-linux and i686-linux.
ATM it attempts to merge always when possible, unless not known to be
aligned and SLOW_UNALIGNED_ACCESS is true, but if you have ideas how to
better tune that decision, I'd appreciate it a lot.
For -Os or bb's optimized for size likely just the instruction sizes should
count (probably using get_attr_length), for optimization for speed either it
could be done only for aligned stores, or differentiate between code in
loops and outside of them, etc. Might be useful to also check rtx_cost,
though it alone will not be ideal (e.g. on x86_64 rtx cost of CONST_INT is
usually 0, only for constants needing movabsq it is 3).
2009-09-24 Jakub Jelinek <jakub@redhat.com>
PR middle-end/22141
* dse.c (SLOW_UNALIGNED_ACCESS): Define if not defined.
(struct adjacent_store_info): New type.
(dse_encode_int, dse_decode_int, merge_adjacent_stores): New
functions.
(record_store): Add adjacent_store argument, fill it in.
(scan_insn): Add adjacent_store variable, update record_store
callers, call merge_adjacent_stores.
* cselib.c (cselib_notice_new_pseudos): New function.
* cselib.h (cselib_notice_new_pseudos): New prototype.
* dse.c (replace_read): Don't optimize PDP-endian targets.
* gcc.c-torture/execute/pr22141-1.c: New test.
* gcc.c-torture/execute/pr22141-2.c: New test.
* gcc.target/i386/pr22141.c: New test.
--- gcc/dse.c.jj 2009-09-16 09:06:29.000000000 +0200
+++ gcc/dse.c 2009-09-24 14:57:42.000000000 +0200
@@ -192,6 +192,10 @@ along with GCC; see the file COPYING3.
does, assuming that the alias sets can be manipulated in the same
way. */
+#ifndef SLOW_UNALIGNED_ACCESS
+#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
+#endif
+
/* There are limits to the size of constant offsets we model for the
global problem. There are certainly test cases, that exceed this
limit, however, it is unlikely that there are important programs
@@ -577,6 +581,13 @@ static alloc_pool clear_alias_mode_pool;
this for vararg functions because they play games with the frame. */
static bool stores_off_frame_dead_at_return;
+struct adjacent_store_info
+{
+ unsigned HOST_WIDE_INT mask;
+ unsigned char value[8];
+ unsigned HOST_WIDE_INT alignment[8];
+};
+
/* Counter for stats. */
static int globally_deleted;
static int locally_deleted;
@@ -1297,16 +1308,53 @@ static rtx get_stored_val (store_info_t,
HOST_WIDE_INT, basic_block, bool);
+/* Store an integer into memory in target byte order starting
+ at PTR + START and ending at PTR + START + WIDTH, modifying
+ only bytes set in MASK. */
+
+static void
+dse_encode_int (unsigned HOST_WIDE_INT value, unsigned char *ptr,
+ unsigned int start, unsigned int width,
+ unsigned HOST_WIDE_INT mask)
+{
+ unsigned int byte, end = start + width;
+ for (byte = start; byte < end; byte++)
+ {
+ if (mask & (((unsigned HOST_WIDE_INT) 1) << byte))
+ ptr[byte]
+ = value >> ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
+ * BITS_PER_UNIT);
+ }
+}
+
+
+/* Read back an integer from memory in target byte order. */
+
+static unsigned HOST_WIDE_INT
+dse_decode_int (unsigned char *ptr, unsigned int start, unsigned int width)
+{
+ unsigned HOST_WIDE_INT value = 0;
+ unsigned int byte, end = start + width;
+ for (byte = start; byte < end; byte++)
+ value |= ((unsigned HOST_WIDE_INT) ptr[byte])
+ << ((BYTES_BIG_ENDIAN ? end - byte - 1 : byte - start)
+ * BITS_PER_UNIT);
+ return value;
+}
+
+
/* BODY is an instruction pattern that belongs to INSN. Return 1 if
there is a candidate store, after adding it to the appropriate
local store group if so. */
static int
-record_store (rtx body, bb_info_t bb_info)
+record_store (rtx body, bb_info_t bb_info,
+ struct adjacent_store_info *adjacent_store)
{
rtx mem, rhs, const_rhs, mem_addr;
HOST_WIDE_INT offset = 0;
HOST_WIDE_INT width = 0;
+ HOST_WIDE_INT beg = 0;
alias_set_type spill_alias_set;
insn_info_t insn_info = bb_info->last_insn;
store_info_t store_info = NULL;
@@ -1469,6 +1517,36 @@ record_store (rtx body, bb_info_t bb_inf
}
}
+ /* Attempt to merge adjacent subword stores. */
+ if (width < UNITS_PER_WORD
+ && CHAR_BIT == 8
+ && BITS_PER_UNIT == 8
+ && !reload_completed
+ && GET_CODE (body) == SET
+ && rhs
+ && GET_CODE (rhs) == CONST_INT
+ && GET_MODE_CLASS (GET_MODE (mem)) == MODE_INT
+ && !insn_info->cannot_delete
+ && !spill_alias_set)
+ {
+ beg = offset & (UNITS_PER_WORD - 1);
+ if (beg + width > UNITS_PER_WORD
+ || (beg % width) != 0)
+ adjacent_store->mask = 0;
+ else
+ {
+ adjacent_store->mask = ~(unsigned HOST_WIDE_INT) 0;
+ adjacent_store->mask <<= HOST_BITS_PER_WIDE_INT - width;
+ adjacent_store->mask >>= HOST_BITS_PER_WIDE_INT - (width + beg);
+ dse_encode_int (INTVAL (rhs), adjacent_store->value, beg, width,
+ adjacent_store->mask);
+ adjacent_store->alignment[beg] = MEM_ALIGN (mem);
+ beg = offset - beg;
+ }
+ }
+ else
+ adjacent_store->mask = 0;
+
/* Check to see if this stores causes some other stores to be
dead. */
ptr = active_local_stores;
@@ -1573,6 +1651,42 @@ record_store (rtx body, bb_info_t bb_inf
i < offset + width && i < s_info->end;
i++)
set_position_unneeded (s_info, i - s_info->begin);
+
+ /* Attempt to merge adjacent subword stores. */
+ if (adjacent_store->mask
+ && s_info->begin < beg + UNITS_PER_WORD
+ && s_info->end > beg)
+ {
+ unsigned HOST_WIDE_INT mask;
+
+ if (ptr->cannot_delete
+ || s_info->begin < beg
+ || s_info->end > beg + UNITS_PER_WORD
+ || ((s_info->begin - beg)
+ & (s_info->end - s_info->begin - 1))
+ || s_info->rhs == NULL
+ || GET_CODE (s_info->rhs) != CONST_INT
+ || GET_MODE_CLASS (GET_MODE (s_info->mem)) != MODE_INT
+ || (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem)
+ && MEM_ALIAS_SET (s_info->mem)
+ && MEM_ALIAS_SET (mem)))
+ adjacent_store->mask = 0;
+ else
+ {
+ mask = ~(unsigned HOST_WIDE_INT) 0;
+ mask <<= HOST_BITS_PER_WIDE_INT
+ - (s_info->end - s_info->begin);
+ mask >>= HOST_BITS_PER_WIDE_INT - (s_info->end - beg);
+ mask &= ~adjacent_store->mask;
+ dse_encode_int (INTVAL (s_info->rhs), adjacent_store->value,
+ s_info->begin - beg,
+ s_info->end - s_info->begin, mask);
+ adjacent_store->mask |= mask;
+ adjacent_store->alignment[s_info->begin - beg]
+ = MAX (adjacent_store->alignment[s_info->begin - beg],
+ MEM_ALIGN (s_info->mem));
+ }
+ }
}
else if (s_info->rhs)
/* Need to see if it is possible for this store to overwrite
@@ -1909,7 +2023,10 @@ replace_read (store_info_t store_info, i
rtx insns, this_insn, read_reg;
basic_block bb;
- if (!dbg_cnt (dse))
+ if (!dbg_cnt (dse)
+ || (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+ && (GET_MODE_BITSIZE (store_mode) > BITS_PER_WORD
+ || GET_MODE_BITSIZE (read_mode) > BITS_PER_WORD)))
return false;
/* Create a sequence of instructions to set up the read register.
@@ -2367,6 +2484,177 @@ get_call_args (rtx call_insn, tree fn, r
}
+/* Attempt to merge adjacent stores of constants into one bigger
+ store. */
+
+static void
+merge_adjacent_stores (struct adjacent_store_info *adjacent_store)
+{
+ store_info_t store_info = active_local_stores->store_rec;
+ insn_info_t ptr;
+ enum machine_mode mode, chosen_mode = VOIDmode;
+ unsigned HOST_WIDE_INT chosen_offset = 0;
+ HOST_WIDE_INT beg, mem_offset, value;
+ int count = 0;
+ rtx cst, mem, insn, insns, set;
+ tree expr;
+
+ /* Skip the clobbers. */
+ while (!store_info->is_set)
+ store_info = store_info->next;
+
+ for (mode = GET_MODE_WIDER_MODE (GET_MODE (store_info->mem));
+ GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+ mode = GET_MODE_WIDER_MODE (mode))
+ {
+ unsigned HOST_WIDE_INT mask, offset;
+
+ offset = ((store_info->begin & (UNITS_PER_WORD - 1))
+ - (store_info->begin & (GET_MODE_SIZE (mode) - 1)));
+ mask = (((HOST_WIDE_INT) 1 << GET_MODE_SIZE (mode)) - 1) << offset;
+ if ((adjacent_store->mask & mask) == mask
+ && (GET_MODE_ALIGNMENT (mode) <= adjacent_store->alignment[offset]
+ || !SLOW_UNALIGNED_ACCESS (mode,
+ adjacent_store->alignment[offset])))
+ {
+ chosen_mode = mode;
+ chosen_offset = offset;
+ }
+ }
+ if (chosen_mode == VOIDmode)
+ return;
+
+ value = dse_decode_int (adjacent_store->value, chosen_offset,
+ GET_MODE_SIZE (chosen_mode));
+
+ start_sequence ();
+ cst = GEN_INT (trunc_int_for_mode (value, chosen_mode));
+ mem = adjust_address (store_info->mem, chosen_mode,
+ chosen_offset
+ - (store_info->begin & (UNITS_PER_WORD - 1)));
+ if (MEM_ALIGN (mem) < adjacent_store->alignment[chosen_offset])
+ set_mem_align (mem, adjacent_store->alignment[chosen_offset]);
+ expr = MEM_EXPR (mem);
+ if (MEM_OFFSET (mem) && GET_CODE (MEM_OFFSET (mem)) != CONST_INT)
+ set_mem_offset (mem, NULL_RTX);
+ mem_offset = MEM_OFFSET (mem) ? INTVAL (MEM_OFFSET (mem)) : 0;
+ beg = store_info->begin
+ - (store_info->begin & (GET_MODE_SIZE (chosen_mode) - 1));
+
+ for (ptr = active_local_stores; ptr; ptr = ptr->next_local_store)
+ {
+ store_info_t s_info = ptr->store_rec;
+
+ /* Skip the clobbers. */
+ while (!s_info->is_set)
+ s_info = s_info->next;
+
+ if (s_info->alias_set
+ || s_info->group_id != store_info->group_id
+ || s_info->cse_base != store_info->cse_base
+ || s_info->begin >= beg + GET_MODE_SIZE (chosen_mode)
+ || s_info->end <= beg)
+ continue;
+
+ count++;
+
+ if (MEM_ALIAS_SET (s_info->mem) != MEM_ALIAS_SET (mem))
+ {
+ gcc_assert (MEM_ALIAS_SET (s_info->mem) == 0
+ || MEM_ALIAS_SET (mem) == 0);
+ if (MEM_ALIAS_SET (s_info->mem) == 0)
+ set_mem_alias_set (mem, 0);
+ }
+
+ if (expr && MEM_EXPR (s_info->mem) != expr)
+ {
+ tree op0 = expr;
+ tree op1 = MEM_EXPR (s_info->mem);
+ int depth0 = 0, depth1 = 0;
+
+ while (op0 && TREE_CODE (op0) == COMPONENT_REF)
+ {
+ op0 = TREE_OPERAND (op0, 0);
+ depth0++;
+ }
+ while (op1 && TREE_CODE (op1) == COMPONENT_REF)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ depth1++;
+ }
+ if ((op0 == NULL) != (op1 == NULL)
+ || (op0 != NULL && !operand_equal_p (op0, op1, 0)))
+ expr = NULL_TREE;
+ else
+ {
+ op1 = MEM_EXPR (s_info->mem);
+ while (depth1 > depth0)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ depth1--;
+ }
+ while (depth0)
+ {
+ if (depth0 == depth1
+ && TREE_OPERAND (expr, 1) == TREE_OPERAND (op1, 1))
+ break;
+ if (MEM_OFFSET (mem))
+ {
+ tree off = component_ref_field_offset (expr);
+ tree bit_off
+ = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (expr, 1));
+ if (off == NULL_TREE
+ || !host_integerp (off, 0)
+ || bit_off == NULL_TREE
+ || !host_integerp (bit_off, 0))
+ {
+ expr = NULL_TREE;
+ break;
+ }
+ mem_offset += tree_low_cst (off, 0);
+ mem_offset += tree_low_cst (bit_off, 0) / BITS_PER_UNIT;
+ }
+ expr = TREE_OPERAND (expr, 0);
+ if (depth0 == depth1)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ depth1--;
+ }
+ depth0--;
+ }
+ }
+ }
+ }
+ if (MEM_EXPR (mem) != expr)
+ set_mem_expr (mem, expr);
+ if (MEM_OFFSET (mem))
+ {
+ if (!expr)
+ set_mem_offset (mem, NULL_RTX);
+ else if (INTVAL (MEM_OFFSET (mem)) != mem_offset)
+ set_mem_offset (mem, GEN_INT (mem_offset));
+ }
+
+ emit_move_insn (mem, cst);
+ insn = get_last_insn ();
+ set = single_set (insn);
+ if (set && SET_DEST (set) == mem && SET_SRC (set) != cst)
+ set_unique_reg_note (insn, REG_EQUAL, cst);
+ insns = get_insns ();
+ end_sequence ();
+
+ emit_insn_after (insns, active_local_stores->insn);
+ cselib_notice_new_pseudos ();
+ if (dump_file)
+ {
+ fprintf (dump_file, "Merging %d adjacent constant stores into ", count);
+ print_inline_rtx (dump_file, mem, 0);
+ fprintf (dump_file, " = ");
+ print_inline_rtx (dump_file, cst, 0);
+ fprintf (dump_file, "\n");
+ }
+}
+
/* Apply record_store to all candidate stores in INSN. Mark INSN
if some part of it is not a candidate store and assigns to a
non-register target. */
@@ -2377,6 +2665,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
rtx body;
insn_info_t insn_info = (insn_info_t) pool_alloc (insn_info_pool);
int mems_found = 0;
+ struct adjacent_store_info adjacent_store;
+
memset (insn_info, 0, sizeof (struct insn_info));
if (dump_file)
@@ -2407,6 +2697,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
/* Look at all of the uses in the insn. */
note_uses (&PATTERN (insn), check_mem_read_use, bb_info);
+ memset (&adjacent_store, '\0', sizeof (adjacent_store));
+
if (CALL_P (insn))
{
bool const_call;
@@ -2508,7 +2800,7 @@ scan_insn (bb_info_t bb_info, rtx insn)
rtx mem = gen_rtx_MEM (BLKmode, args[0]);
set_mem_size (mem, args[2]);
body = gen_rtx_SET (VOIDmode, mem, args[1]);
- mems_found += record_store (body, bb_info);
+ mems_found += record_store (body, bb_info, &adjacent_store);
if (dump_file)
fprintf (dump_file, "handling memset as BLKmode store\n");
if (mems_found == 1)
@@ -2535,16 +2827,19 @@ scan_insn (bb_info_t bb_info, rtx insn)
|| (RTX_FRAME_RELATED_P (insn))
|| find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX))
insn_info->cannot_delete = true;
-
+
+ gcc_assert (BITS_PER_WORD <= HOST_BITS_PER_WIDE_INT);
+ gcc_assert (UNITS_PER_WORD <= ARRAY_SIZE (adjacent_store.alignment));
body = PATTERN (insn);
if (GET_CODE (body) == PARALLEL)
{
int i;
for (i = 0; i < XVECLEN (body, 0); i++)
- mems_found += record_store (XVECEXP (body, 0, i), bb_info);
+ mems_found += record_store (XVECEXP (body, 0, i), bb_info,
+ &adjacent_store);
}
else
- mems_found += record_store (body, bb_info);
+ mems_found += record_store (body, bb_info, &adjacent_store);
if (dump_file)
fprintf (dump_file, "mems_found = %d, cannot_delete = %s\n",
@@ -2558,6 +2853,8 @@ scan_insn (bb_info_t bb_info, rtx insn)
{
insn_info->next_local_store = active_local_stores;
active_local_stores = insn_info;
+ if (adjacent_store.mask)
+ merge_adjacent_stores (&adjacent_store);
}
else
insn_info->cannot_delete = true;
--- gcc/cselib.c.jj 2009-09-11 13:10:26.000000000 +0200
+++ gcc/cselib.c 2009-09-24 12:36:49.000000000 +0200
@@ -2038,6 +2038,29 @@ cselib_init (bool record_memory)
entry_and_rtx_equal_p, NULL);
}
+/* Called when new pseudos were created between cselib_init and
+ cselib_finish and cselib_* routines might see them. */
+
+void
+cselib_notice_new_pseudos (void)
+{
+ unsigned int nregs = max_reg_num ();
+
+ if (nregs > reg_values_size)
+ {
+ unsigned int new_size = nregs + (63 + nregs) / 16;
+ reg_values = XRESIZEVEC (struct elt_list *, reg_values, new_size);
+ memset (®_values[reg_values_size], '\0',
+ (new_size - reg_values_size) * sizeof (struct elt_list *));
+ reg_values_size = new_size;
+ }
+ if (nregs > cselib_nregs)
+ {
+ cselib_nregs = nregs + (63 + nregs) / 16;
+ used_regs = XRESIZEVEC (unsigned int, used_regs, cselib_nregs);
+ }
+}
+
/* Called when the current user is done with cselib. */
void
--- gcc/cselib.h.jj 2009-09-03 09:59:40.000000000 +0200
+++ gcc/cselib.h 2009-09-24 12:36:49.000000000 +0200
@@ -68,6 +68,7 @@ extern void (*cselib_record_sets_hook) (
extern cselib_val *cselib_lookup (rtx, enum machine_mode, int);
extern void cselib_init (bool record_memory);
+extern void cselib_notice_new_pseudos (void);
extern void cselib_clear_table (void);
extern void cselib_finish (void);
extern void cselib_process_insn (rtx);
--- gcc/testsuite/gcc.c-torture/execute/pr22141-2.c.jj 2009-09-24 12:36:49.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr22141-2.c 2009-09-24 12:36:49.000000000 +0200
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u __attribute__((aligned));
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v __attribute__((aligned));
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v __attribute__((aligned));
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w __attribute__((aligned));
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr22141-1.c.jj 2009-09-24 12:36:49.000000000 +0200
+++ gcc/testsuite/gcc.c-torture/execute/pr22141-1.c 2009-09-24 12:36:49.000000000 +0200
@@ -0,0 +1,122 @@
+/* PR middle-end/22141 */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
--- gcc/testsuite/gcc.target/i386/pr22141.c.jj 2009-09-24 12:36:49.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/pr22141.c 2009-09-24 12:36:49.000000000 +0200
@@ -0,0 +1,126 @@
+/* PR middle-end/22141 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void abort (void);
+
+struct S
+{
+ struct T
+ {
+ char a;
+ char b;
+ char c;
+ char d;
+ } t;
+} u;
+
+struct U
+{
+ struct S s[4];
+};
+
+void __attribute__((noinline))
+c1 (struct T *p)
+{
+ if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
+ abort ();
+ __builtin_memset (p, 0xaa, sizeof (*p));
+}
+
+void __attribute__((noinline))
+c2 (struct S *p)
+{
+ c1 (&p->t);
+}
+
+void __attribute__((noinline))
+c3 (struct U *p)
+{
+ c2 (&p->s[2]);
+}
+
+void __attribute__((noinline))
+f1 (void)
+{
+ u = (struct S) { { 1, 2, 3, 4 } };
+}
+
+void __attribute__((noinline))
+f2 (void)
+{
+ u.t.a = 1;
+ u.t.b = 2;
+ u.t.c = 3;
+ u.t.d = 4;
+}
+
+void __attribute__((noinline))
+f3 (void)
+{
+ u.t.d = 4;
+ u.t.b = 2;
+ u.t.a = 1;
+ u.t.c = 3;
+}
+
+void __attribute__((noinline))
+f4 (void)
+{
+ struct S v;
+ v.t.a = 1;
+ v.t.b = 2;
+ v.t.c = 3;
+ v.t.d = 4;
+ c2 (&v);
+}
+
+void __attribute__((noinline))
+f5 (struct S *p)
+{
+ p->t.a = 1;
+ p->t.c = 3;
+ p->t.d = 4;
+ p->t.b = 2;
+}
+
+void __attribute__((noinline))
+f6 (void)
+{
+ struct U v;
+ v.s[2].t.a = 1;
+ v.s[2].t.b = 2;
+ v.s[2].t.c = 3;
+ v.s[2].t.d = 4;
+ c3 (&v);
+}
+
+void __attribute__((noinline))
+f7 (struct U *p)
+{
+ p->s[2].t.a = 1;
+ p->s[2].t.c = 3;
+ p->s[2].t.d = 4;
+ p->s[2].t.b = 2;
+}
+
+int
+main (void)
+{
+ struct U w;
+ f1 ();
+ c2 (&u);
+ f2 ();
+ c1 (&u.t);
+ f3 ();
+ c2 (&u);
+ f4 ();
+ f5 (&u);
+ c2 (&u);
+ f6 ();
+ f7 (&w);
+ c3 (&w);
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
Jakub