[PATCH][v4] GIMPLE store merging pass
Kyrill Tkachov
kyrylo.tkachov@foss.arm.com
Fri Sep 30 15:34:00 GMT 2016
On 30/09/16 15:36, Kyrill Tkachov wrote:
> Hi Richard,
>
> On 29/09/16 11:45, Richard Biener wrote:
>>
>>> +
>>> + /* In some cases get_inner_reference may return a
>>> + MEM_REF [ptr + byteoffset]. For the purposes of this pass
>>> + canonicalize the base_addr to MEM_REF [ptr] and take
>>> + byteoffset into account in the bitpos. This occurs in
>>> + PR 23684 and this way we can catch more chains. */
>>> + if (TREE_CODE (base_addr) == MEM_REF
>>> + && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (base_addr, 0)))
>>> + && TREE_CODE (TREE_OPERAND (base_addr, 1)) == INTEGER_CST
>> This is always an INTEGER_CST.
>>
>>> + && tree_fits_shwi_p (TREE_OPERAND (base_addr, 1))
>> This will never allow negative offsets (but maybe this is a good thing?)
>>
>> )
>>> + {
>>> + bitpos += tree_to_shwi (TREE_OPERAND (base_addr, 1))
>>> + * BITS_PER_UNIT;
>> this multiplication may overflow. There is mem_ref_offset () which
>> you should really use here, see get_inner_reference itself (and
>> how to translate back from offset_int to HOST_WIDE_INT if it fits).
>>
>>> +
>>> + base_addr = fold_build2 (MEM_REF, TREE_TYPE (base_addr),
>>> + TREE_OPERAND (base_addr, 0),
>>> + build_zero_cst (TREE_TYPE (
>>> + TREE_OPERAND (base_addr, 1))));
>> Ugh, building a tree node ... you could use TREE_OPERAND (base_addr, 0)
>> as base_addr instead?
>
> This didn't work for me because aliasing info was lost.
> So in the example:
> void
> foo2 (struct bar *p, struct bar *p2)
> {
> p->b = 0xff;
> p2->b = 0xa;
> p->a = 0xfffff;
> p2->c = 0xc;
> p->c = 0xff;
> p2->d = 0xbf;
> p->d = 0xfff;
> }
>
> we end up merging p->b with p->a even though the p2->b store may alias.
> We'll record the base objects as being 'p' and 'p2' whereas with my approach
> we record them as '*p' and '*p2'. I don't suppose I could just do:
> TREE_OPERAND (base_addr, 1) = build_zero_cst (TREE_TYPE (TREE_OPERAND (base_addr, 1)));
> ?
>
Although I think I could try to make it work by using ptr_derefs_may_alias_p in the alias checks
a bit more. I'll see what I can do.
Kyrill
> Thanks,
> Kyrill
>
>>
>>> + }
>>> +
>>> + struct imm_store_chain_info **chain_info
>>> + = m_stores.get (base_addr);
>>> +
>>> + if (!invalid)
>>> + {
>>> + store_immediate_info *info;
>>> + if (chain_info)
>>> + {
>>> + info = new store_immediate_info (
>>> + bitsize, bitpos, rhs, lhs, stmt,
>>> + (*chain_info)->m_store_info.length ());
>>> + if (dump_file)
>>> + {
>>> + fprintf (dump_file,
>>> + "Recording immediate store from stmt:\n");
>>> + print_gimple_stmt (dump_file, stmt, 0, 0);
>>> + }
>>> + (*chain_info)->m_store_info.safe_push (info);
>>> + continue;
>>> + }
>>> +
>>> + /* Store aliases any existing chain? */
>>> + terminate_all_aliasing_chains (lhs, base_addr, stmt);
>>> +
>>> + /* Start a new chain. */
>>> + struct imm_store_chain_info *new_chain
>>> + = new imm_store_chain_info;
>>> + info = new store_immediate_info (bitsize, bitpos, rhs, lhs,
>>> + stmt, 0);
>>> + new_chain->m_store_info.safe_push (info);
>>> + m_stores.put (base_addr, new_chain);
>>> + if (dump_file)
>>> + {
>>> + fprintf (dump_file,
>>> + "Starting new chain with statement:\n");
>>> + print_gimple_stmt (dump_file, stmt, 0, 0);
>>> + fprintf (dump_file, "The base object is:\n");
>>> + print_generic_expr (dump_file, base_addr, 0);
>>> + fprintf (dump_file, "\n");
>>> + }
>>> + }
>>> + else
>>> + terminate_all_aliasing_chains (lhs, base_addr, stmt);
>>> +
>>> + continue;
>>> + }
>>> +
>>> + terminate_all_aliasing_chains (NULL_TREE, NULL_TREE, stmt);
>>> + }
>>> + terminate_and_process_all_chains (bb);
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +} // anon namespace
>>> +
>>> +/* Construct and return a store merging pass object. */
>>> +
>>> +gimple_opt_pass *
>>> +make_pass_store_merging (gcc::context *ctxt)
>>> +{
>>> + return new pass_store_merging (ctxt);
>>> +}
>>> diff --git a/gcc/opts.c b/gcc/opts.c
>>> index 45f1f89c..e63d7e4 100644
>>> --- a/gcc/opts.c
>>> +++ b/gcc/opts.c
>>> @@ -463,6 +463,7 @@ static const struct default_options default_options_table[] =
>>> { OPT_LEVELS_1_PLUS, OPT_ftree_dse, NULL, 1 },
>>> { OPT_LEVELS_1_PLUS, OPT_ftree_ter, NULL, 1 },
>>> { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 },
>>> + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fstore_merging, NULL, 1 },
>> Please leave it to -O[2s]+ -- the chain invalidation is quadratic and
>> -O1 should work well even for gigantic basic blocks.
>>
>> Overall the pass looks quite well with the comments addressed.
>>
>> Thanks,
>> Richard.
>>
>>> { OPT_LEVELS_1_PLUS, OPT_ftree_fre, NULL, 1 },
>>> { OPT_LEVELS_1_PLUS, OPT_ftree_copy_prop, NULL, 1 },
>>> { OPT_LEVELS_1_PLUS, OPT_ftree_sink, NULL, 1 },
>>> diff --git a/gcc/params.def b/gcc/params.def
>>> index 8907aa4..e63e594 100644
>>> --- a/gcc/params.def
>>> +++ b/gcc/params.def
>>> @@ -1100,6 +1100,12 @@ DEFPARAM (PARAM_MAX_TAIL_MERGE_COMPARISONS,
>>> "Maximum amount of similar bbs to compare a bb with.",
>>> 10, 0, 0)
>>> +DEFPARAM (PARAM_STORE_MERGING_ALLOW_UNALIGNED,
>>> + "store-merging-allow-unaligned",
>>> + "Allow the store merging pass to introduce unaligned stores "
>>> + "if it is legal to do so",
>>> + 1, 0, 1)
>>> +
>>> DEFPARAM (PARAM_MAX_TAIL_MERGE_ITERATIONS,
>>> "max-tail-merge-iterations",
>>> "Maximum amount of iterations of the pass over a function.",
>>> diff --git a/gcc/passes.def b/gcc/passes.def
>>> index 2830421..ee7dd50 100644
>>> --- a/gcc/passes.def
>>> +++ b/gcc/passes.def
>>> @@ -329,6 +329,7 @@ along with GCC; see the file COPYING3. If not see
>>> NEXT_PASS (pass_phiopt);
>>> NEXT_PASS (pass_fold_builtins);
>>> NEXT_PASS (pass_optimize_widening_mul);
>>> + NEXT_PASS (pass_store_merging);
>>> NEXT_PASS (pass_tail_calls);
>>> /* If DCE is not run before checking for uninitialized uses,
>>> we may get false warnings (e.g., testsuite/gcc.dg/uninit-5.c).
>>> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c b/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c
>>> new file mode 100644
>>> index 0000000..7c888b4
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.c-torture/execute/pr22141-1.c
>>> @@ -0,0 +1,122 @@
>>> +/* PR middle-end/22141 */
>>> +
>>> +extern void abort (void);
>>> +
>>> +struct S
>>> +{
>>> + struct T
>>> + {
>>> + char a;
>>> + char b;
>>> + char c;
>>> + char d;
>>> + } t;
>>> +} u;
>>> +
>>> +struct U
>>> +{
>>> + struct S s[4];
>>> +};
>>> +
>>> +void __attribute__((noinline))
>>> +c1 (struct T *p)
>>> +{
>>> + if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
>>> + abort ();
>>> + __builtin_memset (p, 0xaa, sizeof (*p));
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +c2 (struct S *p)
>>> +{
>>> + c1 (&p->t);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +c3 (struct U *p)
>>> +{
>>> + c2 (&p->s[2]);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f1 (void)
>>> +{
>>> + u = (struct S) { { 1, 2, 3, 4 } };
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f2 (void)
>>> +{
>>> + u.t.a = 1;
>>> + u.t.b = 2;
>>> + u.t.c = 3;
>>> + u.t.d = 4;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f3 (void)
>>> +{
>>> + u.t.d = 4;
>>> + u.t.b = 2;
>>> + u.t.a = 1;
>>> + u.t.c = 3;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f4 (void)
>>> +{
>>> + struct S v;
>>> + v.t.a = 1;
>>> + v.t.b = 2;
>>> + v.t.c = 3;
>>> + v.t.d = 4;
>>> + c2 (&v);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f5 (struct S *p)
>>> +{
>>> + p->t.a = 1;
>>> + p->t.c = 3;
>>> + p->t.d = 4;
>>> + p->t.b = 2;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f6 (void)
>>> +{
>>> + struct U v;
>>> + v.s[2].t.a = 1;
>>> + v.s[2].t.b = 2;
>>> + v.s[2].t.c = 3;
>>> + v.s[2].t.d = 4;
>>> + c3 (&v);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f7 (struct U *p)
>>> +{
>>> + p->s[2].t.a = 1;
>>> + p->s[2].t.c = 3;
>>> + p->s[2].t.d = 4;
>>> + p->s[2].t.b = 2;
>>> +}
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + struct U w;
>>> + f1 ();
>>> + c2 (&u);
>>> + f2 ();
>>> + c1 (&u.t);
>>> + f3 ();
>>> + c2 (&u);
>>> + f4 ();
>>> + f5 (&u);
>>> + c2 (&u);
>>> + f6 ();
>>> + f7 (&w);
>>> + c3 (&w);
>>> + return 0;
>>> +}
>>> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c b/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c
>>> new file mode 100644
>>> index 0000000..cb9cc79
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.c-torture/execute/pr22141-2.c
>>> @@ -0,0 +1,122 @@
>>> +/* PR middle-end/22141 */
>>> +
>>> +extern void abort (void);
>>> +
>>> +struct S
>>> +{
>>> + struct T
>>> + {
>>> + char a;
>>> + char b;
>>> + char c;
>>> + char d;
>>> + } t;
>>> +} u __attribute__((aligned));
>>> +
>>> +struct U
>>> +{
>>> + struct S s[4];
>>> +};
>>> +
>>> +void __attribute__((noinline))
>>> +c1 (struct T *p)
>>> +{
>>> + if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
>>> + abort ();
>>> + __builtin_memset (p, 0xaa, sizeof (*p));
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +c2 (struct S *p)
>>> +{
>>> + c1 (&p->t);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +c3 (struct U *p)
>>> +{
>>> + c2 (&p->s[2]);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f1 (void)
>>> +{
>>> + u = (struct S) { { 1, 2, 3, 4 } };
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f2 (void)
>>> +{
>>> + u.t.a = 1;
>>> + u.t.b = 2;
>>> + u.t.c = 3;
>>> + u.t.d = 4;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f3 (void)
>>> +{
>>> + u.t.d = 4;
>>> + u.t.b = 2;
>>> + u.t.a = 1;
>>> + u.t.c = 3;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f4 (void)
>>> +{
>>> + struct S v __attribute__((aligned));
>>> + v.t.a = 1;
>>> + v.t.b = 2;
>>> + v.t.c = 3;
>>> + v.t.d = 4;
>>> + c2 (&v);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f5 (struct S *p)
>>> +{
>>> + p->t.a = 1;
>>> + p->t.c = 3;
>>> + p->t.d = 4;
>>> + p->t.b = 2;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f6 (void)
>>> +{
>>> + struct U v __attribute__((aligned));
>>> + v.s[2].t.a = 1;
>>> + v.s[2].t.b = 2;
>>> + v.s[2].t.c = 3;
>>> + v.s[2].t.d = 4;
>>> + c3 (&v);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f7 (struct U *p)
>>> +{
>>> + p->s[2].t.a = 1;
>>> + p->s[2].t.c = 3;
>>> + p->s[2].t.d = 4;
>>> + p->s[2].t.b = 2;
>>> +}
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + struct U w __attribute__((aligned));
>>> + f1 ();
>>> + c2 (&u);
>>> + f2 ();
>>> + c1 (&u.t);
>>> + f3 ();
>>> + c2 (&u);
>>> + f4 ();
>>> + f5 (&u);
>>> + c2 (&u);
>>> + f6 ();
>>> + f7 (&w);
>>> + c3 (&w);
>>> + return 0;
>>> +}
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_1.c b/gcc/testsuite/gcc.dg/store_merging_1.c
>>> new file mode 100644
>>> index 0000000..09a4d14
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_1.c
>>> @@ -0,0 +1,35 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +struct bar {
>>> + int a;
>>> + char b;
>>> + char c;
>>> + char d;
>>> + char e;
>>> + char f;
>>> + char g;
>>> +};
>>> +
>>> +void
>>> +foo1 (struct bar *p)
>>> +{
>>> + p->b = 0;
>>> + p->a = 0;
>>> + p->c = 0;
>>> + p->d = 0;
>>> + p->e = 0;
>>> +}
>>> +
>>> +void
>>> +foo2 (struct bar *p)
>>> +{
>>> + p->b = 0;
>>> + p->a = 0;
>>> + p->c = 1;
>>> + p->d = 0;
>>> + p->e = 0;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_2.c b/gcc/testsuite/gcc.dg/store_merging_2.c
>>> new file mode 100644
>>> index 0000000..d3acc2d
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_2.c
>>> @@ -0,0 +1,80 @@
>>> +/* { dg-do run } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +struct bar
>>> +{
>>> + int a;
>>> + unsigned char b;
>>> + unsigned char c;
>>> + short d;
>>> + unsigned char e;
>>> + unsigned char f;
>>> + unsigned char g;
>>> +};
>>> +
>>> +__attribute__ ((noinline)) void
>>> +foozero (struct bar *p)
>>> +{
>>> + p->b = 0;
>>> + p->a = 0;
>>> + p->c = 0;
>>> + p->d = 0;
>>> + p->e = 0;
>>> + p->f = 0;
>>> + p->g = 0;
>>> +}
>>> +
>>> +__attribute__ ((noinline)) void
>>> +foo1 (struct bar *p)
>>> +{
>>> + p->b = 1;
>>> + p->a = 2;
>>> + p->c = 3;
>>> + p->d = 4;
>>> + p->e = 5;
>>> + p->f = 0;
>>> + p->g = 0xff;
>>> +}
>>> +
>>> +__attribute__ ((noinline)) void
>>> +foo2 (struct bar *p, struct bar *p2)
>>> +{
>>> + p->b = 0xff;
>>> + p2->b = 0xa;
>>> + p->a = 0xfffff;
>>> + p2->c = 0xc;
>>> + p->c = 0xff;
>>> + p2->d = 0xbf;
>>> + p->d = 0xfff;
>>> +}
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + struct bar b1, b2;
>>> + foozero (&b1);
>>> + foozero (&b2);
>>> +
>>> + foo1 (&b1);
>>> + if (b1.b != 1 || b1.a != 2 || b1.c != 3 || b1.d != 4 || b1.e != 5
>>> + || b1.f != 0 || b1.g != 0xff)
>>> + __builtin_abort ();
>>> +
>>> + foozero (&b1);
>>> + /* Make sure writes to aliasing struct pointers preserve the
>>> + correct order. */
>>> + foo2 (&b1, &b1);
>>> + if (b1.b != 0xa || b1.a != 0xfffff || b1.c != 0xff || b1.d != 0xfff)
>>> + __builtin_abort ();
>>> +
>>> + foozero (&b1);
>>> + foo2 (&b1, &b2);
>>> + if (b1.a != 0xfffff || b1.b != 0xff || b1.c != 0xff || b1.d != 0xfff
>>> + || b2.b != 0xa || b2.c != 0xc || b2.d != 0xbf)
>>> + __builtin_abort ();
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_3.c b/gcc/testsuite/gcc.dg/store_merging_3.c
>>> new file mode 100644
>>> index 0000000..cd756c1
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_3.c
>>> @@ -0,0 +1,32 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +/* Make sure stores to volatile addresses don't get combined with
>>> + other accesses. */
>>> +
>>> +struct bar
>>> +{
>>> + int a;
>>> + char b;
>>> + char c;
>>> + volatile short d;
>>> + char e;
>>> + char f;
>>> + char g;
>>> +};
>>> +
>>> +void
>>> +foozero (struct bar *p)
>>> +{
>>> + p->b = 0xa;
>>> + p->a = 0xb;
>>> + p->c = 0xc;
>>> + p->d = 0;
>>> + p->e = 0xd;
>>> + p->f = 0xe;
>>> + p->g = 0xf;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump "Volatile access terminates all chains" "store-merging" } } */
>>> +/* { dg-final { scan-tree-dump-times "=\{v\} 0;" 1 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_4.c b/gcc/testsuite/gcc.dg/store_merging_4.c
>>> new file mode 100644
>>> index 0000000..4bf9025
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_4.c
>>> @@ -0,0 +1,32 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +/* Check that we can merge interleaving stores that are guaranteed
>>> + to be non-aliasing. */
>>> +
>>> +struct bar
>>> +{
>>> + int a;
>>> + char b;
>>> + char c;
>>> + short d;
>>> + char e;
>>> + char f;
>>> + char g;
>>> +};
>>> +
>>> +void
>>> +foozero (struct bar *restrict p, struct bar *restrict p2)
>>> +{
>>> + p->b = 0xff;
>>> + p2->b = 0xa;
>>> + p->a = 0xfffff;
>>> + p2->a = 0xab;
>>> + p2->c = 0xc;
>>> + p->c = 0xff;
>>> + p2->d = 0xbf;
>>> + p->d = 0xfff;
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_5.c b/gcc/testsuite/gcc.dg/store_merging_5.c
>>> new file mode 100644
>>> index 0000000..3b82420
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_5.c
>>> @@ -0,0 +1,30 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +/* Make sure that non-aliasing non-constant interspersed stores do not
>>> + stop chains. */
>>> +
>>> +struct bar {
>>> + int a;
>>> + char b;
>>> + char c;
>>> + char d;
>>> + char e;
>>> + char g;
>>> +};
>>> +
>>> +void
>>> +foo1 (struct bar *p, char tmp)
>>> +{
>>> + p->a = 0;
>>> + p->b = 0;
>>> + p->g = tmp;
>>> + p->c = 0;
>>> + p->d = 0;
>>> + p->e = 0;
>>> +}
>>> +
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */
>>> +/* { dg-final { scan-tree-dump-times "MEM\\\[.*\\\]" 1 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_6.c b/gcc/testsuite/gcc.dg/store_merging_6.c
>>> new file mode 100644
>>> index 0000000..7d89baf
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_6.c
>>> @@ -0,0 +1,53 @@
>>> +/* { dg-do run } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +/* Check that we can widen accesses to bitfields. */
>>> +
>>> +struct bar {
>>> + int a : 3;
>>> + unsigned char b : 4;
>>> + unsigned char c : 1;
>>> + char d;
>>> + char e;
>>> + char f;
>>> + char g;
>>> +};
>>> +
>>> +__attribute__ ((noinline)) void
>>> +foozero (struct bar *p)
>>> +{
>>> + p->b = 0;
>>> + p->a = 0;
>>> + p->c = 0;
>>> + p->d = 0;
>>> + p->e = 0;
>>> + p->f = 0;
>>> + p->g = 0;
>>> +}
>>> +
>>> +__attribute__ ((noinline)) void
>>> +foo1 (struct bar *p)
>>> +{
>>> + p->b = 3;
>>> + p->a = 2;
>>> + p->c = 1;
>>> + p->d = 4;
>>> + p->e = 5;
>>> +}
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + struct bar p;
>>> + foozero (&p);
>>> + foo1 (&p);
>>> + if (p.a != 2 || p.b != 3 || p.c != 1 || p.d != 4 || p.e != 5
>>> + || p.f != 0 || p.g != 0)
>>> + __builtin_abort ();
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.dg/store_merging_7.c b/gcc/testsuite/gcc.dg/store_merging_7.c
>>> new file mode 100644
>>> index 0000000..02008f7
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.dg/store_merging_7.c
>>> @@ -0,0 +1,26 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target non_strict_align } */
>>> +/* { dg-options "-O -fdump-tree-store-merging" } */
>>> +
>>> +/* Check that we can merge consecutive array members through the pointer.
>>> + PR rtl-optimization/23684. */
>>> +
>>> +void
>>> +foo (char *input)
>>> +{
>>> + input = __builtin_assume_aligned (input, 8);
>>> + input[0] = 'H';
>>> + input[1] = 'e';
>>> + input[2] = 'l';
>>> + input[3] = 'l';
>>> + input[4] = 'o';
>>> + input[5] = ' ';
>>> + input[6] = 'w';
>>> + input[7] = 'o';
>>> + input[8] = 'r';
>>> + input[9] = 'l';
>>> + input[10] = 'd';
>>> + input[11] = '\0';
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "Merging successful" 1 "store-merging" } } */
>>> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c
>>> index f02e55f..9de4e77 100644
>>> --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c
>>> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c
>>> @@ -3,22 +3,22 @@
>>> int arr[4][4];
>>> void
>>> -foo ()
>>> +foo (int x, int y)
>>> {
>>> - arr[0][1] = 1;
>>> - arr[1][0] = -1;
>>> - arr[2][0] = 1;
>>> - arr[1][1] = -1;
>>> - arr[0][2] = 1;
>>> - arr[0][3] = -1;
>>> - arr[1][2] = 1;
>>> - arr[2][1] = -1;
>>> - arr[3][0] = 1;
>>> - arr[3][1] = -1;
>>> - arr[2][2] = 1;
>>> - arr[1][3] = -1;
>>> - arr[2][3] = 1;
>>> - arr[3][2] = -1;
>>> + arr[0][1] = x;
>>> + arr[1][0] = y;
>>> + arr[2][0] = x;
>>> + arr[1][1] = y;
>>> + arr[0][2] = x;
>>> + arr[0][3] = y;
>>> + arr[1][2] = x;
>>> + arr[2][1] = y;
>>> + arr[3][0] = x;
>>> + arr[3][1] = y;
>>> + arr[2][2] = x;
>>> + arr[1][3] = y;
>>> + arr[2][3] = x;
>>> + arr[3][2] = y;
>>> }
>>> /* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 7 } } */
>>> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c
>>> index 40056b1..824f0d2 100644
>>> --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c
>>> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_4.c
>>> @@ -3,22 +3,22 @@
>>> float arr[4][4];
>>> void
>>> -foo ()
>>> +foo (float x, float y)
>>> {
>>> - arr[0][1] = 1;
>>> - arr[1][0] = -1;
>>> - arr[2][0] = 1;
>>> - arr[1][1] = -1;
>>> - arr[0][2] = 1;
>>> - arr[0][3] = -1;
>>> - arr[1][2] = 1;
>>> - arr[2][1] = -1;
>>> - arr[3][0] = 1;
>>> - arr[3][1] = -1;
>>> - arr[2][2] = 1;
>>> - arr[1][3] = -1;
>>> - arr[2][3] = 1;
>>> - arr[3][2] = -1;
>>> + arr[0][1] = x;
>>> + arr[1][0] = y;
>>> + arr[2][0] = x;
>>> + arr[1][1] = y;
>>> + arr[0][2] = x;
>>> + arr[0][3] = y;
>>> + arr[1][2] = x;
>>> + arr[2][1] = y;
>>> + arr[3][0] = x;
>>> + arr[3][1] = y;
>>> + arr[2][2] = x;
>>> + arr[1][3] = y;
>>> + arr[2][3] = x;
>>> + arr[3][2] = y;
>>> }
>>> /* { dg-final { scan-assembler-times "stp\ts\[0-9\]+, s\[0-9\]" 7 } } */
>>> diff --git a/gcc/testsuite/gcc.target/i386/pr22141.c b/gcc/testsuite/gcc.target/i386/pr22141.c
>>> new file mode 100644
>>> index 0000000..036422e
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/i386/pr22141.c
>>> @@ -0,0 +1,126 @@
>>> +/* PR middle-end/22141 */
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-Os" } */
>>> +
>>> +extern void abort (void);
>>> +
>>> +struct S
>>> +{
>>> + struct T
>>> + {
>>> + char a;
>>> + char b;
>>> + char c;
>>> + char d;
>>> + } t;
>>> +} u;
>>> +
>>> +struct U
>>> +{
>>> + struct S s[4];
>>> +};
>>> +
>>> +void __attribute__((noinline))
>>> +c1 (struct T *p)
>>> +{
>>> + if (p->a != 1 || p->b != 2 || p->c != 3 || p->d != 4)
>>> + abort ();
>>> + __builtin_memset (p, 0xaa, sizeof (*p));
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +c2 (struct S *p)
>>> +{
>>> + c1 (&p->t);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +c3 (struct U *p)
>>> +{
>>> + c2 (&p->s[2]);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f1 (void)
>>> +{
>>> + u = (struct S) { { 1, 2, 3, 4 } };
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f2 (void)
>>> +{
>>> + u.t.a = 1;
>>> + u.t.b = 2;
>>> + u.t.c = 3;
>>> + u.t.d = 4;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f3 (void)
>>> +{
>>> + u.t.d = 4;
>>> + u.t.b = 2;
>>> + u.t.a = 1;
>>> + u.t.c = 3;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f4 (void)
>>> +{
>>> + struct S v;
>>> + v.t.a = 1;
>>> + v.t.b = 2;
>>> + v.t.c = 3;
>>> + v.t.d = 4;
>>> + c2 (&v);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f5 (struct S *p)
>>> +{
>>> + p->t.a = 1;
>>> + p->t.c = 3;
>>> + p->t.d = 4;
>>> + p->t.b = 2;
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f6 (void)
>>> +{
>>> + struct U v;
>>> + v.s[2].t.a = 1;
>>> + v.s[2].t.b = 2;
>>> + v.s[2].t.c = 3;
>>> + v.s[2].t.d = 4;
>>> + c3 (&v);
>>> +}
>>> +
>>> +void __attribute__((noinline))
>>> +f7 (struct U *p)
>>> +{
>>> + p->s[2].t.a = 1;
>>> + p->s[2].t.c = 3;
>>> + p->s[2].t.d = 4;
>>> + p->s[2].t.b = 2;
>>> +}
>>> +
>>> +int
>>> +main (void)
>>> +{
>>> + struct U w;
>>> + f1 ();
>>> + c2 (&u);
>>> + f2 ();
>>> + c1 (&u.t);
>>> + f3 ();
>>> + c2 (&u);
>>> + f4 ();
>>> + f5 (&u);
>>> + c2 (&u);
>>> + f6 ();
>>> + f7 (&w);
>>> + c3 (&w);
>>> + return 0;
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-times "67305985\|4030201" 7 } } */
>>> diff --git a/gcc/testsuite/gcc.target/i386/pr34012.c b/gcc/testsuite/gcc.target/i386/pr34012.c
>>> index 00b1240..d0cffa0 100644
>>> --- a/gcc/testsuite/gcc.target/i386/pr34012.c
>>> +++ b/gcc/testsuite/gcc.target/i386/pr34012.c
>>> @@ -1,7 +1,7 @@
>>> /* PR rtl-optimization/34012 */
>>> /* { dg-do compile } */
>>> /* { dg-require-effective-target lp64 } */
>>> -/* { dg-options "-O2" } */
>>> +/* { dg-options "-O2 -fno-store-merging" } */
>>> void bar (long int *);
>>> void
>>> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
>>> index a706729..b5373a3 100644
>>> --- a/gcc/tree-pass.h
>>> +++ b/gcc/tree-pass.h
>>> @@ -425,6 +425,7 @@ extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_cse_sincos (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_optimize_bswap (gcc::context *ctxt);
>>> +extern gimple_opt_pass *make_pass_store_merging (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_optimize_widening_mul (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_warn_function_return (gcc::context *ctxt);
>>> extern gimple_opt_pass *make_pass_warn_function_noreturn (gcc::context *ctxt);
>
More information about the Gcc-patches
mailing list