This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix PR44900 (SRA miscompilation)
- From: Richard Guenther <rguenther at suse dot de>
- To: Martin Jambor <mjambor at suse dot cz>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 20 Jul 2010 16:02:13 +0200 (CEST)
- Subject: Re: [PATCH] Fix PR44900 (SRA miscompilation)
- References: <20100720133034.GA24667@virgil.arch.suse.de>
On Tue, 20 Jul 2010, Martin Jambor wrote:
> Hi,
>
> the simple patch below fixes PR 44900. Apparently we rarely happen to
> land in the SRA_UDH_LEFT refreshing mode in
> load_assign_lhs_subreplacements because cleaning up after it has not
> worked for a long time. But it does require a special combination of
> assignments between special unions to trigger, such as the one in the
> testcase.
>
> This patch is aginst the 4.5 branch but the one for trunk is exactly
> the same. I have bootstrapped and regression tested both without any
> problems. OK for trunk and the branch?
I belive you can remove test_assert (and thus the printf decl and
the test macro) from the testcase.
Ok with that change.
Thanks,
RIchard.
> Thanks,
>
> Martin
>
>
>
> 2010-07-20 Martin Jambor <mjambor@suse.cz>
>
> PR tree-optimization/44900
> * tree-sra.c (load_assign_lhs_subreplacements): Updated comments.
> (sra_modify_assign): Move gsi to the next statmenent unconditionally.
>
> Index: gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C
> ===================================================================
> --- /dev/null
> +++ gcc-4_5-branch/gcc/testsuite/g++.dg/torture/pr44900.C
> @@ -0,0 +1,91 @@
> +/* { dg-do run { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-msse" } */
> +/* { dg-require-effective-target sse } */
> +
> +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
> +typedef float __v4sf __attribute__ ((__vector_size__ (16)));
> +
> +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__,
> +__artificial__))
> +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
> +{
> + return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z };
> +}
> +
> +struct vec
> +{
> + union {
> + __m128 v;
> + float e[4];
> + };
> +
> + static const vec & zero()
> + {
> + static const vec v = _mm_set_ps(0, 0, 0, 0);
> + return v;
> + }
> +
> + vec() {}
> + vec(const __m128 & a) : v(a) {}
> +
> + operator const __m128&() const { return v; }
> +};
> +
> +struct vec2
> +{
> + vec _v1;
> + vec _v2;
> +
> + vec2() {}
> + vec2(const vec & a, const vec & b) : _v1(a), _v2(b) {}
> +
> + static vec2 load(const float * a)
> + {
> + return vec2(
> + __builtin_ia32_loadups(&a[0]),
> + __builtin_ia32_loadups(&a[4]));
> + }
> +
> + const vec & v1() const { return _v1; }
> + const vec & v2() const { return _v2; }
> +};
> +
> +extern "C" {
> + int printf (const char*, ...);
> + void abort(void);
> +}
> +
> +inline bool test_assert( bool is_succeed, const char * file_name, int line_num
> +)
> +{
> + if ( !is_succeed )
> + {
> + printf("error: %s(%d)\n", file_name, line_num);
> + }
> +
> + return is_succeed;
> +}
> +
> +inline bool operator==(const vec & a, const vec & b)
> +{ return 0xf == __builtin_ia32_movmskps(__builtin_ia32_cmpeqps(a, b)); }
> +
> +#define test(x, y) test_assert( (x)==(y), __FILE__, __LINE__ )
> +
> +int main( int argc, char * argv[] )
> +{
> + __attribute__((aligned(16))) float data[] =
> + { 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5 };
> +
> + float * p = &data[2];
> + vec2 a;
> +
> + a = vec2::load(p);
> +
> + vec v1 = a.v1();
> + vec v2 = a.v2();
> +
> + if (v2.e[3] != 7.0)
> + abort();
> +
> + return 0;
> +}
> Index: gcc-4_5-branch/gcc/tree-sra.c
> ===================================================================
> --- gcc-4_5-branch.orig/gcc/tree-sra.c
> +++ gcc-4_5-branch/gcc/tree-sra.c
> @@ -2445,9 +2445,11 @@ handle_unscalarized_data_in_subtree (str
> (sub)tree. If that is not possible, refresh the TOP_RACC base aggregate and
> load the accesses from it. LEFT_OFFSET is the offset of the left whole
> subtree being copied, RIGHT_OFFSET is the same thing for the right subtree.
> - GSI is stmt iterator used for statement insertions. *REFRESHED is true iff
> - the rhs top aggregate has already been refreshed by contents of its scalar
> - reductions and is set to true if this function has to do it. */
> + NEW_GSI is stmt iterator used for statement insertions after the original
> + assignment, OLD_GSI is used to insert statements before the assignment.
> + *REFRESHED keeps the information whether we have needed to refresh
> + replacements of the LHS and from which side of the assignments this takes
> + place. */
>
> static void
> load_assign_lhs_subreplacements (struct access *lacc, struct access *top_racc,
> @@ -2747,9 +2749,7 @@ sra_modify_assign (gimple *stmt, gimple_
> &orig_gsi, gsi, &refreshed, lhs);
> if (refreshed != SRA_UDH_RIGHT)
> {
> - if (*stmt == gsi_stmt (*gsi))
> - gsi_next (gsi);
> -
> + gsi_next (gsi);
> unlink_stmt_vdef (*stmt);
> gsi_remove (&orig_gsi, true);
> sra_stats.deleted++;
>
>
--
Richard Guenther <rguenther@suse.de>
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex