This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug tree-optimization/41089] [4.5 Regression] r147980 (New SRA) breaks stdargs
- From: "ubizjak at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 10 Sep 2009 17:29:39 -0000
- Subject: [Bug tree-optimization/41089] [4.5 Regression] r147980 (New SRA) breaks stdargs
- References: <bug-41089-1649@http.gcc.gnu.org/bugzilla/>
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
------- Comment #20 from ubizjak at gmail dot com 2009-09-10 17:29 -------
I have compared the dumps of two functions:
--function 1, WORKS--
extern void abort (void);
int foo_arg;
long x;
void
f4 (int i, ...)
{
__builtin_va_list ap;
__builtin_va_start(ap,i);
x = __builtin_va_arg(ap,double);
foo_arg = __builtin_va_arg(ap,int);
__builtin_va_end(ap);
}
--stdarg dump of function 1--
;; Function f4 (f4)
f4: va_list escapes 0, needs to save 16 GPR units and 3 FPR units.
f4 (int i)
{
struct ap;
int foo_arg.2;
int D.2032;
int * D.2031;
long unsigned int D.2030;
int * D.2029;
long int D.2028;
long int x.1;
double D.2026;
int D.2025;
double * D.2024;
long int D.2022;
long unsigned int iftmp.0;
double * D.2018;
long int D.2017;
int D.2016;
void * D.2015;
<bb 2>:
__builtin_va_start (&ap, 0);
D.2015_2 = ap.__base;
D.2016_3 = ap.__offset;
D.2017_4 = (long int) D.2016_3;
D.2018_5 = (double *) D.2015_2;
if (D.2017_4 <= 47)
goto <bb 3>;
else
goto <bb 4>;
<bb 3>:
D.2022_6 = D.2017_4 + -48;
iftmp.0_7 = (long unsigned int) D.2022_6;
goto <bb 5>;
<bb 4>:
iftmp.0_8 = (long unsigned int) D.2017_4;
<bb 5>:
# iftmp.0_1 = PHI <iftmp.0_7(3), iftmp.0_8(4)>
D.2024_9 = D.2018_5 + iftmp.0_1;
D.2017_10 = D.2017_4 + 8;
D.2025_11 = (int) D.2017_10;
ap.__offset = D.2025_11;
D.2026_12 = *D.2024_9;
x.1_13 = (long int) D.2026_12;
x = x.1_13;
D.2028_16 = (long int) D.2025_11;
D.2029_17 = (int *) D.2015_2;
D.2030_18 = (long unsigned int) D.2028_16;
D.2031_19 = D.2029_17 + D.2030_18;
D.2028_20 = D.2028_16 + 8;
D.2032_21 = (int) D.2028_20;
ap.__offset = D.2032_21;
foo_arg.2_22 = *D.2031_19;
foo_arg = foo_arg.2_22;
__builtin_va_end (&ap);
return;
}
--function 2, FAILS--
extern void abort (void);
int foo_arg;
long x;
static void
foo (int v, __builtin_va_list ap)
{
switch (v)
{
case 5: foo_arg = __builtin_va_arg(ap,int); break;
default: abort ();
}
}
void
f4 (int i, ...)
{
__builtin_va_list ap;
__builtin_va_start(ap,i);
x = __builtin_va_arg(ap,double);
foo (i, ap);
__builtin_va_end(ap);
}
--stdarg dump of function 2--
;; Function f4 (f4)
f4: va_list escapes 0, needs to save 8 GPR units and 3 FPR units.
f4 (int i)
{
long int D.2051;
int * D.2050;
long unsigned int D.2049;
int * D.2048;
int foo_arg.2;
struct ap;
long int x.1;
double D.2033;
int D.2032;
double * D.2031;
long int D.2029;
long unsigned int iftmp.0;
double * D.2025;
long int D.2024;
int D.2023;
void * D.2022;
<bb 2>:
__builtin_va_start (&ap, 0);
D.2022_2 = ap.__base;
D.2023_3 = ap.__offset;
D.2024_4 = (long int) D.2023_3;
D.2025_5 = (double *) D.2022_2;
if (D.2024_4 <= 47)
goto <bb 3>;
else
goto <bb 4>;
<bb 3>:
D.2029_6 = D.2024_4 + -48;
iftmp.0_7 = (long unsigned int) D.2029_6;
goto <bb 5>;
<bb 4>:
iftmp.0_8 = (long unsigned int) D.2024_4;
<bb 5>:
# iftmp.0_1 = PHI <iftmp.0_7(3), iftmp.0_8(4)>
D.2031_9 = D.2025_5 + iftmp.0_1;
D.2024_10 = D.2024_4 + 8;
D.2032_11 = (int) D.2024_10;
ap.__offset = D.2032_11;
D.2033_12 = *D.2031_9;
x.1_13 = (long int) D.2033_12;
x = x.1_13;
switch (i_14(D)) <default: <L4>, case 5: <L3>>
<L3>:
D.2051_23 = (long int) D.2032_11;
D.2050_24 = (int *) D.2022_2;
D.2049_25 = (long unsigned int) D.2051_23;
D.2048_26 = D.2050_24 + D.2049_25;
foo_arg.2_27 = *D.2048_26;
foo_arg = foo_arg.2_27;
__builtin_va_end (&ap);
return;
<L4>:
abort ();
}
Tracing through the tree-stdarg.c, the difference is in the number of calls to
va_list_counter_struct_op from this place:
{
if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
== GIMPLE_SINGLE_RHS)
{
/* Check for ap[0].field = temp. */
>>> if (va_list_counter_struct_op (&si, lhs, rhs, true))
continue;
/* Check for temp = ap[0].field. */
else if (va_list_counter_struct_op (&si, rhs, lhs,
false))
continue;
}
So indeed, in the failing case, there is only one assignment to ap._offset and
in the working case, there are two assignments, resulting in correct number of
GPR bytes (== 2 registers) saved.
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41089