This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug target/18141] New: mips64-none-elf-gcc: Excessive NOPs with -march=r3000
- From: "niva at niisi dot msk dot ru" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 25 Oct 2004 11:24:50 -0000
- Subject: [Bug target/18141] New: mips64-none-elf-gcc: Excessive NOPs with -march=r3000
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
* the options given when GCC was configured/built;
Configured with: /home/niva/src/gcc-3.4-binutils/configure
--with-headers=/home/vxuser/oc2000/mips/src/include
--without-libs --enable-generated-files-in-srcdir
--enable-threads=posix -with-dwarf2 --disable-shared
--target=mips64-none-elf --verbose --enable-checking
--enable-languages=c --srcdir=/home/niva/svnwork/src
--prefix=/home/niva/local1 --enable-cpp
* the complete command line that triggers the bug;
./mips64-none-elf-gcc -march=r3000 -mabi=32 -G0 -O3 -S flops.i -o f.s -dp
* the compiler output (error messages, warnings, etc.);
No error messages
* There excessive NOPs in the resultant assembler code,
for example:
lwc1 $f2,12($18) # 4287 load_df_low/2 [length = 4]
nop # 4549 hazard_nop [length = 4]
lwc1 $f3,8($18) # 4288 load_df_high/2 [length = 4]
nop # 4550 hazard_nop [length = 4]
swc1 $f2,%lo(sa+4)($fp) # 4289 movsi_internal/10 [length = 4]
* the preprocessed file (flops.i) (Sorry for large source.)
double nulltime = 0.0, TimeArray[3];
double TLimit;
double T[36]={0.0};
double sa,sb,sc,sd,one,two,three;
double four,five,piref,piprg;
double scale,pierr;
double A0 = 1.0;
double A1 = -0.1666666666671334;
double A2 = 0.833333333809067E-2;
double A3 = 0.198412715551283E-3;
double A4 = 0.27557589750762E-5;
double A5 = 0.2507059876207E-7;
double A6 = 0.164105986683E-9;
double B0 = 1.0;
double B1 = -0.4999999999982;
double B2 = 0.4166666664651E-1;
double B3 = -0.1388888805755E-2;
double B4 = 0.24801428034E-4;
double B5 = -0.2754213324E-6;
double B6 = 0.20189405E-8;
double C0 = 1.0;
double C1 = 0.99999999668;
double C2 = 0.49999995173;
double C3 = 0.16666704243;
double C4 = 0.4166685027E-1;
double C5 = 0.832672635E-2;
double C6 = 0.140836136E-2;
double C7 = 0.17358267E-3;
double C8 = 0.3931683E-4;
double D1 = 0.3999999946405E-1;
double D2 = 0.96E-3;
double D3 = 0.1233153E-5;
double E2 = 0.48E-3;
double E3 = 0.411051E-6;
int printf( const char *format ,...);
int dtime(double p[2]);
int main(int argc, char *argv[])
{
register double s,u,v,w,x;
long loops, NLimit;
register long i, m, n;
printf( "\n");
dtime(TimeArray);
printf( " FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n");
loops = 15625;
T[1] = 1.0E+06/(double)loops;
TLimit = 15.0;
NLimit = 512000000;
piref = 3.14159265358979324;
one = 1.0;
two = 2.0;
three = 3.0;
four = 4.0;
five = 5.0;
scale = one;
printf( " Module Error RunTime MFLOPS\n");
printf( " (usec)\n");
dtime(TimeArray);
dtime(TimeArray);
n = loops;
sa = 0.0;
do
{
n = 2 * n;
x = one / (double)n;
s = 0.0;
v = 0.0;
w = one;
dtime(TimeArray);
for( i = 1 ; i <= n-1 ; i++ )
{
v = v + w;
u = v * x;
s = s + (D1+u*(D2+u*D3))/(w+u*(D1+u*(E2+u*E3)));
}
dtime(TimeArray);
sa = TimeArray[1];
if ( n == NLimit ) break;
} while ( sa < TLimit );
scale = 1.0E+06 / (double)n;
T[1] = scale;
dtime(TimeArray);
for( i = 1 ; i <= n-1 ; i++ )
{
}
dtime(TimeArray);
nulltime = T[1] * TimeArray[1];
if ( nulltime < 0.0 ) nulltime = 0.0;
T[2] = T[1] * sa - nulltime;
sa = (D1+D2+D3)/(one+D1+E2+E3);
sb = D1;
T[3] = T[2] / 14.0;
sa = x * ( sa + sb + two * s ) / two;
sb = one / sa;
n = (long)( (double)( 40000 * (long)sb ) / scale );
sc = sb - 25.2;
T[4] = one / T[3];
printf( " 1 %13.4e %10.4f %10.4f\n",sc,T[2],T[4]);
m = n;
s = -five;
sa = -one;
dtime(TimeArray);
for ( i = 1 ; i <= m ; i++ )
{
s = -s;
sa = sa + s;
}
dtime(TimeArray);
T[5] = T[1] * TimeArray[1];
if ( T[5] < 0.0 ) T[5] = 0.0;
sc = (double)m;
u = sa;
v = 0.0;
w = 0.0;
x = 0.0;
dtime(TimeArray);
for ( i = 1 ; i <= m ; i++)
{
s = -s;
sa = sa + s;
u = u + two;
x = x +(s - u);
v = v - s * u;
w = w + s / u;
}
dtime(TimeArray);
T[6] = T[1] * TimeArray[1];
T[7] = ( T[6] - T[5] ) / 7.0;
m = (long)( sa * x / sc );
sa = four * w / five;
sb = sa + five / v;
sc = 31.25;
piprg = sb - sc / (v * v * v);
pierr = piprg - piref;
T[8] = one / T[7];
printf( " 2 %13.4e %10.4f %10.4f\n",pierr,T[6]-T[5],T[8]);
x = piref / ( three * (double)m );
s = 0.0;
v = 0.0;
dtime(TimeArray);
for( i = 1 ; i <= m-1 ; i++ )
{
v = v + one;
u = v * x;
w = u * u;
s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one);
}
dtime(TimeArray);
T[9] = T[1] * TimeArray[1] - nulltime;
u = piref / three;
w = u * u;
sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one);
T[10] = T[9] / 17.0;
sa = x * ( sa + two * s ) / two;
sb = 0.5;
sc = sa - sb;
T[11] = one / T[10];
printf( " 3 %13.4e %10.4f %10.4f\n",sc,T[9],T[11]);
A3 = -A3;
A5 = -A5;
x = piref / ( three * (double)m );
s = 0.0;
v = 0.0;
dtime(TimeArray);
for( i = 1 ; i <= m-1 ; i++ )
{
u = (double)i * x;
w = u * u;
s = s + w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
}
dtime(TimeArray);
T[12] = T[1] * TimeArray[1] - nulltime;
u = piref / three;
w = u * u;
sa = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
T[13] = T[12] / 15.0;
sa = x * ( sa + one + two * s ) / two;
u = piref / three;
w = u * u;
sb = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+A0);
sc = sa - sb;
T[14] = one / T[13];
printf( " 4 %13.4e %10.4f %10.4f\n",sc,T[12],T[14]);
x = piref / ( three * (double)m );
s = 0.0;
v = 0.0;
dtime(TimeArray);
for( i = 1 ; i <= m-1 ; i++ )
{
u = (double)i * x;
w = u * u;
v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
s = s + v / (w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
}
dtime(TimeArray);
T[15] = T[1] * TimeArray[1] - nulltime;
u = piref / three;
w = u * u;
sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
sa = sa / sb;
T[16] = T[15] / 29.0;
sa = x * ( sa + two * s ) / two;
sb = 0.6931471805599453;
sc = sa - sb;
T[17] = one / T[16];
printf( " 5 %13.4e %10.4f %10.4f\n",sc,T[15],T[17]);
x = piref / ( four * (double)m );
s = 0.0;
v = 0.0;
dtime(TimeArray);
for( i = 1 ; i <= m-1 ; i++ )
{
u = (double)i * x;
w = u * u;
v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
s = s + v*(w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
}
dtime(TimeArray);
T[18] = T[1] * TimeArray[1] - nulltime;
u = piref / four;
w = u * u;
sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
sa = sa * sb;
T[19] = T[18] / 29.0;
sa = x * ( sa + two * s ) / two;
sb = 0.25;
sc = sa - sb;
T[20] = one / T[19];
printf( " 6 %13.4e %10.4f %10.4f\n",sc,T[18],T[20]);
s = 0.0;
w = one;
sa = 102.3321513995275;
v = sa / (double)m;
dtime(TimeArray);
for ( i = 1 ; i <= m-1 ; i++)
{
x = (double)i * v;
u = x * x;
s = s - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w );
}
dtime(TimeArray);
T[21] = T[1] * TimeArray[1] - nulltime;
T[22] = T[21] / 12.0;
x = sa;
u = x * x;
sa = -w - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w );
sa = 18.0 * v * (sa + two * s );
m = -2000 * (long)sa;
m = (long)( (double)m / scale );
sc = sa + 500.2;
T[23] = one / T[22];
printf( " 7 %13.4e %10.4f %10.4f\n",sc,T[21],T[23]);
x = piref / ( three * (double)m );
s = 0.0;
v = 0.0;
dtime(TimeArray);
for( i = 1 ; i <= m-1 ; i++ )
{
u = (double)i * x;
w = u * u;
v = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
s = s + v*v*u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
}
dtime(TimeArray);
T[24] = T[1] * TimeArray[1] - nulltime;
u = piref / three;
w = u * u;
sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
sa = sa * sb * sb;
T[25] = T[24] / 30.0;
sa = x * ( sa + two * s ) / two;
sb = 0.29166666666666667;
sc = sa - sb;
T[26] = one / T[25];
printf( " 8 %13.4e %10.4f %10.4f\n",sc,T[24],T[26]);
T[27] = ( five * (T[6] - T[5]) + T[9] ) / 52.0;
T[28] = one / T[27];
T[29] = T[2] + T[9] + T[12] + T[15] + T[18];
T[29] = (T[29] + four * T[21]) / 152.0;
T[30] = one / T[29];
T[31] = T[2] + T[9] + T[12] + T[15] + T[18];
T[31] = (T[31] + T[21] + T[24]) / 146.0;
T[32] = one / T[31];
T[33] = (T[9] + T[12] + T[18] + T[24]) / 91.0;
T[34] = one / T[33];
printf( "\n");
printf( " Iterations = %10ld\n",m);
printf( " NullTime (usec) = %10.4f\n",nulltime);
printf( " MFLOPS(1) = %10.4f\n",T[28]);
printf( " MFLOPS(2) = %10.4f\n",T[30]);
printf( " MFLOPS(3) = %10.4f\n",T[32]);
printf( " MFLOPS(4) = %10.4f\n\n",T[34]);
dtime(TimeArray);
return 0;
}
--
Summary: mips64-none-elf-gcc: Excessive NOPs with -march=r3000
Product: gcc
Version: 3.4.2
Status: UNCONFIRMED
Severity: normal
Priority: P2
Component: target
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: niva at niisi dot msk dot ru
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: i686-pc-linux-gnu
GCC host triplet: i686-pc-linux-gnu
GCC target triplet: mips64-none-elf
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18141