This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug target/18141] New: mips64-none-elf-gcc: Excessive NOPs with -march=r3000


* the options given when GCC was configured/built;

Configured with: /home/niva/src/gcc-3.4-binutils/configure
--with-headers=/home/vxuser/oc2000/mips/src/include
--without-libs --enable-generated-files-in-srcdir
--enable-threads=posix -with-dwarf2 --disable-shared
--target=mips64-none-elf --verbose --enable-checking
--enable-languages=c --srcdir=/home/niva/svnwork/src
--prefix=/home/niva/local1 --enable-cpp

     * the complete command line that triggers the bug;

./mips64-none-elf-gcc -march=r3000 -mabi=32 -G0 -O3 -S flops.i -o f.s -dp

     * the compiler output (error messages, warnings, etc.);

No error messages

     * There excessive NOPs in the resultant assembler code,
       for example:

	lwc1	$f2,12($18)	 # 4287	load_df_low/2	[length = 4]
	nop	 # 4549	hazard_nop	[length = 4]
	lwc1	$f3,8($18)	 # 4288	load_df_high/2	[length = 4]
	nop	 # 4550	hazard_nop	[length = 4]
	swc1	$f2,%lo(sa+4)($fp)	 # 4289	movsi_internal/10 [length = 4]


     * the  preprocessed  file (flops.i) (Sorry for large source.)


double nulltime = 0.0, TimeArray[3];
double TLimit;

double T[36]={0.0};

double sa,sb,sc,sd,one,two,three;
double four,five,piref,piprg;
double scale,pierr;

double A0 = 1.0;
double A1 = -0.1666666666671334;
double A2 = 0.833333333809067E-2;
double A3 = 0.198412715551283E-3;
double A4 = 0.27557589750762E-5;
double A5 = 0.2507059876207E-7;
double A6 = 0.164105986683E-9;

double B0 = 1.0;
double B1 = -0.4999999999982;
double B2 = 0.4166666664651E-1;
double B3 = -0.1388888805755E-2;
double B4 = 0.24801428034E-4;
double B5 = -0.2754213324E-6;
double B6 = 0.20189405E-8;

double C0 = 1.0;
double C1 = 0.99999999668;
double C2 = 0.49999995173;
double C3 = 0.16666704243;
double C4 = 0.4166685027E-1;
double C5 = 0.832672635E-2;
double C6 = 0.140836136E-2;
double C7 = 0.17358267E-3;
double C8 = 0.3931683E-4;

double D1 = 0.3999999946405E-1;
double D2 = 0.96E-3;
double D3 = 0.1233153E-5;

double E2 = 0.48E-3;
double E3 = 0.411051E-6;

int printf( const char *format ,...);
int dtime(double p[2]);
int main(int argc, char *argv[])
{
   register double s,u,v,w,x;
   long loops, NLimit;
   register long i, m, n;

   printf( "\n");
   dtime(TimeArray);
   printf( "   FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n");
   loops = 15625;
   T[1] = 1.0E+06/(double)loops;
   TLimit = 15.0;
   NLimit = 512000000;
   piref = 3.14159265358979324;
   one = 1.0;
   two = 2.0;
   three = 3.0;
   four = 4.0;
   five = 5.0;
   scale = one;
   printf( "   Module     Error        RunTime      MFLOPS\n");
   printf( "                            (usec)\n");
   dtime(TimeArray);
   dtime(TimeArray);
   n = loops;
   sa = 0.0;
   do
   {
   n = 2 * n;
   x = one / (double)n;
   s = 0.0;
   v = 0.0;
   w = one;
       dtime(TimeArray);
       for( i = 1 ; i <= n-1 ; i++ )
       {
       v = v + w;
       u = v * x;
       s = s + (D1+u*(D2+u*D3))/(w+u*(D1+u*(E2+u*E3)));
       }
       dtime(TimeArray);
       sa = TimeArray[1];

   if ( n == NLimit ) break;

   } while ( sa < TLimit );

   scale = 1.0E+06 / (double)n;
   T[1] = scale;

   dtime(TimeArray);
   for( i = 1 ; i <= n-1 ; i++ )
   {
   }
   dtime(TimeArray);
   nulltime = T[1] * TimeArray[1];
   if ( nulltime < 0.0 ) nulltime = 0.0;

   T[2] = T[1] * sa - nulltime;

   sa = (D1+D2+D3)/(one+D1+E2+E3);
   sb = D1;

   T[3] = T[2] / 14.0;
   sa = x * ( sa + sb + two * s ) / two;
   sb = one / sa;
   n = (long)( (double)( 40000 * (long)sb ) / scale );
   sc = sb - 25.2;
   T[4] = one / T[3];
   printf( "     1   %13.4e  %10.4f  %10.4f\n",sc,T[2],T[4]);
   m = n;
   s = -five;
   sa = -one;
   dtime(TimeArray);
   for ( i = 1 ; i <= m ; i++ )
   {
   s = -s;
   sa = sa + s;
   }
   dtime(TimeArray);
   T[5] = T[1] * TimeArray[1];
   if ( T[5] < 0.0 ) T[5] = 0.0;
   sc = (double)m;

   u = sa;
   v = 0.0;
   w = 0.0;
   x = 0.0;

   dtime(TimeArray);
   for ( i = 1 ; i <= m ; i++)
   {
   s = -s;
   sa = sa + s;
   u = u + two;
   x = x +(s - u);
   v = v - s * u;
   w = w + s / u;
   }
   dtime(TimeArray);
   T[6] = T[1] * TimeArray[1];

   T[7] = ( T[6] - T[5] ) / 7.0;
   m = (long)( sa * x / sc );
   sa = four * w / five;
   sb = sa + five / v;
   sc = 31.25;
   piprg = sb - sc / (v * v * v);
   pierr = piprg - piref;
   T[8] = one / T[7];

   printf( "     2   %13.4e  %10.4f  %10.4f\n",pierr,T[6]-T[5],T[8]);
   x = piref / ( three * (double)m );
   s = 0.0;
   v = 0.0;

   dtime(TimeArray);
   for( i = 1 ; i <= m-1 ; i++ )
   {
   v = v + one;
   u = v * x;
   w = u * u;
   s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one);
   }
   dtime(TimeArray);
   T[9] = T[1] * TimeArray[1] - nulltime;

   u = piref / three;
   w = u * u;
   sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one);

   T[10] = T[9] / 17.0;
   sa = x * ( sa + two * s ) / two;
   sb = 0.5;
   sc = sa - sb;
   T[11] = one / T[10];

   printf( "     3   %13.4e  %10.4f  %10.4f\n",sc,T[9],T[11]);
   A3 = -A3;
   A5 = -A5;
   x = piref / ( three * (double)m );
   s = 0.0;
   v = 0.0;

   dtime(TimeArray);
   for( i = 1 ; i <= m-1 ; i++ )
   {
   u = (double)i * x;
   w = u * u;
   s = s + w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
   }
   dtime(TimeArray);
   T[12] = T[1] * TimeArray[1] - nulltime;

   u = piref / three;
   w = u * u;
   sa = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;

   T[13] = T[12] / 15.0;
   sa = x * ( sa + one + two * s ) / two;
   u = piref / three;
   w = u * u;
   sb = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+A0);
   sc = sa - sb;
   T[14] = one / T[13];

   printf( "     4   %13.4e  %10.4f  %10.4f\n",sc,T[12],T[14]);
   x = piref / ( three * (double)m );
   s = 0.0;
   v = 0.0;

   dtime(TimeArray);
   for( i = 1 ; i <= m-1 ; i++ )
   {
   u = (double)i * x;
   w = u * u;
   v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
   s = s + v / (w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
   }
   dtime(TimeArray);
   T[15] = T[1] * TimeArray[1] - nulltime;

   u = piref / three;
   w = u * u;
   sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
   sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
   sa = sa / sb;

   T[16] = T[15] / 29.0;
   sa = x * ( sa + two * s ) / two;
   sb = 0.6931471805599453;
   sc = sa - sb;
   T[17] = one / T[16];

   printf( "     5   %13.4e  %10.4f  %10.4f\n",sc,T[15],T[17]);
   x = piref / ( four * (double)m );
   s = 0.0;
   v = 0.0;

   dtime(TimeArray);
   for( i = 1 ; i <= m-1 ; i++ )
   {
   u = (double)i * x;
   w = u * u;
   v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
   s = s + v*(w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
   }
   dtime(TimeArray);
   T[18] = T[1] * TimeArray[1] - nulltime;

   u = piref / four;
   w = u * u;
   sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
   sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
   sa = sa * sb;

   T[19] = T[18] / 29.0;
   sa = x * ( sa + two * s ) / two;
   sb = 0.25;
   sc = sa - sb;
   T[20] = one / T[19];

   printf( "     6   %13.4e  %10.4f  %10.4f\n",sc,T[18],T[20]);
   s = 0.0;
   w = one;
   sa = 102.3321513995275;
   v = sa / (double)m;

   dtime(TimeArray);
   for ( i = 1 ; i <= m-1 ; i++)
   {
   x = (double)i * v;
   u = x * x;
   s = s - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w );
   }
   dtime(TimeArray);
   T[21] = T[1] * TimeArray[1] - nulltime;
   T[22] = T[21] / 12.0;
   x = sa;
   u = x * x;
   sa = -w - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w );
   sa = 18.0 * v * (sa + two * s );

   m = -2000 * (long)sa;
   m = (long)( (double)m / scale );

   sc = sa + 500.2;
   T[23] = one / T[22];
   printf( "     7   %13.4e  %10.4f  %10.4f\n",sc,T[21],T[23]);
   x = piref / ( three * (double)m );
   s = 0.0;
   v = 0.0;

   dtime(TimeArray);
   for( i = 1 ; i <= m-1 ; i++ )
   {
   u = (double)i * x;
   w = u * u;
   v = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
   s = s + v*v*u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
   }
   dtime(TimeArray);
   T[24] = T[1] * TimeArray[1] - nulltime;

   u = piref / three;
   w = u * u;
   sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
   sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
   sa = sa * sb * sb;

   T[25] = T[24] / 30.0;
   sa = x * ( sa + two * s ) / two;
   sb = 0.29166666666666667;
   sc = sa - sb;
   T[26] = one / T[25];
   printf( "     8   %13.4e  %10.4f  %10.4f\n",sc,T[24],T[26]);
   T[27] = ( five * (T[6] - T[5]) + T[9] ) / 52.0;
   T[28] = one / T[27];
   T[29] = T[2] + T[9] + T[12] + T[15] + T[18];
   T[29] = (T[29] + four * T[21]) / 152.0;
   T[30] = one / T[29];
   T[31] = T[2] + T[9] + T[12] + T[15] + T[18];
   T[31] = (T[31] + T[21] + T[24]) / 146.0;
   T[32] = one / T[31];
   T[33] = (T[9] + T[12] + T[18] + T[24]) / 91.0;
   T[34] = one / T[33];
   printf( "\n");
   printf( "   Iterations      = %10ld\n",m);
   printf( "   NullTime (usec) = %10.4f\n",nulltime);
   printf( "   MFLOPS(1)       = %10.4f\n",T[28]);
   printf( "   MFLOPS(2)       = %10.4f\n",T[30]);
   printf( "   MFLOPS(3)       = %10.4f\n",T[32]);
   printf( "   MFLOPS(4)       = %10.4f\n\n",T[34]);

   dtime(TimeArray);

   return 0;
}

-- 
           Summary: mips64-none-elf-gcc: Excessive NOPs with -march=r3000
           Product: gcc
           Version: 3.4.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: target
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: niva at niisi dot msk dot ru
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: mips64-none-elf


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18141


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]