This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: Code much slower. Why?
- To: gcc at gcc dot gnu dot org
- Subject: Re: Code much slower. Why?
- From: David Ronis <ronis at ronispc dot chem dot mcgill dot ca>
- Date: Wed, 29 Sep 1999 22:48:15 -0400
- CC: N8TM at aol dot com
- Reply-to: ronis at onsager dot chem dot mcgill dot ca
Hi Tim
Thanks for the reply. I've appended the relevant portions of the .i
files that I get when I preprocess with the libc5/6 includes. Both
cases use gcc-2.95.1 (compiled in the glibc2 environment). As far as
I can tell the two are identical. The main differences arise from
/usr/include/bits/mathinline.h, which defines sqrt() and exp(), in the
libc6 version, but leave them as extern's in the libc5 one. Since
only exp is called in the latter, I presume gcc is smart enough to
turn sqrt into the sqrt asm instruction (why isn't it doing it for the
exp too?). Is the FPU that much faster on an i586? If so, how do I
turn off the asm code in mathinline.h?
David
===========================Some notes on the code============================
There are some automatic defines in the file (either to doubles or
ints), but they don't change when I preprocess. The routine
matrix_multiply is defined as:
static inline void matrix_multiply( double a[3][3],
double b[3][3], double c[3][3])
and does what the name imples a=b.c. Other than that, double drand()
is a uniform random number generator on the range [0.0, 1.0), and is
explicitly defined in the file. I'm usually reasonably careful about
mixed mode arithmetic, and explicitly cast when I'm mixing types.
You'll notice that there is no serious looping in the function, which
again accounts for > 90% of the profiled times. (If you're
interested, this is the guts of a monte carlo simulation for polymer
chains on a surface).
============================with glibc2 includes=============================
static void do_hydrocarbon_move()
{
double r[3];
double rotation[3][3];
static double ry[3][3]={
{0.0,0.0,0.0},
{0.0,1.0,0.0},
{0.0,0.0,0.0}
};
static double rz[3][3]={
{0.0,0.0,0.0},
{0.0,0.0,0.0},
{0.0,0.0,1.0}
};
static double dihedral[3][3]={
{-0.5,0.0,0.0},
{0.0,-0.5,0.0},
{0.0,0.0,1.0}
};
static double mdihedral[3][3]={
{-0.5,0.0,0.0},
{0.0,-0.5,0.0},
{0.0,0.0,1.0}
};
double rtmp[3][3],rtmp1[3][3];
double tmp;
static unsigned int last_monomer=0;
unsigned int i=(unsigned int)(drand()*(nmonomer-nrigid))+nrigid;
unsigned int j,k;
struct monomer *ptr;
dihedral[0][1]=mdihedral[1][0]=-(dihedral[1][0]=mdihedral[0][1]=SIN_120);
if(i>last_monomer)
for(j=last_monomer+1;j<=i;j++)
{
for(k=0;k<3;k++)
new_monomer[j].r[k]=monomer[j].r[k];
new_monomer[j].potential=monomer[j].potential;
}
last_monomer=i;
tmp=0.0;
for(j=0;j<3;j++)
{
r[j]=(monomer[i].r[j]-monomer[i-1].r[j]);
tmp+=r[j]*r[j];
}
r[2]/=sqrt(tmp);
ry[0][0]=ry[2][2]=r[2];
ry[0][2]=-(ry[2][0]=sqrt(1.0-r[2]*r[2]));
seed=(seed* 2416L + 374441L )% 1771875L ;
tmp=r[0]*r[0]+r[1]*r[1];
if(tmp!=0.0)
{
rz[0][0]=rz[1][1]=r[0]*(tmp=1.0/sqrt(tmp));
rz[1][0]=-(rz[0][1]=r[1]*tmp);
matrix_multiply(rtmp,ry,rz);
if(seed> 885937L )
matrix_multiply(rtmp1,dihedral,rtmp);
else
matrix_multiply(rtmp1,mdihedral,rtmp);
for(j=0;j<3;j++)
for(k=0;k<3;k++)
{
unsigned int n;
for(n=0,rotation[j][k]=0.0;n<3;n++)
rotation[j][k]+=rtmp[n][j]*rtmp1[n][k];
}
}
else
{
if(seed> 885937L )
for(j=0;j<3;j++)
for(k=0;k<3;k++)
rotation[j][k]=dihedral[j][k];
else
for(j=0;j<3;j++)
for(k=0;k<3;k++)
rotation[j][k]=mdihedral[j][k];
}
i--;
for(j=0;j<3;j++)
{
r[j]=monomer[i].r[j];
for(k=0;k<3;k++)
r[j]-=rotation[j][k]*monomer[i].r[k];
}
for(j=i+2;j<=nmonomer;j++)
{
new_monomer[j].r[2]=r[2];
for(k=0;k<3;k++)
new_monomer[j].r[2]+=rotation[2][k]*monomer[j].r[k];
if((new_monomer[j].r[2]<0.0) ||
((plate_type==2) && (new_monomer[j].r[2] > plate_gap )))
{
fail_core++;
return;
}
new_monomer[j].r[0]=r[0];
new_monomer[j].r[1]=r[1];
for(k=0;k<3;k++)
{
new_monomer[j].r[0]+=rotation[0][k]*monomer[j].r[k];
new_monomer[j].r[1]+=rotation[1][k]*monomer[j].r[k];
}
k=new_monomer[j].r[2]*inverse_grid_spacing;
new_monomer[j].potential=new_monomer[j-1].potential
+steric_potential[k];
}
new_monomer[nmonomer].potential-=electric_field[k]*
(new_monomer[nmonomer].r[2]-new_monomer[nmonomer-1].r[2])
*new_monomer[nmonomer].mu;
if((tmp=new_monomer[nmonomer].potential
-monomer[nmonomer].potential)>0.0)
{
if(drand()>exp(-tmp))
{
fail_swap++;
return;
}
}
ptr=new_monomer;
new_monomer=monomer;
monomer=ptr;
}
============================with libc5 includes==============================
static void do_hydrocarbon_move()
{
double r[3];
double rotation[3][3];
static double ry[3][3]={
{0.0,0.0,0.0},
{0.0,1.0,0.0},
{0.0,0.0,0.0}
};
static double rz[3][3]={
{0.0,0.0,0.0},
{0.0,0.0,0.0},
{0.0,0.0,1.0}
};
static double dihedral[3][3]={
{-0.5,0.0,0.0},
{0.0,-0.5,0.0},
{0.0,0.0,1.0}
};
static double mdihedral[3][3]={
{-0.5,0.0,0.0},
{0.0,-0.5,0.0},
{0.0,0.0,1.0}
};
double rtmp[3][3],rtmp1[3][3];
double tmp;
static unsigned int last_monomer=0;
unsigned int i=(unsigned int)(drand()*(nmonomer-nrigid))+nrigid;
unsigned int j,k;
struct monomer *ptr;
dihedral[0][1]=mdihedral[1][0]=-(dihedral[1][0]=mdihedral[0][1]=SIN_120);
if(i>last_monomer)
for(j=last_monomer+1;j<=i;j++)
{
for(k=0;k<3;k++)
new_monomer[j].r[k]=monomer[j].r[k];
new_monomer[j].potential=monomer[j].potential;
}
last_monomer=i;
tmp=0.0;
for(j=0;j<3;j++)
{
r[j]=(monomer[i].r[j]-monomer[i-1].r[j]);
tmp+=r[j]*r[j];
}
r[2]/=sqrt(tmp);
ry[0][0]=ry[2][2]=r[2];
ry[0][2]=-(ry[2][0]=sqrt(1.0-r[2]*r[2]));
seed=(seed* 2416L + 374441L )% 1771875L ;
tmp=r[0]*r[0]+r[1]*r[1];
if(tmp!=0.0)
{
rz[0][0]=rz[1][1]=r[0]*(tmp=1.0/sqrt(tmp));
rz[1][0]=-(rz[0][1]=r[1]*tmp);
matrix_multiply(rtmp,ry,rz);
if(seed> 885937L )
matrix_multiply(rtmp1,dihedral,rtmp);
else
matrix_multiply(rtmp1,mdihedral,rtmp);
for(j=0;j<3;j++)
for(k=0;k<3;k++)
{
unsigned int n;
for(n=0,rotation[j][k]=0.0;n<3;n++)
rotation[j][k]+=rtmp[n][j]*rtmp1[n][k];
}
}
else
{
if(seed> 885937L )
for(j=0;j<3;j++)
for(k=0;k<3;k++)
rotation[j][k]=dihedral[j][k];
else
for(j=0;j<3;j++)
for(k=0;k<3;k++)
rotation[j][k]=mdihedral[j][k];
}
i--;
for(j=0;j<3;j++)
{
r[j]=monomer[i].r[j];
for(k=0;k<3;k++)
r[j]-=rotation[j][k]*monomer[i].r[k];
}
for(j=i+2;j<=nmonomer;j++)
{
new_monomer[j].r[2]=r[2];
for(k=0;k<3;k++)
new_monomer[j].r[2]+=rotation[2][k]*monomer[j].r[k];
if((new_monomer[j].r[2]<0.0) ||
((plate_type==2) && (new_monomer[j].r[2] > plate_gap )))
{
fail_core++;
return;
}
new_monomer[j].r[0]=r[0];
new_monomer[j].r[1]=r[1];
for(k=0;k<3;k++)
{
new_monomer[j].r[0]+=rotation[0][k]*monomer[j].r[k];
new_monomer[j].r[1]+=rotation[1][k]*monomer[j].r[k];
}
k=new_monomer[j].r[2]*inverse_grid_spacing;
new_monomer[j].potential=new_monomer[j-1].potential
+steric_potential[k];
}
new_monomer[nmonomer].potential-=electric_field[k]*
(new_monomer[nmonomer].r[2]-new_monomer[nmonomer-1].r[2])
*new_monomer[nmonomer].mu;
if((tmp=new_monomer[nmonomer].potential
-monomer[nmonomer].potential)>0.0)
{
if(drand()>exp(-tmp))
{
fail_swap++;
return;
}
}
ptr=new_monomer;
new_monomer=monomer;
monomer=ptr;
}