This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: Code much slower. Why?



Hi Tim

Thanks for the reply.  I've appended the relevant portions of the .i
files that I get when I preprocess with the libc5/6 includes.  Both
cases use gcc-2.95.1 (compiled in the glibc2 environment).  As far as
I can tell the two are identical.  The main differences arise from
/usr/include/bits/mathinline.h, which defines sqrt() and exp(), in the
libc6 version, but leave them as extern's in the libc5 one.  Since
only exp is called in the latter, I presume gcc is smart enough to
turn sqrt into the sqrt asm instruction (why isn't it doing it for the
exp too?).  Is the FPU that much faster on an i586?  If so, how do I
turn off the asm code in mathinline.h?

David
===========================Some notes on the code============================
There are some automatic defines in the file (either to doubles or
ints), but they don't change when I preprocess.  The routine
matrix_multiply is defined as:

static inline void matrix_multiply( double a[3][3], 
				    double b[3][3], double c[3][3])

and does what the name imples a=b.c.  Other than that, double drand()
is a uniform random number generator on the range [0.0, 1.0), and is
explicitly defined in the file.  I'm usually reasonably careful about
mixed mode arithmetic, and explicitly cast when I'm mixing types.

You'll notice that there is no serious looping in the function, which
again accounts for > 90% of the profiled times.  (If you're
interested, this is the guts of a monte carlo simulation for polymer
chains on a surface).


============================with glibc2 includes=============================
static void do_hydrocarbon_move()
{
  double r[3];
  double rotation[3][3];
  static double ry[3][3]={
    {0.0,0.0,0.0},
    {0.0,1.0,0.0},
    {0.0,0.0,0.0}
  };
  static double rz[3][3]={
    {0.0,0.0,0.0},
    {0.0,0.0,0.0},
    {0.0,0.0,1.0}
  };
  static double dihedral[3][3]={
      {-0.5,0.0,0.0},
      {0.0,-0.5,0.0},
      {0.0,0.0,1.0}
  };
  static double mdihedral[3][3]={
      {-0.5,0.0,0.0},
      {0.0,-0.5,0.0},
      {0.0,0.0,1.0}
  };
  double rtmp[3][3],rtmp1[3][3];
  double tmp;
  static unsigned int last_monomer=0;
  unsigned int i=(unsigned int)(drand()*(nmonomer-nrigid))+nrigid;
  unsigned int j,k;
  struct monomer *ptr;
  dihedral[0][1]=mdihedral[1][0]=-(dihedral[1][0]=mdihedral[0][1]=SIN_120);



  if(i>last_monomer)
    for(j=last_monomer+1;j<=i;j++)
      {
	for(k=0;k<3;k++)
	  new_monomer[j].r[k]=monomer[j].r[k];
	new_monomer[j].potential=monomer[j].potential;
      }
  last_monomer=i;
  tmp=0.0;
  for(j=0;j<3;j++)
    {
      r[j]=(monomer[i].r[j]-monomer[i-1].r[j]);
      tmp+=r[j]*r[j];
    }
  r[2]/=sqrt(tmp);
  ry[0][0]=ry[2][2]=r[2];
  ry[0][2]=-(ry[2][0]=sqrt(1.0-r[2]*r[2]));
  seed=(seed* 2416L + 374441L )% 1771875L ;
  tmp=r[0]*r[0]+r[1]*r[1];
  if(tmp!=0.0)
  {
    rz[0][0]=rz[1][1]=r[0]*(tmp=1.0/sqrt(tmp));
    rz[1][0]=-(rz[0][1]=r[1]*tmp);
    matrix_multiply(rtmp,ry,rz);
    if(seed> 885937L )
      matrix_multiply(rtmp1,dihedral,rtmp);
    else
      matrix_multiply(rtmp1,mdihedral,rtmp);
    for(j=0;j<3;j++)
      for(k=0;k<3;k++)
	{
	  unsigned int n;
	  for(n=0,rotation[j][k]=0.0;n<3;n++)
	    rotation[j][k]+=rtmp[n][j]*rtmp1[n][k];
	}
  }
  else
    {
      if(seed> 885937L )
	for(j=0;j<3;j++)
	  for(k=0;k<3;k++)
	    rotation[j][k]=dihedral[j][k];
      else
	for(j=0;j<3;j++)
	  for(k=0;k<3;k++)
	    rotation[j][k]=mdihedral[j][k];
    }
  i--;
  for(j=0;j<3;j++)
    {
      r[j]=monomer[i].r[j];
      for(k=0;k<3;k++)
	r[j]-=rotation[j][k]*monomer[i].r[k];
    }
  for(j=i+2;j<=nmonomer;j++)
    {
      new_monomer[j].r[2]=r[2];
      for(k=0;k<3;k++)
	new_monomer[j].r[2]+=rotation[2][k]*monomer[j].r[k];
      if((new_monomer[j].r[2]<0.0) ||
	 ((plate_type==2) && (new_monomer[j].r[2] > plate_gap )))
	{
	  fail_core++;
	  return;
	}
      new_monomer[j].r[0]=r[0];
      new_monomer[j].r[1]=r[1];
      for(k=0;k<3;k++)
	{
	  new_monomer[j].r[0]+=rotation[0][k]*monomer[j].r[k];
	  new_monomer[j].r[1]+=rotation[1][k]*monomer[j].r[k];
	}
      k=new_monomer[j].r[2]*inverse_grid_spacing;


      new_monomer[j].potential=new_monomer[j-1].potential
	+steric_potential[k];
    }



  new_monomer[nmonomer].potential-=electric_field[k]*
    (new_monomer[nmonomer].r[2]-new_monomer[nmonomer-1].r[2])
    *new_monomer[nmonomer].mu;
  if((tmp=new_monomer[nmonomer].potential
	    -monomer[nmonomer].potential)>0.0)
    {
      if(drand()>exp(-tmp))
	{
	  fail_swap++;
	  return;
	}
    }
  ptr=new_monomer;
  new_monomer=monomer;
  monomer=ptr;
}

============================with libc5 includes==============================

static void do_hydrocarbon_move()
{
  double r[3];
  double rotation[3][3];
  static double ry[3][3]={
    {0.0,0.0,0.0},
    {0.0,1.0,0.0},
    {0.0,0.0,0.0}
  };
  static double rz[3][3]={
    {0.0,0.0,0.0},
    {0.0,0.0,0.0},
    {0.0,0.0,1.0}
  };
  static double dihedral[3][3]={
      {-0.5,0.0,0.0},
      {0.0,-0.5,0.0},
      {0.0,0.0,1.0}
  };
  static double mdihedral[3][3]={
      {-0.5,0.0,0.0},
      {0.0,-0.5,0.0},
      {0.0,0.0,1.0}
  };
  double rtmp[3][3],rtmp1[3][3];
  double tmp;

  static unsigned int last_monomer=0;
  unsigned int i=(unsigned int)(drand()*(nmonomer-nrigid))+nrigid;
  unsigned int j,k;
  struct monomer *ptr;

  dihedral[0][1]=mdihedral[1][0]=-(dihedral[1][0]=mdihedral[0][1]=SIN_120);

  if(i>last_monomer)
    for(j=last_monomer+1;j<=i;j++)
      {
	for(k=0;k<3;k++)
	  new_monomer[j].r[k]=monomer[j].r[k];
	new_monomer[j].potential=monomer[j].potential;
      }
  last_monomer=i;

  tmp=0.0;
  for(j=0;j<3;j++)
    {
      r[j]=(monomer[i].r[j]-monomer[i-1].r[j]);
      tmp+=r[j]*r[j];
    }
  r[2]/=sqrt(tmp);
  ry[0][0]=ry[2][2]=r[2];
  ry[0][2]=-(ry[2][0]=sqrt(1.0-r[2]*r[2]));

  seed=(seed* 2416L + 374441L )% 1771875L ;
  tmp=r[0]*r[0]+r[1]*r[1];
  if(tmp!=0.0)
  {

    rz[0][0]=rz[1][1]=r[0]*(tmp=1.0/sqrt(tmp));
    rz[1][0]=-(rz[0][1]=r[1]*tmp);
    matrix_multiply(rtmp,ry,rz);
    if(seed> 885937L )
      matrix_multiply(rtmp1,dihedral,rtmp);
    else
      matrix_multiply(rtmp1,mdihedral,rtmp);
    for(j=0;j<3;j++)
      for(k=0;k<3;k++)
	{
	  unsigned int n;
	  for(n=0,rotation[j][k]=0.0;n<3;n++)
	    rotation[j][k]+=rtmp[n][j]*rtmp1[n][k];
	}
  }
  else
    {
      if(seed> 885937L )
	for(j=0;j<3;j++)
	  for(k=0;k<3;k++)
	    rotation[j][k]=dihedral[j][k];
      else
	for(j=0;j<3;j++)
	  for(k=0;k<3;k++)
	    rotation[j][k]=mdihedral[j][k];
    }

  i--;
  for(j=0;j<3;j++)
    {
      r[j]=monomer[i].r[j];
      for(k=0;k<3;k++)
	r[j]-=rotation[j][k]*monomer[i].r[k];
    }
  for(j=i+2;j<=nmonomer;j++)
    {
      new_monomer[j].r[2]=r[2];
      for(k=0;k<3;k++)
	new_monomer[j].r[2]+=rotation[2][k]*monomer[j].r[k];
      if((new_monomer[j].r[2]<0.0) ||
	 ((plate_type==2) && (new_monomer[j].r[2] > plate_gap )))
	{
	  fail_core++;
	  return;
	}
      new_monomer[j].r[0]=r[0];
      new_monomer[j].r[1]=r[1];
      for(k=0;k<3;k++)
	{
	  new_monomer[j].r[0]+=rotation[0][k]*monomer[j].r[k];
	  new_monomer[j].r[1]+=rotation[1][k]*monomer[j].r[k];
	}
      k=new_monomer[j].r[2]*inverse_grid_spacing;




      new_monomer[j].potential=new_monomer[j-1].potential
	+steric_potential[k];
    }

  new_monomer[nmonomer].potential-=electric_field[k]*
    (new_monomer[nmonomer].r[2]-new_monomer[nmonomer-1].r[2])
    *new_monomer[nmonomer].mu;
  if((tmp=new_monomer[nmonomer].potential
	    -monomer[nmonomer].potential)>0.0)
    {
      if(drand()>exp(-tmp))
	{
	  fail_swap++;
	  return;
	}
    }
  ptr=new_monomer;
  new_monomer=monomer;
  monomer=ptr;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]