This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Alpha and egcs performance problem isolated


Keywords: Alpha, egcs, gcc, floating-point, C++

Hi all!

I isolated a piece of code that shows extremely bad
performance using the latest egcs and gcc releases on
ev56 based Alphas. This mail includes a few test results
using different compilers on various machines and the
(small) test code itself.

The code in question is a small C++ class handling
coordinate triplets (double). All methods are inlined,
all (three) data mebers are public. What I included in
this mail is a stripped down version of my code,
containing only two operators besides C'- and D'-TORs.
The intention to write this class was to have some
common vector operations at hand, whose usage eventually
had to be replaced by "written out" code only in very
performance critical places.

To demonstrate the performance problem, I wrote two
short programs, which do exactly the same. One of them
using the vector class described above named "lfv", the
other doing all operations by hand named "nolfv".

The code itself, together with a Makefile, is attached
to this mail, so I proceed with a presentation of a few
(mini-)benchmark results (user times measured using 
/usr/bin/time on otherwise almost idle machines):

       ENV1   ENV2   ENV3   ENV4   ENV5
lfv    2.82   4.12   4.90   3.69   13.4
nolfv  1.69   1.65   1.59   3.61   18.0

The environments `ENV1', `ENV2' and `ENV3' have the
hardware in common: a machine based on a PC164 board with
a 500 MHz ev56 CPU.

ENV1: RH 4.1, gcc 2.7.2.1 as shipped with RH 4.1
ENV2: RH 5.1, gcc 2.8.1 compiled from sources
ENV3: RH 5.1, egcs 1.1.2 ---""---
ENV4: Intel PII 450 MHz, SuSE 6.0, egcs 1.1.1 as shipped
ENV5: Sparc 5 clone (MicroSparc), 110 MHz, SunOS 4.1.3,
      gcc 2.7.2.1

Remark to ENV4 results:
On the PII there is almost no permance penalty using
the vector class! If I compared only egcs compiled
"lfv" binaries I would have come to the conclusion that
the PII is a better floating point performer!

Remark to ENV5 results:
This is no typo! The version using the vector class is
indeed faster on this platform!

Regards and thanks for your interest,

Stefan Schroepfer
scr@iis.fhg.de

CC = gcc
#CC = /usr/local/gcc-2.8.1/bin/gcc

GCCARCH = #				# none (gcc, if conf'd for target arch)
#GCCARCH = -mcpu=ev56 -Wa,-m21164a #	# gcc, explicitly ev56 plus asm option
#GCCARCH = -march=i686 #		# gcc, PPro and siblings
#GCCARCH = -march=i586 -mcpu=i686 #	# gcc, worth a try with K6-2

CFLAGS1 = -Wall -W -O2 $(GCCARCH) -ffast-math -fforce-addr
CFLAGS2 = -c

LD = $(CC)
LDFLAGS = -s

.SUFFIXES:	.cpp

.cpp.s:
	$(CC) $(CFLAGS1) -S $<

.s.o:
	$(CC) $(CFLAGS2) $<

all:		lfv nolfv

lfv.s:		lfv.cpp lfvector.h

lfv.o:		lfv.s

nolfv.s:	nolfv.cpp

nolfv.o:	nolfv.s

lfv:		lfv.o
	$(LD) $(LDFLAGS) -o lfv lfv.o

nolfv:		nolfv.o
	$(LD) $(LDFLAGS) -o nolfv nolfv.o

clean:
	rm -f lfv.s lfv.o lfv nolfv.s nolfv.o nolfv


#include <stdio.h>
#include "lfvector.h"

#define LSIZE	256L

double doSth (const LFVector &lfvPos,
	      const LFVector &lfvPnt,
	      double lfSqrRadius);

//====================================================================
int main ()
//====================================================================
{
  long		lSizeX, lSizeY, lSizeZ;	// volume dimensions
  long		lActX,  lActY,  lActZ;	// actual voxel numbers
  double 	lfPosX, lfPosY, lfPosZ;	// center of zeroth voxel
  LFVector	lfvAct;			// actual position
  LFVector	lfvPnt;			// 'some' point in space
  double	lfSum = 0.;		// result

  // handling a LSIZE^3 volume (no allocation, only coordinate calculation)
  lSizeX = LSIZE;
  lSizeY = LSIZE;
  lSizeZ = LSIZE;

  // initializing 'some' point
  lfvPnt = LFVector (0.1234,
		     0.3456,
		     0.5678);

  // set center of volume to origin of (cartesian) coordinate system
  lfPosX = - (double) (lSizeX -1L) * 0.5;
  lfPosY = - (double) (lSizeY -1L) * 0.5;
  lfPosZ = - (double) (lSizeZ -1L) * 0.5;

  // going thru voxel centers
  for (lActZ=0L; lActZ<lSizeZ; lActZ++)
  {
    lfvAct.lfZ = lfPosZ + (double) lActZ;
    for (lActY=0L; lActY<lSizeY; lActY++)
    {
      lfvAct.lfY = lfPosY + (double) lActY;
      for (lActX=0L; lActX<lSizeX; lActX++)
      {
	lfvAct.lfX = lfPosX + (double) lActX;

	lfSum += doSth (lfvAct,
			lfvPnt,
			(double) LSIZE * 0.333);
      }
    }
  }

  printf ("lfSum = %10.3f\n", lfSum);

  return (0);
}

//====================================================================
double doSth (const LFVector &lfvPos,
	      const LFVector &lfvPnt,
	      double lfSqrRadius)
//====================================================================
{
  LFVector	lfvDist;

  lfvDist = lfvPnt - lfvPos;

  if (lfvDist * lfvDist <= lfSqrRadius)
    return (1.);
  else
    return (0.);
}


#ifndef _LFVECTOR_HH_
#define _LFVECTOR_HH_

class LFVector
{
public:
  // public data: use this class as a better struct
  double	lfX, lfY, lfZ;

  // C'TORs ans D'TOR
  inline LFVector (void);
  inline LFVector (double lfXA, double lfYA, double lfZA);
  inline ~LFVector (void);

  inline LFVector operator - (const LFVector &lfvA) const;
  inline double operator * (const LFVector &lfvA) const;
};

inline LFVector::LFVector (void)
{
  // empty C'TOR !!!
}

inline LFVector::LFVector (double lfXA, double lfYA, double lfZA)
{
  lfX = lfXA; lfY = lfYA; lfZ = lfZA;
}

inline LFVector::~LFVector (void)
{
}

inline LFVector LFVector::operator - (const LFVector &lfvA) const
{
  return (LFVector (lfX - lfvA.lfX, lfY - lfvA.lfY, lfZ - lfvA.lfZ));
}

inline double LFVector::operator * (const LFVector &lfvA) const
{
  return (lfX * lfvA.lfX + lfY * lfvA.lfY + lfZ * lfvA.lfZ);
}

#endif	// _LFVECTOR_HH_


#include <stdio.h>

#define LSIZE	256L

double doSth (double lfPosX, double lfPosY, double lfPosZ,
	      double lfPntX, double lfPntY, double lfPntZ,
	      double lfSqrRadius);

//====================================================================
int main ()
//====================================================================
{
  long		lSizeX, lSizeY, lSizeZ;	// volume dimensions
  long		lActX,  lActY,  lActZ;	// actual voxel numbers
  double 	lfPosX, lfPosY, lfPosZ;	// center of zeroth voxel
  double	lfActX, lfActY, lfActZ;	// actual position
  double	lfPntX, lfPntY, lfPntZ;	// 'some' point in space
  double	lfSum = 0.;		// result

  // handling a LSIZE^3 volume (no allocation, only coordinate calculation)
  lSizeX = LSIZE;
  lSizeY = LSIZE;
  lSizeZ = LSIZE;

  // initializing 'some' point
  lfPntX = 0.1234;
  lfPntY = 0.3456;
  lfPntZ = 0.5678;

  // set center of volume to origin of (cartesian) coordinate system
  lfPosX = - (double) (lSizeX -1L) * 0.5;
  lfPosY = - (double) (lSizeY -1L) * 0.5;
  lfPosZ = - (double) (lSizeZ -1L) * 0.5;

  // going thru voxel centers
  for (lActZ=0L; lActZ<lSizeZ; lActZ++)
  {
    lfActZ = lfPosZ + (double) lActZ;
    for (lActY=0L; lActY<lSizeY; lActY++)
    {
      lfActY = lfPosY + (double) lActY;
      for (lActX=0L; lActX<lSizeX; lActX++)
      {
	lfActX = lfPosX + (double) lActX;

	lfSum += doSth (lfActX, lfActY, lfActZ,
			lfPntX, lfPntY, lfPntZ,
			(double) LSIZE * 0.333);
      }
    }
  }

  printf ("lfSum = %10.3f\n", lfSum);

  return (0);
}

//====================================================================
double doSth (double lfPosX, double lfPosY, double lfPosZ,
	      double lfPntX, double lfPntY, double lfPntZ,
	      double lfSqrRadius)
//====================================================================
{
  double	lfDistX, lfDistY, lfDistZ;

  lfDistX = lfPntX - lfPosX;
  lfDistY = lfPntY - lfPosY;
  lfDistZ = lfPntZ - lfPosZ;

  if (lfDistX * lfDistX + lfDistY * lfDistY + lfDistZ * lfDistZ <= lfSqrRadius)
    return (1.);
  else
    return (0.);
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]