This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Alpha and egcs performance problem isolated
- To: egcs at egcs dot cygnus dot com
- Subject: Alpha and egcs performance problem isolated
- From: Stefan Schroepfer <scr at iis dot fhg dot de>
- Date: Sun, 18 Apr 1999 19:53:13 +0200
- Organization: FHG-IIS
Keywords: Alpha, egcs, gcc, floating-point, C++
Hi all!
I isolated a piece of code that shows extremely bad
performance using the latest egcs and gcc releases on
ev56 based Alphas. This mail includes a few test results
using different compilers on various machines and the
(small) test code itself.
The code in question is a small C++ class handling
coordinate triplets (double). All methods are inlined,
all (three) data mebers are public. What I included in
this mail is a stripped down version of my code,
containing only two operators besides C'- and D'-TORs.
The intention to write this class was to have some
common vector operations at hand, whose usage eventually
had to be replaced by "written out" code only in very
performance critical places.
To demonstrate the performance problem, I wrote two
short programs, which do exactly the same. One of them
using the vector class described above named "lfv", the
other doing all operations by hand named "nolfv".
The code itself, together with a Makefile, is attached
to this mail, so I proceed with a presentation of a few
(mini-)benchmark results (user times measured using
/usr/bin/time on otherwise almost idle machines):
ENV1 ENV2 ENV3 ENV4 ENV5
lfv 2.82 4.12 4.90 3.69 13.4
nolfv 1.69 1.65 1.59 3.61 18.0
The environments `ENV1', `ENV2' and `ENV3' have the
hardware in common: a machine based on a PC164 board with
a 500 MHz ev56 CPU.
ENV1: RH 4.1, gcc 2.7.2.1 as shipped with RH 4.1
ENV2: RH 5.1, gcc 2.8.1 compiled from sources
ENV3: RH 5.1, egcs 1.1.2 ---""---
ENV4: Intel PII 450 MHz, SuSE 6.0, egcs 1.1.1 as shipped
ENV5: Sparc 5 clone (MicroSparc), 110 MHz, SunOS 4.1.3,
gcc 2.7.2.1
Remark to ENV4 results:
On the PII there is almost no permance penalty using
the vector class! If I compared only egcs compiled
"lfv" binaries I would have come to the conclusion that
the PII is a better floating point performer!
Remark to ENV5 results:
This is no typo! The version using the vector class is
indeed faster on this platform!
Regards and thanks for your interest,
Stefan Schroepfer
scr@iis.fhg.de
CC = gcc
#CC = /usr/local/gcc-2.8.1/bin/gcc
GCCARCH = # # none (gcc, if conf'd for target arch)
#GCCARCH = -mcpu=ev56 -Wa,-m21164a # # gcc, explicitly ev56 plus asm option
#GCCARCH = -march=i686 # # gcc, PPro and siblings
#GCCARCH = -march=i586 -mcpu=i686 # # gcc, worth a try with K6-2
CFLAGS1 = -Wall -W -O2 $(GCCARCH) -ffast-math -fforce-addr
CFLAGS2 = -c
LD = $(CC)
LDFLAGS = -s
.SUFFIXES: .cpp
.cpp.s:
$(CC) $(CFLAGS1) -S $<
.s.o:
$(CC) $(CFLAGS2) $<
all: lfv nolfv
lfv.s: lfv.cpp lfvector.h
lfv.o: lfv.s
nolfv.s: nolfv.cpp
nolfv.o: nolfv.s
lfv: lfv.o
$(LD) $(LDFLAGS) -o lfv lfv.o
nolfv: nolfv.o
$(LD) $(LDFLAGS) -o nolfv nolfv.o
clean:
rm -f lfv.s lfv.o lfv nolfv.s nolfv.o nolfv
#include <stdio.h>
#include "lfvector.h"
#define LSIZE 256L
double doSth (const LFVector &lfvPos,
const LFVector &lfvPnt,
double lfSqrRadius);
//====================================================================
int main ()
//====================================================================
{
long lSizeX, lSizeY, lSizeZ; // volume dimensions
long lActX, lActY, lActZ; // actual voxel numbers
double lfPosX, lfPosY, lfPosZ; // center of zeroth voxel
LFVector lfvAct; // actual position
LFVector lfvPnt; // 'some' point in space
double lfSum = 0.; // result
// handling a LSIZE^3 volume (no allocation, only coordinate calculation)
lSizeX = LSIZE;
lSizeY = LSIZE;
lSizeZ = LSIZE;
// initializing 'some' point
lfvPnt = LFVector (0.1234,
0.3456,
0.5678);
// set center of volume to origin of (cartesian) coordinate system
lfPosX = - (double) (lSizeX -1L) * 0.5;
lfPosY = - (double) (lSizeY -1L) * 0.5;
lfPosZ = - (double) (lSizeZ -1L) * 0.5;
// going thru voxel centers
for (lActZ=0L; lActZ<lSizeZ; lActZ++)
{
lfvAct.lfZ = lfPosZ + (double) lActZ;
for (lActY=0L; lActY<lSizeY; lActY++)
{
lfvAct.lfY = lfPosY + (double) lActY;
for (lActX=0L; lActX<lSizeX; lActX++)
{
lfvAct.lfX = lfPosX + (double) lActX;
lfSum += doSth (lfvAct,
lfvPnt,
(double) LSIZE * 0.333);
}
}
}
printf ("lfSum = %10.3f\n", lfSum);
return (0);
}
//====================================================================
double doSth (const LFVector &lfvPos,
const LFVector &lfvPnt,
double lfSqrRadius)
//====================================================================
{
LFVector lfvDist;
lfvDist = lfvPnt - lfvPos;
if (lfvDist * lfvDist <= lfSqrRadius)
return (1.);
else
return (0.);
}
#ifndef _LFVECTOR_HH_
#define _LFVECTOR_HH_
class LFVector
{
public:
// public data: use this class as a better struct
double lfX, lfY, lfZ;
// C'TORs ans D'TOR
inline LFVector (void);
inline LFVector (double lfXA, double lfYA, double lfZA);
inline ~LFVector (void);
inline LFVector operator - (const LFVector &lfvA) const;
inline double operator * (const LFVector &lfvA) const;
};
inline LFVector::LFVector (void)
{
// empty C'TOR !!!
}
inline LFVector::LFVector (double lfXA, double lfYA, double lfZA)
{
lfX = lfXA; lfY = lfYA; lfZ = lfZA;
}
inline LFVector::~LFVector (void)
{
}
inline LFVector LFVector::operator - (const LFVector &lfvA) const
{
return (LFVector (lfX - lfvA.lfX, lfY - lfvA.lfY, lfZ - lfvA.lfZ));
}
inline double LFVector::operator * (const LFVector &lfvA) const
{
return (lfX * lfvA.lfX + lfY * lfvA.lfY + lfZ * lfvA.lfZ);
}
#endif // _LFVECTOR_HH_
#include <stdio.h>
#define LSIZE 256L
double doSth (double lfPosX, double lfPosY, double lfPosZ,
double lfPntX, double lfPntY, double lfPntZ,
double lfSqrRadius);
//====================================================================
int main ()
//====================================================================
{
long lSizeX, lSizeY, lSizeZ; // volume dimensions
long lActX, lActY, lActZ; // actual voxel numbers
double lfPosX, lfPosY, lfPosZ; // center of zeroth voxel
double lfActX, lfActY, lfActZ; // actual position
double lfPntX, lfPntY, lfPntZ; // 'some' point in space
double lfSum = 0.; // result
// handling a LSIZE^3 volume (no allocation, only coordinate calculation)
lSizeX = LSIZE;
lSizeY = LSIZE;
lSizeZ = LSIZE;
// initializing 'some' point
lfPntX = 0.1234;
lfPntY = 0.3456;
lfPntZ = 0.5678;
// set center of volume to origin of (cartesian) coordinate system
lfPosX = - (double) (lSizeX -1L) * 0.5;
lfPosY = - (double) (lSizeY -1L) * 0.5;
lfPosZ = - (double) (lSizeZ -1L) * 0.5;
// going thru voxel centers
for (lActZ=0L; lActZ<lSizeZ; lActZ++)
{
lfActZ = lfPosZ + (double) lActZ;
for (lActY=0L; lActY<lSizeY; lActY++)
{
lfActY = lfPosY + (double) lActY;
for (lActX=0L; lActX<lSizeX; lActX++)
{
lfActX = lfPosX + (double) lActX;
lfSum += doSth (lfActX, lfActY, lfActZ,
lfPntX, lfPntY, lfPntZ,
(double) LSIZE * 0.333);
}
}
}
printf ("lfSum = %10.3f\n", lfSum);
return (0);
}
//====================================================================
double doSth (double lfPosX, double lfPosY, double lfPosZ,
double lfPntX, double lfPntY, double lfPntZ,
double lfSqrRadius)
//====================================================================
{
double lfDistX, lfDistY, lfDistZ;
lfDistX = lfPntX - lfPosX;
lfDistY = lfPntY - lfPosY;
lfDistZ = lfPntZ - lfPosZ;
if (lfDistX * lfDistX + lfDistY * lfDistY + lfDistZ * lfDistZ <= lfSqrRadius)
return (1.);
else
return (0.);
}