Performance regression between 4.7.2 and 4.8.1 for shootout's nbody.c
Walter Landry
wlandry@caltech.edu
Wed Jun 5 13:24:00 GMT 2013
Hello Everyone,
I found an odd performance regression going from 4.7.2 to 4.8.1, and I
wanted to make sure that it is not a problem on my end. Specifically,
if I compile the nbody.c code from
http://benchmarksgame.alioth.debian.org/u32/program.php?test=nbody&lang=gcc&id=4
(also attached) with a command like
gcc -O3 -fomit-frame-pointer -march=native -mfpmath=sse -msse3 nbody_c.c -o nbody_c -lm
then run "/usr/bin/time nbody_c 50000000" 4 times, I get the results (elapsed time)
4.7.2: 4.42 4.44 4.42 4.43
4.8.1: 4.71 4.71 4.70 4.71
So 4.8.1 is about 5% slower than 4.7.2. Has anyone else seen this result?
I am getting my compilers from Debian.
Thanks,
Walter Landry
wlandry@caltech.edu
-------------- next part --------------
/* The Computer Language Benchmarks Game
http://benchmarksgame.alioth.debian.org/
contributed by Mark C. Lewis
modified slightly by Chad Whipkey
converted from java to c++,added sse support, by Branimir Maksimovic
converted from c++ to c, by Alexey Medvedchikov
*/
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <immintrin.h>
#define PI 3.141592653589793
#define SOLAR_MASS ( 4 * PI * PI )
#define DAYS_PER_YEAR 365.24
struct body {
double x[3], fill, v[3], mass;
};
static struct body solar_bodies[] = {
/* sun */
{
.x = { 0., 0., 0. },
.v = { 0., 0., 0. },
.mass = SOLAR_MASS
},
/* jupiter */
{
.x = { 4.84143144246472090e+00,
-1.16032004402742839e+00,
-1.03622044471123109e-01 },
.v = { 1.66007664274403694e-03 * DAYS_PER_YEAR,
7.69901118419740425e-03 * DAYS_PER_YEAR,
-6.90460016972063023e-05 * DAYS_PER_YEAR },
.mass = 9.54791938424326609e-04 * SOLAR_MASS
},
/* saturn */
{
.x = { 8.34336671824457987e+00,
4.12479856412430479e+00,
-4.03523417114321381e-01 },
.v = { -2.76742510726862411e-03 * DAYS_PER_YEAR,
4.99852801234917238e-03 * DAYS_PER_YEAR,
2.30417297573763929e-05 * DAYS_PER_YEAR },
.mass = 2.85885980666130812e-04 * SOLAR_MASS
},
/* uranus */
{
.x = { 1.28943695621391310e+01,
-1.51111514016986312e+01,
-2.23307578892655734e-01 },
.v = { 2.96460137564761618e-03 * DAYS_PER_YEAR,
2.37847173959480950e-03 * DAYS_PER_YEAR,
-2.96589568540237556e-05 * DAYS_PER_YEAR },
.mass = 4.36624404335156298e-05 * SOLAR_MASS
},
/* neptune */
{
.x = { 1.53796971148509165e+01,
-2.59193146099879641e+01,
1.79258772950371181e-01 },
.v = { 2.68067772490389322e-03 * DAYS_PER_YEAR,
1.62824170038242295e-03 * DAYS_PER_YEAR,
-9.51592254519715870e-05 * DAYS_PER_YEAR },
.mass = 5.15138902046611451e-05 * SOLAR_MASS
}
};
static const int BODIES_SIZE = sizeof(solar_bodies) / sizeof(solar_bodies[0]);
void offset_momentum(struct body *bodies, unsigned int nbodies)
{
unsigned int i, k;
for (i = 0; i < nbodies; ++i)
for (k = 0; k < 3; ++k)
bodies[0].v[k] -= bodies[i].v[k] * bodies[i].mass
/ SOLAR_MASS;
}
void bodies_advance(struct body *bodies, unsigned int nbodies, double dt)
{
unsigned int N = (nbodies - 1) * nbodies / 2;
static struct {
double dx[3], fill;
} r[1000];
static __attribute__((aligned(16))) double mag[1000];
unsigned int i, j, k, m;
__m128d dx[3], dsquared, distance, dmag;
for(k = 0, i = 0; i < nbodies - 1; ++i)
for(j = i + 1; j < nbodies; ++j, ++k)
for ( m = 0; m < 3; ++m)
r[k].dx[m] = bodies[i].x[m] - bodies[j].x[m];
for (i = 0; i < N; i += 2) {
for (m = 0; m < 3; ++m) {
dx[m] = _mm_loadl_pd(dx[m], &r[i].dx[m]);
dx[m] = _mm_loadh_pd(dx[m], &r[i+1].dx[m]);
}
dsquared = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
distance = _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(dsquared)));
for (j = 0; j < 2; ++j)
distance = distance * _mm_set1_pd(1.5)
- ((_mm_set1_pd(0.5) * dsquared) * distance)
* (distance * distance);
dmag = _mm_set1_pd(dt) / (dsquared) * distance;
_mm_store_pd(&mag[i], dmag);
}
for (i = 0, k = 0; i < nbodies - 1; ++i)
for ( j = i + 1; j < nbodies; ++j, ++k)
for ( m = 0; m < 3; ++m) {
bodies[i].v[m] -= r[k].dx[m] * bodies[j].mass
* mag[k];
bodies[j].v[m] += r[k].dx[m] * bodies[i].mass
* mag[k];
}
for (i = 0; i < nbodies; ++i)
for ( m = 0; m < 3; ++m)
bodies[i].x[m] += dt * bodies[i].v[m];
}
double bodies_energy(struct body *bodies, unsigned int nbodies) {
double dx[3], distance, e = 0.0;
unsigned int i, j, k;
for (i=0; i < nbodies; ++i) {
e += bodies[i].mass * ( bodies[i].v[0] * bodies[i].v[0]
+ bodies[i].v[1] * bodies[i].v[1]
+ bodies[i].v[2] * bodies[i].v[2] ) / 2.;
for (j=i+1; j < nbodies; ++j) {
for (k = 0; k < 3; ++k)
dx[k] = bodies[i].x[k] - bodies[j].x[k];
distance = sqrt(dx[0] * dx[0] + dx[1] * dx[1]
+ dx[2] * dx[2]);
e -= (bodies[i].mass * bodies[j].mass) / distance;
}
}
return e;
}
int main(int argc, char** argv)
{
int i, n = atoi(argv[1]);
offset_momentum(solar_bodies, BODIES_SIZE);
printf("%.9f\n", bodies_energy(solar_bodies, BODIES_SIZE));
for (i = 0; i < n; ++i)
bodies_advance(solar_bodies, BODIES_SIZE, 0.01);
printf("%.9f\n", bodies_energy(solar_bodies, BODIES_SIZE));
return 0;
}
More information about the Gcc-help
mailing list