This is the mail archive of the
gcc-help@gcc.gnu.org
mailing list for the GCC project.
Why are vector instructions slower than loops?
- From: fniles at gnupooh dot org
- To: gcc-help at gcc dot gnu dot org
- Date: Wed, 24 Sep 2008 15:42:38 -0400 (EDT)
- Subject: Why are vector instructions slower than loops?
/**
* Why are vectors so much slower than plain old loops? Shouldn't
* they be faster? Do I have to actually call the built-in MMX and
* SSE instructions myself? Shouldn't the compiler be able to do this
* given this much information?
*
* Results compiled on Intel(R) Core(TM)2 Duo CPU E8500 @ 3.16GHz
* Using: "-O3 -march=core2"
*
* $ gcc -v
* gcc version 4.3.0 20080428 (Red Hat 4.3.0-8) (GCC)
*
* $ time ./test 1000000
*
* real 0m3.639s
* user 0m3.634s
* sys 0m0.001s
* $ time ./test 1000000 b
*
* real 0m9.160s
* user 0m9.148s
* sys 0m0.002s
*
* Please correct what I'm doing wrong here to make the vector
* version faster.
*
* Note: if you make SIZE smaller it only gets worse!
*
**/
#include <stdio.h>
#define SIZE 2048
typedef char vSIZEqi __attribute__ ((vector_size (SIZE)));
static void
loop_method (char out[4][SIZE], char a[SIZE], char b[SIZE], char c[SIZE],
char d[SIZE], char e[SIZE])
{
int i;
for (i = 0; i < SIZE; i++) {
char g = a[i] * b[i];
char h = a[i] * c[i];
out[0][i] = d[i] * g;
out[1][i] = e[i] * g;
out[2][i] = d[i] * h;
out[3][i] = e[i] * h;
}
}
static void
vector_method (vSIZEqi out[4], vSIZEqi *a, vSIZEqi *b, vSIZEqi *c,
vSIZEqi *d, vSIZEqi *e)
{
vSIZEqi g = *a * *b;
vSIZEqi h = *a * *c;
out[0] = *d * g;
out[1] = *e * g;
out[2] = *d * h;
out[3] = *e * h;
}
int
main (int argc, char *argv[] __attribute__ ((unused)))
{
int i;
char a[SIZE], b[SIZE], c[SIZE], d[SIZE], e[SIZE];
char out[4][SIZE];
int loops = 1000000;
if (argc > 1)
sscanf (argv[1], "%d", &loops);
for (i = 0; i < loops; i++) {
if (argc > 2)
vector_method( (vSIZEqi *) out, (vSIZEqi *) &a, (vSIZEqi *) &b,
(vSIZEqi *) &c,
(vSIZEqi *) &d, (vSIZEqi *) &e);
else
loop_method (out, a, b, c, d, e);
}
return 0;
}