Why are vector instructions slower than loops?

fniles@gnupooh.org fniles@gnupooh.org
Wed Sep 24 19:43:00 GMT 2008


/**
 * Why are vectors so much slower than plain old loops?  Shouldn't
 * they be faster?  Do I have to actually call the built-in MMX and
 * SSE instructions myself?  Shouldn't the compiler be able to do this
 * given this much information?
 *
 * Results compiled on Intel(R) Core(TM)2 Duo CPU     E8500  @ 3.16GHz
 * Using: "-O3 -march=core2"
 *
 * $ gcc -v
 * gcc version 4.3.0 20080428 (Red Hat 4.3.0-8) (GCC)
 *
 * $ time ./test 1000000
 *
 * real	0m3.639s
 * user	0m3.634s
 * sys	0m0.001s
 * $ time ./test 1000000 b
 *
 * real	0m9.160s
 * user	0m9.148s
 * sys	0m0.002s
 *
 * Please correct what I'm doing wrong here to make the vector
 * version faster.
 *
 * Note: if you make SIZE smaller it only gets worse!
 *
 **/

#include <stdio.h>

#define SIZE 2048

typedef char vSIZEqi __attribute__ ((vector_size (SIZE)));

static void
loop_method (char out[4][SIZE], char a[SIZE], char b[SIZE], char c[SIZE],
	     char d[SIZE], char e[SIZE])
{
  int i;
  for (i = 0; i < SIZE; i++) {
    char g = a[i] * b[i];
    char h = a[i] * c[i];

    out[0][i] = d[i] * g;
    out[1][i] = e[i] * g;
    out[2][i] = d[i] * h;
    out[3][i] = e[i] * h;
  }
}


static void
vector_method (vSIZEqi out[4], vSIZEqi *a, vSIZEqi *b, vSIZEqi *c,
	       vSIZEqi *d, vSIZEqi *e)
{
  vSIZEqi g = *a * *b;
  vSIZEqi h = *a * *c;

  out[0] = *d * g;
  out[1] = *e * g;
  out[2] = *d * h;
  out[3] = *e * h;
}

int
main (int argc, char *argv[]  __attribute__ ((unused)))
{
  int i;
  char a[SIZE], b[SIZE], c[SIZE], d[SIZE], e[SIZE];
  char out[4][SIZE];
  int loops = 1000000;

  if (argc > 1)
    sscanf (argv[1], "%d", &loops);

  for (i = 0; i < loops; i++) {
    if (argc > 2)
      vector_method( (vSIZEqi *) out, (vSIZEqi *) &a, (vSIZEqi *) &b,
(vSIZEqi *) &c,
		     (vSIZEqi *) &d, (vSIZEqi *) &e);
    else
      loop_method (out, a, b, c, d, e);
  }

  return 0;
}




More information about the Gcc-help mailing list