/* Toy examples for vectorization. Compile with -O3 -fno-strict-aliasing -fold-unroll-loops -ftree-vectorize -Wa,-force_cpusubtype_ALL */ #include #define N 16 short add_results[N] = {0,6,12,18,24,30,36,42,48,54,60,66,72,78,84,90}; int iadd_results[N] = {0,6,12,18,24,30,36,42,48,54,60,66,72,78,84,90}; float fadd_results[N] = {0.0,6.0,12.0,18.0,24.0,30.0,36.0,42.0,48.0,54.0,60.0,66.0,72.0,78.0,84.0,90.0}; short copy_results[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; float fcopy_results[N] = {0.0,3.0,6.0,9.0,12.0,15.0,18.0,21.0,24.0,27.0,30.0,33.0,36.0,39.0,42.0,45.0}; short mult_results[N] = {0,3,12,27,48,75,108,147,192,243,300,363,432,507,588,675}; float fmult_results[N] = {0.0,3.0,12.0,27.0,48.0,75.0,108.0,147.0,192.0,243.0,300.0,363.0,432.0,507.0,588.0,675.0}; void check_results (short *a, short *results) { int i; fprintf (stderr, "Check output:\n"); for (i = 0; i < N; i++) { if (a[i] != results[i]) fprintf (stderr, "Wrong Output [%d]: %d instead of %d\n", i, a[i], results[i]); else fprintf (stderr, "[%d]\t%d\n", i, a[i]); } } void icheck_results (int *a, int *results) { int i; fprintf (stderr, "Check output:\n"); for (i = 0; i < N; i++) { if (a[i] != results[i]) fprintf (stderr, "Wrong Output [%d]: %d instead of %d\n", i, a[i], results[i]); else fprintf (stderr, "[%d]\t%d\n", i, a[i]); } } void fcheck_results (float *a, float *results) { int i; fprintf (stderr, "Check output:\n"); for (i = 0; i < N; i++) { if (a[i] != results[i]) fprintf (stderr, "Wrong Output [%d]: %f instead of %f\n", i, a[i], results[i]); else fprintf (stderr, "[%d]\t%f\n", i, a[i]); } } /****************************************************/ void bar (short *a) {} void ibar (int *a) {} void fbar_copy (short *a) { fcheck_results (a, fcopy_results); } void bar_add (short *a) { check_results (a, add_results); } void ibar_add (int *a) { icheck_results (a, iadd_results); } void fbar_add (float *a) { fcheck_results (a, fadd_results); } void bar_mult (short *a) { check_results (a, mult_results); } void fbar_mult (float *a) { fcheck_results (a, fmult_results); } /****************************************************/ /* Loops that are vectorized by the basic vectorizer */ foo_copy (){ float a[N]; float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; int i; for (i=0; i