Index: testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c (revision 125641) --- testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-68.c (working copy) *************** int main (void) *** 84,89 **** return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ ! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 84,88 ---- return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-fast-math-vect-pr29925.c (revision 0) *************** *** 0 **** --- 1,39 ---- + /* { dg-require-effective-target vect_float } */ + + #include + #include "../../tree-vect.h" + + void interp_pitch(float *exc, float *interp, int pitch, int len) + { + int i,k; + int maxj; + + maxj=3; + for (i=0;i + #include "../../tree-vect.h" + + #define N 32 + + struct t{ + int k[N]; + int l; + }; + + struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ + }; + + int main1 () + { + int i; + struct s tmp; + + /* unaligned */ + for (i = 0; i < N/2; i++) + { + tmp.b[i] = 5; + } + + /* check results: */ + for (i = 0; i + #include "../../tree-vect.h" + + #define N 32 + + struct t{ + int k[N]; + int l; + }; + + struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ + }; + + int main1 () + { + int i; + struct s tmp; + + /* aligned */ + for (i = 0; i < N/2; i++) + { + tmp.c[i] = 6; + } + + /* check results: */ + for (i = 0; i + #include "../../tree-vect.h" + + #define N 16 + struct test { + char ca[N]; + }; + + extern struct test s; + + int main1 () + { + int i; + + for (i = 0; i < N; i++) + { + s.ca[i] = 5; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (s.ca[i] != 5) + abort (); + } + + return 0; + } + + int main (void) + { + return main1 (); + } + + /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-31c.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-31c.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-31c.c (revision 0) *************** *** 0 **** --- 1,50 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 32 + + struct t{ + int k[N]; + int l; + }; + + struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ + }; + + int main1 () + { + int i; + struct s tmp; + + /* aligned */ + for (i = 0; i < N/2; i++) + { + tmp.d.k[i] = 7; + } + + /* check results: */ + for (i = 0; i + #include "../../tree-vect.h" + + #define N 32 + + struct t{ + int k[N]; + int l; + }; + + struct s{ + char a; /* aligned */ + char b[N-1]; /* unaligned (offset 1B) */ + char c[N]; /* aligned (offset NB) */ + struct t d; /* aligned (offset 2NB) */ + struct t e; /* unaligned (offset 2N+4N+4 B) */ + }; + + int main1 () + { + int i; + struct s tmp; + + /* unaligned */ + for (i = 0; i < N/2; i++) + { + tmp.e.k[i] = 8; + } + + /* check results: */ + for (i = 0; i + #include "../../tree-vect.h" + + #define N 8 + #define OFF 4 + + /* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + + int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + int main1 (int *pib) + { + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + ia[i] = pib[i - OFF]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (ia[i] != pib[i - OFF]) + abort (); + } + + return 0; + } + + int main (void) + { + check_vect (); + + main1 (&ib[OFF]); + return 0; + } + + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c (revision 0) *************** *** 0 **** --- 1,51 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 16 + #define DIFF 242 + + void + main1 (unsigned char x, unsigned char max_result, unsigned char min_result) + { + int i; + unsigned char ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; + unsigned char uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + unsigned char udiff = 2; + unsigned char umax = x; + unsigned char umin = x; + + for (i = 0; i < N; i++) { + udiff += (unsigned char)(ub[i] - uc[i]); + } + + for (i = 0; i < N; i++) { + umax = umax < uc[i] ? uc[i] : umax; + } + + for (i = 0; i < N; i++) { + umin = umin > uc[i] ? uc[i] : umin; + } + + /* check results: */ + if (udiff != DIFF) + abort (); + if (umax != max_result) + abort (); + if (umin != min_result) + abort (); + } + + int main (void) + { + check_vect (); + + main1 (100, 100, 1); + main1 (0, 15, 0); + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */ + /* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68a.c (revision 0) *************** *** 0 **** --- 1,49 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 32 + + struct s{ + int m; + int n[N][N][N]; + }; + + struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ + }; + + int main1 () + { + int i,j; + struct test1 tmp1; + + /* 1. unaligned */ + for (i = 0; i < N; i++) + { + tmp1.a.n[1][2][i] = 5; + } + + /* check results: */ + for (i = 0; i + #include "../../tree-vect.h" + + #define N 8 + #define OFF 4 + + /* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + + int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + int main1 (int *pib) + { + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + pib[i - OFF] = ic[i]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (pib[i - OFF] != ic[i]) + abort (); + } + + return 0; + } + + int main (void) + { + check_vect (); + + main1 (&ib[OFF]); + return 0; + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */ + /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68b.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68b.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68b.c (revision 0) *************** *** 0 **** --- 1,49 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 32 + + struct s{ + int m; + int n[N][N][N]; + }; + + struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ + }; + + int main1 () + { + int i,j; + struct test1 tmp1; + + /* 2. aligned */ + for (i = 3; i < N-1; i++) + { + tmp1.a.n[1][2][i] = 6; + } + + /* check results: */ + for (i = 3; i < N-1; i++) + { + if (tmp1.a.n[1][2][i] != 6) + abort (); + } + + return 0; + } + + int main (void) + { + check_vect (); + + return main1 (); + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-76c.c (revision 0) *************** *** 0 **** --- 1,47 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 8 + #define OFF 4 + + /* Check handling of accesses for which the "initial condition" - + the expression that represents the first location accessed - is + more involved than just an ssa_name. */ + + int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + int main1 (int *pib) + { + int i; + int ia[N+OFF]; + int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10}; + + for (i = OFF; i < N; i++) + { + ia[i] = ic[i - OFF]; + } + + + /* check results: */ + for (i = OFF; i < N; i++) + { + if (ia[i] != ic[i - OFF]) + abort (); + } + + return 0; + } + + int main (void) + { + check_vect (); + + main1 (&ib[OFF]); + return 0; + } + + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68c.c (revision 0) *************** *** 0 **** --- 1,49 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 32 + + struct s{ + int m; + int n[N][N][N]; + }; + + struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ + }; + + int main1 () + { + int i,j; + struct test1 tmp1; + + /* 3. aligned */ + for (i = 0; i < N; i++) + { + tmp1.e.n[1][2][i] = 7; + } + + /* check results: */ + for (i = 0; i < N; i++) + { + if (tmp1.e.n[1][2][i] != 7) + abort (); + } + + return 0; + } + + int main (void) + { + check_vect (); + + return main1 (); + } + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c =================================================================== *** testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c (revision 0) --- testsuite/gcc.dg/vect/costmodel/ppc/costmodel-vect-68d.c (revision 0) *************** *** 0 **** --- 1,50 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include "../../tree-vect.h" + + #define N 20 + + struct s{ + int m; + int n[N][N][N]; + }; + + struct test1{ + struct s a; /* array a.n is unaligned */ + int b; + int c; + struct s e; /* array e.n is aligned */ + }; + + int main1 () + { + int i,j; + struct test1 tmp1; + + /* 4. unaligned */ + for (i = 3; i < N-3; i++) + { + tmp1.e.n[1][2][i] = 8; + } + + /* check results: */ + for (i = 3; i bbs #define LOOP_VINFO_EXIT_COND(L) (L)->exit_cond #define LOOP_VINFO_NITERS(L) (L)->num_iters + #define LOOP_VINFO_COST_MODEL_MIN_ITERS(L) (L)->min_profitable_iters #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask *************** typedef struct _stmt_vec_info { *** 329,334 **** --- 337,347 ---- #define TARG_VEC_TO_SCALAR_COST 1 #endif + /* Cost of scalar to vector operation. */ + #ifndef TARG_SCALAR_TO_VEC_COST + #define TARG_SCALAR_TO_VEC_COST 1 + #endif + /* Cost of aligned vector load. */ #ifndef TARG_VEC_LOAD_COST #define TARG_VEC_LOAD_COST 1 Index: tree-vect-analyze.c =================================================================== *** tree-vect-analyze.c (revision 125702) --- tree-vect-analyze.c (working copy) *************** vect_analyze_operations (loop_vec_info l *** 482,488 **** /* Analyze cost. Decide if worth while to vectorize. */ min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); ! if (min_profitable_iters < 0) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) --- 482,488 ---- /* Analyze cost. Decide if worth while to vectorize. */ min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); ! LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters; if (min_profitable_iters < 0) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)) Index: tree-vect-transform.c =================================================================== *** tree-vect-transform.c (revision 125702) --- tree-vect-transform.c (working copy) *************** vect_estimate_min_profitable_iters (loop *** 97,102 **** --- 97,103 ---- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); int nbbs = loop->num_nodes; + int byte_misalign; /* Cost model disabled. */ if (!flag_vect_cost_model) *************** vect_estimate_min_profitable_iters (loop *** 109,115 **** /* Requires loop versioning tests to handle misalignment. FIXME: Make cost depend on number of stmts in may_misalign list. */ ! if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) { vec_outside_cost += TARG_COND_BRANCH_COST; if (vect_print_dump_info (REPORT_DETAILS)) --- 110,116 ---- /* Requires loop versioning tests to handle misalignment. FIXME: Make cost depend on number of stmts in may_misalign list. */ ! if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))) { vec_outside_cost += TARG_COND_BRANCH_COST; if (vect_print_dump_info (REPORT_DETAILS)) *************** vect_estimate_min_profitable_iters (loop *** 117,148 **** "versioning.\n"); } - /* Requires a prologue loop when peeling to handle misalignment. Add cost of - two guards, one for the peeled loop and one for the vector loop. */ - - peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); - if (peel_iters_prologue) - { - vec_outside_cost += 2 * TARG_COND_BRANCH_COST; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "cost model: Adding cost of checks for " - "prologue.\n"); - } - - /* Requires an epilogue loop to finish up remaining iterations after vector - loop. Add cost of two guards, one for the peeled loop and one for the - vector loop. */ - - if ((peel_iters_prologue < 0) - || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf) - { - vec_outside_cost += 2 * TARG_COND_BRANCH_COST; - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "cost model : Adding cost of checks for " - "epilogue.\n"); - } - /* Count statements in scalar loop. Using this as scalar cost for a single iteration for now. --- 118,123 ---- *************** vect_estimate_min_profitable_iters (loop *** 178,186 **** TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ ! peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); ! if (peel_iters_prologue < 0) { peel_iters_prologue = vf - 1; if (vect_print_dump_info (REPORT_DETAILS)) --- 153,161 ---- TODO: Build an expression that represents peel_iters for prologue and epilogue to be used in a run-time test. */ ! byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); ! if (byte_misalign < 0) { peel_iters_prologue = vf - 1; if (vect_print_dump_info (REPORT_DETAILS)) *************** vect_estimate_min_profitable_iters (loop *** 197,202 **** --- 172,189 ---- } else { + if (byte_misalign) + { + struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); + int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); + tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); + int nelements = TYPE_VECTOR_SUBPARTS (vectype); + + peel_iters_prologue = nelements - (byte_misalign / element_size); + } + else + peel_iters_prologue = 0; + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) { peel_iters_epilogue = vf - 1; *************** vect_estimate_min_profitable_iters (loop *** 206,214 **** "loop iterations are unknown ."); } else ! peel_iters_epilogue = ! (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue) ! % vf; } vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) --- 193,229 ---- "loop iterations are unknown ."); } else ! { ! int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); ! peel_iters_prologue = niters < peel_iters_prologue ? ! niters : peel_iters_prologue; ! peel_iters_epilogue = (niters - peel_iters_prologue) % vf; ! } ! } ! ! /* Requires a prologue loop when peeling to handle misalignment. Add cost of ! two guards, one for the peeled loop and one for the vector loop. */ ! ! if (peel_iters_prologue) ! { ! vec_outside_cost += 2 * TARG_COND_BRANCH_COST; ! if (vect_print_dump_info (REPORT_DETAILS)) ! fprintf (vect_dump, "cost model: Adding cost of checks for " ! "prologue.\n"); ! } ! ! /* Requires an epilogue loop to finish up remaining iterations after vector ! loop. Add cost of two guards, one for the peeled loop and one for the ! vector loop. */ ! ! if (peel_iters_epilogue ! || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ! || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf) ! { ! vec_outside_cost += 2 * TARG_COND_BRANCH_COST; ! if (vect_print_dump_info (REPORT_DETAILS)) ! fprintf (vect_dump, "cost model : Adding cost of checks for " ! "epilogue.\n"); } vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) *************** vect_model_induction_cost (stmt_vec_info *** 356,362 **** /* loop cost for vec_loop. */ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; /* prologue cost for vec_init and vec_step. */ ! STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, " --- 371,377 ---- /* loop cost for vec_loop. */ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; /* prologue cost for vec_init and vec_step. */ ! STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, " *************** vect_model_induction_cost (stmt_vec_info *** 372,381 **** be generated for the single vector op. We will handle that shortly. */ static void ! vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies) { STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), --- 387,405 ---- be generated for the single vector op. We will handle that shortly. */ static void ! vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt) { + int i; + STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST; + /* FORNOW: Assuming maximum 2 args per stmts. */ + for (i=0; i<2; i++) + { + if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def) + STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST; + } + if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, " "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info), *************** vect_cost_strided_group_size (stmt_vec_i *** 407,417 **** has the overhead of the strided access attributed to it. */ static void ! vect_model_store_cost (stmt_vec_info stmt_info, int ncopies) { int cost = 0; int group_size; /* Strided access? */ if (DR_GROUP_FIRST_DR (stmt_info)) group_size = vect_cost_strided_group_size (stmt_info); --- 431,444 ---- has the overhead of the strided access attributed to it. */ static void ! vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt) { int cost = 0; int group_size; + if (dt == vect_constant_def || dt == vect_invariant_def) + STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST; + /* Strided access? */ if (DR_GROUP_FIRST_DR (stmt_info)) group_size = vect_cost_strided_group_size (stmt_info); *************** vect_get_vec_def_for_operand (tree op, t *** 1274,1279 **** --- 1301,1307 ---- /* FIXME: use build_constructor directly. */ vector_type = get_vectype_for_scalar_type (TREE_TYPE (def)); vec_inv = build_constructor_from_list (vector_type, t); + return vect_init_vector (stmt, vec_inv, vector_type); } *************** vectorizable_call (tree stmt, block_stmt *** 2256,2262 **** tree vectype_out, vectype_in; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; ! enum vect_def_type dt[2]; int ncopies, j, nargs; call_expr_arg_iterator iter; --- 2284,2290 ---- tree vectype_out, vectype_in; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type; ! enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; int ncopies, j, nargs; call_expr_arg_iterator iter; *************** vectorizable_call (tree stmt, block_stmt *** 2355,2361 **** STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_call ==="); ! vect_model_simple_cost (stmt_info, ncopies); return true; } --- 2383,2389 ---- STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_call ==="); ! vect_model_simple_cost (stmt_info, ncopies, dt); return true; } *************** vectorizable_assignment (tree stmt, bloc *** 2750,2756 **** loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree new_temp; tree def, def_stmt; ! enum vect_def_type dt; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; --- 2778,2784 ---- loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); tree new_temp; tree def, def_stmt; ! enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; int nunits = TYPE_VECTOR_SUBPARTS (vectype); int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits; *************** vectorizable_assignment (tree stmt, bloc *** 2781,2787 **** return false; op = GIMPLE_STMT_OPERAND (stmt, 1); ! if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); --- 2809,2815 ---- return false; op = GIMPLE_STMT_OPERAND (stmt, 1); ! if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); *************** vectorizable_assignment (tree stmt, bloc *** 2793,2799 **** STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_assignment ==="); ! vect_model_simple_cost (stmt_info, ncopies); return true; } --- 2821,2827 ---- STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_assignment ==="); ! vect_model_simple_cost (stmt_info, ncopies, dt); return true; } *************** vectorizable_operation (tree stmt, block *** 2927,2933 **** int icode; enum machine_mode optab_op2_mode; tree def, def_stmt; ! enum vect_def_type dt0, dt1; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in = TYPE_VECTOR_SUBPARTS (vectype); --- 2955,2961 ---- int icode; enum machine_mode optab_op2_mode; tree def, def_stmt; ! enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in = TYPE_VECTOR_SUBPARTS (vectype); *************** vectorizable_operation (tree stmt, block *** 2979,2985 **** } op0 = TREE_OPERAND (operation, 0); ! if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); --- 3007,3013 ---- } op0 = TREE_OPERAND (operation, 0); ! if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); *************** vectorizable_operation (tree stmt, block *** 2989,2995 **** if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); ! if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); --- 3017,3023 ---- if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); ! if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); *************** vectorizable_operation (tree stmt, block *** 3038,3045 **** by a scalar shift operand. */ optab_op2_mode = insn_data[icode].operand[2].mode; if (! (VECTOR_MODE_P (optab_op2_mode) ! || dt1 == vect_constant_def ! || dt1 == vect_invariant_def)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "operand mode requires invariant argument."); --- 3066,3073 ---- by a scalar shift operand. */ optab_op2_mode = insn_data[icode].operand[2].mode; if (! (VECTOR_MODE_P (optab_op2_mode) ! || dt[1] == vect_constant_def ! || dt[1] == vect_invariant_def)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "operand mode requires invariant argument."); *************** vectorizable_operation (tree stmt, block *** 3052,3058 **** STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_operation ==="); ! vect_model_simple_cost (stmt_info, ncopies); return true; } --- 3080,3086 ---- STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_operation ==="); ! vect_model_simple_cost (stmt_info, ncopies, dt); return true; } *************** vectorizable_operation (tree stmt, block *** 3146,3154 **** } else { ! vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); if (op_type == binary_op) ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1); } /* Arguments are ready. create the new vector stmt. */ --- 3174,3182 ---- } else { ! vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); if (op_type == binary_op) ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); } /* Arguments are ready. create the new vector stmt. */ *************** vectorizable_type_demotion (tree stmt, b *** 3196,3202 **** enum tree_code code, code1 = ERROR_MARK; tree new_temp; tree def, def_stmt; ! enum vect_def_type dt0; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in; --- 3224,3230 ---- enum tree_code code, code1 = ERROR_MARK; tree new_temp; tree def, def_stmt; ! enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in; *************** vectorizable_type_demotion (tree stmt, b *** 3254,3260 **** return false; /* Check the operands of the operation. */ ! if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); --- 3282,3288 ---- return false; /* Check the operands of the operation. */ ! if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); *************** vectorizable_type_demotion (tree stmt, b *** 3272,3278 **** STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_demotion ==="); ! vect_model_simple_cost (stmt_info, ncopies); return true; } --- 3300,3306 ---- STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_demotion ==="); ! vect_model_simple_cost (stmt_info, ncopies, dt); return true; } *************** vectorizable_type_demotion (tree stmt, b *** 3295,3306 **** if (j == 0) { vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); } else { ! vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1); ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); } /* Arguments are ready. Create the new vector stmt. */ --- 3323,3334 ---- if (j == 0) { vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); } else { ! vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1); ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); } /* Arguments are ready. Create the new vector stmt. */ *************** vectorizable_type_promotion (tree stmt, *** 3346,3352 **** tree decl1 = NULL_TREE, decl2 = NULL_TREE; int op_type; tree def, def_stmt; ! enum vect_def_type dt0, dt1; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in; --- 3374,3380 ---- tree decl1 = NULL_TREE, decl2 = NULL_TREE; int op_type; tree def, def_stmt; ! enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; tree new_stmt; stmt_vec_info prev_stmt_info; int nunits_in; *************** vectorizable_type_promotion (tree stmt, *** 3404,3410 **** return false; /* Check the operands of the operation. */ ! if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); --- 3432,3438 ---- return false; /* Check the operands of the operation. */ ! if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); *************** vectorizable_type_promotion (tree stmt, *** 3415,3421 **** if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); ! if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); --- 3443,3449 ---- if (op_type == binary_op) { op1 = TREE_OPERAND (operation, 1); ! if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1])) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "use not simple."); *************** vectorizable_type_promotion (tree stmt, *** 3435,3441 **** STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_promotion ==="); ! vect_model_simple_cost (stmt_info, 2*ncopies); return true; } --- 3463,3469 ---- STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_promotion ==="); ! vect_model_simple_cost (stmt_info, 2*ncopies, dt); return true; } *************** vectorizable_type_promotion (tree stmt, *** 3465,3473 **** } else { ! vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); if (op_type == binary_op) ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1); } /* Arguments are ready. Create the new vector stmt. We are creating --- 3493,3501 ---- } else { ! vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0); if (op_type == binary_op) ! vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1); } /* Arguments are ready. Create the new vector stmt. We are creating *************** vectorizable_store (tree stmt, block_stm *** 3756,3762 **** if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; ! vect_model_store_cost (stmt_info, ncopies); return true; } --- 3784,3790 ---- if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = store_vec_info_type; ! vect_model_store_cost (stmt_info, ncopies, dt); return true; } *************** vect_do_peeling_for_loop_bound (loop_vec *** 5332,5339 **** loop_num = loop->num; /* Analyze cost to set threshhold for vectorized loop. */ ! min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo); ! min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) * LOOP_VINFO_VECT_FACTOR (loop_vinfo); --- 5360,5366 ---- loop_num = loop->num; /* Analyze cost to set threshhold for vectorized loop. */ ! min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) * LOOP_VINFO_VECT_FACTOR (loop_vinfo); *************** vect_do_peeling_for_loop_bound (loop_vec *** 5346,5352 **** || min_profitable_iters > min_scalar_loop_bound)) th = (unsigned) min_profitable_iters; ! if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vectorization may not be profitable."); new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), --- 5373,5381 ---- || min_profitable_iters > min_scalar_loop_bound)) th = (unsigned) min_profitable_iters; ! if (min_profitable_iters ! && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ! && vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vectorization may not be profitable."); new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),