Index: ChangeLog.autovect =================================================================== RCS file: /cvs/gcc/gcc/gcc/Attic/ChangeLog.autovect,v retrieving revision 1.1.2.47 diff -c -3 -p -r1.1.2.47 ChangeLog.autovect *** ChangeLog.autovect 22 Mar 2005 10:25:13 -0000 1.1.2.47 --- ChangeLog.autovect 3 Apr 2005 11:53:04 -0000 *************** *** 1,3 **** --- 1,75 ---- + 2005-04-03 Dorit Naishlos + + * defaults.h (TARGET_VECT_NUM_PATTERNS): New. + * target-def.h (TARGET_VECTORIZE_BUILTIN_VECT_PATTERN_RECOG): New. + * target.h (builtin_vect_pattern_recog): New. + * tree-vect-analyze.c (target.h): Include. + (vect_pattern_recog_1): Removed static qualifier. Declaration moved to + tree-vectorizer.h. + (vect_recog_unsigned_subsat_pattern): Takes additional argument. + Removed dump print. Handle target-reduction-pattern case. + (vect_pattern_recog): Call targetm.vectorize.builtin_vect_pattern_recog. + (vect_analyze_loop): Move call to vect_pattern_recog to after the call + to vect_analyze_scalar_cycles. + (vect_determine_vectorization_factor): Change skip test from checking + for reduction, to checking for STMT_VINFO_LIVE_P. Handle the case of + vectype being already initialized. + (vect_analyze_operations): Call vectorizable_target_reduction_pattern. + (vect_mark_relevant): When dealing with the last stmt in a sequence + that was recognized as a certain idiom - use the pattern-stmt that + replaces the sequence. + (vect_mark_stmts_to_be_vectorized): Remove special handling of + STMT_VINFO_IN_PATTERN_P. + * tree-vect-transform.c (vect_create_epilog_for_reduction): New + function. Contains functionality that was factored out of + vectorizable_reduction. + (vectorizable_target_reduction_pattern): New function. + (vectorizable_reduction): Epilog creation code was factored out into + a new function vect_create_epilog_for_reduction. + (vect_transform_stmt): Call vectorizable_target_reduction_pattern. + * tree-vectorizer.h (target_reduc_pattern_vec_info_type): New enum + value. + (vect_recog_unsigned_subsat_pattern): Takes additional argument. + (_recog_func_ptr): Takes additional argument. + (vectorizable_target_reduction_pattern): New function declaration. + (vect_pattern_recog_func): Rename to vect_pattern_recog_funcs. + + * tree-inline.c (estimate_num_insns_1): Add missing cases - + REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR. + * tree-ssa-operands.c (get_expr_operands): Remove redundant cases - + REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR. + * tree-vect-transform.c (get_initial_def_for_reduction): Remove case + that is not supported yet. + * tree.def (REDUC_BIT_AND_EXPR, REDUC_BIT_IOR_EXPR, REDUC_BIT_XOR_EXPR) + (REDUC_MULT_EXPR): Remove tree-codes that are not yet supported. + * tree-vectorizer.c (reduction_code_for_scalar_code): Remove cases that + are not yet unsupported. + (vect_is_simple_reduction): Fix comment. + * tree-vect-analyze.c (vect_analyze_operations): Minor changes to print + messages. + (vect_mark_stmts_to_be_vectorized): Likewise. + (vect_determine_vectorization_factor): Likewise. + + * config/rs6000/rs6000.c (tree-flow.h, tree-data-ref.h) + (tree-vectorizer.h): Include. + (altivec_builtin_widening_summation): New global variable to hold the + decl of the builtin widening_summation. + (target_vect_recog_widening_summation_pattern): New pattern recognition + function. + (target_vect_pattern_recog_funcs): New global array to hold pointers + to pattern-recognition functions. + (rs6000_builtin_vect_pattern_recog): New function. Implements the target + builtin vect_pattern_recog. + (rs6000_expand_builtin): Support ALTIVEC_BUILTIN_WIDENING_SUMMATION + case. + (altivec_init_builtins): Add new function type v4si_ftype_v8hi_v4si. + Initialize a new target builtin __builtin_altivec_widening_summation. + Set altivec_builtin_widening_summation. + * config/rs6000/rs6000.c (TARGET_VECT_NUM_PATTERNS): New define. + (ALTIVEC_BUILTIN_WIDENING_SUMMATION): New builtin. + * config/rs6000/t-rs6000 (tree-flow.h, tree-data-ref.h) + (tree-vectorizer.h): Add dependency. + 2005-03-22 Ira Rosen * tree-data-ref.c (address_analysis): Perform alignment Index: defaults.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/defaults.h,v retrieving revision 1.160.4.2 diff -c -3 -p -r1.160.4.2 defaults.h *** defaults.h 30 Jan 2005 18:09:25 -0000 1.160.4.2 --- defaults.h 3 Apr 2005 11:53:05 -0000 *************** do { fputs (integer_asm_op (POINTER_SIZE *** 719,724 **** --- 719,728 ---- #define UNITS_PER_SIMD_WORD 0 #endif + #ifndef TARGET_VECT_NUM_PATTERNS + #define TARGET_VECT_NUM_PATTERNS 0 + #endif + /* Determine whether __cxa_atexit, rather than atexit, is used to register C++ destructors for local statics and global objects. */ #ifndef DEFAULT_USE_CXA_ATEXIT Index: target-def.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/target-def.h,v retrieving revision 1.106.12.2 diff -c -3 -p -r1.106.12.2 target-def.h *** target-def.h 30 Jan 2005 18:10:32 -0000 1.106.12.2 --- target-def.h 3 Apr 2005 11:53:05 -0000 *************** Foundation, 59 Temple Place - Suite 330, *** 274,282 **** TARGET_SCHED_IS_COSTLY_DEPENDENCE} #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0 #define TARGET_VECTORIZE \ ! {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD} /* In except.c */ #define TARGET_EH_RETURN_FILTER_MODE default_eh_return_filter_mode --- 274,284 ---- TARGET_SCHED_IS_COSTLY_DEPENDENCE} #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0 + #define TARGET_VECTORIZE_BUILTIN_VECT_PATTERN_RECOG 0 #define TARGET_VECTORIZE \ ! {TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \ ! TARGET_VECTORIZE_BUILTIN_VECT_PATTERN_RECOG} /* In except.c */ #define TARGET_EH_RETURN_FILTER_MODE default_eh_return_filter_mode Index: target.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/target.h,v retrieving revision 1.118.12.2 diff -c -3 -p -r1.118.12.2 target.h *** target.h 30 Jan 2005 18:10:33 -0000 1.118.12.2 --- target.h 3 Apr 2005 11:53:05 -0000 *************** struct gcc_target *** 289,294 **** --- 289,296 ---- by the vectorizer, and return the decl of the target builtin function. */ tree (* builtin_mask_for_load) (void); + + void (* builtin_vect_pattern_recog) (tree); } vectorize; /* Return machine mode for filter value. */ Index: tree-inline.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-inline.c,v retrieving revision 1.150.2.2 diff -c -3 -p -r1.150.2.2 tree-inline.c *** tree-inline.c 30 Jan 2005 18:10:47 -0000 1.150.2.2 --- tree-inline.c 3 Apr 2005 11:53:06 -0000 *************** estimate_num_insns_1 (tree *tp, int *wal *** 1329,1334 **** --- 1329,1337 ---- case ASM_EXPR: case REALIGN_LOAD_EXPR: + case REDUC_MAX_EXPR: + case REDUC_MIN_EXPR: + case REDUC_PLUS_EXPR: case RESX_EXPR: *count += 1; Index: tree-ssa-operands.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-ssa-operands.c,v retrieving revision 2.56.2.4 diff -c -3 -p -r2.56.2.4 tree-ssa-operands.c *** tree-ssa-operands.c 8 Mar 2005 08:29:58 -0000 2.56.2.4 --- tree-ssa-operands.c 3 Apr 2005 11:53:06 -0000 *************** get_expr_operands (tree stmt, tree *expr *** 1193,1201 **** case TRUTH_NOT_EXPR: case BIT_FIELD_REF: case VIEW_CONVERT_EXPR: - case REDUC_MAX_EXPR: - case REDUC_MIN_EXPR: - case REDUC_PLUS_EXPR: do_unary: get_expr_operands (stmt, &TREE_OPERAND (expr, 0), flags); return; --- 1193,1198 ---- Index: tree-vect-analyze.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-vect-analyze.c,v retrieving revision 2.4.4.7 diff -c -3 -p -r2.4.4.7 tree-vect-analyze.c *** tree-vect-analyze.c 16 Mar 2005 16:48:44 -0000 2.4.4.7 --- tree-vect-analyze.c 3 Apr 2005 11:53:07 -0000 *************** Software Foundation, 59 Temple Place - S *** 26,31 **** --- 26,32 ---- #include "errors.h" #include "ggc.h" #include "tree.h" + #include "target.h" #include "basic-block.h" #include "diagnostic.h" #include "tree-flow.h" *************** static bool vect_analyze_data_ref_access *** 62,78 **** static bool vect_can_advance_ivs_p (loop_vec_info); static void vect_update_misalignment_for_peel (struct data_reference *, struct data_reference *, int npeel); - static void vect_pattern_recog_1 - (tree (* ) (tree, varray_type *), block_stmt_iterator); /* Pattern recognition functions */ ! _recog_func_ptr vect_pattern_recog_func[NUM_PATTERNS] = { vect_recog_unsigned_subsat_pattern}; - tree vect_recog_unsigned_subsat_pattern (tree, varray_type *); /* Function vect_determine_vectorization_factor */ static bool --- 63,98 ---- static bool vect_can_advance_ivs_p (loop_vec_info); static void vect_update_misalignment_for_peel (struct data_reference *, struct data_reference *, int npeel); /* Pattern recognition functions */ ! tree vect_recog_unsigned_subsat_pattern (tree, tree *, varray_type *); ! _recog_func_ptr vect_pattern_recog_funcs[NUM_PATTERNS] = { vect_recog_unsigned_subsat_pattern}; /* Function vect_determine_vectorization_factor + Determine the vectorization factor (VF). VF is the number of data elements + that are operated upon in parallel in a single iteration of the vectorized + loop. For example, when vectorizing a loop that operates on 4byte elements, + on a target with vector size (VS) 16byte, the VF is set to 4, since 4 + elements can fit in a single vector register. + + We currently support vectorization of loops in which all types operated upon + are of the same size. Therefore this function currently sets VF according to + the size of the types operated upon, and fails if there are multiple sizes + in the loop. + + VF is also the factor by which the loop iterations are strip-mined, e.g.: + original loop: + for (i=0; i examining statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } gcc_assert (stmt_info); /* skip stmts which do not need to be vectorized. */ ! if (!STMT_VINFO_RELEVANT_P (stmt_info) ! && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) ! continue; if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) { --- 122,140 ---- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { ! fprintf (vect_dump, "examining statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } gcc_assert (stmt_info); /* skip stmts which do not need to be vectorized. */ ! if (!STMT_VINFO_RELEVANT_P (stmt_info) ! && !STMT_VINFO_LIVE_P (stmt_info)) ! { ! if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) ! fprintf (vect_dump, "skip."); ! continue; ! } if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt)))) { *************** vect_determine_vectorization_factor (loo *** 122,158 **** } return false; } ! ! if (STMT_VINFO_DATA_REF (stmt_info)) ! scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); ! else if (TREE_CODE (stmt) == MODIFY_EXPR) ! scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); ! else ! scalar_type = TREE_TYPE (stmt); ! ! if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { ! fprintf (vect_dump, "get vectype for scalar type: "); ! print_generic_expr (vect_dump, scalar_type, TDF_SLIM); } ! ! vectype = get_vectype_for_scalar_type (scalar_type); ! if (!vectype) ! { ! if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, ! LOOP_LOC (loop_vinfo))) { ! fprintf (vect_dump, "not vectorized: unsupported data-type "); ! print_generic_expr (vect_dump, scalar_type, TDF_SLIM); } ! return false; ! } if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { fprintf (vect_dump, "vectype: "); print_generic_expr (vect_dump, vectype, TDF_SLIM); } - STMT_VINFO_VECTYPE (stmt_info) = vectype; nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) --- 146,190 ---- } return false; } ! ! if (STMT_VINFO_VECTYPE (stmt_info)) { ! vectype = STMT_VINFO_VECTYPE (stmt_info); ! scalar_type = TREE_TYPE (vectype); } ! else ! { ! if (STMT_VINFO_DATA_REF (stmt_info)) ! scalar_type = ! TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); ! else if (TREE_CODE (stmt) == MODIFY_EXPR) ! scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0)); ! else ! scalar_type = TREE_TYPE (stmt); ! if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) ! { ! fprintf (vect_dump, "get vectype for scalar type: "); ! print_generic_expr (vect_dump, scalar_type, TDF_SLIM); ! } ! vectype = get_vectype_for_scalar_type (scalar_type); ! if (!vectype) { ! if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, ! LOOP_LOC (loop_vinfo))) ! { ! fprintf (vect_dump, "not vectorized: unsupported data-type "); ! print_generic_expr (vect_dump, scalar_type, TDF_SLIM); ! } ! return false; } ! STMT_VINFO_VECTYPE (stmt_info) = vectype; ! } ! if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { fprintf (vect_dump, "vectype: "); print_generic_expr (vect_dump, vectype, TDF_SLIM); } nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) *************** vect_analyze_operations (loop_vec_info l *** 213,219 **** stmt_vec_info stmt_info; if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) ! fprintf (vect_dump, "\n=== vect_analyze_operations ===\n"); gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); --- 245,251 ---- stmt_vec_info stmt_info; if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) ! fprintf (vect_dump, "=== vect_analyze_operations ==="); gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); *************** vect_analyze_operations (loop_vec_info l *** 227,233 **** stmt_info = vinfo_for_stmt (phi); if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { ! fprintf (vect_dump, "==> examining statement: "); print_generic_expr (vect_dump, phi, TDF_SLIM); } --- 259,265 ---- stmt_info = vinfo_for_stmt (phi); if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { ! fprintf (vect_dump, "examining phi: "); print_generic_expr (vect_dump, phi, TDF_SLIM); } *************** vect_analyze_operations (loop_vec_info l *** 252,258 **** if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { ! fprintf (vect_dump, "==> examining statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } --- 284,290 ---- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { ! fprintf (vect_dump, "examining statement: "); print_generic_expr (vect_dump, stmt, TDF_SLIM); } *************** vect_analyze_operations (loop_vec_info l *** 281,287 **** } #endif ! ok = (vectorizable_reduction (stmt, NULL, NULL) || vectorizable_operation (stmt, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL) || vectorizable_load (stmt, NULL, NULL) --- 313,320 ---- } #endif ! ok = (vectorizable_target_reduction_pattern (stmt, NULL, NULL) ! || vectorizable_reduction (stmt, NULL, NULL) || vectorizable_operation (stmt, NULL, NULL) || vectorizable_assignment (stmt, NULL, NULL) || vectorizable_load (stmt, NULL, NULL) *************** vect_analyze_scalar_cycles (loop_vec_inf *** 496,502 **** if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) fprintf (vect_dump, "Detected reduction."); STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; ! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = vect_reduction_def; } else if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) --- 529,536 ---- if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) fprintf (vect_dump, "Detected reduction."); STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; ! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = ! vect_reduction_def; } else if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo))) *************** vect_analyze_data_refs (loop_vec_info lo *** 1336,1342 **** /* Set vectype for STMT. */ scalar_type = TREE_TYPE (DR_REF (dr)); ! STMT_VINFO_VECTYPE (stmt_info) = get_vectype_for_scalar_type (scalar_type); if (!STMT_VINFO_VECTYPE (stmt_info)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, --- 1370,1377 ---- /* Set vectype for STMT. */ scalar_type = TREE_TYPE (DR_REF (dr)); ! STMT_VINFO_VECTYPE (stmt_info) = ! get_vectype_for_scalar_type (scalar_type); if (!STMT_VINFO_VECTYPE (stmt_info)) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, *************** vect_mark_relevant (varray_type *worklis *** 1372,1391 **** if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p); STMT_VINFO_LIVE_P (stmt_info) |= live_p; - /* CHECKME */ if (TREE_CODE (stmt) == PHI_NODE) /* Don't mark as relevant because it's not going to vectorized. */ return; - if (STMT_VINFO_IN_PATTERN_P (stmt_info)) - { - /* Don't mark as relevant because it's not going to vectorized. */ - VARRAY_PUSH_TREE (*worklist, stmt); - return; - } - STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p; if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p --- 1407,1433 ---- if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p); + if (STMT_VINFO_IN_PATTERN_P (stmt_info) + && STMT_VINFO_RELATED_STMT (stmt_info)) + { + /* This is the last stmt in a sequence that was detected as a + pattern that can potentially be vectorized. Don't mark the stmt + as relevant/live because it's not going to vectorized. + Instead mark the pattern-stmt that replaces it. */ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live."); + stmt = STMT_VINFO_RELATED_STMT (stmt_info); + stmt_info = vinfo_for_stmt (stmt); + save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info); + save_live_p = STMT_VINFO_LIVE_P (stmt_info); + } + STMT_VINFO_LIVE_P (stmt_info) |= live_p; if (TREE_CODE (stmt) == PHI_NODE) /* Don't mark as relevant because it's not going to vectorized. */ return; STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p; if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p *************** vect_mark_stmts_to_be_vectorized (loop_v *** 1591,1613 **** if (!def_stmt || IS_EMPTY_STMT (def_stmt)) continue; - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "worklist: examine use %d: ", i); - print_generic_expr (vect_dump, use, TDF_SLIM); - } - bb = bb_for_stmt (def_stmt); if (!flow_bb_inside_loop_p (loop, bb)) continue; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ! { ! gcc_assert (!relevant_p && live_p); ! vect_mark_relevant (&worklist, def_stmt, true, false); ! } ! else if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) ! vect_mark_relevant (&worklist, def_stmt, true, live_p); else vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); } --- 1633,1653 ---- if (!def_stmt || IS_EMPTY_STMT (def_stmt)) continue; bb = bb_for_stmt (def_stmt); if (!flow_bb_inside_loop_p (loop, bb)) continue; + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "def_stmt: "); + print_generic_expr (vect_dump, def_stmt, TDF_SLIM); + } + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ! { ! gcc_assert (!relevant_p && live_p); ! vect_mark_relevant (&worklist, def_stmt, true, false); ! } else vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p); } *************** vect_mark_stmts_to_be_vectorized (loop_v *** 1647,1653 **** */ tree ! vect_recog_unsigned_subsat_pattern (tree last_stmt, varray_type *stmt_list) { tree stmt, expr; tree type; --- 1687,1694 ---- */ tree ! vect_recog_unsigned_subsat_pattern (tree last_stmt, tree *pattern_type, ! varray_type *stmt_list) { tree stmt, expr; tree type; *************** vect_recog_unsigned_subsat_pattern (tree *** 1660,1671 **** tree pattern_expr; tree new; - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "vect_recog_unsigned_subsat_pattern: "); - print_generic_expr (vect_dump, last_stmt, TDF_SLIM); - } - if (TREE_CODE (last_stmt) != MODIFY_EXPR) return NULL; --- 1701,1706 ---- *************** vect_recog_unsigned_subsat_pattern (tree *** 1781,1786 **** --- 1816,1822 ---- /* Pattern detected. Create a stmt to be used to replace the pattern: */ pattern_expr = build (SAT_MINUS_EXPR, type, a, b); + *pattern_type = get_vectype_for_scalar_type (TREE_TYPE (pattern_expr)); return pattern_expr; } *************** vect_recog_unsigned_subsat_pattern (tree *** 1799,1823 **** form by the target and does some bookeeping, as explained in the documentation for vect_recog_pattern. */ ! static void ! vect_pattern_recog_1 (tree (* pattern_recog_func) (tree, varray_type *), block_stmt_iterator si) { tree stmt = bsi_stmt (si); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); varray_type stmt_list; tree pattern_expr; enum tree_code code; - tree vectype; - optab optab; - enum machine_mode vec_mode; tree var, var_name; stmt_ann_t ann; VARRAY_TREE_INIT (stmt_list, 10, "stmt list"); ! pattern_expr = (* pattern_recog_func) (stmt, &stmt_list); if (!pattern_expr) { varray_clear (stmt_list); --- 1835,1861 ---- form by the target and does some bookeeping, as explained in the documentation for vect_recog_pattern. */ ! void ! vect_pattern_recog_1 (tree (* pattern_recog_func) (tree, tree *, varray_type *), block_stmt_iterator si) { tree stmt = bsi_stmt (si); stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info pattern_stmt_info; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); varray_type stmt_list; tree pattern_expr; + tree pattern_vectype; + tree pattern_expr_type; enum tree_code code; tree var, var_name; stmt_ann_t ann; + bool supported_generic_pattern = false; + bool target_specific_pattern = false; VARRAY_TREE_INIT (stmt_list, 10, "stmt list"); ! pattern_expr = (* pattern_recog_func) (stmt, &pattern_vectype, &stmt_list); if (!pattern_expr) { varray_clear (stmt_list); *************** vect_pattern_recog_1 (tree (* pattern_re *** 1827,1847 **** /* Check that the pattern is supported in vector form: */ code = TREE_CODE (pattern_expr); ! vectype = get_vectype_for_scalar_type (TREE_TYPE (pattern_expr)); ! optab = optab_for_tree_code (code, vectype); ! if (!optab) { ! varray_clear (stmt_list); ! return; } ! vec_mode = TYPE_MODE (vectype); ! if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) { varray_clear (stmt_list); return; } - /* Found a vectorizable pattern! */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { --- 1865,1901 ---- /* Check that the pattern is supported in vector form: */ code = TREE_CODE (pattern_expr); ! pattern_expr_type = TREE_TYPE (pattern_expr); ! ! /* target specific pattern? */ ! if (code == CALL_EXPR ! && TREE_CODE (TREE_OPERAND (pattern_expr, 0)) == ADDR_EXPR ! && TREE_CODE (TREE_OPERAND (TREE_OPERAND (pattern_expr, 0), 0)) ! == FUNCTION_DECL ! && DECL_BUILT_IN (TREE_OPERAND (TREE_OPERAND (pattern_expr, 0), 0)) ! && DECL_BUILT_IN_CLASS (TREE_OPERAND (TREE_OPERAND (pattern_expr, 0), 0)) ! == BUILT_IN_MD) ! { ! gcc_assert (VECTOR_MODE_P (TYPE_MODE (pattern_expr_type))); ! pattern_expr_type = TREE_TYPE (pattern_expr_type); ! target_specific_pattern = true; ! } ! else { ! /* generic pattern? */ ! optab optab = optab_for_tree_code (code, pattern_vectype); ! enum machine_mode vec_mode = TYPE_MODE (pattern_vectype); ! if (optab ! && optab->handlers[(int) vec_mode].insn_code != CODE_FOR_nothing) ! supported_generic_pattern = true; } ! ! if (!target_specific_pattern && !supported_generic_pattern) { varray_clear (stmt_list); return; } /* Found a vectorizable pattern! */ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) { *************** vect_pattern_recog_1 (tree (* pattern_re *** 1853,1859 **** /* Mark the stmts that are involved in the pattern, and create a new stmt to express the pattern and add it to the code. */ ! var = create_tmp_var (TREE_TYPE (pattern_expr), "patt"); add_referenced_tmp_var (var); var_name = make_ssa_name (var, NULL_TREE); pattern_expr = build (MODIFY_EXPR, void_type_node, var_name, pattern_expr); --- 1907,1913 ---- /* Mark the stmts that are involved in the pattern, and create a new stmt to express the pattern and add it to the code. */ ! var = create_tmp_var (pattern_expr_type, "patt"); add_referenced_tmp_var (var); var_name = make_ssa_name (var, NULL_TREE); pattern_expr = build (MODIFY_EXPR, void_type_node, var_name, pattern_expr); *************** vect_pattern_recog_1 (tree (* pattern_re *** 1862,1871 **** get_stmt_operands (pattern_expr); ann = stmt_ann (pattern_expr); set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (pattern_expr, loop_vinfo)); ! STMT_VINFO_RELATED_STMT (vinfo_for_stmt (pattern_expr)) = stmt; ! STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_expr; ! STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_expr)) = true; while (VARRAY_ACTIVE_SIZE (stmt_list) > 0) { --- 1916,1944 ---- get_stmt_operands (pattern_expr); ann = stmt_ann (pattern_expr); set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (pattern_expr, loop_vinfo)); + pattern_stmt_info = vinfo_for_stmt (pattern_expr); ! STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt; ! STMT_VINFO_RELATED_STMT (stmt_info) = pattern_expr; ! STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info); ! STMT_VINFO_EXTERNAL_USE (pattern_stmt_info) = ! STMT_VINFO_EXTERNAL_USE (stmt_info); ! /* We created the following expr: ! X = pattern_expr (args) ! and set the type of X to the type of the pattern_expr. ! The vectype of this new stmt (the type of the stmt when vectorized) is ! usually obtained by: ! get_vectype_for_scalar_type (TREE_TYPE (X)) ! In some cases however, this is not the right vectype for the stmt. ! One such example is a reduction computation - some reductions (like ! accumulation) may require one vectype for the epilog computation (which ! depends on the type of the reduction variable, X in this case), and a ! different vectype is used for the partial-sums computation that takes ! place inside the loop (which depends on the type of the other arguments to ! the stmt, and may or may not be the same as the type of X). ! 'pattern_vectype' is the vectype that will be used inside the loop. ! */ ! STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; while (VARRAY_ACTIVE_SIZE (stmt_list) > 0) { *************** vect_pattern_recog_1 (tree (* pattern_re *** 1918,1925 **** (the last stmt in the pattern (S4) and the new pattern stmt (S6) point to each other through the RELATED_STMT field). ! S6 is marked as relevant. In vect_mark_stmts_to_be_vectorized the ! stmts {S1,S2,S3,S4} are marked as irrelevant. If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} (because they are marked as irrelevent). It will vectorize S6, and record --- 1991,1999 ---- (the last stmt in the pattern (S4) and the new pattern stmt (S6) point to each other through the RELATED_STMT field). ! S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead ! of S4 because it will replace all its uses. Stmts {S1,S2,S3} will ! remain irrelevant unless used by stmts other than S4. If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} (because they are marked as irrelevent). It will vectorize S6, and record *************** vect_pattern_recog (loop_vec_info loop_v *** 1957,1963 **** block_stmt_iterator si; tree stmt; unsigned int i, j; ! tree (* pattern_recog_func) (tree, varray_type *); if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "\n<>\n"); --- 2031,2037 ---- block_stmt_iterator si; tree stmt; unsigned int i, j; ! tree (* pattern_recog_func) (tree, tree *, varray_type *); if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "\n<>\n"); *************** vect_pattern_recog (loop_vec_info loop_v *** 1971,1980 **** { stmt = bsi_stmt (si); ! /* Scan over all vect_recog_xxx_pattern functions. */ for (j = 0; j < NUM_PATTERNS; j++) { ! pattern_recog_func = vect_pattern_recog_func[j]; vect_pattern_recog_1 (pattern_recog_func, si); } } --- 2045,2059 ---- { stmt = bsi_stmt (si); ! /* Scan over all target specific vect_recog_xxx_pattern functions ! if available. */ ! if (targetm.vectorize.builtin_vect_pattern_recog) ! targetm.vectorize.builtin_vect_pattern_recog (stmt); ! ! /* Scan over all generic vect_recog_xxx_pattern functions. */ for (j = 0; j < NUM_PATTERNS; j++) { ! pattern_recog_func = vect_pattern_recog_funcs[j]; vect_pattern_recog_1 (pattern_recog_func, si); } } *************** vect_analyze_loop (struct loop *loop) *** 2285,2297 **** return NULL; } - vect_pattern_recog (loop_vinfo); - /* Check that all cross-iteration scalar data-flow cycles are OK. Cross-iteration cycles caused by virtual phis are analyzed separately. */ vect_analyze_scalar_cycles (loop_vinfo); /* Data-flow analysis to detect stmts that do not need to be vectorized. */ ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); --- 2364,2376 ---- return NULL; } /* Check that all cross-iteration scalar data-flow cycles are OK. Cross-iteration cycles caused by virtual phis are analyzed separately. */ vect_analyze_scalar_cycles (loop_vinfo); + vect_pattern_recog (loop_vinfo); + /* Data-flow analysis to detect stmts that do not need to be vectorized. */ ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); Index: tree-vect-transform.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-vect-transform.c,v retrieving revision 2.1.4.5 diff -c -3 -p -r2.1.4.5 tree-vect-transform.c *** tree-vect-transform.c 16 Mar 2005 16:48:44 -0000 2.1.4.5 --- tree-vect-transform.c 3 Apr 2005 11:53:08 -0000 *************** static void vect_finish_stmt_generation *** 60,65 **** --- 60,67 ---- (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); static void update_vuses_to_preheader (tree, struct loop*); static bool vect_is_simple_cond (tree, loop_vec_info); + static void vect_create_epilog_for_reduction + (tree, tree, tree, enum tree_code, tree); /* Utility function dealing with loop peeling (not peeling itself). */ static void vect_generate_tmps_on_preheader *************** get_initial_def_for_reduction (tree stmt *** 691,699 **** tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); int nelements = nunits; ! tree operation = TREE_OPERAND (stmt, 1); ! enum tree_code code = TREE_CODE (operation); ! tree type = TREE_TYPE (operation); tree def; tree vec, t = NULL_TREE; int i; --- 693,700 ---- tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype)); int nelements = nunits; ! enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); ! tree type = TREE_TYPE (op); tree def; tree vec, t = NULL_TREE; int i; *************** get_initial_def_for_reduction (tree stmt *** 714,730 **** #endif break; - /* (Not supported yet) */ - case MULT_EXPR: - def = INTEGRAL_TYPE_P (type) ? integer_one_node : - build_real (type, dconst1); - #ifdef ADJUST_IN_EPILOG - *need_epilog_adjust = true; - #else - nelements = nunits - 1; - #endif - break; - case MIN_EXPR: case MAX_EXPR: def = op; --- 715,720 ---- *************** get_initial_def_for_reduction (tree stmt *** 759,764 **** --- 749,1155 ---- } + /* Function vect_create_epilog_for_reduction: + + Create code at the loop-epilog to finalize the result of a reduction + computation. + + LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it + into a single result, by applying the operation REDUC_CODE on the + partial-results-vector. For this, we need to create a new phi node at the + loop exit to preserve loop-closed form, as illustrated below. + + STMT is the original scalar reduction stmt that is being vectorized. + REDUCTION_OP is the scalar reduction-variable. + REDUCTION_PHI is the phi-node that carries the reduction computation. + This function also sets the arguments for the REDUCTION_PHI: + The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP. + The loop-latch argument is VECT_DEF - the vector of partial sums. + + Transform this: + + loop: + vec_def = phi # REDUCTION_PHI + .... + VECT_DEF = ... + + loop_exit: + s_out0 = phi # EXIT_PHI + + use + use + + Into: + + loop: + vec_def = phi # REDUCTION_PHI + .... + VECT_DEF = ... + + loop_exit: + s_out0 = phi # EXIT_PHI + v_out1 = phi # NEW_EXIT_PHI + + v_out2 = reduc_expr + s_out3 = extract_field + + use + use + */ + + static void + vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op, + enum tree_code reduc_code, tree reduction_phi) + { + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree vectype = STMT_VINFO_VECTYPE (stmt_info); + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + basic_block exit_bb; + tree scalar_dest = TREE_OPERAND (stmt, 0); + tree scalar_type = TREE_TYPE (scalar_dest); + tree new_phi; + block_stmt_iterator exit_bsi; + tree vec_dest; + tree new_temp; + tree epilog_stmt; + tree new_scalar_dest, exit_phi; + tree bitsize, bitpos; + enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); + tree scalar_initial_def; + tree vec_initial_def; + tree orig_name; + bool need_epilog_adjust = false; + + /*** 1. Create the reduction def-use cycle ***/ + + /* 1.1 set the loop-entry arg for the reduction-phi: */ + /* For iteration i, the reducion-operand is expected to be defined: + for i=first_iteration: before the loop + for i>first_iteration: by the previous iteration. + For the case of reduction, vect_get_vec_def_for_operand returns + the scalar def before the loop, that defines the initial value + of the reduction variable. */ + scalar_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt); + gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (scalar_initial_def)))); + vec_initial_def = get_initial_def_for_reduction (stmt, scalar_initial_def, + &need_epilog_adjust); + add_phi_arg (reduction_phi, vec_initial_def, loop->entry_edges[0]); + + + /* 1.2 set the loop-latch arg for the reduction-phi: */ + add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "transform reduction: created def-use cycle:"); + print_generic_expr (vect_dump, reduction_phi, TDF_SLIM); + fprintf (vect_dump, "\n"); + print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM); + } + + + /*** 2. Create epilog code ***/ + + /* 2.1 Create new loop-exit-phi to preserve loop-closed form: + v_out1 = phi */ + + exit_bb = loop->single_exit->dest; + new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); + SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); + + exit_bsi = bsi_start (exit_bb); + + + /* 2.2 Create: + v_out2 = reduc_expr + s_out3 = extract_field */ + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, + build1 (reduc_code, vectype, PHI_RESULT (new_phi))); + new_temp = make_ssa_name (vec_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + { + fprintf (vect_dump, "transform reduction: created epilog code:"); + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + } + + new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); + bitsize = TYPE_SIZE (scalar_type); + + /* The result is in the low order bits. */ + if (BITS_BIG_ENDIAN) + bitpos = size_binop (MULT_EXPR, + bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), + TYPE_SIZE (scalar_type)); + else + bitpos = bitsize_zero_node; + + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, + build3 (BIT_FIELD_REF, scalar_type, + new_temp, bitsize, bitpos)); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + + + #ifdef ADJUST_IN_EPILOG + /* 2.3 If needed, consider the initial value of the reduction variable. + Create: + s_out = scalar_expr */ + + if (need_epilog_adjust) + { + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, + build2 (code, scalar_type, new_temp, scalar_initial_def)); + new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); + TREE_OPERAND (epilog_stmt, 0) = new_temp; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + } + #endif + + + /* 2.4 Replace uses of s_out0 with uses of s_out3 */ + + exit_phi = STMT_VINFO_EXTERNAL_USE (stmt_info); + orig_name = PHI_RESULT (exit_phi); + replace_immediate_uses (orig_name, new_temp); + + + /* CHECKME: Alternative solution: + Remove the original scalar exit phi: + s_out0 = phi + and instead create a new assignment to define s_out0: + s_out0 = s_out3 + + remove_phi_node_keeping_name (exit_phi, NULL_TREE, exit_bb); + + epilog_stmt = build2 (MODIFY_EXPR, TREE_TYPE (scalar_dest), orig_name, + new_temp); + SSA_NAME_DEF_STMT (orig_name) = epilog_stmt; + bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); + */ + } + + + /* Function vectorizable_target_reduction_pattern + + Check if STMT performs a target-reduction-pattern that can be vectorized. + If VEC_STMT is also passed, vectorize the STMT: create a vectorized + stmt to replace it, put it in VEC_STMT, and insert it at BSI. + Return FALSE if not a vectorizable STMT, TRUE otherwise. */ + + bool + vectorizable_target_reduction_pattern (tree stmt, block_stmt_iterator *bsi, + tree *vec_stmt) + { + tree vec_dest; + tree scalar_dest; + tree op0, op1 = NULL_TREE, reduc_op; + tree loop_vec_def; + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree loop_vectype = STMT_VINFO_VECTYPE (stmt_info); + tree reduction_vectype; + enum machine_mode vec_mode; + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree call; + enum tree_code fcode; + enum tree_code orig_code, epilog_reduc_code = 0; + optab reduc_optab; + tree new_temp; + tree def0, def1, rdef; + tree def_stmt0, def_stmt1, rdef_stmt; + enum vect_def_type dt0, dt1, rdt; + tree new_phi; + bool is_simple_use0; + bool is_simple_use1; + bool is_simple_reduc_use; + tree orig_stmt; + stmt_vec_info orig_stmt_info; + tree fncdecl; + tree params; + tree vec_params; + tree new_stmt; + int n_ops = 0; + tree arg; + + /* Is STMT a vectorizable target-reduction-pattern? */ + + /** 1. Is this a reduction? **/ + + /* Check if recognized as a reduction pattern. */ + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) + return false; + + /* Not supportable if the reduction variable is used in the loop. */ + if (STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + /* Reduction is expected to be used out of the loop. */ + if (!STMT_VINFO_LIVE_P (stmt_info)) + return false; + + /* 2. Is this a recognized computation idiom? + + Check if STMT represents a pattern that has been recognized + in earlier analysis stages. For stmts that represent a pattern, + the STMT_VINFO_RELATED_STMT field records the last stmt in + the original sequence that constitutes the pattern. */ + + orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (!orig_stmt) + return false; + orig_stmt_info = vinfo_for_stmt (orig_stmt); + #ifdef ENABLE_CHECKING + gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt); + gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); + #endif + + /* 3. Is this a target-specific pattern? */ + + call = TREE_OPERAND (stmt, 1); + if (TREE_CODE (call) != CALL_EXPR) + return false; + fncdecl = TREE_OPERAND (TREE_OPERAND (call, 0), 0); + fcode = DECL_FUNCTION_CODE (fncdecl); + if (TREE_CODE (fncdecl) != FUNCTION_DECL + || !DECL_BUILT_IN (fncdecl) + || DECL_BUILT_IN_CLASS (fncdecl) != BUILT_IN_MD) + return false; + /* The vectype is expected to have been set */ + if (!loop_vectype) + return false; + + /* 4. Check the operands of the operation. The first operands are defined + inside the loop body. The last operand is the reduction variable, + which is defined by the loop-header-phi. */ + + params = TREE_OPERAND (call, 1); + + /* 4.1 Check the operand that are defined inside the loop body + (i.e. by the current iteration) */ + + for (arg = params; arg; arg = TREE_CHAIN (arg)) + n_ops++; + if (n_ops < 2 || n_ops > 3) + return false; + + op0 = TREE_VALUE (params); + is_simple_use0 = + vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0); + #ifdef ENABLE_CHECKING + gcc_assert (is_simple_use0); + gcc_assert (dt0 == vect_loop_def); + #endif + + if (n_ops == 3) + { + params = TREE_CHAIN (params); + op1 = TREE_VALUE (params); + is_simple_use1 = + vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1); + #ifdef ENABLE_CHECKING + gcc_assert (is_simple_use1); + gcc_assert (dt1 == vect_loop_def); + #endif + } + + /* 4.2 Check the reduction operand. It is expected to be defined by the + loop header phi. */ + + reduc_op = TREE_VALUE (TREE_CHAIN (params)); + is_simple_reduc_use = + vect_is_simple_use (reduc_op, loop_vinfo, &rdef_stmt, &rdef, &rdt); + #ifdef ENABLE_CHECKING + gcc_assert (is_simple_reduc_use); + gcc_assert (rdt == vect_reduction_def); + gcc_assert (TREE_CODE (rdef_stmt) == PHI_NODE); + gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == + vect_is_simple_reduction (loop, rdef_stmt)); + #endif + if (STMT_VINFO_LIVE_P (vinfo_for_stmt (rdef_stmt))) + return false; + + /* 5. Check that the epilog code for finalizing the reduction is supported */ + + orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); + if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) + return false; + reduction_vectype = get_vectype_for_scalar_type (TREE_TYPE (rdef)); + reduc_optab = optab_for_tree_code (epilog_reduc_code, reduction_vectype); + vec_mode = TYPE_MODE (reduction_vectype); + if (!reduc_optab || + reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + return false; + + + /* This is a vectorizable reduction pattern. */ + + if (!vec_stmt) /* transformation not required. */ + { + STMT_VINFO_TYPE (stmt_info) = target_reduc_pattern_vec_info_type; + return true; + } + + /** Transform. **/ + + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) + fprintf (vect_dump, "transforming target reduction pattern."); + + /* Create the destination vector */ + scalar_dest = TREE_OPERAND (stmt, 0); + vec_dest = vect_create_destination_var (scalar_dest, reduction_vectype); + + /* Create the reduction-phi that defines the reduction-operand. */ + new_phi = create_phi_node (vec_dest, loop->header); + vec_params = build_tree_list (NULL_TREE, PHI_RESULT (new_phi)); + + /* Prepare the operands that are defined inside the loop body */ + loop_vec_def = vect_get_vec_def_for_operand (op0, stmt); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def)))); + vec_params = tree_cons (NULL_TREE, loop_vec_def, vec_params); + if (n_ops == 3) + { + loop_vec_def = vect_get_vec_def_for_operand (op1, stmt); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def)))); + vec_params = tree_cons (NULL_TREE, loop_vec_def, vec_params); + } + + /* Create the vectorized operation that computes the partial results */ + /* CHECKME: gcc_assert (TREE_READONLY (fncdecl)); */ + new_stmt = build_function_call_expr (fncdecl, vec_params); + *vec_stmt = build2 (MODIFY_EXPR, reduction_vectype, vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, *vec_stmt); + TREE_OPERAND (*vec_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + new_temp = TREE_OPERAND (*vec_stmt, 0); + + /* Finalize the reduction-phi (set it's arguments) and create the + epilog reduction code. */ + gcc_assert (! STMT_VINFO_VECTYPE (orig_stmt_info)); + STMT_VINFO_VECTYPE (orig_stmt_info) = reduction_vectype; + vect_create_epilog_for_reduction (new_temp, orig_stmt, reduc_op, + epilog_reduc_code, new_phi); + STMT_VINFO_VECTYPE (orig_stmt_info) = NULL; + + return true; + } + + /* Function vectorizable_reduction. Check if STMT performs a reduction operation that can be vectorized. *************** vectorizable_reduction (tree stmt, block *** 772,779 **** tree vec_dest; tree scalar_dest; tree op0, op1; ! tree scalar_initial_def, loop_vec_def, vec_initial_def; ! bool need_epilog_adjust = false; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); --- 1163,1169 ---- tree vec_dest; tree scalar_dest; tree op0, op1; ! tree loop_vec_def; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); tree vectype = STMT_VINFO_VECTYPE (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); *************** vectorizable_reduction (tree stmt, block *** 787,800 **** tree def0, def1, def_stmt0, def_stmt1; enum vect_def_type dt0, dt1; tree new_phi; - tree new_scalar_dest, exit_phi; - tree epilog_stmt; - tree bitsize, bitpos; - basic_block exit_bb; - block_stmt_iterator exit_bsi; - tree orig_name; tree scalar_type; ! bool is_simple_use; /* Is vectorizable reduction? */ --- 1177,1185 ---- tree def0, def1, def_stmt0, def_stmt1; enum vect_def_type dt0, dt1; tree new_phi; tree scalar_type; ! bool is_simple_use0; ! bool is_simple_use1; /* Is vectorizable reduction? */ *************** vectorizable_reduction (tree stmt, block *** 817,825 **** code = TREE_CODE (operation); op_type = TREE_CODE_LENGTH (code); ! #ifdef ENABLE_CHECKING ! gcc_assert (op_type == binary_op); ! #endif op0 = TREE_OPERAND (operation, 0); op1 = TREE_OPERAND (operation, 1); --- 1202,1209 ---- code = TREE_CODE (operation); op_type = TREE_CODE_LENGTH (code); ! if (op_type != binary_op) ! return false; op0 = TREE_OPERAND (operation, 0); op1 = TREE_OPERAND (operation, 1); *************** vectorizable_reduction (tree stmt, block *** 827,837 **** scalar_type = TREE_TYPE (scalar_dest); /* Check the first operand. It is expected to be defined inside the loop. */ ! is_simple_use = vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0); ! gcc_assert (is_simple_use); ! is_simple_use = vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1); ! gcc_assert (is_simple_use); #ifdef ENABLE_CHECKING gcc_assert (dt0 == vect_loop_def); gcc_assert (dt1 == vect_reduction_def); gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE); --- 1211,1223 ---- scalar_type = TREE_TYPE (scalar_dest); /* Check the first operand. It is expected to be defined inside the loop. */ ! is_simple_use0 = ! vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0); ! is_simple_use1 = ! vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1); #ifdef ENABLE_CHECKING + gcc_assert (is_simple_use0); + gcc_assert (is_simple_use1); gcc_assert (dt0 == vect_loop_def); gcc_assert (dt1 == vect_reduction_def); gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE); *************** vectorizable_reduction (tree stmt, block *** 841,846 **** --- 1227,1233 ---- return false; /* Supportable by target? */ + /* check support for the operation in the loop */ optab = optab_for_tree_code (code, vectype); if (!optab) *************** vectorizable_reduction (tree stmt, block *** 856,861 **** --- 1243,1249 ---- fprintf (vect_dump, "op not supported by target."); return false; } + /* check support for the epilog operation */ if (!reduction_code_for_scalar_code (code, &reduc_code)) return false; *************** vectorizable_reduction (tree stmt, block *** 880,1038 **** } /** Transform. **/ if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "transform reduction."); ! /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype); - /* Handle uses. */ ! /* oprnd 0 is expected to be defined in the loop */ loop_vec_def = vect_get_vec_def_for_operand (op0, stmt); gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def)))); - /* For iteration i, oprnd 1 is expected to be defined: - i=first_iteration: before the loop - i>first_iteration: by the previous iteration. - For the case of reduction, vect_get_vec_def_for_operand returns - the scalar def before the loop, that defines the initial value - of the reduction variable. */ - scalar_initial_def = vect_get_vec_def_for_operand (op1, stmt); - gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (scalar_initial_def)))); ! /* Create preheader code and phi */ ! new_phi = create_phi_node (vec_dest, loop->header); ! vec_initial_def = get_initial_def_for_reduction (stmt, scalar_initial_def, ! &need_epilog_adjust); ! add_phi_arg (new_phi, vec_initial_def, loop->entry_edges[0]); ! ! /* Create the new vector stmt. */ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi))); new_temp = make_ssa_name (vec_dest, *vec_stmt); TREE_OPERAND (*vec_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, *vec_stmt, bsi); - add_phi_arg (new_phi, new_temp, loop_latch_edge (loop)); - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "transform reduction: created def-use cycle:"); - print_generic_expr (vect_dump, new_phi, TDF_SLIM); - fprintf (vect_dump, "\n"); - print_generic_expr (vect_dump, *vec_stmt, TDF_SLIM); - } - /** Handle epilog code. - - Transform this: - s_out0 = phi - - use - use - - Into: - s_out0 = phi - v_out1 = phi - - v_out2 = reduc_expr - s_out3 = extract_field - - use - use - **/ - - /* 1. Create new loop-exit-phi to preserve loop-closed form: - v_out1 = phi */ - - exit_bb = loop->single_exit->dest; - new_phi = create_phi_node (vec_dest, exit_bb); - SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, new_temp); - - exit_bsi = bsi_start (exit_bb); - - - /* 2. Create: - v_out2 = reduc_expr - s_out3 = extract_field */ - - vec_dest = vect_create_destination_var (scalar_dest, vectype); - epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, - build1 (reduc_code, vectype, PHI_RESULT (new_phi))); - new_temp = make_ssa_name (vec_dest, epilog_stmt); - TREE_OPERAND (epilog_stmt, 0) = new_temp; - bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - { - fprintf (vect_dump, "transform reduction: created epilog code:"); - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); - } - - new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); - bitsize = TYPE_SIZE (scalar_type); - - /* The result is in the low order bits. */ - if (BITS_BIG_ENDIAN) - bitpos = size_binop (MULT_EXPR, - bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), - TYPE_SIZE (scalar_type)); - else - bitpos = bitsize_zero_node; - - epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, - build3 (BIT_FIELD_REF, scalar_type, - new_temp, bitsize, bitpos)); - new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); - TREE_OPERAND (epilog_stmt, 0) = new_temp; - bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); - - - #ifdef ADJUST_IN_EPILOG - /* 3. If needed, consider the initial value of the reduction variable. - Create: - s_out = scalar_expr */ - - if (need_epilog_adjust) - { - epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, - build2 (code, scalar_type, new_temp, scalar_initial_def)); - new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); - TREE_OPERAND (epilog_stmt, 0) = new_temp; - bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); - } - #endif - - - /* 4. Replace uses of s_out0 with uses of s_out3 */ - - exit_phi = STMT_VINFO_EXTERNAL_USE (stmt_info); - orig_name = PHI_RESULT (exit_phi); - replace_immediate_uses (orig_name, new_temp); - - /* CHECKME: Alternative solution: - Remove the original scalar exit phi: - s_out0 = phi - and instead create a new assignment to define s_out0: - s_out0 = s_out3 - - remove_phi_node_keeping_name (exit_phi, NULL_TREE, exit_bb); - - epilog_stmt = build2 (MODIFY_EXPR, TREE_TYPE (scalar_dest), orig_name, - new_temp); - SSA_NAME_DEF_STMT (orig_name) = epilog_stmt; - bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - - if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); - */ return true; } --- 1268,1301 ---- } /** Transform. **/ + if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC)) fprintf (vect_dump, "transform reduction."); ! /* Create the destination vector */ vec_dest = vect_create_destination_var (scalar_dest, vectype); ! /* Create the reduction-phi that defines the reduction-operand. */ ! new_phi = create_phi_node (vec_dest, loop->header); ! ! ! /* Prepare the operand that is defined inside the loop body */ loop_vec_def = vect_get_vec_def_for_operand (op0, stmt); gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def)))); ! /* Create the vectorized operation that computes the partial results */ *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi))); new_temp = make_ssa_name (vec_dest, *vec_stmt); TREE_OPERAND (*vec_stmt, 0) = new_temp; vect_finish_stmt_generation (stmt, *vec_stmt, bsi); + /* Finalize the reduction-phi (set it's arguments) and create the + epilog reduction code. */ + vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi); return true; } *************** vectorizable_load (tree stmt, block_stmt *** 1566,1572 **** /* <4> Create msq = phi in loop */ vec_dest = vect_create_destination_var (scalar_dest, vectype); msq = make_ssa_name (vec_dest, NULL_TREE); ! phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */ SSA_NAME_DEF_STMT (msq) = phi_stmt; add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); --- 1829,1835 ---- /* <4> Create msq = phi in loop */ vec_dest = vect_create_destination_var (scalar_dest, vectype); msq = make_ssa_name (vec_dest, NULL_TREE); ! phi_stmt = create_phi_node (msq, loop->header); SSA_NAME_DEF_STMT (msq) = phi_stmt; add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop)); add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop)); *************** vect_transform_stmt (tree stmt, block_st *** 1761,1766 **** --- 2024,2034 ---- switch (STMT_VINFO_TYPE (stmt_info)) { + case target_reduc_pattern_vec_info_type: + done = vectorizable_target_reduction_pattern (stmt, bsi, &vec_stmt); + gcc_assert (done); + break; + case reduc_vec_info_type: done = vectorizable_reduction (stmt, bsi, &vec_stmt); gcc_assert (done); Index: tree-vectorizer.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-vectorizer.c,v retrieving revision 2.25.2.23 diff -c -3 -p -r2.25.2.23 tree-vectorizer.c *** tree-vectorizer.c 16 Mar 2005 16:48:44 -0000 2.25.2.23 --- tree-vectorizer.c 3 Apr 2005 11:53:08 -0000 *************** reduction_code_for_scalar_code (enum tre *** 1847,1870 **** { switch (code) { - #if 0 /* TODO */ - case BIT_AND_EXPR: - *reduc_code = REDUC_BIT_AND_EXPR; - return true; - - case BIT_IOR_EXPR: - *reduc_code = REDUC_BIT_IOR_EXPR; - return true; - - case BIT_XOR_EXPR: - *reduc_code = REDUC_BIT_XOR_EXPR; - return true; - - case MULT_EXPR: - *reduc_code = REDUC_MULT_EXPR; - return true; - #endif - case MAX_EXPR: *reduc_code = REDUC_MAX_EXPR; return true; --- 1847,1852 ---- *************** vect_is_simple_reduction (struct loop *l *** 2010,2020 **** } /* reduction is safe. we're dealing with one of the following: ! 1) wrapping integer arithmetic 2) floating point arithmetic, and special flags permit this optimization. ! 3) TODO: non-wrapping integer arithmetic, but there's a flag that permits ! this optimization. */ ! def1 = SSA_NAME_DEF_STMT (op1); def2 = SSA_NAME_DEF_STMT (op2); if (!def1 || !def2) --- 1992,2000 ---- } /* reduction is safe. we're dealing with one of the following: ! 1) integer arithmetic and no trapv 2) floating point arithmetic, and special flags permit this optimization. ! */ def1 = SSA_NAME_DEF_STMT (op1); def2 = SSA_NAME_DEF_STMT (op2); if (!def1 || !def2) Index: tree-vectorizer.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree-vectorizer.h,v retrieving revision 2.7.2.14 diff -c -3 -p -r2.7.2.14 tree-vectorizer.h *** tree-vectorizer.h 13 Mar 2005 10:43:21 -0000 2.7.2.14 --- tree-vectorizer.h 3 Apr 2005 11:53:09 -0000 *************** enum stmt_vec_info_type { *** 167,173 **** op_vec_info_type, assignment_vec_info_type, select_vec_info_type, ! reduc_vec_info_type }; typedef struct _stmt_vec_info { --- 167,174 ---- op_vec_info_type, assignment_vec_info_type, select_vec_info_type, ! reduc_vec_info_type, ! target_reduc_pattern_vec_info_type }; typedef struct _stmt_vec_info { *************** extern void vectorize_loops (struct loop *** 340,352 **** /** In tree-vect-analyze.c **/ /* Driver for analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *); ! /* Pattern recognition functions. */ ! tree vect_recog_unsigned_subsat_pattern (tree, varray_type *); ! typedef tree (* _recog_func_ptr) (tree, varray_type *); ! /* Additional pattern recognition functions can (and will) be added in the future. */ #define NUM_PATTERNS 1 ! extern _recog_func_ptr vect_pattern_recog_func[NUM_PATTERNS]; /** In tree-vect-transform.c **/ extern bool vectorizable_load (tree, block_stmt_iterator *, tree *); --- 341,357 ---- /** In tree-vect-analyze.c **/ /* Driver for analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *); ! ! extern void vect_pattern_recog_1 ! (tree (* ) (tree, tree *, varray_type *), block_stmt_iterator); ! ! /* Pattern recognition functions. ! Additional pattern recognition functions can (and will) be added in the future. */ + tree vect_recog_unsigned_subsat_pattern (tree, tree *, varray_type *); + typedef tree (* _recog_func_ptr) (tree, tree *, varray_type *); #define NUM_PATTERNS 1 ! extern _recog_func_ptr vect_pattern_recog_funcs[]; /** In tree-vect-transform.c **/ extern bool vectorizable_load (tree, block_stmt_iterator *, tree *); *************** extern bool vectorizable_store (tree, bl *** 354,359 **** --- 359,366 ---- extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *); extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *); extern bool vectorizable_reduction (tree, block_stmt_iterator *, tree *); + extern bool vectorizable_target_reduction_pattern + (tree, block_stmt_iterator *, tree *); extern bool vectorizable_select (tree, block_stmt_iterator *, tree *); /* Driver for transformation stage. */ extern void vect_transform_loop (loop_vec_info, struct loops *); Index: tree.def =================================================================== RCS file: /cvs/gcc/gcc/gcc/tree.def,v retrieving revision 1.103.14.5 diff -c -3 -p -r1.103.14.5 tree.def *** tree.def 8 Mar 2005 08:30:03 -0000 1.103.14.5 --- tree.def 3 Apr 2005 11:53:09 -0000 *************** DEFTREECODE (REALIGN_LOAD_EXPR, "realign *** 936,955 **** */ DEFTREECODE (SAT_MINUS_EXPR, "sat_minus_expr", tcc_binary, 2) - /* Reduction operations. Operations that takes a vector of elements and "reduce" it to a scalar result (e.g. summing the elements of the vector, finding the minimum over the vector elements, etc). Operand 0 is a vector; the first element in the vector has the result. Operand 1 is a vector. */ - - #if 0 /* TODO */ - DEFTREECODE (REDUC_BIT_AND_EXPR, "reduc_and_expr", tcc_unary, 1) - DEFTREECODE (REDUC_BIT_IOR_EXPR, "reduc_ior_expr", tcc_unary, 1) - DEFTREECODE (REDUC_BIT_XOR_EXPR, "reduc_xor_expr", tcc_unary, 1) - DEFTREECODE (REDUC_MULT_EXPR, "reduc_mult_expr", tcc_unary, 1) - #endif DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1) DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1) DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1) --- 936,947 ---- Index: config/rs6000/rs6000.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v retrieving revision 1.739.2.5 diff -c -3 -p -r1.739.2.5 rs6000.c *** config/rs6000/rs6000.c 11 Feb 2005 01:10:37 -0000 1.739.2.5 --- config/rs6000/rs6000.c 3 Apr 2005 11:53:15 -0000 *************** *** 35,40 **** --- 35,41 ---- #include "recog.h" #include "obstack.h" #include "tree.h" + #include "tree-flow.h" #include "expr.h" #include "optabs.h" #include "except.h" *************** *** 53,58 **** --- 54,61 ---- #include "cfglayout.h" #include "sched-int.h" #include "tree-gimple.h" + #include "tree-data-ref.h" + #include "tree-vectorizer.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif *************** enum rs6000_nop_insertion rs6000_sched_i *** 148,153 **** --- 151,164 ---- /* Support targetm.vectorize.builtin_mask_for_load. */ static GTY(()) tree altivec_builtin_mask_for_load; + /* Pattern recognition functions */ + static GTY(()) tree altivec_builtin_widening_summation; + static tree target_vect_recog_widening_summation_pattern (tree, tree *, + varray_type *); + static _recog_func_ptr + target_vect_pattern_recog_funcs[TARGET_VECT_NUM_PATTERNS] = { + target_vect_recog_widening_summation_pattern}; + /* Size of long double */ const char *rs6000_long_double_size_string; int rs6000_long_double_type_size; *************** static int pad_groups (FILE *, int, rtx, *** 691,696 **** --- 702,708 ---- static void rs6000_sched_finish (FILE *, int); static int rs6000_use_sched_lookahead (void); static tree rs6000_builtin_mask_for_load (void); + static void rs6000_builtin_vect_pattern_recog (tree); static void rs6000_init_builtins (void); static rtx rs6000_expand_unop_builtin (enum insn_code, tree, rtx); *************** static const char alt_reg_names[][8] = *** 933,938 **** --- 945,953 ---- #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load + #undef TARGET_VECTORIZE_BUILTIN_VECT_PATTERN_RECOG + #define TARGET_VECTORIZE_BUILTIN_VECT_PATTERN_RECOG rs6000_builtin_vect_pattern_recog + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins *************** rs6000_builtin_mask_for_load (void) *** 1602,1607 **** --- 1617,1795 ---- return 0; } + /* Implement targetm.vectorize.builtin_vect_pattern_recog. */ + static void + rs6000_builtin_vect_pattern_recog (tree stmt) + { + int j; + block_stmt_iterator si = bsi_for_stmt (stmt); + tree (* pattern_recog_func) (tree, tree *, varray_type *); + + /* Scan over all target_vect_recog_xxx_pattern functions. */ + for (j = 0; j < TARGET_VECT_NUM_PATTERNS; j++) + { + pattern_recog_func = target_vect_pattern_recog_funcs[j]; + vect_pattern_recog_1 (pattern_recog_func, si); + } + } + + /* Function target_vect_recog_widening_summation_pattern + + Try to find the following pattern: + + t x_t; + dt x_dt, sum = init; + loop: + sum_0 = phi + + S1 x_t = *p; + S2 x_dt = (dt) x_t; + S3 sum_1 = x_dt + sum_0; + + where type 'dt' is at least double the size of type 't', i.e - we're summing + elements of type 't' into an accumulator of type 'dt'. + + For example: + + unsigned short *data, X; + unsigned int DX, sum = init; + loop: + sum_0 = phi + + S1 X = *data.1_20; + S2 DX = (unsigned int) X; + S3 sum_1 = DX + sum_0; + + + Input: + LAST_STMT: A stmt from which the pattern search begins. In the example, + when this function is called with S3, the pattern {S2,S3} will be detected. + + Output: + STMT_LIST: If this pattern is detected, STMT_LIST will hold the stmts that + are part of the pattern. In the example, STMT_LIST will consist of {S2,S3}. + + PATTERN_TYPE: The vector type to be used for the returned new stmt. + For the example above, we want to return PATTERN_TYPE=V8HI, because + it handles 8 short elements at a time. (Note that the result it + produces contains 4 ints, but it doesn't change the fact that the + vetorization factor is 8). + + Return value: A new stmt that will be used to replace the sequence of + stmts in STMT_LIST. In this case it will be: + CALL_EXPR < altivec.builtin.widenning_summation (X,sum_0) > + */ + + static tree + target_vect_recog_widening_summation_pattern (tree last_stmt, + tree *pattern_type, + varray_type *stmt_list) + { + tree stmt, expr; + tree oprnd0, oprnd1; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + tree type, half_type; + tree pattern_expr; + tree builtin_decl; + tree params; + tree dummy; + enum vect_def_type dt; + + if (!TARGET_ALTIVEC) + return NULL; + + if (TREE_CODE (last_stmt) != MODIFY_EXPR) + return NULL; + + expr = TREE_OPERAND (last_stmt, 1); + type = TREE_TYPE (expr); + + /* Look for the following pattern + DX = (unsigned int) X; + sum_1 = DX + sum_0; + In which DX is at least double the size of X, and sum_1 has been + recognized as reduction. + */ + + /* Starting from LAST_STMT, follow the defs of its uses in search + of the above pattern. */ + + if (TREE_CODE (expr) != PLUS_EXPR) + return NULL; + + if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) + return NULL; + + oprnd0 = TREE_OPERAND (expr, 0); + oprnd1 = TREE_OPERAND (expr, 1); + if (TREE_TYPE (oprnd0) != type || TREE_TYPE (oprnd1) != type) + return NULL; + + VARRAY_PUSH_TREE (*stmt_list, last_stmt); + + /* So far so good. + Assumption: oprnd1 is the reduction variable (defined by a loop-header + phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. + Left to check that oprnd0 is defined by a cast from type 'half_type' + to type 'type'. + + DX = (unsigned int) X; + sum_1 = DX + sum_0; + */ + + stmt = SSA_NAME_DEF_STMT (oprnd0); + gcc_assert (stmt); + stmt_vinfo = vinfo_for_stmt (stmt); + #ifdef ENABLE_CHECKING + gcc_assert (stmt_vinfo); + gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_loop_def); + gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); + #endif + + expr = TREE_OPERAND (stmt, 1); + if (TREE_CODE (expr) != NOP_EXPR) + return NULL; + + oprnd0 = TREE_OPERAND (expr, 0); + if (!vect_is_simple_use (oprnd0, STMT_VINFO_LOOP_VINFO (stmt_vinfo), + &dummy, &dummy, &dt)) + return NULL; + if (dt != vect_loop_def) + return NULL; + + half_type = TREE_TYPE (oprnd0); + if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (half_type) + || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type) + || TYPE_PRECISION (type) < TYPE_PRECISION (half_type) * 2) + return NULL; + + /* We add support for summation of QIs that is being widened to SI, + and support for summation of HIs that is being widened to SI. + TODO: Support also QI->HI summation. */ + + if (TYPE_MODE (half_type) != HImode && TYPE_MODE (half_type) != QImode) + return NULL; + + /* Supporting HI->SI. */ + if (TYPE_MODE (half_type) == HImode && TYPE_MODE (type) != SImode) + return NULL; + + /* Supporting QI->SI. */ + if (TYPE_MODE (half_type) == QImode && TYPE_MODE (type) != SImode) + return NULL; + + VARRAY_PUSH_TREE (*stmt_list, stmt); + + /* Pattern detected. Create a stmt to be used to replace the pattern: */ + params = build_tree_list (NULL_TREE, oprnd1); + params = tree_cons (NULL_TREE, oprnd0, params); + builtin_decl = altivec_builtin_widening_summation; + pattern_expr = build_function_call_expr (builtin_decl, params); + *pattern_type = get_vectype_for_scalar_type (half_type); + + return pattern_expr; + } + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. *************** rs6000_expand_builtin (tree exp, rtx tar *** 7784,7789 **** --- 7972,8072 ---- return target; } + if (fcode == ALTIVEC_BUILTIN_WIDENING_SUMMATION) + { + /* we have: + altivec_builtin_widening_summation (t, op0, op1) + such that type(t) == type(op1) + and type(op0) is half the size of type(t). + + generate: + ones = gen_reg_rtx (V8HI) + gen_altivec_altivec_vspltish (ones, const1_rtx) + gen_altivec_vmsumuhm (t, op0, ones, op1) + */ + int icode; + enum machine_mode tmode; + enum machine_mode mode0; + enum machine_mode mode1; + enum machine_mode mode2; + enum machine_mode mode, half_mode; + tree arg0, arg1; + tree type, half_type; + rtx op0, op1, ones, pat; + + gcc_assert (TARGET_ALTIVEC); + + /* prepare first argment: */ + arg0 = TREE_VALUE (arglist); + half_type = TREE_TYPE (arg0); + half_mode = TYPE_MODE (TREE_TYPE (half_type)); + + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + type = TREE_TYPE (arg1); + mode = TYPE_MODE (TREE_TYPE (type)); + + gcc_assert (TYPE_PRECISION (TREE_TYPE (type)) >= + TYPE_PRECISION (TREE_TYPE (half_type)) * 2); + + /* Supporting QI->SI and HI->SI. + TODO: Support QI to HI. */ + + if (half_mode == QImode && mode == SImode) + icode = TYPE_UNSIGNED (half_type) ? + (int) CODE_FOR_altivec_vmsumubm : + (int) CODE_FOR_altivec_vmsummbm; + else if (half_mode == HImode && mode == SImode) + icode = TYPE_UNSIGNED (half_type) ? + (int) CODE_FOR_altivec_vmsumuhm : + (int) CODE_FOR_altivec_vmsumshm; + else + gcc_unreachable (); + + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + op0 = expand_expr (arg0, NULL_RTX, mode0, EXPAND_NORMAL); + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0) + && mode0 != VOIDmode) + op0 = copy_to_mode_reg (mode0, op0); + + /* prepare second argment: */ + ones = gen_reg_rtx (mode1); + if (mode1 == V16QImode) + emit_insn (gen_altivec_vspltisb (ones, const1_rtx)); + else if (mode1 == V8HImode) + emit_insn (gen_altivec_vspltish (ones, const1_rtx)); + else + gcc_unreachable (); + gcc_assert (mode0 == mode1); + + /* prepare third argment: */ + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + type = TREE_TYPE (arg1); + op1 = expand_expr (arg1, NULL_RTX, mode2, EXPAND_NORMAL); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode2) + && mode1 != VOIDmode) + op1 = copy_to_mode_reg (mode2, op1); + + /* prepare target: */ + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert (tmode == mode2); + + pat = GEN_FCN (icode) (target, op0, ones, op1); + if (!pat) + return 0; + emit_insn (pat); + + return target; + } + + if (TARGET_ALTIVEC) { ret = altivec_expand_builtin (exp, target, &success); *************** altivec_init_builtins (void) *** 8265,8270 **** --- 8548,8556 ---- = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); tree v4sf_ftype_v4sf = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); + tree v4si_ftype_v8hi_v4si + = build_function_type_list (V4SI_type_node, + V8HI_type_node, V4SI_type_node, NULL_TREE); tree void_ftype_pcvoid_int_int = build_function_type_list (void_type_node, pcvoid_type_node, integer_type_node, *************** altivec_init_builtins (void) *** 8390,8395 **** --- 8676,8693 ---- NULL_TREE, NULL_TREE)); /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ altivec_builtin_mask_for_load = decl; + + /* Initialize target builtin */ + + decl = + lang_hooks.builtin_function ("__builtin_altivec_widening_summation", + v4si_ftype_v8hi_v4si, + ALTIVEC_BUILTIN_WIDENING_SUMMATION, + BUILT_IN_MD, NULL, + tree_cons (get_identifier ("const"), + NULL_TREE, NULL_TREE)); + /* Record the decl. Will be used by vect_recog_widening_summation. */ + altivec_builtin_widening_summation = decl; } } Index: config/rs6000/rs6000.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.h,v retrieving revision 1.345.4.2 diff -c -3 -p -r1.345.4.2 rs6000.h *** config/rs6000/rs6000.h 30 Jan 2005 18:11:53 -0000 1.345.4.2 --- config/rs6000/rs6000.h 3 Apr 2005 11:53:16 -0000 *************** extern int optimize; *** 2630,2635 **** --- 2630,2638 ---- extern int flag_expensive_optimizations; extern int frame_pointer_needed; + /* Target secific patterns that can be vectorized. */ + #define TARGET_VECT_NUM_PATTERNS 1 + enum rs6000_builtins { /* AltiVec builtins. */ *************** enum rs6000_builtins *** 2837,2842 **** --- 2840,2846 ---- ALTIVEC_BUILTIN_COMPILETIME_ERROR, ALTIVEC_BUILTIN_MASK_FOR_LOAD, ALTIVEC_BUILTIN_MASK_FOR_STORE, + ALTIVEC_BUILTIN_WIDENING_SUMMATION, /* SPE builtins. */ SPE_BUILTIN_EVADDW, Index: config/rs6000/t-rs6000 =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/rs6000/t-rs6000,v retrieving revision 1.12 diff -c -3 -p -r1.12 t-rs6000 *** config/rs6000/t-rs6000 23 May 2004 12:25:57 -0000 1.12 --- config/rs6000/t-rs6000 3 Apr 2005 11:53:16 -0000 *************** rs6000.o: $(CONFIG_H) $(SYSTEM_H) corety *** 8,14 **** $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \ output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) toplev.h $(GGC_H) $(HASHTAB_H) \ $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h gt-rs6000.h \ ! cfglayout.h rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c \ $(srcdir)/config/rs6000/rs6000-protos.h \ --- 8,14 ---- $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \ output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) toplev.h $(GGC_H) $(HASHTAB_H) \ $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h gt-rs6000.h \ ! cfglayout.h tree-flow.h tree-data-ref.h tree-vectorizer.h rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c \ $(srcdir)/config/rs6000/rs6000-protos.h \ Index: testsuite/ChangeLog.autovect =================================================================== RCS file: /cvs/gcc/gcc/gcc/testsuite/Attic/ChangeLog.autovect,v retrieving revision 1.1.2.22 diff -c -3 -p -r1.1.2.22 ChangeLog.autovect *** testsuite/ChangeLog.autovect 16 Mar 2005 17:18:52 -0000 1.1.2.22 --- testsuite/ChangeLog.autovect 3 Apr 2005 11:53:16 -0000 *************** *** 1,3 **** --- 1,7 ---- + 2005-04-03 Dorit Naishlos + + * gcc.dg/vect/vect-reduc-pattern-1.c: New. + 2005-03-16 Dorit Naishlos * gcc.dg/vect/vect-reduc-5.c: Use -ffast-math. Now vectorized. Index: testsuite/gcc.dg/vect/vect-reduc-pattern-1.c =================================================================== RCS file: testsuite/gcc.dg/vect/vect-reduc-pattern-1.c diff -N testsuite/gcc.dg/vect/vect-reduc-pattern-1.c *** /dev/null 1 Jan 1970 00:00:00 -0000 --- testsuite/gcc.dg/vect/vect-reduc-pattern-1.c 3 Apr 2005 11:53:16 -0000 *************** *** 0 **** --- 1,45 ---- + /* { dg-require-effective-target vect_int } */ + + #include + #include + #include "tree-vect.h" + + #define N 16 + #define SH_SUM 210 + #define CH_SUM 120 + + int main1 () + { + int i; + unsigned short udata_sh[N] = {0,2,4,6,8,10,12,14,16,18,20,22,24,26,28}; + unsigned char udata_ch[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + unsigned int sum = 0; + + for (i = 0; i < N; i++){ + sum += udata_sh[i]; + } + + /* check results: */ + if (sum != SH_SUM) + abort (); + + sum = 0; + + for (i = 0; i < N; i++){ + sum += udata_ch[i]; + } + + /* check results: */ + if (sum != CH_SUM) + abort (); + return 0; + } + + int main (void) + { + check_vect (); + + return main1 (); + } + + /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target powerpc*-*-* } } } */