Index: optabs.c =================================================================== --- optabs.c (revision 108944) +++ optabs.c (working copy) @@ -390,20 +390,20 @@ There are two different classes of operations handled here: 1) Operations whose result is wider than all the arguments to the operation. Examples: VEC_UNPACK_HI/LO_EXPR, VEC_WIDEN_MULT_HI/LO_EXPR - In this case OP0 and optionally OP1 will be initialized, + In this case OP0 and optionally OP1 would be initialized, but WIDE_OP wouldn't (not relevant for this case). - 2) Operations whose result is of the same size as the lase argument to the + 2) Operations whose result is of the same size as the last argument to the operations, but wider than all the other arguments to the operation. Examples: WIDEN_SUM_EXPR, VEC_DOT_PROD_EXPR. - In the case WIDE_OP, OP0 and optionally OP1 will be initialized. + In the case WIDE_OP, OP0 and optionally OP1 would be initialized. - E.g, when called to expand the following operations, thes is how + E.g, when called to expand the following operations, this is how the arguments will be initialized: nops OP0 OP1 WIDE_OP widening-sum 2 oprnd0 - oprnd1 widening-dot-product 3 oprnd0 oprnd1 oprnd2 widening-mult 2 oprnd0 oprnd1 - - type-promotion (vec-unpack) 1 oprmd0 - - */ + type-promotion (vec-unpack) 1 oprnd0 - - */ rtx expand_widen_pattern_expr (tree exp, rtx op0, rtx op1, rtx wide_op, rtx target, @@ -434,8 +434,7 @@ xmode1 = insn_data[icode].operand[2].mode; } - /* The last operand is of a wider mode than the rest - of the operands. */ + /* The last operand is of a wider mode than the rest of the operands. */ if (nops == 2) { wmode = tmode1; @@ -485,14 +484,12 @@ xop1, unsignedp); if (wide_op) - { - if (GET_MODE (wide_op) != wxmode && wxmode != VOIDmode) - wxop = convert_modes (wxmode, - GET_MODE (wide_op) != VOIDmode - ? GET_MODE (wide_op) - : wmode, - wxop, unsignedp); - } + if (GET_MODE (wide_op) != wxmode && wxmode != VOIDmode) + wxop = convert_modes (wxmode, + GET_MODE (wide_op) != VOIDmode + ? GET_MODE (wide_op) + : wmode, + wxop, unsignedp); /* Now, if insn's predicates don't allow our operands, put them into pseudo regs. */ Index: tree-vect-patterns.c =================================================================== --- tree-vect-patterns.c (revision 109037) +++ tree-vect-patterns.c (working copy) @@ -425,7 +425,7 @@ * Return value: A new stmt that will be used to replace the sequence of stmts that constitute the pattern. In this case it will be: - WIDEN_DOT_PRODUCT + DOT_PRODUCT */ tree @@ -477,7 +477,6 @@ /* Has been detected as widening-summation? */ stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); - gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); expr = TREE_OPERAND (stmt, 1); type = TREE_TYPE (expr); if (TREE_CODE (expr) != WIDEN_SUM_EXPR) @@ -513,14 +512,13 @@ we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a (widen_)mult_expr */ - prod_type = type; + prod_type = half_type; stmt = SSA_NAME_DEF_STMT (oprnd0); gcc_assert (stmt); stmt_vinfo = vinfo_for_stmt (stmt); gcc_assert (stmt_vinfo); gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_loop_def); - gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); expr = TREE_OPERAND (stmt, 1); if (TREE_CODE (expr) != MULT_EXPR) return NULL; @@ -529,7 +527,6 @@ /* Has been detected as a widening multiplication? */ stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); - gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); expr = TREE_OPERAND (stmt, 1); if (TREE_CODE (expr) != WIDEN_MULT_EXPR) return NULL; @@ -543,11 +540,14 @@ { tree half_type0, half_type1; tree def_stmt; + tree oprnd0, oprnd1; oprnd0 = TREE_OPERAND (expr, 0); oprnd1 = TREE_OPERAND (expr, 1); - if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type) - || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type)) + if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) + != TYPE_MAIN_VARIANT (prod_type) + || TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) + != TYPE_MAIN_VARIANT (prod_type)) return NULL; if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) return NULL; @@ -557,6 +557,8 @@ oprnd01 = TREE_OPERAND (TREE_OPERAND (def_stmt, 1), 0); if (TYPE_MAIN_VARIANT (half_type0) != TYPE_MAIN_VARIANT (half_type1)) return NULL; + if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) + return NULL; } half_type = TREE_TYPE (oprnd00); Index: tree-vect-transform.c =================================================================== --- tree-vect-transform.c (revision 108944) +++ tree-vect-transform.c (working copy) @@ -59,8 +59,7 @@ (tree stmt, tree vec_stmt, block_stmt_iterator *bsi); static void update_vuses_to_preheader (tree, struct loop*); static bool vect_is_simple_cond (tree, loop_vec_info); -static void vect_create_epilog_for_reduction - (tree, tree, tree, enum tree_code, tree); +static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree); static tree get_initial_def_for_reduction (tree, tree, tree *); static bool vect_permute_store_chain (VEC(tree,heap) *, unsigned int, tree, block_stmt_iterator *, VEC(tree,heap) **); @@ -821,61 +820,61 @@ } -/* Function vect_create_epilog_for_reduction: +/* Function vect_create_epilog_for_reduction Create code at the loop-epilog to finalize the result of a reduction - computation. + computation. - LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it - into a single result, by applying the operation REDUC_CODE on the - partial-results-vector. For this, we need to create a new phi node at the - loop exit to preserve loop-closed form, as illustrated below. - - STMT is the original scalar reduction stmt that is being vectorized. - REDUCTION_OP is the scalar reduction-variable. + VECT_DEF is a vector of partial results. + REDUC_CODE is the tree-code for the epilog reduction. + STMT is the scalar reduction stmt that is being vectorized. REDUCTION_PHI is the phi-node that carries the reduction computation. - This function also sets the arguments for the REDUCTION_PHI: - The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP. - The loop-latch argument is VECT_DEF - the vector of partial sums. - This function transforms this: + This function: + 1. Creates the reduction def-use cycle: sets the the arguments for + REDUCTION_PHI: + The loop-entry argument is the vectorized initial-value of the reduction. + The loop-latch argument is VECT_DEF - the vector of partial sums. + 2. "Reduces" the vector of partial results VECT_DEF into a single result, + by applying the operation specified by REDUC_CODE if available, or by + other means (whole-vector shifts or a scalar loop). + The function also creates a new phi node at the loop exit to preserve + loop-closed form, as illustrated below. + + The flow at the entry to this function: loop: - vec_def = phi # REDUCTION_PHI - .... - VECT_DEF = ... - + vec_def = phi # REDUCTION_PHI + VECT_DEF = vector_stmt # vectorized form of STMT + s_loop = scalar_stmt # (scalar) STMT loop_exit: - s_out0 = phi # EXIT_PHI - + s_out0 = phi # (scalar) EXIT_PHI use use - Into: + The above is transformed by this function into: loop: - vec_def = phi # REDUCTION_PHI - .... - VECT_DEF = ... - + vec_def = phi # REDUCTION_PHI + VECT_DEF = vector_stmt # vectorized form of STMT + s_loop = scalar_stmt # (scalar) STMT loop_exit: - s_out0 = phi # EXIT_PHI - v_out1 = phi # NEW_EXIT_PHI - - v_out2 = reduc_expr + s_out0 = phi # (scalar) EXIT_PHI + v_out1 = phi # NEW_EXIT_PHI + v_out2 = reduce s_out3 = extract_field - - use - use + s_out4 = adjust_result + use + use */ static void -vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op, - enum tree_code reduc_code, tree reduction_phi) +vect_create_epilog_for_reduction (tree vect_def, tree stmt, + enum tree_code reduc_code, tree reduction_phi) { stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); - enum machine_mode mode = TYPE_MODE (vectype); + tree vectype; + enum machine_mode mode; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); basic_block exit_bb; @@ -896,25 +895,17 @@ imm_use_iterator imm_iter; use_operand_p use_p; bool extract_scalar_result; + tree reduction_op; tree orig_stmt; + tree operation = TREE_OPERAND (stmt, 1); + int op_type; - /* Get the relevant tree-code to use: - 1) in the epilog, if reduc_code is not avalilable. - 2) in the final adjusment code. - In case STMT represents a reduction pattern, the tree-code is taken from - the original stmt that STMT replaces. - Otherwise, it is a regular reduction, take the tree-code from STMT. */ - - orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); - gcc_assert (! STMT_VINFO_IN_PATTERN_P (stmt_info)); - if (!orig_stmt) - orig_stmt = stmt; - code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); - scalar_dest = TREE_OPERAND (orig_stmt, 0); - scalar_type = TREE_TYPE (scalar_dest); + op_type = TREE_CODE_LENGTH (TREE_CODE (operation)); + reduction_op = TREE_OPERAND (operation, op_type-1); + vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op)); + mode = TYPE_MODE (vectype); - - /*** 1. Create the reduction def-use cycle ***/ + /*** 1. Create the reduction def-use cycle. ***/ /* 1.1 set the loop-entry arg of the reduction-phi: */ /* For the case of reduction, vect_get_vec_def_for_operand returns @@ -924,7 +915,6 @@ &scalar_initial_def); add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop)); - /* 1.2 set the loop-latch arg for the reduction-phi: */ add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop)); @@ -937,23 +927,72 @@ } - /*** 2. Create epilog code ***/ + /*** 2. Create the reduction epilog code. + The reduction epilog code operates across the elements of the vector + of partial results computed by the vectorized loop. + The reduction epilog code consists of: + step 1: compute the scalar result in a vector (v_out2) + step 2: extract the scalar result (s_out3) from the vector (v_out2) + step 3: adjust the scalar result (s_out3) if needed. + Step 1 can be accomplished using one the following three schemes: + (scheme 1) using reduc_code, if available. + (scheme 2) using whole-vector shifts, if available. + (scheme 3) using a scalar loop. In this case steps 1+2 above are + combined. + + The overall epilog code looks like this: + + s_out0 = phi # original EXIT_PHI + v_out1 = phi # NEW_EXIT_PHI + v_out2 = reduce # step 1 + s_out3 = extract_field # step 2 + s_out4 = adjust_result # step 3 + + (step 3 is optional, and step2 1 and 2 may be combined). + Lastly, the uses of s_out0 are replaced by s_out4. + + ***/ + /* 2.1 Create new loop-exit-phi to preserve loop-closed form: v_out1 = phi */ exit_bb = loop->single_exit->dest; new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb); SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def); - exit_bsi = bsi_start (exit_bb); + /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 + (i.e. when reduc_code is not available) and in the final adjusment code + (if needed). Also get the original scalar reduction variable as + defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it + represents a reduction pattern), the tree-code and scalar-def are + taken from the original stmt that the pattern-stmt (STMT) replaces. + Otherwise (it is a regular reduction) - the tree-code and scalar-def + are taken from STMT. */ + orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); + if (!orig_stmt) + { + /* Regular reduction */ + orig_stmt = stmt; + } + else + { + /* Reduction pattern */ + stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt); + gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)); + gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); + } + code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); + scalar_dest = TREE_OPERAND (orig_stmt, 0); + scalar_type = TREE_TYPE (scalar_dest); new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); bitsize = TYPE_SIZE (scalar_type); bytesize = TYPE_SIZE_UNIT (scalar_type); - /* 2.2 Create the reduction code. */ + /* 2.3 Create the reduction code, using one of the three schemes described + above. */ if (reduc_code < NUM_TREE_CODES) { @@ -981,10 +1020,6 @@ int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); tree vec_temp; - /* The result of the reduction is expected to be at the least - significant bits of the vector. This is merely convention, - as it's the extraction later that really matters, and that - is also under our control. */ if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing) shift_code = VEC_RSHIFT_EXPR; else @@ -1007,7 +1042,7 @@ if (have_whole_vector_shift) { - /*** Case 2: + /*** Case 2: Create: for (offset = VS/2; offset >= element_size; offset/=2) { Create: va' = vec_shift @@ -1031,17 +1066,12 @@ new_name = make_ssa_name (vec_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_name; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); - epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, build2 (code, vectype, new_name, new_temp)); new_temp = make_ssa_name (vec_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_temp; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); } extract_scalar_result = true; @@ -1050,10 +1080,11 @@ { tree rhs; - /*** Case 3: - Create: + /*** Case 3: Create: s = extract_field - for (offset=element_size; offset Create: s = op @@ -1064,18 +1095,13 @@ vec_temp = PHI_RESULT (new_phi); vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1); - rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, bitsize_zero_node); - BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type); - epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, - rhs); + epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs); new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_temp; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); for (bit_offset = element_bitsize; bit_offset < vec_size_in_bits; @@ -1091,25 +1117,19 @@ new_name = make_ssa_name (new_scalar_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_name; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); - epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, build2 (code, scalar_type, new_name, new_temp)); new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_temp; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); } extract_scalar_result = false; } } - - /* 2.3 Extract the final scalar result. Create: + /* 2.4 Extract the final scalar result. Create: s_out3 = extract_field */ if (extract_scalar_result) @@ -1117,9 +1137,8 @@ tree rhs; if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "extract scalar result"); + fprintf (vect_dump, "extract scalar result"); - /* The result is in the low order bits. */ if (BYTES_BIG_ENDIAN) bitpos = size_binop (MULT_EXPR, bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1), @@ -1133,17 +1152,14 @@ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_temp; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); } - - /* 2.4 Adjust the final result by the initial value of the reduction - variable. (when such adjustment is not needed, then + /* 2.5 Adjust the final result by the initial value of the reduction + variable. (When such adjustment is not needed, then 'scalar_initial_def' is zero). Create: - s_out = scalar_expr */ + s_out4 = scalar_expr */ if (scalar_initial_def) { @@ -1152,18 +1168,13 @@ new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); TREE_OPERAND (epilog_stmt, 0) = new_temp; bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT); - - if (vect_print_dump_info (REPORT_DETAILS)) - print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM); } - - /* 2.5 Replace uses of s_out0 with uses of s_out3 */ + /* 2.6 Replace uses of s_out0 with uses of s_out4 (or s_out3) */ - /* Find the loop-closed-use at the loop exit of the original - scalar result. (The reduction result is expected to have - two immediate uses - one at the latch block, and one at the - loop exit). */ + /* Find the loop-closed-use at the loop exit of the original scalar result. + (The reduction result is expected to have two immediate uses - one at the + latch block, and one at the loop exit). */ exit_phi = NULL; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) { @@ -1173,12 +1184,10 @@ break; } } - /* We expect to have found an exit_phi because of loop-closed-ssa form. */ gcc_assert (exit_phi); - + /* Replace the uses: */ orig_name = PHI_RESULT (exit_phi); - FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name) SET_USE (use_p, new_temp); } @@ -1200,7 +1209,6 @@ In some cases of reduction patterns, the type of the reduction variable X is different than the type of the other arguments of STMT. - In such cases, the vectype that is used when transforming STMT into a vector stmt is different than the vectype that is used to determine the vectorization factor, because it consists of a different number of elements @@ -1215,15 +1223,15 @@ Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that indicates what is the actual level of parallelism (V8HI in the example), so that the right vectorization factor would be derived. This vectype - corresponds to the type of the reduction arguments. This vectype should - *NOT* be used to create the vectorized stmt. The right vectype is obtained - from the type of the result X: get_vectype_for_scalar_type (TREE_TYPE (X)) + corresponds to the type of arguments to the reduction stmt, and should *NOT* + be used to create the vectorized stmt. The right vectype for the vectorized + stmt is obtained from the type of the result X: + get_vectype_for_scalar_type (TREE_TYPE (X)) - This means that, - contrary to "regular" reductions (or "regular" stmts in general), - the following: + This means that, contrary to "regular" reductions (or "regular" stmts in + general), the following equation: STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) - does *NOT* hold for reduction patterns. + does *NOT* necessarily hold for reduction patterns. */ bool @@ -1272,7 +1280,7 @@ if (!STMT_VINFO_LIVE_P (stmt_info)) return false; - /* Make sure it was already recognized as a reduction pattern. */ + /* Make sure it was already recognized as a reduction computation. */ if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def) return false; @@ -1382,32 +1390,33 @@ stmt from the pattern that STMT is replacing. I.e, in the example above we want to use 'widen_sum' in the loop, but 'plus' in the epilog. - 2. The type (mode) we use to check the relevant optab for support - for the vector operation to be created in the epilog, is + 2. The type (mode) we use to check available target support + for the vector operation to be created in the *epilog*, is determined by the type of the reduction variable (in the example - above: plus_optab[vect_int_mode]). - However the type (mode) we use to check the relevant optab for - support for the vector operation to be created inside the loop, - is determined by the type of the other arguments of STMT (in the - example: widen_sum_optab[vect_short_mode]). + above we'd check this: plus_optab[vect_int_mode]). + However the type (mode) we use to check available target support + for the vector operation to be created *inside the loop*, is + determined by the type of the other arguments to STMT (in the + example we'd check this: widen_sum_optab[vect_short_mode]). - Regular reductions have the types of all the arguments the same - as the type of the reduction variable. Therefore, we use the same - vector type when generating the epilog code as the vector type we - use when generating the code inside the loop. */ + This is contrary to "regular" reductions, in which the types of all + the arguments are the same as the type of the reduction variable. + For "regular" reductions we can therefore use the same vector type + (and also the same tree-code) when generating the epilog code and + when generating the code inside the loop. */ if (orig_stmt) { - /* This is a reduction pattern. */ - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "reduction pattern."); + /* This is a reduction pattern: get the vectype from the type of the + reduction variable, and get the tree-code from orig_stmt. */ orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1)); vectype = get_vectype_for_scalar_type (TREE_TYPE (def)); vec_mode = TYPE_MODE (vectype); } else { - /* Regular reduction. */ + /* Regular reduction: use the same vectype and tree-code as used for + the vector code inside the loop can be used for the epilog code. */ orig_code = code; } @@ -1517,9 +1526,7 @@ /* Finalize the reduction-phi (set it's arguments) and create the epilog reduction code. */ - op = TREE_OPERAND (operation, op_type-1); - vect_create_epilog_for_reduction (new_temp, stmt, op, - epilog_reduc_code, new_phi); + vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi); *vec_stmt = NULL; return true; @@ -1653,7 +1660,6 @@ optab optab; int icode; enum machine_mode optab_op2_mode; - tree orig_stmt_in_pattern; tree def, def_stmt; enum vect_def_type dt0, dt1; tree new_stmt; @@ -1895,7 +1901,7 @@ /* Do nothing; can reuse same def. */ ; else { - /* Similarly for opernad1 */ + /* Similarly for operand1 */ vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd1); def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand); gcc_assert (def_stmt_info); @@ -1928,21 +1934,7 @@ prev_stmt_info = vinfo_for_stmt (new_stmt); } - orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); - if (orig_stmt_in_pattern - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (orig_stmt_in_pattern))) - { - /* STMT is a new stmt that was inserted by the vectorizer to replace a - computation idiom. ORIG_STMT_IN_PATTERN is a stmt in the original - sequence that computed this idiom. We need to record a pointer to - VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more detail in - the documentation of vect_pattern_recog. - */ - STMT_VINFO_VEC_STMT (vinfo_for_stmt (orig_stmt_in_pattern)) = - STMT_VINFO_VEC_STMT (stmt_info); - } - - *vec_stmt = NULL_TREE; + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); return true; } @@ -2124,7 +2116,7 @@ prev_stmt_info = vinfo_for_stmt (new_stmt); } - *vec_stmt = NULL_TREE; + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); return true; } @@ -2152,7 +2144,6 @@ tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree new_temp; int op_type; - tree orig_stmt_in_pattern = NULL_TREE; tree def, def_stmt; enum vect_def_type dt0, dt1; tree new_stmt; @@ -2386,21 +2377,7 @@ prev_stmt_info = vinfo_for_stmt (new_stmt); } - orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); - if (orig_stmt_in_pattern - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (orig_stmt_in_pattern))) - { - /* STMT is a new stmt that was inserted by the vectorizer to replace a - computation idiom. ORIG_STMT_IN_PATTERN is a stmt in the original - sequence that computed this idiom. We need to record a pointer to - VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more detail in - the documentation of vect_pattern_recog. - */ - STMT_VINFO_VEC_STMT (vinfo_for_stmt (orig_stmt_in_pattern)) = - STMT_VINFO_VEC_STMT (stmt_info); - } - - *vec_stmt = NULL_TREE; + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); return true; } @@ -4000,8 +3977,10 @@ vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *interleaving) { bool is_store = false; + bool is_load = false; tree vec_stmt = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + tree orig_stmt_in_pattern; bool done; *interleaving = false; @@ -4033,6 +4012,7 @@ case load_vec_info_type: done = vectorizable_load (stmt, bsi, &vec_stmt); gcc_assert (done); + is_load = true; break; case store_vec_info_type: @@ -4058,8 +4038,29 @@ gcc_unreachable (); } + /* If STMT was inserted by the vectorizer to replace a computation idiom + (i.e. it is a "pattern stmt"), We need to record a pointer to VEC_STMT + in the stmt_info of ORIG_STMT_IN_PATTERN (the stmt in the original + sequence that computed this idiom). See more detail in the + documentation of vect_pattern_recog. */ + /* CHECKME */ + gcc_assert (vec_stmt || is_load || is_store + || (*interleaving && !DR_GROUP_VECTORIZED (stmt_info))); if (vec_stmt) - STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; + { + STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt; + orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info); + if (orig_stmt_in_pattern) + { + stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern); + + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) + { + gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); + STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt; + } + } + } } if (*interleaving)