This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion
- From: Ira Rosen <IRAR at il dot ibm dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Kirill Yukhin <kirill dot yukhin at gmail dot com>, Richard Guenther <rguenther at suse dot de>
- Date: Thu, 15 Dec 2011 08:32:26 +0200
- Subject: Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion
- References: <CAGs3RfvOFgfVQ=PkYM+CtsgBL99k_gF_R-Yet3oFLHU7b-k5jQ@mail.gmail.com> <alpine.LNX.2.00.1112131406430.4527@zhemvz.fhfr.qr> <20111213132128.GZ1957@tyan-ft48-01.lab.bos.redhat.com> <CAGs3Rfucj9C7DqcjjJOzor0X=Yf_DT1av1T6cZdfZkmOsS+VNw@mail.gmail.com> <20111213134741.GA1957@tyan-ft48-01.lab.bos.redhat.com> <CAGs3RfvkZW+k-NmqHkNx7V42zOV4SCL-OXVLUr7M-iHs_HVAYA@mail.gmail.com> <20111214122513.GD1957@tyan-ft48-01.lab.bos.redhat.com>
Jakub Jelinek <jakub@redhat.com> wrote on 14/12/2011 02:25:13 PM:
>
> @@ -1573,6 +1576,211 @@ vect_recog_vector_vector_shift_pattern (
> return pattern_stmt;
> }
>
> +/* Detect a signed division by power of two constant that wouldn't be
> + otherwise vectorized:
> +
> + type a_t, b_t;
> +
> + S1 a_t = b_t / N;
> +
> + where type 'type' is a signed integral type and N is a constant
positive
> + power of two.
> +
> + Similarly handle signed modulo by power of two constant:
> +
> + S4 a_t = b_t % N;
> +
> + Input/Output:
> +
> + * STMTS: Contains a stmt from which the pattern search begins,
> + i.e. the division stmt. S1 is replaced by:
> + S3 y_t = b_t < 0 ? N - 1 : 0;
> + S2 x_t = b_t + y_t;
> + S1' a_t = x_t >> log2 (N);
> +
> + S4 is replaced by (where *_T temporaries have unsigned type):
> + S9 y_T = b_t < 0 ? -1U : 0U;
> + S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
> + S7 z_t = (type) z_T;
> + S6 w_t = b_t + z_t;
> + S5 x_t = w_t & (N - 1);
> + S4' a_t = x_t - z_t;
> +
> + Output:
> +
> + * TYPE_IN: The type of the input arguments to the pattern.
> +
> + * TYPE_OUT: The type of the output of this pattern.
> +
> + * Return value: A new stmt that will be used to replace the division
> + S1 or modulo S4 stmt. */
> +
> +static gimple
> +vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
> + tree *type_in, tree *type_out)
> +{
> + gimple last_stmt = VEC_pop (gimple, *stmts);
> + gimple_stmt_iterator gsi;
> + tree oprnd0, oprnd1, vectype, itype, cond;
> + gimple pattern_stmt, def_stmt;
> + enum tree_code rhs_code;
> + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
> + optab optab;
> +
> + if (!is_gimple_assign (last_stmt))
> + return NULL;
> +
> + rhs_code = gimple_assign_rhs_code (last_stmt);
> + switch (rhs_code)
> + {
> + case TRUNC_DIV_EXPR:
> + case TRUNC_MOD_EXPR:
> + break;
> + default:
> + return NULL;
> + }
> +
> + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
> + return NULL;
> +
> + oprnd0 = gimple_assign_rhs1 (last_stmt);
> + oprnd1 = gimple_assign_rhs2 (last_stmt);
> + itype = TREE_TYPE (oprnd0);
> + if (TREE_CODE (oprnd0) != SSA_NAME
> + || TREE_CODE (oprnd1) != INTEGER_CST
> + || TREE_CODE (itype) != INTEGER_TYPE
> + || TYPE_UNSIGNED (itype)
> + || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE
(itype))
> + || !integer_pow2p (oprnd1)
> + || tree_int_cst_sgn (oprnd1) != 1)
> + return NULL;
> +
> + vectype = get_vectype_for_scalar_type (itype);
> + if (vectype == NULL_TREE)
> + return NULL;
> +
> + /* If the target can handle vectorized division or modulo natively,
> + don't attempt to optimize this. */
> + optab = optab_for_tree_code (rhs_code, vectype, optab_default);
> + if (optab != NULL)
> + {
> + enum machine_mode vec_mode = TYPE_MODE (vectype);
> + int icode = (int) optab_handler (optab, vec_mode);
> + if (icode != CODE_FOR_nothing
> + || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
> + return NULL;
> + }
> +
> + /* Pattern detected. */
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
> +
> + cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst
> (itype, 0));
> + gsi = gsi_for_stmt (last_stmt);
> + if (rhs_code == TRUNC_DIV_EXPR)
> + {
> + tree var = vect_recog_temp_ssa_var (itype, NULL);
> + def_stmt
> + = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
> + fold_build2 (MINUS_EXPR, itype,
> + oprnd1,
> + build_int_cst (itype,
> + 1)),
> + build_int_cst (itype, 0));
> + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
Hmm, you are inserting pattern stmts. This was causing some mess in the
past as explained here
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00801.html. Maybe you can use
STMT_VINFO_PATTERN_DEF_STMT to keep a chain of def_stmts?
Ira
> + set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt,
loop_vinfo,
> + NULL));
> + var = vect_recog_temp_ssa_var (itype, NULL);
> + def_stmt
> + = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
> + gimple_assign_lhs (def_stmt));
> + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
> +
> + pattern_stmt
> + = gimple_build_assign_with_ops (RSHIFT_EXPR,
> + vect_recog_temp_ssa_var (itype, NULL),
> + var,
> + build_int_cst (itype,
> + tree_log2 (oprnd1)));
> + }
> + else
> + {
> + tree signmask;
> + tree utype = build_nonstandard_integer_type (TYPE_PRECISION
> (itype), 1);
> + tree shift = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE
(itype))
> + - tree_log2 (oprnd1));
> + if (compare_tree_int (oprnd1, 2) == 0)
> + {
> + signmask = vect_recog_temp_ssa_var (itype, NULL);
> + def_stmt
> + = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
> + build_int_cst (itype, 1),
> + build_int_cst (itype, 0));
> + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + set_vinfo_for_stmt (def_stmt,
> + new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + }
> + else
> + {
> + tree var = vect_recog_temp_ssa_var (utype, NULL);
> + def_stmt
> + = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
> + build_int_cst (utype, -1),
> + build_int_cst (utype, 0));
> + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + set_vinfo_for_stmt (def_stmt,
> + new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + var = vect_recog_temp_ssa_var (utype, NULL);
> + def_stmt
> + = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
> + gimple_assign_lhs (def_stmt),
> + shift);
> + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + set_vinfo_for_stmt (def_stmt,
> + new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + signmask = vect_recog_temp_ssa_var (itype, NULL);
> + def_stmt
> + = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
> + NULL_TREE);
> + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + set_vinfo_for_stmt (def_stmt,
> + new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + }
> + def_stmt
> + = gimple_build_assign_with_ops (PLUS_EXPR,
> + vect_recog_temp_ssa_var (itype, NULL),
> + oprnd0, signmask);
> + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt,
loop_vinfo,
> + NULL));
> + def_stmt
> + = gimple_build_assign_with_ops (BIT_AND_EXPR,
> + vect_recog_temp_ssa_var (itype, NULL),
> + gimple_assign_lhs (def_stmt),
> + fold_build2 (MINUS_EXPR, itype,
> + oprnd1,
> + build_int_cst (itype,
> + 1)));
> + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
> +
> + pattern_stmt
> + = gimple_build_assign_with_ops (MINUS_EXPR,
> + vect_recog_temp_ssa_var (itype, NULL),
> + gimple_assign_lhs (def_stmt),
> + signmask);
> + }
> +
> + if (vect_print_dump_info (REPORT_DETAILS))
> + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
> +
> + VEC_safe_push (gimple, heap, *stmts, last_stmt);
> +
> + *type_in = vectype;
> + *type_out = vectype;
> + return pattern_stmt;
> +}
> +
> /* Function vect_recog_mixed_size_cond_pattern
>
> Try to find the following pattern: