This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Re: Vectorizer question: DIV to RSHIFT conversion


Hi Jakub,
For 401.bzip2 it looks perfect. This is loop is vectorized:
.L6:
        vmovdqa (%rsi,%rax), %ymm0
        addl    $1, %ecx
        vpsrad  $8, %ymm0, %ymm0
        vpsrld  $31, %ymm0, %ymm1
        vpaddd  %ymm1, %ymm0, %ymm0
        vpsrad  $1, %ymm0, %ymm0
        vpaddd  %ymm2, %ymm0, %ymm0
        vpslld  $8, %ymm0, %ymm0
        vmovdqa %ymm0, (%rsi,%rax)
        addq    $32, %rax
        cmpl    %r8d, %ecx
        jne     .L3

Unfortunatelly, I have no perf. simulator of AVX2. But I believe it
will benefit since, in absence of that optimization we got (same
options):
.L3:
        movl    (%rax), %edx
        sarl    $8, %edx
        movl    %edx, %ecx
        shrl    $31, %ecx
        addl    %ecx, %edx
        sarl    %edx
        addl    $1, %edx
        sall    $8, %edx
        movl    %edx, (%rax)
        addq    $4, %rax
        cmpq    %rsi, %rax
        jne     .L3

Thanks, K

On Wed, Dec 14, 2011 at 4:25 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Dec 13, 2011 at 05:57:40PM +0400, Kirill Yukhin wrote:
>> > Let me hack up a quick pattern recognizer for this...
>
> Here it is, untested so far.
> On the testcase doing 2000000 f1+f2+f3+f4 calls in the loop with -O3 -mavx
> on Sandybridge (so, vectorized just with 16 byte vectors) gives:
> vanilla ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 0m34.571s
> the tree-vect* parts of this patch only 0m9.013s
> the whole patch ? ? ? ? ? ? ? ? ? ? ? ? 0m8.824s
> The i386 parts are just a small optimization, I guess it could be
> done in the vectorizer too (but then we'd have to check whether the
> arithmetic/logical right shifts are supported and check costs?), or
> perhaps in the generic vcond expander (again, we'd need to check some
> costs).
>
> Can you see if it gives a measurable speedup on SPEC bzip2?
>
> 2011-12-14 ?Jakub Jelinek ?<jakub@redhat.com>
>
> ? ? ? ?* tree-vectorizer.h (NUM_PATTERNS): Bump to 10.
> ? ? ? ?* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern): New
> ? ? ? ?function.
> ? ? ? ?(vect_vect_recog_func_ptrs): Add it.
>
> ? ? ? ?* config/i386/sse.md (vcond<V_256:mode><VI_256:mode>,
> ? ? ? ?vcond<V_128:mode><VI124_128:mode>, vcond<VI8F_128:mode>v2di):
> ? ? ? ?Use general_operand instead of nonimmediate_operand for
> ? ? ? ?operand 5 and no predicate for operands 1 and 2.
> ? ? ? ?* config/i386/i386.c (ix86_expand_int_vcond): Optimize
> ? ? ? ?x < 0 ? -1 : 0 and x < 0 ? 1 : 0 into vector arithmetic
> ? ? ? ?resp. logical shift.
>
> ? ? ? ?* gcc.dg/vect/vect-sdivmod-1.c: New test.
>
> --- gcc/tree-vectorizer.h.jj ? ?2011-12-14 08:11:03.592010101 +0100
> +++ gcc/tree-vectorizer.h ? ? ? 2011-12-14 08:28:57.006693371 +0100
> @@ -929,7 +929,7 @@ extern void vect_slp_transform_bb (basic
> ? ?Additional pattern recognition functions can (and will) be added
> ? ?in the future. ?*/
> ?typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
> -#define NUM_PATTERNS 9
> +#define NUM_PATTERNS 10
> ?void vect_pattern_recog (loop_vec_info);
>
> ?/* In tree-vectorizer.c. ?*/
> --- gcc/tree-vect-patterns.c.jj 2011-12-02 01:52:27.918883940 +0100
> +++ gcc/tree-vect-patterns.c ? ?2011-12-14 11:50:10.439763740 +0100
> @@ -53,6 +53,8 @@ static gimple vect_recog_widen_shift_pat
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *, tree *);
> ?static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *, tree *);
> +static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *, tree *);
> ?static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *, tree *);
> ?static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
> @@ -64,6 +66,7 @@ static vect_recog_func_ptr vect_vect_rec
> ? ? ? ?vect_recog_over_widening_pattern,
> ? ? ? ?vect_recog_widen_shift_pattern,
> ? ? ? ?vect_recog_vector_vector_shift_pattern,
> + ? ? ? vect_recog_sdivmod_pow2_pattern,
> ? ? ? ?vect_recog_mixed_size_cond_pattern,
> ? ? ? ?vect_recog_bool_pattern};
>
> @@ -1573,6 +1576,211 @@ vect_recog_vector_vector_shift_pattern (
> ? return pattern_stmt;
> ?}
>
> +/* Detect a signed division by power of two constant that wouldn't be
> + ? otherwise vectorized:
> +
> + ? type a_t, b_t;
> +
> + ? S1 a_t = b_t / N;
> +
> + ?where type 'type' is a signed integral type and N is a constant positive
> + ?power of two.
> +
> + ?Similarly handle signed modulo by power of two constant:
> +
> + ? S4 a_t = b_t % N;
> +
> + ?Input/Output:
> +
> + ?* STMTS: Contains a stmt from which the pattern search begins,
> + ? ?i.e. the division stmt. ?S1 is replaced by:
> + ?S3 ?y_t = b_t < 0 ? N - 1 : 0;
> + ?S2 ?x_t = b_t + y_t;
> + ?S1' a_t = x_t >> log2 (N);
> +
> + ? ?S4 is replaced by (where *_T temporaries have unsigned type):
> + ?S9 ?y_T = b_t < 0 ? -1U : 0U;
> + ?S8 ?z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
> + ?S7 ?z_t = (type) z_T;
> + ?S6 ?w_t = b_t + z_t;
> + ?S5 ?x_t = w_t & (N - 1);
> + ?S4' a_t = x_t - z_t;
> +
> + ?Output:
> +
> + ?* TYPE_IN: The type of the input arguments to the pattern.
> +
> + ?* TYPE_OUT: The type of the output of this pattern.
> +
> + ?* Return value: A new stmt that will be used to replace the division
> + ? ?S1 or modulo S4 stmt. ?*/
> +
> +static gimple
> +vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree *type_in, tree *type_out)
> +{
> + ?gimple last_stmt = VEC_pop (gimple, *stmts);
> + ?gimple_stmt_iterator gsi;
> + ?tree oprnd0, oprnd1, vectype, itype, cond;
> + ?gimple pattern_stmt, def_stmt;
> + ?enum tree_code rhs_code;
> + ?stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
> + ?loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
> + ?optab optab;
> +
> + ?if (!is_gimple_assign (last_stmt))
> + ? ?return NULL;
> +
> + ?rhs_code = gimple_assign_rhs_code (last_stmt);
> + ?switch (rhs_code)
> + ? ?{
> + ? ?case TRUNC_DIV_EXPR:
> + ? ?case TRUNC_MOD_EXPR:
> + ? ? ?break;
> + ? ?default:
> + ? ? ?return NULL;
> + ? ?}
> +
> + ?if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
> + ? ?return NULL;
> +
> + ?oprnd0 = gimple_assign_rhs1 (last_stmt);
> + ?oprnd1 = gimple_assign_rhs2 (last_stmt);
> + ?itype = TREE_TYPE (oprnd0);
> + ?if (TREE_CODE (oprnd0) != SSA_NAME
> + ? ? ?|| TREE_CODE (oprnd1) != INTEGER_CST
> + ? ? ?|| TREE_CODE (itype) != INTEGER_TYPE
> + ? ? ?|| TYPE_UNSIGNED (itype)
> + ? ? ?|| TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
> + ? ? ?|| !integer_pow2p (oprnd1)
> + ? ? ?|| tree_int_cst_sgn (oprnd1) != 1)
> + ? ?return NULL;
> +
> + ?vectype = get_vectype_for_scalar_type (itype);
> + ?if (vectype == NULL_TREE)
> + ? ?return NULL;
> +
> + ?/* If the target can handle vectorized division or modulo natively,
> + ? ? don't attempt to optimize this. ?*/
> + ?optab = optab_for_tree_code (rhs_code, vectype, optab_default);
> + ?if (optab != NULL)
> + ? ?{
> + ? ? ?enum machine_mode vec_mode = TYPE_MODE (vectype);
> + ? ? ?int icode = (int) optab_handler (optab, vec_mode);
> + ? ? ?if (icode != CODE_FOR_nothing
> + ? ? ? ? || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
> + ? ? ? return NULL;
> + ? ?}
> +
> + ?/* Pattern detected. ?*/
> + ?if (vect_print_dump_info (REPORT_DETAILS))
> + ? ?fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
> +
> + ?cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
> + ?gsi = gsi_for_stmt (last_stmt);
> + ?if (rhs_code == TRUNC_DIV_EXPR)
> + ? ?{
> + ? ? ?tree var = vect_recog_temp_ssa_var (itype, NULL);
> + ? ? ?def_stmt
> + ? ? ? = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?fold_build2 (MINUS_EXPR, itype,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? oprnd1,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? build_int_cst (itype,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?1)),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?build_int_cst (itype, 0));
> + ? ? ?gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + ? ? ?set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt, loop_vinfo,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NULL));
> + ? ? ?var = vect_recog_temp_ssa_var (itype, NULL);
> + ? ? ?def_stmt
> + ? ? ? = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? gimple_assign_lhs (def_stmt));
> + ? ? ?STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
> +
> + ? ? ?pattern_stmt
> + ? ? ? = gimple_build_assign_with_ops (RSHIFT_EXPR,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vect_recog_temp_ssa_var (itype, NULL),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? var,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? build_int_cst (itype,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?tree_log2 (oprnd1)));
> + ? ?}
> + ?else
> + ? ?{
> + ? ? ?tree signmask;
> + ? ? ?tree utype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
> + ? ? ?tree shift = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?- tree_log2 (oprnd1));
> + ? ? ?if (compare_tree_int (oprnd1, 2) == 0)
> + ? ? ? {
> + ? ? ? ? signmask = vect_recog_temp_ssa_var (itype, NULL);
> + ? ? ? ? def_stmt
> + ? ? ? ? ? = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?build_int_cst (itype, 1),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?build_int_cst (itype, 0));
> + ? ? ? ? gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + ? ? ? ? set_vinfo_for_stmt (def_stmt,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + ? ? ? }
> + ? ? ?else
> + ? ? ? {
> + ? ? ? ? tree var = vect_recog_temp_ssa_var (utype, NULL);
> + ? ? ? ? def_stmt
> + ? ? ? ? ? = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?build_int_cst (utype, -1),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?build_int_cst (utype, 0));
> + ? ? ? ? gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + ? ? ? ? set_vinfo_for_stmt (def_stmt,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + ? ? ? ? var = vect_recog_temp_ssa_var (utype, NULL);
> + ? ? ? ? def_stmt
> + ? ? ? ? ? = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? gimple_assign_lhs (def_stmt),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? shift);
> + ? ? ? ? gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + ? ? ? ? set_vinfo_for_stmt (def_stmt,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + ? ? ? ? signmask = vect_recog_temp_ssa_var (itype, NULL);
> + ? ? ? ? def_stmt
> + ? ? ? ? ? = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NULL_TREE);
> + ? ? ? ? gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + ? ? ? ? set_vinfo_for_stmt (def_stmt,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
> + ? ? ? }
> + ? ? ?def_stmt
> + ? ? ? = gimple_build_assign_with_ops (PLUS_EXPR,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vect_recog_temp_ssa_var (itype, NULL),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? oprnd0, signmask);
> + ? ? ?gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT);
> + ? ? ?set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt, loop_vinfo,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?NULL));
> + ? ? ?def_stmt
> + ? ? ? = gimple_build_assign_with_ops (BIT_AND_EXPR,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vect_recog_temp_ssa_var (itype, NULL),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? gimple_assign_lhs (def_stmt),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fold_build2 (MINUS_EXPR, itype,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?oprnd1,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?build_int_cst (itype,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 1)));
> + ? ? ?STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt;
> +
> + ? ? ?pattern_stmt
> + ? ? ? = gimple_build_assign_with_ops (MINUS_EXPR,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? vect_recog_temp_ssa_var (itype, NULL),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? gimple_assign_lhs (def_stmt),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? signmask);
> + ? ?}
> +
> + ?if (vect_print_dump_info (REPORT_DETAILS))
> + ? ?print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
> +
> + ?VEC_safe_push (gimple, heap, *stmts, last_stmt);
> +
> + ?*type_in = vectype;
> + ?*type_out = vectype;
> + ?return pattern_stmt;
> +}
> +
> ?/* Function vect_recog_mixed_size_cond_pattern
>
> ? ?Try to find the following pattern:
> --- gcc/config/i386/sse.md.jj ? 2011-12-03 12:22:33.000000000 +0100
> +++ gcc/config/i386/sse.md ? ? ?2011-12-14 13:00:47.695788256 +0100
> @@ -6340,9 +6340,9 @@ (define_expand "vcond<V_256:mode><VI_256
> ? ? ? ?(if_then_else:V_256
> ? ? ? ? ?(match_operator 3 ""
> ? ? ? ? ? ?[(match_operand:VI_256 4 "nonimmediate_operand" "")
> - ? ? ? ? ? ?(match_operand:VI_256 5 "nonimmediate_operand" "")])
> - ? ? ? ? (match_operand:V_256 1 "general_operand" "")
> - ? ? ? ? (match_operand:V_256 2 "general_operand" "")))]
> + ? ? ? ? ? ?(match_operand:VI_256 5 "general_operand" "")])
> + ? ? ? ? (match_operand:V_256 1 "" "")
> + ? ? ? ? (match_operand:V_256 2 "" "")))]
> ? "TARGET_AVX2
> ? ?&& (GET_MODE_NUNITS (<V_256:MODE>mode)
> ? ? ? ?== GET_MODE_NUNITS (<VI_256:MODE>mode))"
> @@ -6357,9 +6357,9 @@ (define_expand "vcond<V_128:mode><VI124_
> ? ? ? ?(if_then_else:V_128
> ? ? ? ? ?(match_operator 3 ""
> ? ? ? ? ? ?[(match_operand:VI124_128 4 "nonimmediate_operand" "")
> - ? ? ? ? ? ?(match_operand:VI124_128 5 "nonimmediate_operand" "")])
> - ? ? ? ? (match_operand:V_128 1 "general_operand" "")
> - ? ? ? ? (match_operand:V_128 2 "general_operand" "")))]
> + ? ? ? ? ? ?(match_operand:VI124_128 5 "general_operand" "")])
> + ? ? ? ? (match_operand:V_128 1 "" "")
> + ? ? ? ? (match_operand:V_128 2 "" "")))]
> ? "TARGET_SSE2
> ? ?&& (GET_MODE_NUNITS (<V_128:MODE>mode)
> ? ? ? ?== GET_MODE_NUNITS (<VI124_128:MODE>mode))"
> @@ -6374,9 +6374,9 @@ (define_expand "vcond<VI8F_128:mode>v2di
> ? ? ? ?(if_then_else:VI8F_128
> ? ? ? ? ?(match_operator 3 ""
> ? ? ? ? ? ?[(match_operand:V2DI 4 "nonimmediate_operand" "")
> - ? ? ? ? ? ?(match_operand:V2DI 5 "nonimmediate_operand" "")])
> - ? ? ? ? (match_operand:VI8F_128 1 "general_operand" "")
> - ? ? ? ? (match_operand:VI8F_128 2 "general_operand" "")))]
> + ? ? ? ? ? ?(match_operand:V2DI 5 "general_operand" "")])
> + ? ? ? ? (match_operand:VI8F_128 1 "" "")
> + ? ? ? ? (match_operand:VI8F_128 2 "" "")))]
> ? "TARGET_SSE4_2"
> ?{
> ? bool ok = ix86_expand_int_vcond (operands);
> --- gcc/config/i386/i386.c.jj ? 2011-12-14 08:11:01.000000000 +0100
> +++ gcc/config/i386/i386.c ? ? ?2011-12-14 13:01:30.207538465 +0100
> @@ -19434,6 +19434,45 @@ ix86_expand_int_vcond (rtx operands[])
> ? cop0 = operands[4];
> ? cop1 = operands[5];
>
> + ?/* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
> + ? ? and x < 0 ? 1 : 0 into (unsigned) x >> 31. ?*/
> + ?if ((code == LT || code == GE)
> + ? ? ?&& data_mode == mode
> + ? ? ?&& cop1 == CONST0_RTX (mode)
> + ? ? ?&& operands[1 + (code == LT)] == CONST0_RTX (data_mode)
> + ? ? ?&& GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
> + ? ? ?&& GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
> + ? ? ?&& (GET_MODE_SIZE (data_mode) == 16
> + ? ? ? ? || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
> + ? ?{
> + ? ? ?rtx negop = operands[2 - (code == LT)];
> + ? ? ?int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
> + ? ? ?if (negop == CONST1_RTX (data_mode))
> + ? ? ? {
> + ? ? ? ? rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[0], 1, OPTAB_DIRECT);
> + ? ? ? ? if (res != operands[0])
> + ? ? ? ? ? emit_move_insn (operands[0], res);
> + ? ? ? ? return true;
> + ? ? ? }
> + ? ? ?else if (GET_MODE_INNER (data_mode) != DImode
> + ? ? ? ? ? ? ?&& vector_all_ones_operand (negop, data_mode))
> + ? ? ? {
> + ? ? ? ? rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?operands[0], 0, OPTAB_DIRECT);
> + ? ? ? ? if (res != operands[0])
> + ? ? ? ? ? emit_move_insn (operands[0], res);
> + ? ? ? ? return true;
> + ? ? ? }
> + ? ?}
> +
> + ?if (!nonimmediate_operand (cop1, mode))
> + ? ?cop1 = force_reg (mode, cop1);
> + ?if (!general_operand (operands[1], data_mode))
> + ? ?operands[1] = force_reg (data_mode, operands[1]);
> + ?if (!general_operand (operands[2], data_mode))
> + ? ?operands[2] = force_reg (data_mode, operands[2]);
> +
> ? /* XOP supports all of the comparisons on all 128-bit vector int types. ?*/
> ? if (TARGET_XOP
> ? ? ? && (mode == V16QImode || mode == V8HImode
> --- gcc/testsuite/gcc.dg/vect/vect-sdivmod-1.c.jj ? ? ? 2011-12-14 13:05:36.240133846 +0100
> +++ gcc/testsuite/gcc.dg/vect/vect-sdivmod-1.c ?2011-12-14 13:08:10.288228745 +0100
> @@ -0,0 +1,98 @@
> +#include "tree-vect.h"
> +
> +extern void abort (void);
> +int a[4096];
> +
> +__attribute__((noinline, noclone)) void
> +f1 (int x)
> +{
> + ?int i, j;
> + ?for (i = 1; i <= x; i++)
> + ? ?{
> + ? ? ?j = a[i] >> 8;
> + ? ? ?j = 1 + (j / 2);
> + ? ? ?a[i] = j << 8;
> + ? ?}
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f2 (int x)
> +{
> + ?int i, j;
> + ?for (i = 1; i <= x; i++)
> + ? ?{
> + ? ? ?j = a[i] >> 8;
> + ? ? ?j = 1 + (j / 16);
> + ? ? ?a[i] = j << 8;
> + ? ?}
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f3 (int x)
> +{
> + ?int i, j;
> + ?for (i = 1; i <= x; i++)
> + ? ?{
> + ? ? ?j = a[i] >> 8;
> + ? ? ?j = 1 + (j % 2);
> + ? ? ?a[i] = j << 8;
> + ? ?}
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f4 (int x)
> +{
> + ?int i, j;
> + ?for (i = 1; i <= x; i++)
> + ? ?{
> + ? ? ?j = a[i] >> 8;
> + ? ? ?j = 1 + (j % 16);
> + ? ? ?a[i] = j << 8;
> + ? ?}
> +}
> +
> +int
> +main ()
> +{
> + ?int i;
> + ?check_vect ();
> + ?for (i = 0; i < 4096; i++)
> + ? ?{
> + ? ? ?asm ("");
> + ? ? ?a[i] = (i - 2048) << 8;
> + ? ?}
> + ?f1 (4095);
> + ?if (a[0] != (-2048 << 8))
> + ? ?abort ();
> + ?for (i = 1; i < 4096; i++)
> + ? ?if (a[i] != ((1 + ((i - 2048) / 2)) << 8))
> + ? ? ?abort ();
> + ? ?else
> + ? ? ?a[i] = (i - 2048) << 8;
> + ?f2 (4095);
> + ?if (a[0] != (-2048 << 8))
> + ? ?abort ();
> + ?for (i = 1; i < 4096; i++)
> + ? ?if (a[i] != ((1 + ((i - 2048) / 16)) << 8))
> + ? ? ?abort ();
> + ? ?else
> + ? ? ?a[i] = (i - 2048) << 8;
> + ?f3 (4095);
> + ?if (a[0] != (-2048 << 8))
> + ? ?abort ();
> + ?for (i = 1; i < 4096; i++)
> + ? ?if (a[i] != ((1 + ((i - 2048) % 2)) << 8))
> + ? ? ?abort ();
> + ? ?else
> + ? ? ?a[i] = (i - 2048) << 8;
> + ?f4 (4095);
> + ?if (a[0] != (-2048 << 8))
> + ? ?abort ();
> + ?for (i = 1; i < 4096; i++)
> + ? ?if (a[i] != ((1 + ((i - 2048) % 16)) << 8))
> + ? ? ?abort ();
> + ?return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target vect_condition } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
>
> ? ? ? ?Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]