diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index bf033e31c1c..dc2756f83e9 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5637,7 +5637,8 @@ (define_expand "len_load_v16qi" [(match_operand:V16QI 0 "vlogical_operand") (match_operand:V16QI 1 "memory_operand") - (match_operand:QI 2 "gpc_reg_operand")] + (match_operand:QI 2 "gpc_reg_operand") + (match_operand:QI 3 "zero_constant")] "TARGET_P9_VECTOR && TARGET_64BIT" { rtx mem = XEXP (operands[1], 0); @@ -5651,6 +5652,7 @@ [(match_operand:V16QI 0 "memory_operand") (match_operand:V16QI 1 "vlogical_operand") (match_operand:QI 2 "gpc_reg_operand") + (match_operand:QI 3 "zero_constant") ] "TARGET_P9_VECTOR && TARGET_64BIT" { diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 2b41cb7fb7b..8df61f578bd 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5213,7 +5213,10 @@ which must be a vector mode. Operand 2 has whichever integer mode the target prefers. If operand 2 exceeds the number of elements in mode @var{m}, the behavior is undefined. If the target prefers the length to be measured in bytes rather than elements, it should only implement -this pattern for vectors of @code{QI} elements. +this pattern for vectors of @code{QI} elements. Operand 3 specifies +a bias predicate that determines whether a length of zero is permitted +or not. If permitted, the predicate should only allow a zero immediate, +otherwise it should only allow an immediate value of -1. This pattern is not allowed to @code{FAIL}. @@ -5226,7 +5229,10 @@ a vector mode. Operand 2 has whichever integer mode the target prefers. If operand 2 exceeds the number of elements in mode @var{m}, the behavior is undefined. If the target prefers the length to be measured in bytes rather than elements, it should only implement this pattern for vectors -of @code{QI} elements. +of @code{QI} elements. Operand 3 specifies a bias predicate that +determines whether a length of zero is permitted or not. If permitted, +the predicate should only allow a zero constant, otherwise it should +only allow an immediate value of -1. This pattern is not allowed to @code{FAIL}. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 8312d08aab2..d45f080c06f 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index) static void expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) { - class expand_operand ops[3]; - tree type, lhs, rhs, maskt; - rtx mem, target, mask; + class expand_operand ops[4]; + tree type, lhs, rhs, maskt, biast; + rtx mem, target, mask, bias; insn_code icode; maskt = gimple_call_arg (stmt, 2); @@ -2723,11 +2723,20 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) create_output_operand (&ops[0], target, TYPE_MODE (type)); create_fixed_operand (&ops[1], mem); if (optab == len_load_optab) - create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), - TYPE_UNSIGNED (TREE_TYPE (maskt))); + { + create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), + TYPE_UNSIGNED (TREE_TYPE (maskt))); + biast = gimple_call_arg (stmt, 3); + bias = expand_normal (biast); + create_input_operand (&ops[3], bias, QImode); + expand_insn (icode, 4, ops); + } else + { create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); - expand_insn (icode, 3, ops); + expand_insn (icode, 3, ops); + } + if (!rtx_equal_p (target, ops[0].value)) emit_move_insn (target, ops[0].value); } @@ -2741,9 +2750,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab) static void expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) { - class expand_operand ops[3]; - tree type, lhs, rhs, maskt; - rtx mem, reg, mask; + class expand_operand ops[4]; + tree type, lhs, rhs, maskt, biast; + rtx mem, reg, mask, bias; insn_code icode; maskt = gimple_call_arg (stmt, 2); @@ -2766,11 +2775,19 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) create_fixed_operand (&ops[0], mem); create_input_operand (&ops[1], reg, TYPE_MODE (type)); if (optab == len_store_optab) - create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), - TYPE_UNSIGNED (TREE_TYPE (maskt))); + { + create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)), + TYPE_UNSIGNED (TREE_TYPE (maskt))); + biast = gimple_call_arg (stmt, 4); + bias = expand_normal (biast); + create_input_operand (&ops[3], bias, QImode); + expand_insn (icode, 4, ops); + } else - create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); - expand_insn (icode, 3, ops); + { + create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt))); + expand_insn (icode, 3, ops); + } } #define expand_mask_store_optab_fn expand_partial_store_optab_fn @@ -4172,6 +4189,29 @@ internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type, && insn_operand_matches (icode, 4, GEN_INT (align))); } +/* Return the supported bias for the len_load IFN. For now we only support + the biases of 0 and -1 (in case 0 is not an allowable length for len_load). + If none of the biases match what the backend provides, return + VECT_PARTIAL_BIAS_UNSUPPORTED. */ + +signed char +internal_len_load_store_bias (internal_fn ifn, machine_mode mode) +{ + optab optab = direct_internal_fn_optab (ifn); + insn_code icode = direct_optab_handler (optab, mode); + + if (icode != CODE_FOR_nothing) + { + /* For now we only support biases of 0 or -1. Try both of them. */ + if (insn_operand_matches (icode, 3, GEN_INT (0))) + return 0; + if (insn_operand_matches (icode, 3, GEN_INT (-1))) + return -1; + } + + return VECT_PARTIAL_BIAS_UNSUPPORTED; +} + /* Expand STMT as though it were a call to internal function FN. */ void diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 19d0f849a5a..d46aa4ebf33 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -227,6 +227,10 @@ extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int); extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree, poly_uint64, unsigned int); +#define VECT_PARTIAL_BIAS_UNSUPPORTED 127 + +extern signed char internal_len_load_store_bias (internal_fn ifn, + machine_mode); extern void expand_addsub_overflow (location_t, tree_code, tree, tree, tree, bool, bool, bool, bool, tree *); diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index a94490373c3..c4a6492b50d 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2723,16 +2723,7 @@ convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi) from_unsigned1 = from_unsigned2 = false; } else - { - /* Expand can synthesize smul_widen_optab if the target - supports umul_widen_optab. */ - op = umul_widen_optab; - handler = find_widening_optab_handler_and_mode (op, to_mode, - from_mode, - &actual_mode); - if (handler == CODE_FOR_nothing) - return false; - } + return false; } /* Ensure that the inputs to the handler are in the correct precison diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 4988c93fdb6..4fd078d535a 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -421,6 +421,7 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_controls *dest_rgm, static tree vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, gimple_seq *preheader_seq, + gimple_seq *header_seq, gimple_stmt_iterator loop_cond_gsi, rgroup_controls *rgc, tree niters, tree niters_skip, bool might_wrap_p) @@ -664,6 +665,19 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo, vect_set_loop_control (loop, ctrl, init_ctrl, next_ctrl); } + + int partial_load_bias = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + if (partial_load_bias != 0) + { + tree adjusted_len = rgc->bias_adjusted_ctrl; + gassign *minus = gimple_build_assign (adjusted_len, MINUS_EXPR, + rgc->controls[0], + build_int_cst + (TREE_TYPE (rgc->controls[0]), + -partial_load_bias)); + gimple_seq_add_stmt (header_seq, minus); + } + return next_ctrl; } @@ -744,6 +758,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop, /* Set up all controls for this group. */ test_ctrl = vect_set_loop_controls_directly (loop, loop_vinfo, &preheader_seq, + &header_seq, loop_cond_gsi, rgc, niters, niters_skip, might_wrap_p); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index e94356d76e9..8eb1726b4f5 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1163,6 +1163,31 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo) if (LOOP_VINFO_LENS (loop_vinfo).is_empty ()) return false; + machine_mode len_load_mode = get_len_load_store_mode + (loop_vinfo->vector_mode, true).require (); + machine_mode len_store_mode = get_len_load_store_mode + (loop_vinfo->vector_mode, false).require (); + + signed char partial_load_bias = internal_len_load_store_bias + (IFN_LEN_LOAD, len_load_mode); + + signed char partial_store_bias = internal_len_load_store_bias + (IFN_LEN_STORE, len_store_mode); + + gcc_assert (partial_load_bias == partial_store_bias); + + if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED) + return false; + + /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit + len_loads with a length of zero. In order to avoid that we prohibit + more than one loop length here. */ + if (partial_load_bias == -1 + && LOOP_VINFO_LENS (loop_vinfo).length () > 1) + return false; + + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias; + unsigned int max_nitems_per_iter = 1; unsigned int i; rgroup_controls *rgl; @@ -4125,6 +4150,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, here. */ bool niters_known_p = LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo); + signed char partial_load_store_bias + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); bool need_iterate_p = (!LOOP_VINFO_EPILOGUE_P (loop_vinfo) && !vect_known_niters_smaller_than_vf (loop_vinfo)); @@ -4157,6 +4184,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, for each since start index is zero. */ prologue_stmts += num_vectors; + /* If we have a non-zero partial load bias, we need one MINUS + to adjust the load length. */ + if (partial_load_store_bias != 0) + body_stmts += 1; + /* Each may need two MINs and one MINUS to update lengths in body for next iteration. */ if (need_iterate_p) @@ -9226,6 +9258,11 @@ vect_get_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens, { rgroup_controls *rgl = &(*lens)[nvectors - 1]; + signed char partial_load_store_bias = + LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); + + bool use_bias_adjusted_len = partial_load_store_bias != 0; + /* Populate the rgroup's len array, if this is the first time we've used it. */ if (rgl->controls.is_empty ()) @@ -9235,6 +9272,15 @@ vect_get_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens, { tree len_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo); gcc_assert (len_type != NULL_TREE); + + if (i == 0 && use_bias_adjusted_len) + { + tree adjusted_len = + make_temp_ssa_name (len_type, NULL, "adjusted_loop_len"); + SSA_NAME_DEF_STMT (adjusted_len) = gimple_build_nop (); + rgl->bias_adjusted_ctrl = adjusted_len; + } + tree len = make_temp_ssa_name (len_type, NULL, "loop_len"); /* Provide a dummy definition until the real one is available. */ @@ -9243,7 +9289,10 @@ vect_get_loop_len (loop_vec_info loop_vinfo, vec_loop_lens *lens, } } - return rgl->controls[index]; + if (use_bias_adjusted_len) + return rgl->bias_adjusted_ctrl; + else + return rgl->controls[index]; } /* Scale profiling counters by estimation for LOOP which is vectorized diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 17849b575b7..006f4c31217 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -8289,9 +8289,16 @@ vectorizable_store (vec_info *vinfo, gsi); vec_oprnd = var; } + + /* Check which bias value to use. */ + signed char biasval = internal_len_load_store_bias + (IFN_LEN_STORE, new_vmode); + + tree bias = build_int_cst (intQI_type_node, biasval); gcall *call - = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr, - ptr, final_len, vec_oprnd); + = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr, + ptr, final_len, vec_oprnd, + bias); gimple_call_set_nothrow (call, true); vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; @@ -9588,22 +9595,30 @@ vectorizable_load (vec_info *vinfo, vec_num * j + i); tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); + + machine_mode vmode = TYPE_MODE (vectype); + opt_machine_mode new_ovmode + = get_len_load_store_mode (vmode, true); + machine_mode new_vmode = new_ovmode.require (); + tree qi_type = unsigned_intQI_type_node; + + /* Check which bias value to use. */ + signed char biasval = internal_len_load_store_bias + (IFN_LEN_LOAD, new_vmode); + + tree bias = build_int_cst (intQI_type_node, biasval); + gcall *call - = gimple_build_call_internal (IFN_LEN_LOAD, 3, + = gimple_build_call_internal (IFN_LEN_LOAD, 4, dataref_ptr, ptr, - final_len); + final_len, bias); gimple_call_set_nothrow (call, true); new_stmt = call; data_ref = NULL_TREE; /* Need conversion if it's wrapped with VnQI. */ - machine_mode vmode = TYPE_MODE (vectype); - opt_machine_mode new_ovmode - = get_len_load_store_mode (vmode, true); - machine_mode new_vmode = new_ovmode.require (); if (vmode != new_vmode) { - tree qi_type = unsigned_intQI_type_node; tree new_vtype = build_vector_type_for_mode (qi_type, new_vmode); tree var = vect_get_new_ssa_name (new_vtype, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index c4c5678e7f1..75fdfa31405 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -544,6 +544,10 @@ struct rgroup_controls { /* A vector of nV controls, in iteration order. */ vec controls; + + /* In case of len_load and len_store with a bias there is only one + rgroup. This holds the adjusted loop length for the this rgroup. */ + tree bias_adjusted_ctrl; }; typedef auto_vec vec_loop_masks; @@ -749,6 +753,11 @@ public: epilogue of loop. */ bool epil_using_partial_vectors_p; + /* The bias for len_load and len_store. For now, only 0 and -1 are + supported. -1 must be used when a backend does not support + len_load/len_store with a length of zero. */ + signed char partial_load_store_bias; + /* When we have grouped data accesses with gaps, we may introduce invalid memory accesses. We peel the last iteration of the loop to prevent this. */ @@ -814,6 +823,7 @@ public: #define LOOP_VINFO_USING_PARTIAL_VECTORS_P(L) (L)->using_partial_vectors_p #define LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P(L) \ (L)->epil_using_partial_vectors_p +#define LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS(L) (L)->partial_load_store_bias #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor #define LOOP_VINFO_MASKS(L) (L)->masks