This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH, rs6000] Folding of vector loads in GIMPLE
- From: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>
- To: will_schmidt at vnet dot ibm dot com
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>, Segher Boessenkool <segher at kernel dot crashing dot org>, Richard Biener <richard dot guenther at gmail dot com>, David Edelsohn <dje dot gcc at gmail dot com>
- Date: Tue, 12 Sep 2017 10:22:37 -0500
- Subject: Re: [PATCH, rs6000] Folding of vector loads in GIMPLE
- Authentication-results: sourceware.org; auth=none
- References: <1505227262.14827.155.camel@brimstone.rchland.ibm.com>
> On Sep 12, 2017, at 9:41 AM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>
> Hi
>
> [PATCH, rs6000] Folding of vector loads in GIMPLE
>
> Folding of vector loads in GIMPLE.
>
> - Add code to handle gimple folding for the vec_ld builtins.
> - Remove the now obsoleted folding code for vec_ld from rs6000-c.c. Surrounding
> comments have been adjusted slightly so they continue to read OK for the
> vec_st code that remains.
>
> The resulting code is specifically verified by the powerpc/fold-vec-ld-*.c
> tests which have been posted separately. (a few minutes ago).
>
> Regtest successfully completed on power6 and newer. (p6,p7,p8le,p8be,p9).
>
> OK for trunk?
>
> Thanks,
> -Will
>
> [gcc]
>
> 2017-09-12 Will Schmidt <will_schmidt@vnet.ibm.com>
>
> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
> for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
> Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
>
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index 897306c..73e14d9 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -6459,92 +6459,19 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
> convert (TREE_TYPE (stmt), arg0));
> stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
> return stmt;
> }
>
> - /* Expand vec_ld into an expression that masks the address and
> - performs the load. We need to expand this early to allow
> + /* Expand vec_st into an expression that masks the address and
> + performs the store. We need to expand this early to allow
> the best aliasing, as by the time we get into RTL we no longer
> are able to honor __restrict__, for example. We may want to
> consider this for all memory access built-ins.
>
> When -maltivec=be is specified, or the wrong number of arguments
> is provided, simply punt to existing built-in processing. */
> - if (fcode == ALTIVEC_BUILTIN_VEC_LD
> - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
> - && nargs == 2)
> - {
> - tree arg0 = (*arglist)[0];
> - tree arg1 = (*arglist)[1];
> -
> - /* Strip qualifiers like "const" from the pointer arg. */
> - tree arg1_type = TREE_TYPE (arg1);
> - if (!POINTER_TYPE_P (arg1_type) && TREE_CODE (arg1_type) != ARRAY_TYPE)
> - goto bad;
> -
> - tree inner_type = TREE_TYPE (arg1_type);
> - if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
> - {
> - arg1_type = build_pointer_type (build_qualified_type (inner_type,
> - 0));
> - arg1 = fold_convert (arg1_type, arg1);
> - }
> -
> - /* Construct the masked address. Let existing error handling take
> - over if we don't have a constant offset. */
> - arg0 = fold (arg0);
> -
> - if (TREE_CODE (arg0) == INTEGER_CST)
> - {
> - if (!ptrofftype_p (TREE_TYPE (arg0)))
> - arg0 = build1 (NOP_EXPR, sizetype, arg0);
> -
> - tree arg1_type = TREE_TYPE (arg1);
> - if (TREE_CODE (arg1_type) == ARRAY_TYPE)
> - {
> - arg1_type = TYPE_POINTER_TO (TREE_TYPE (arg1_type));
> - tree const0 = build_int_cstu (sizetype, 0);
> - tree arg1_elt0 = build_array_ref (loc, arg1, const0);
> - arg1 = build1 (ADDR_EXPR, arg1_type, arg1_elt0);
> - }
> -
> - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
> - arg1, arg0);
> - tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
> - build_int_cst (arg1_type, -16));
> -
> - /* Find the built-in to get the return type so we can convert
> - the result properly (or fall back to default handling if the
> - arguments aren't compatible). */
> - for (desc = altivec_overloaded_builtins;
> - desc->code && desc->code != fcode; desc++)
> - continue;
> -
> - for (; desc->code == fcode; desc++)
> - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
> - && (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
> - desc->op2)))
> - {
> - tree ret_type = rs6000_builtin_type (desc->ret_type);
> - if (TYPE_MODE (ret_type) == V2DImode)
> - /* Type-based aliasing analysis thinks vector long
> - and vector long long are different and will put them
> - in distinct alias classes. Force our return type
> - to be a may-alias type to avoid this. */
> - ret_type
> - = build_pointer_type_for_mode (ret_type, Pmode,
> - true/*can_alias_all*/);
> - else
> - ret_type = build_pointer_type (ret_type);
> - aligned = build1 (NOP_EXPR, ret_type, aligned);
> - tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
> - return ret_val;
> - }
> - }
> - }
>
> - /* Similarly for stvx. */
> if (fcode == ALTIVEC_BUILTIN_VEC_ST
> && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
> && nargs == 3)
> {
> tree arg0 = (*arglist)[0];
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index cf744d8..5b14789 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16473,10 +16473,65 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
> res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
> gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> update_call_from_tree (gsi, res);
> return true;
> }
> + /* Vector loads. */
> + case ALTIVEC_BUILTIN_LVX_V16QI:
> + case ALTIVEC_BUILTIN_LVX_V8HI:
> + case ALTIVEC_BUILTIN_LVX_V4SI:
> + case ALTIVEC_BUILTIN_LVX_V4SF:
> + case ALTIVEC_BUILTIN_LVX_V2DI:
> + case ALTIVEC_BUILTIN_LVX_V2DF:
> + {
> + gimple *g;
> + arg0 = gimple_call_arg (stmt, 0); // offset
> + arg1 = gimple_call_arg (stmt, 1); // address
> +
> + /* Limit folding of loads to LE targets. */
> + if (BYTES_BIG_ENDIAN || VECTOR_ELT_ORDER_BIG)
> + return false;
Why? This transformation shouldn't be endian-dependent.
Thanks,
Bill
> +
> + lhs = gimple_call_lhs (stmt);
> + location_t loc = gimple_location (stmt);
> +
> + tree arg1_type = TREE_TYPE (arg1);
> + tree lhs_type = TREE_TYPE (lhs);
> +
> + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
> + the tree using the value from arg0. The resulting type will match
> + the type of arg1. */
> + tree temp_offset = create_tmp_reg_or_ssa_name (sizetype);
> + g = gimple_build_assign (temp_offset, NOP_EXPR, arg0);
> + gimple_set_location (g, loc);
> + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> + tree temp_addr = create_tmp_reg_or_ssa_name (arg1_type);
> + g = gimple_build_assign (temp_addr, POINTER_PLUS_EXPR, arg1,
> + temp_offset);
> + gimple_set_location (g, loc);
> + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +
> + /* Mask off any lower bits from the address. */
> + tree alignment_mask = build_int_cst (arg1_type, -16);
> + tree aligned_addr = create_tmp_reg_or_ssa_name (arg1_type);
> + g = gimple_build_assign (aligned_addr, BIT_AND_EXPR,
> + temp_addr, alignment_mask);
> + gimple_set_location (g, loc);
> + gsi_insert_before (gsi, g, GSI_SAME_STMT);
> +
> + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
> + take an offset, but since we've already incorporated the offset
> + above, here we just pass in a zero. */
> + g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
> + build_int_cst (arg1_type, 0)));
> + gimple_set_location (g, loc);
> + gsi_replace (gsi, g, true);
> +
> + return true;
> +
> + }
> +
> default:
> if (TARGET_DEBUG_BUILTIN)
> fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> fn_code, fn_name1, fn_name2);
> break;
>
>