[PATCH, rs6000] gimple folding of vec_msum()
Richard Biener
richard.guenther@gmail.com
Fri Dec 1 17:46:00 GMT 2017
On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>Hi,
>Add support for folding of vec_msum in GIMPLE.
>
>This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
>mismatches:
> error: type mismatch in dot product reduction
> __vector signed int
> __vector signed char
> __vector unsigned char
> D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
>So for those cases with a signed/unsigned mismatch in the arguments,
>this
>converts those arguments to their signed type.
>
>This also adds a define_expand for sdot_prodv16qi. This is based on a
>similar
>existing entry.
>
>Testing coverage is handled by the existing
>gcc.target/powerpc/fold-vec-msum*.c tests.
>
>Sniff-tests have passed on P8. full regtests currently running on
>other assorted
>power systems.
>OK for trunk with successful results?
Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant.
This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors. Does vec_msum specify that?
That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE.
Richard.
>Thanks
>-Will
>
>[gcc]
>
>2017-12-01 Will Schmidt <will_schmidt@vnet.ibm.com>
>
> * config/rs6000/altivec.md (sdot_prodv16qi): New.
> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
> gimple-folding of vec_msum.
> (builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
>
>diff --git a/gcc/config/rs6000/altivec.md
>b/gcc/config/rs6000/altivec.md
>index 7122f99..fa9e121 100644
>--- a/gcc/config/rs6000/altivec.md
>+++ b/gcc/config/rs6000/altivec.md
>@@ -3349,11 +3349,26 @@
> (match_operand:V8HI 2 "register_operand" "v")]
> UNSPEC_VMSUMSHM)))]
> "TARGET_ALTIVEC"
> "
> {
>- emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>operands[2], operands[3]));
>+ emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>+ operands[2], operands[3]));
>+ DONE;
>+}")
>+
>+(define_expand "sdot_prodv16qi"
>+ [(set (match_operand:V4SI 0 "register_operand" "=v")
>+ (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
>+ (unspec:V4SI [(match_operand:V16QI 1
>"register_operand" "v")
>+ (match_operand:V16QI 2
>"register_operand" "v")]
>+ UNSPEC_VMSUMM)))]
>+ "TARGET_ALTIVEC"
>+ "
>+{
>+ emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
>+ operands[2], operands[3]));
> DONE;
> }")
>
> (define_expand "widen_usum<mode>3"
> [(set (match_operand:V4SI 0 "register_operand" "=v")
>diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>index 551d9c4..552fcdd 100644
>--- a/gcc/config/rs6000/rs6000.c
>+++ b/gcc/config/rs6000/rs6000.c
>@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
>(gimple_stmt_iterator *gsi)
> case VSX_BUILTIN_CMPLE_2DI:
> case VSX_BUILTIN_CMPLE_U2DI:
> fold_compare_helper (gsi, LE_EXPR, stmt);
> return true;
>
>+ /* vec_msum. */
>+ case ALTIVEC_BUILTIN_VMSUMUHM:
>+ case ALTIVEC_BUILTIN_VMSUMSHM:
>+ case ALTIVEC_BUILTIN_VMSUMUBM:
>+ case ALTIVEC_BUILTIN_VMSUMMBM:
>+ {
>+ arg0 = gimple_call_arg (stmt, 0);
>+ arg1 = gimple_call_arg (stmt, 1);
>+ tree arg2 = gimple_call_arg (stmt, 2);
>+ lhs = gimple_call_lhs (stmt);
>+ if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
>+ g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
>+ else
>+ {
>+ // For the case where we have a mix of signed/unsigned
>+ // arguments, convert both multiply args to their signed type.
>+ gimple_seq stmts = NULL;
>+ location_t loc = gimple_location (stmt);
>+ tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
>+ tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
>arg0);
>+ tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
>arg1);
>+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>+ g = gimple_build_assign (lhs, DOT_PROD_EXPR,
>+ signed_arg0, signed_arg1, arg2);
>+ }
>+ gimple_set_location (g, gimple_location (stmt));
>+ gsi_replace (gsi, g, true);
>+ return true;
>+ }
>+
> default:
> if (TARGET_DEBUG_BUILTIN)
> fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> fn_code, fn_name1, fn_name2);
> break;
>@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
>machine_mode mode_arg0,
> case CRYPTO_BUILTIN_VPERMXOR_V8HI:
> case CRYPTO_BUILTIN_VPERMXOR_V16QI:
> case CRYPTO_BUILTIN_VSHASIGMAW:
> case CRYPTO_BUILTIN_VSHASIGMAD:
> case CRYPTO_BUILTIN_VSHASIGMA:
>+ case ALTIVEC_BUILTIN_VMSUMUHM:
>+ case ALTIVEC_BUILTIN_VMSUMUBM:
> h.uns_p[0] = 1;
> h.uns_p[1] = 1;
> h.uns_p[2] = 1;
> h.uns_p[3] = 1;
> break;
>
>+ /* The second parm to this vec_msum variant is unsigned. */
>+ case ALTIVEC_BUILTIN_VMSUMMBM:
>+ h.uns_p[2] = 1;
>+ break;
>+
> /* signed permute functions with unsigned char mask. */
> case ALTIVEC_BUILTIN_VPERM_16QI:
> case ALTIVEC_BUILTIN_VPERM_8HI:
> case ALTIVEC_BUILTIN_VPERM_4SI:
> case ALTIVEC_BUILTIN_VPERM_4SF:
More information about the Gcc-patches
mailing list