[PATCH, rs6000] gimple folding of vec_msum()

Fri Dec 1 17:46:00 GMT 2017

On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>Hi,
>Add support for folding of vec_msum in GIMPLE.
>    
>This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
>mismatches:
>	error: type mismatch in dot product reduction
>	__vector signed int
>	__vector signed char
>	__vector unsigned char
>	D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
>So for those cases with a signed/unsigned mismatch in the arguments,
>this
>converts those arguments to their signed type.
>    
>This also adds a define_expand for sdot_prodv16qi. This is based on a
>similar
>existing entry.
>    
>Testing coverage is handled by the existing
>gcc.target/powerpc/fold-vec-msum*.c tests.
>    
>Sniff-tests have passed on P8.  full regtests currently running on
>other assorted
>power systems.
>OK for trunk with successful results?

Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant. 

This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors.  Does vec_msum specify that? 

That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE. 

Richard. 

>Thanks
>-Will
>    
>[gcc]
>
>2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>
>
>	* config/rs6000/altivec.md (sdot_prodv16qi): New.
>	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>	gimple-folding of vec_msum.
>	(builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
>
>diff --git a/gcc/config/rs6000/altivec.md
>b/gcc/config/rs6000/altivec.md
>index 7122f99..fa9e121 100644
>--- a/gcc/config/rs6000/altivec.md
>+++ b/gcc/config/rs6000/altivec.md
>@@ -3349,11 +3349,26 @@
>                         (match_operand:V8HI 2 "register_operand" "v")]
>                                 UNSPEC_VMSUMSHM)))]
>   "TARGET_ALTIVEC"
>   "
> {
>-  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>operands[2], operands[3]));
>+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>+				   operands[2], operands[3]));
>+  DONE;
>+}")
>+
>+(define_expand "sdot_prodv16qi"
>+  [(set (match_operand:V4SI 0 "register_operand" "=v")
>+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
>+                   (unspec:V4SI [(match_operand:V16QI 1
>"register_operand" "v")
>+                                 (match_operand:V16QI 2
>"register_operand" "v")]
>+                                UNSPEC_VMSUMM)))]
>+  "TARGET_ALTIVEC"
>+  "
>+{
>+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
>+				   operands[2], operands[3]));
>   DONE;
> }")
> 
> (define_expand "widen_usum<mode>3"
>   [(set (match_operand:V4SI 0 "register_operand" "=v")
>diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>index 551d9c4..552fcdd 100644
>--- a/gcc/config/rs6000/rs6000.c
>+++ b/gcc/config/rs6000/rs6000.c
>@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
>(gimple_stmt_iterator *gsi)
>     case VSX_BUILTIN_CMPLE_2DI:
>     case VSX_BUILTIN_CMPLE_U2DI:
>       fold_compare_helper (gsi, LE_EXPR, stmt);
>       return true;
> 
>+    /* vec_msum.  */
>+    case ALTIVEC_BUILTIN_VMSUMUHM:
>+    case ALTIVEC_BUILTIN_VMSUMSHM:
>+    case ALTIVEC_BUILTIN_VMSUMUBM:
>+    case ALTIVEC_BUILTIN_VMSUMMBM:
>+      {
>+	arg0 = gimple_call_arg (stmt, 0);
>+	arg1 = gimple_call_arg (stmt, 1);
>+	tree arg2 = gimple_call_arg (stmt, 2);
>+	lhs = gimple_call_lhs (stmt);
>+	if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
>+	  g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
>+	else
>+	  {
>+	    // For the case where we have a mix of signed/unsigned
>+	    // arguments, convert both multiply args to their signed type.
>+	    gimple_seq stmts = NULL;
>+	    location_t loc = gimple_location (stmt);
>+	    tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
>+	    tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
>arg0);
>+	    tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
>arg1);
>+	    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>+	    g = gimple_build_assign (lhs, DOT_PROD_EXPR,
>+				     signed_arg0, signed_arg1, arg2);
>+	  }
>+	gimple_set_location (g, gimple_location (stmt));
>+	gsi_replace (gsi, g, true);
>+	return true;
>+      }
>+
>     default:
>       if (TARGET_DEBUG_BUILTIN)
> 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> 		 fn_code, fn_name1, fn_name2);
>       break;
>@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
>machine_mode mode_arg0,
>     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
>     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
>     case CRYPTO_BUILTIN_VSHASIGMAW:
>     case CRYPTO_BUILTIN_VSHASIGMAD:
>     case CRYPTO_BUILTIN_VSHASIGMA:
>+    case ALTIVEC_BUILTIN_VMSUMUHM:
>+    case ALTIVEC_BUILTIN_VMSUMUBM:
>       h.uns_p[0] = 1;
>       h.uns_p[1] = 1;
>       h.uns_p[2] = 1;
>       h.uns_p[3] = 1;
>       break;
> 
>+    /* The second parm to this vec_msum variant is unsigned.  */
>+    case ALTIVEC_BUILTIN_VMSUMMBM:
>+      h.uns_p[2] = 1;
>+      break;
>+
>     /* signed permute functions with unsigned char mask.  */
>     case ALTIVEC_BUILTIN_VPERM_16QI:
>     case ALTIVEC_BUILTIN_VPERM_8HI:
>     case ALTIVEC_BUILTIN_VPERM_4SI:
>     case ALTIVEC_BUILTIN_VPERM_4SF: