[PATCH, rs6000] gimple folding of vec_msum()

Fri Dec 1 21:43:00 GMT 2017

On Fri, 2017-12-01 at 18:46 +0100, Richard Biener wrote:
> On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> >Hi,
> >Add support for folding of vec_msum in GIMPLE.
> >    
> >This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
> >mismatches:
> >	error: type mismatch in dot product reduction
> >	__vector signed int
> >	__vector signed char
> >	__vector unsigned char
> >	D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
> >So for those cases with a signed/unsigned mismatch in the arguments,
> >this
> >converts those arguments to their signed type.
> >    
> >This also adds a define_expand for sdot_prodv16qi. This is based on a
> >similar
> >existing entry.
> >    
> >Testing coverage is handled by the existing
> >gcc.target/powerpc/fold-vec-msum*.c tests.
> >    
> >Sniff-tests have passed on P8.  full regtests currently running on
> >other assorted
> >power systems.
> >OK for trunk with successful results?
> 
> Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant. 
> 
> This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors.  Does vec_msum specify that? 

Not that I see, but there may be an implied intent here that just isn't
obvious to me.   I'll defer to ... someone. :-)

> That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE.

OK.  With that in mind, should I just try to split this out into
separate multiply and add steps?

Thanks,
-Will

>  
> 
> Richard. 
> 
> >Thanks
> >-Will
> >    
> >[gcc]
> >
> >2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>
> >
> >	* config/rs6000/altivec.md (sdot_prodv16qi): New.
> >	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
> >	gimple-folding of vec_msum.
> >	(builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
> >
> >diff --git a/gcc/config/rs6000/altivec.md
> >b/gcc/config/rs6000/altivec.md
> >index 7122f99..fa9e121 100644
> >--- a/gcc/config/rs6000/altivec.md
> >+++ b/gcc/config/rs6000/altivec.md
> >@@ -3349,11 +3349,26 @@
> >                         (match_operand:V8HI 2 "register_operand" "v")]
> >                                 UNSPEC_VMSUMSHM)))]
> >   "TARGET_ALTIVEC"
> >   "
> > {
> >-  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
> >operands[2], operands[3]));
> >+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
> >+				   operands[2], operands[3]));
> >+  DONE;
> >+}")
> >+
> >+(define_expand "sdot_prodv16qi"
> >+  [(set (match_operand:V4SI 0 "register_operand" "=v")
> >+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
> >+                   (unspec:V4SI [(match_operand:V16QI 1
> >"register_operand" "v")
> >+                                 (match_operand:V16QI 2
> >"register_operand" "v")]
> >+                                UNSPEC_VMSUMM)))]
> >+  "TARGET_ALTIVEC"
> >+  "
> >+{
> >+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
> >+				   operands[2], operands[3]));
> >   DONE;
> > }")
> > 
> > (define_expand "widen_usum<mode>3"
> >   [(set (match_operand:V4SI 0 "register_operand" "=v")
> >diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> >index 551d9c4..552fcdd 100644
> >--- a/gcc/config/rs6000/rs6000.c
> >+++ b/gcc/config/rs6000/rs6000.c
> >@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
> >(gimple_stmt_iterator *gsi)
> >     case VSX_BUILTIN_CMPLE_2DI:
> >     case VSX_BUILTIN_CMPLE_U2DI:
> >       fold_compare_helper (gsi, LE_EXPR, stmt);
> >       return true;
> > 
> >+    /* vec_msum.  */
> >+    case ALTIVEC_BUILTIN_VMSUMUHM:
> >+    case ALTIVEC_BUILTIN_VMSUMSHM:
> >+    case ALTIVEC_BUILTIN_VMSUMUBM:
> >+    case ALTIVEC_BUILTIN_VMSUMMBM:
> >+      {
> >+	arg0 = gimple_call_arg (stmt, 0);
> >+	arg1 = gimple_call_arg (stmt, 1);
> >+	tree arg2 = gimple_call_arg (stmt, 2);
> >+	lhs = gimple_call_lhs (stmt);
> >+	if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
> >+	  g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
> >+	else
> >+	  {
> >+	    // For the case where we have a mix of signed/unsigned
> >+	    // arguments, convert both multiply args to their signed type.
> >+	    gimple_seq stmts = NULL;
> >+	    location_t loc = gimple_location (stmt);
> >+	    tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
> >+	    tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
> >arg0);
> >+	    tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
> >arg1);
> >+	    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >+	    g = gimple_build_assign (lhs, DOT_PROD_EXPR,
> >+				     signed_arg0, signed_arg1, arg2);
> >+	  }
> >+	gimple_set_location (g, gimple_location (stmt));
> >+	gsi_replace (gsi, g, true);
> >+	return true;
> >+      }
> >+
> >     default:
> >       if (TARGET_DEBUG_BUILTIN)
> > 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> > 		 fn_code, fn_name1, fn_name2);
> >       break;
> >@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
> >machine_mode mode_arg0,
> >     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
> >     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
> >     case CRYPTO_BUILTIN_VSHASIGMAW:
> >     case CRYPTO_BUILTIN_VSHASIGMAD:
> >     case CRYPTO_BUILTIN_VSHASIGMA:
> >+    case ALTIVEC_BUILTIN_VMSUMUHM:
> >+    case ALTIVEC_BUILTIN_VMSUMUBM:
> >       h.uns_p[0] = 1;
> >       h.uns_p[1] = 1;
> >       h.uns_p[2] = 1;
> >       h.uns_p[3] = 1;
> >       break;
> > 
> >+    /* The second parm to this vec_msum variant is unsigned.  */
> >+    case ALTIVEC_BUILTIN_VMSUMMBM:
> >+      h.uns_p[2] = 1;
> >+      break;
> >+
> >     /* signed permute functions with unsigned char mask.  */
> >     case ALTIVEC_BUILTIN_VPERM_16QI:
> >     case ALTIVEC_BUILTIN_VPERM_8HI:
> >     case ALTIVEC_BUILTIN_VPERM_4SI:
> >     case ALTIVEC_BUILTIN_VPERM_4SF:
>