[PATCH, rs6000] Fix vec_mulo and vec_mule instruction generation

Carl Love cel@us.ibm.com
Fri Jun 16 21:19:00 GMT 2017


GCC Maintainers:

The support for the vec_mulo and vec_mule has yet another bug.  For the
case of signed/unsigned integer arguments the builtin generates the half
word instruction not the word instruction.  This patch fixes the issue.
The fix  has been tested and verified on powerpc64le-unknown-linux-gnu
(Power 8 LE)

Is the patch OK for gcc mainline?

                  Carl Love


---------------------------------------------
>From 3127a3f9c8480fde428c4a13bc37d6eaefd0edfe Mon Sep 17 00:00:00 2001
From: Carl Love <carll@us.ibm.com>
Date: Fri, 16 Jun 2017 16:10:56 -0500
Subject: [PATCH] vec_mule, vec_mulo fix 2

gcc/ChangeLog:

2017-06-17  Carl Love  <cel@us.ibm.com>

        * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
	ALTIVEC_BUILTIN_VMULESW, ALTIVEC_BUILTIN_VMULEUW,
	ALTIVEC_BUILTIN_VMULOSW, ALTIVEC_BUILTIN_VMULOUW enties.
	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin(),
	builtin_function_type()): Add needed ALTIVEC_BUILTIN_* case
	statements.
	* config/rs6000/altivec.md (define_c_enum "unspec",
	define_expand "vec_widen_umult_even_v4si",
	define_expand "vec_widen_smult_even_v4si",
	define_expand "vec_widen_umult_odd_v4si",
	define_expand "vec_widen_smult_odd_v4si",
	define_insn "altivec_vmuleuw", define_insn "altivec_vmulesw",
	define_insn "altivec_vmulouw",	define_insn "altivec_vmulosw"): Add
	support to generate vmuleuw, vmulesw, vmulouw, vmulosw instructions.
	* config/rs6000/rs6000-builtin.def (VMLEUW, VMULESW, VMULOUW,
	VMULOSW): Add definitions.
---
 gcc/config/rs6000/altivec.md         | 91
++++++++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-builtin.def |  8 ++++
 gcc/config/rs6000/rs6000-c.c         | 12 +++--
 gcc/config/rs6000/rs6000.c           |  6 +++
 4 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 487b9a4..142300a 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -36,10 +36,14 @@
    UNSPEC_VMULESB
    UNSPEC_VMULEUH
    UNSPEC_VMULESH
+   UNSPEC_VMULEUW
+   UNSPEC_VMULESW
    UNSPEC_VMULOUB
    UNSPEC_VMULOSB
    UNSPEC_VMULOUH
    UNSPEC_VMULOSH
+   UNSPEC_VMULOUW
+   UNSPEC_VMULOSW
    UNSPEC_VPKPX
    UNSPEC_VPACK_SIGN_SIGN_SAT
    UNSPEC_VPACK_SIGN_UNS_SAT
@@ -1412,6 +1416,32 @@
   DONE;
 })
 
+(define_expand "vec_widen_umult_even_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmuleuw (operands[0], operands[1],
operands[2]));
+  else
+    emit_insn (gen_altivec_vmulouw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_even_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
operands[2]));
+  else
+    emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
 (define_expand "vec_widen_umult_odd_v16qi"
   [(use (match_operand:V8HI 0 "register_operand" ""))
    (use (match_operand:V16QI 1 "register_operand" ""))
@@ -1464,6 +1494,32 @@
   DONE;
 })
 
+(define_expand "vec_widen_umult_odd_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulouw (operands[0], operands[1],
operands[2]));
+  else
+    emit_insn (gen_altivec_vmuleuw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v4si"
+  [(use (match_operand:V2DI 0 "register_operand" ""))
+   (use (match_operand:V4SI 1 "register_operand" ""))
+   (use (match_operand:V4SI 2 "register_operand" ""))]
+  "TARGET_ALTIVEC"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
operands[2]));
+  else
+    emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
operands[2]));
+  DONE;
+})
+
 (define_insn "altivec_vmuleub"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1536,6 +1592,41 @@
   "vmulosh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
+(define_insn "altivec_vmuleuw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VMULEUW))]
+  "TARGET_ALTIVEC"
+  "vmuleuw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VMULOUW))]
+  "TARGET_ALTIVEC"
+  "vmulouw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VMULESW))]
+  "TARGET_ALTIVEC"
+  "vmulesw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosw"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+        (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+                      (match_operand:V4SI 2 "register_operand" "v")]
+		     UNSPEC_VMULOSW))]
+  "TARGET_ALTIVEC"
+  "vmulosw %0,%1,%2"
+  [(set_attr "type" "veccomplex")])
 
 ;; Vector pack/unpack
 (define_insn "altivec_vpkpx"
diff --git a/gcc/config/rs6000/rs6000-builtin.def
b/gcc/config/rs6000/rs6000-builtin.def
index 241c439..780d452 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1031,10 +1031,14 @@ BU_ALTIVEC_2 (VMULEUB,	      "vmuleub",	CONST,
vec_widen_umult_even_v16qi)
 BU_ALTIVEC_2 (VMULESB,	      "vmulesb",	CONST,
vec_widen_smult_even_v16qi)
 BU_ALTIVEC_2 (VMULEUH,	      "vmuleuh",	CONST,
vec_widen_umult_even_v8hi)
 BU_ALTIVEC_2 (VMULESH,	      "vmulesh",	CONST,
vec_widen_smult_even_v8hi)
+BU_ALTIVEC_2 (VMULEUW,	      "vmuleuw",	CONST,
vec_widen_umult_even_v4si)
+BU_ALTIVEC_2 (VMULESW,	      "vmulesw",	CONST,
vec_widen_smult_even_v4si)
 BU_ALTIVEC_2 (VMULOUB,	      "vmuloub",	CONST,
vec_widen_umult_odd_v16qi)
 BU_ALTIVEC_2 (VMULOSB,	      "vmulosb",	CONST,
vec_widen_smult_odd_v16qi)
 BU_ALTIVEC_2 (VMULOUH,	      "vmulouh",	CONST,
vec_widen_umult_odd_v8hi)
 BU_ALTIVEC_2 (VMULOSH,	      "vmulosh",	CONST,
vec_widen_smult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOUW,	      "vmulouw",	CONST,
vec_widen_umult_odd_v4si)
+BU_ALTIVEC_2 (VMULOSW,	      "vmulosw",	CONST,
vec_widen_smult_odd_v4si)
 BU_ALTIVEC_2 (VNOR,	      "vnor",		CONST,	norv4si3)
 BU_ALTIVEC_2 (VOR,	      "vor",		CONST,	iorv4si3)
 BU_ALTIVEC_2 (VPKUHUM,	      "vpkuhum",	CONST,	altivec_vpkuhum)
@@ -1346,12 +1350,16 @@ BU_ALTIVEC_OVERLOAD_2 (VMRGLH,	   "vmrglh")
 BU_ALTIVEC_OVERLOAD_2 (VMRGLW,	   "vmrglw")
 BU_ALTIVEC_OVERLOAD_2 (VMULESB,	   "vmulesb")
 BU_ALTIVEC_OVERLOAD_2 (VMULESH,	   "vmulesh")
+BU_ALTIVEC_OVERLOAD_2 (VMULESW,	   "vmulesw")
 BU_ALTIVEC_OVERLOAD_2 (VMULEUB,	   "vmuleub")
 BU_ALTIVEC_OVERLOAD_2 (VMULEUH,	   "vmuleuh")
+BU_ALTIVEC_OVERLOAD_2 (VMULEUW,	   "vmuleuw")
 BU_ALTIVEC_OVERLOAD_2 (VMULOSB,	   "vmulosb")
 BU_ALTIVEC_OVERLOAD_2 (VMULOSH,	   "vmulosh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOSW,	   "vmulosw")
 BU_ALTIVEC_OVERLOAD_2 (VMULOUB,	   "vmuloub")
 BU_ALTIVEC_OVERLOAD_2 (VMULOUH,	   "vmulouh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOUW,	   "vmulouw")
 BU_ALTIVEC_OVERLOAD_2 (VPKSHSS,	   "vpkshss")
 BU_ALTIVEC_OVERLOAD_2 (VPKSHUS,	   "vpkshus")
 BU_ALTIVEC_OVERLOAD_2 (VPKSWSS,	   "vpkswss")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index f1e8d3d..af0bafd 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2207,11 +2207,13 @@ const struct altivec_builtin_types
altivec_overloaded_builtins[] = {
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
RS6000_BTI_unsigned_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
+
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESW,
     RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH,
+  { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUW,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+
   { ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB,
     RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI,
RS6000_BTI_unsigned_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB,
@@ -2226,11 +2228,13 @@ const struct altivec_builtin_types
altivec_overloaded_builtins[] = {
     RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
     RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
RS6000_BTI_unsigned_V8HI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
+
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSW,
     RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
-  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
+  { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUW,
     RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
     RS6000_BTI_unsigned_V4SI, 0 },
+
   { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
     RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
   { ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6b28658..d1a4937 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16332,9 +16332,11 @@ rs6000_gimple_fold_builtin
(gimple_stmt_iterator *gsi)
     /* Even element flavors of vec_mul (signed). */
     case ALTIVEC_BUILTIN_VMULESB:
     case ALTIVEC_BUILTIN_VMULESH:
+    case ALTIVEC_BUILTIN_VMULESW:
     /* Even element flavors of vec_mul (unsigned).  */
     case ALTIVEC_BUILTIN_VMULEUB:
     case ALTIVEC_BUILTIN_VMULEUH:
+    case ALTIVEC_BUILTIN_VMULEUW:
       {
 	arg0 = gimple_call_arg (stmt, 0);
 	arg1 = gimple_call_arg (stmt, 1);
@@ -16347,9 +16349,11 @@ rs6000_gimple_fold_builtin
(gimple_stmt_iterator *gsi)
     /* Odd element flavors of vec_mul (signed).  */
     case ALTIVEC_BUILTIN_VMULOSB:
     case ALTIVEC_BUILTIN_VMULOSH:
+    case ALTIVEC_BUILTIN_VMULOSW:
     /* Odd element flavors of vec_mul (unsigned). */
     case ALTIVEC_BUILTIN_VMULOUB:
     case ALTIVEC_BUILTIN_VMULOUH:
+    case ALTIVEC_BUILTIN_VMULOUW:
       {
 	arg0 = gimple_call_arg (stmt, 0);
 	arg1 = gimple_call_arg (stmt, 1);
@@ -17965,8 +17969,10 @@ builtin_function_type (machine_mode mode_ret,
machine_mode mode_arg0,
       /* unsigned 2 argument functions.  */
     case ALTIVEC_BUILTIN_VMULEUB:
     case ALTIVEC_BUILTIN_VMULEUH:
+    case ALTIVEC_BUILTIN_VMULEUW:
     case ALTIVEC_BUILTIN_VMULOUB:
     case ALTIVEC_BUILTIN_VMULOUH:
+    case ALTIVEC_BUILTIN_VMULOUW:
     case CRYPTO_BUILTIN_VCIPHER:
     case CRYPTO_BUILTIN_VCIPHERLAST:
     case CRYPTO_BUILTIN_VNCIPHER:
-- 
1.9.1





More information about the Gcc-patches mailing list