This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, rs6000] Fix vec_mulo and vec_mule instruction generation
- From: Carl Love <cel at us dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org, David Edelsohn <dje dot gcc at gmail dot com>, Segher Boessenkool <segher at kernel dot crashing dot org>
- Cc: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>, cel at us dot ibm dot com
- Date: Fri, 16 Jun 2017 14:19:05 -0700
- Subject: [PATCH, rs6000] Fix vec_mulo and vec_mule instruction generation
- Authentication-results: sourceware.org; auth=none
GCC Maintainers:
The support for the vec_mulo and vec_mule has yet another bug. For the
case of signed/unsigned integer arguments the builtin generates the half
word instruction not the word instruction. This patch fixes the issue.
The fix has been tested and verified on powerpc64le-unknown-linux-gnu
(Power 8 LE)
Is the patch OK for gcc mainline?
Carl Love
---------------------------------------------
>From 3127a3f9c8480fde428c4a13bc37d6eaefd0edfe Mon Sep 17 00:00:00 2001
From: Carl Love <carll@us.ibm.com>
Date: Fri, 16 Jun 2017 16:10:56 -0500
Subject: [PATCH] vec_mule, vec_mulo fix 2
gcc/ChangeLog:
2017-06-17 Carl Love <cel@us.ibm.com>
* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
ALTIVEC_BUILTIN_VMULESW, ALTIVEC_BUILTIN_VMULEUW,
ALTIVEC_BUILTIN_VMULOSW, ALTIVEC_BUILTIN_VMULOUW enties.
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin(),
builtin_function_type()): Add needed ALTIVEC_BUILTIN_* case
statements.
* config/rs6000/altivec.md (define_c_enum "unspec",
define_expand "vec_widen_umult_even_v4si",
define_expand "vec_widen_smult_even_v4si",
define_expand "vec_widen_umult_odd_v4si",
define_expand "vec_widen_smult_odd_v4si",
define_insn "altivec_vmuleuw", define_insn "altivec_vmulesw",
define_insn "altivec_vmulouw", define_insn "altivec_vmulosw"): Add
support to generate vmuleuw, vmulesw, vmulouw, vmulosw instructions.
* config/rs6000/rs6000-builtin.def (VMLEUW, VMULESW, VMULOUW,
VMULOSW): Add definitions.
---
gcc/config/rs6000/altivec.md | 91
++++++++++++++++++++++++++++++++++++
gcc/config/rs6000/rs6000-builtin.def | 8 ++++
gcc/config/rs6000/rs6000-c.c | 12 +++--
gcc/config/rs6000/rs6000.c | 6 +++
4 files changed, 113 insertions(+), 4 deletions(-)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 487b9a4..142300a 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -36,10 +36,14 @@
UNSPEC_VMULESB
UNSPEC_VMULEUH
UNSPEC_VMULESH
+ UNSPEC_VMULEUW
+ UNSPEC_VMULESW
UNSPEC_VMULOUB
UNSPEC_VMULOSB
UNSPEC_VMULOUH
UNSPEC_VMULOSH
+ UNSPEC_VMULOUW
+ UNSPEC_VMULOSW
UNSPEC_VPKPX
UNSPEC_VPACK_SIGN_SIGN_SAT
UNSPEC_VPACK_SIGN_UNS_SAT
@@ -1412,6 +1416,32 @@
DONE;
})
+(define_expand "vec_widen_umult_even_v4si"
+ [(use (match_operand:V2DI 0 "register_operand" ""))
+ (use (match_operand:V4SI 1 "register_operand" ""))
+ (use (match_operand:V4SI 2 "register_operand" ""))]
+ "TARGET_ALTIVEC"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_vmuleuw (operands[0], operands[1],
operands[2]));
+ else
+ emit_insn (gen_altivec_vmulouw (operands[0], operands[1],
operands[2]));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_even_v4si"
+ [(use (match_operand:V2DI 0 "register_operand" ""))
+ (use (match_operand:V4SI 1 "register_operand" ""))
+ (use (match_operand:V4SI 2 "register_operand" ""))]
+ "TARGET_ALTIVEC"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
operands[2]));
+ else
+ emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
operands[2]));
+ DONE;
+})
+
(define_expand "vec_widen_umult_odd_v16qi"
[(use (match_operand:V8HI 0 "register_operand" ""))
(use (match_operand:V16QI 1 "register_operand" ""))
@@ -1464,6 +1494,32 @@
DONE;
})
+(define_expand "vec_widen_umult_odd_v4si"
+ [(use (match_operand:V2DI 0 "register_operand" ""))
+ (use (match_operand:V4SI 1 "register_operand" ""))
+ (use (match_operand:V4SI 2 "register_operand" ""))]
+ "TARGET_ALTIVEC"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_vmulouw (operands[0], operands[1],
operands[2]));
+ else
+ emit_insn (gen_altivec_vmuleuw (operands[0], operands[1],
operands[2]));
+ DONE;
+})
+
+(define_expand "vec_widen_smult_odd_v4si"
+ [(use (match_operand:V2DI 0 "register_operand" ""))
+ (use (match_operand:V4SI 1 "register_operand" ""))
+ (use (match_operand:V4SI 2 "register_operand" ""))]
+ "TARGET_ALTIVEC"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_altivec_vmulosw (operands[0], operands[1],
operands[2]));
+ else
+ emit_insn (gen_altivec_vmulesw (operands[0], operands[1],
operands[2]));
+ DONE;
+})
+
(define_insn "altivec_vmuleub"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1536,6 +1592,41 @@
"vmulosh %0,%1,%2"
[(set_attr "type" "veccomplex")])
+(define_insn "altivec_vmuleuw"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULEUW))]
+ "TARGET_ALTIVEC"
+ "vmuleuw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulouw"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULOUW))]
+ "TARGET_ALTIVEC"
+ "vmulouw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulesw"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULESW))]
+ "TARGET_ALTIVEC"
+ "vmulesw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmulosw"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULOSW))]
+ "TARGET_ALTIVEC"
+ "vmulosw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
;; Vector pack/unpack
(define_insn "altivec_vpkpx"
diff --git a/gcc/config/rs6000/rs6000-builtin.def
b/gcc/config/rs6000/rs6000-builtin.def
index 241c439..780d452 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1031,10 +1031,14 @@ BU_ALTIVEC_2 (VMULEUB, "vmuleub", CONST,
vec_widen_umult_even_v16qi)
BU_ALTIVEC_2 (VMULESB, "vmulesb", CONST,
vec_widen_smult_even_v16qi)
BU_ALTIVEC_2 (VMULEUH, "vmuleuh", CONST,
vec_widen_umult_even_v8hi)
BU_ALTIVEC_2 (VMULESH, "vmulesh", CONST,
vec_widen_smult_even_v8hi)
+BU_ALTIVEC_2 (VMULEUW, "vmuleuw", CONST,
vec_widen_umult_even_v4si)
+BU_ALTIVEC_2 (VMULESW, "vmulesw", CONST,
vec_widen_smult_even_v4si)
BU_ALTIVEC_2 (VMULOUB, "vmuloub", CONST,
vec_widen_umult_odd_v16qi)
BU_ALTIVEC_2 (VMULOSB, "vmulosb", CONST,
vec_widen_smult_odd_v16qi)
BU_ALTIVEC_2 (VMULOUH, "vmulouh", CONST,
vec_widen_umult_odd_v8hi)
BU_ALTIVEC_2 (VMULOSH, "vmulosh", CONST,
vec_widen_smult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOUW, "vmulouw", CONST,
vec_widen_umult_odd_v4si)
+BU_ALTIVEC_2 (VMULOSW, "vmulosw", CONST,
vec_widen_smult_odd_v4si)
BU_ALTIVEC_2 (VNOR, "vnor", CONST, norv4si3)
BU_ALTIVEC_2 (VOR, "vor", CONST, iorv4si3)
BU_ALTIVEC_2 (VPKUHUM, "vpkuhum", CONST, altivec_vpkuhum)
@@ -1346,12 +1350,16 @@ BU_ALTIVEC_OVERLOAD_2 (VMRGLH, "vmrglh")
BU_ALTIVEC_OVERLOAD_2 (VMRGLW, "vmrglw")
BU_ALTIVEC_OVERLOAD_2 (VMULESB, "vmulesb")
BU_ALTIVEC_OVERLOAD_2 (VMULESH, "vmulesh")
+BU_ALTIVEC_OVERLOAD_2 (VMULESW, "vmulesw")
BU_ALTIVEC_OVERLOAD_2 (VMULEUB, "vmuleub")
BU_ALTIVEC_OVERLOAD_2 (VMULEUH, "vmuleuh")
+BU_ALTIVEC_OVERLOAD_2 (VMULEUW, "vmuleuw")
BU_ALTIVEC_OVERLOAD_2 (VMULOSB, "vmulosb")
BU_ALTIVEC_OVERLOAD_2 (VMULOSH, "vmulosh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOSW, "vmulosw")
BU_ALTIVEC_OVERLOAD_2 (VMULOUB, "vmuloub")
BU_ALTIVEC_OVERLOAD_2 (VMULOUH, "vmulouh")
+BU_ALTIVEC_OVERLOAD_2 (VMULOUW, "vmulouw")
BU_ALTIVEC_OVERLOAD_2 (VPKSHSS, "vpkshss")
BU_ALTIVEC_OVERLOAD_2 (VPKSHUS, "vpkshus")
BU_ALTIVEC_OVERLOAD_2 (VPKSWSS, "vpkswss")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index f1e8d3d..af0bafd 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2207,11 +2207,13 @@ const struct altivec_builtin_types
altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
RS6000_BTI_unsigned_V8HI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
- { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESH,
+
+ { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESW,
RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
- { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUH,
+ { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUW,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
RS6000_BTI_unsigned_V4SI, 0 },
+
{ ALTIVEC_BUILTIN_VEC_VMULEUB, ALTIVEC_BUILTIN_VMULEUB,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI,
RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMULESB, ALTIVEC_BUILTIN_VMULESB,
@@ -2226,11 +2228,13 @@ const struct altivec_builtin_types
altivec_overloaded_builtins[] = {
RS6000_BTI_V8HI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
RS6000_BTI_unsigned_V8HI, 0 },
- { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
+
+ { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSW,
RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
- { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUH,
+ { ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUW,
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
RS6000_BTI_unsigned_V4SI, 0 },
+
{ ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6b28658..d1a4937 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16332,9 +16332,11 @@ rs6000_gimple_fold_builtin
(gimple_stmt_iterator *gsi)
/* Even element flavors of vec_mul (signed). */
case ALTIVEC_BUILTIN_VMULESB:
case ALTIVEC_BUILTIN_VMULESH:
+ case ALTIVEC_BUILTIN_VMULESW:
/* Even element flavors of vec_mul (unsigned). */
case ALTIVEC_BUILTIN_VMULEUB:
case ALTIVEC_BUILTIN_VMULEUH:
+ case ALTIVEC_BUILTIN_VMULEUW:
{
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
@@ -16347,9 +16349,11 @@ rs6000_gimple_fold_builtin
(gimple_stmt_iterator *gsi)
/* Odd element flavors of vec_mul (signed). */
case ALTIVEC_BUILTIN_VMULOSB:
case ALTIVEC_BUILTIN_VMULOSH:
+ case ALTIVEC_BUILTIN_VMULOSW:
/* Odd element flavors of vec_mul (unsigned). */
case ALTIVEC_BUILTIN_VMULOUB:
case ALTIVEC_BUILTIN_VMULOUH:
+ case ALTIVEC_BUILTIN_VMULOUW:
{
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
@@ -17965,8 +17969,10 @@ builtin_function_type (machine_mode mode_ret,
machine_mode mode_arg0,
/* unsigned 2 argument functions. */
case ALTIVEC_BUILTIN_VMULEUB:
case ALTIVEC_BUILTIN_VMULEUH:
+ case ALTIVEC_BUILTIN_VMULEUW:
case ALTIVEC_BUILTIN_VMULOUB:
case ALTIVEC_BUILTIN_VMULOUH:
+ case ALTIVEC_BUILTIN_VMULOUW:
case CRYPTO_BUILTIN_VCIPHER:
case CRYPTO_BUILTIN_VCIPHERLAST:
case CRYPTO_BUILTIN_VNCIPHER:
--
1.9.1