This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, rs6000] Add builtin support for power8 32-bit Altivec multiply insns
- From: Peter Bergner <bergner at vnet dot ibm dot com>
- To: David Edelsohn <dje dot gcc at gmail dot com>
- Cc: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Thu, 01 Aug 2013 13:21:00 -0500
- Subject: [PATCH, rs6000] Add builtin support for power8 32-bit Altivec multiply insns
This patch adds builtin support for the new 32-bit Altivec multiply
instructions that were added in ISA 2.07 (ie, POWER8).
This passed bootstrap and regtesting with no errors. Ok for mainline?
P.S. I will be working on a followup patch sometime later that will attempt
to generate these new instructions automatically.
Peter
gcc/
* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add support
for new poweer8 P8V_BUILTIN_VEC_VMULESW builtin.
(P8V_BUILTIN_VEC_VMULEUW): Likewise.
(P8V_BUILTIN_VEC_VMULOSW): Likewise.
(P8V_BUILTIN_VEC_VMULOUW): Likewise.
(P8V_BUILTIN_VEC_VMULUWM): Likewise.
* config/rs6000/rs6000-builtin.def (vmulesw): Add support for new
power8 builtin.
(vmuleuw): Likewise.
(vmulosw): Likewise.
(vmulouw): Likewise.
(vmuluwm): Likewise.
* config/rs6000/altivec.md (UNSPEC_VMULEUW): New unspec enum value.
(UNSPEC_VMULESW): Likewise.
(UNSPEC_VMULOUW): Likewise.
(UNSPEC_VMULOSW): Likewise.
(UNSPEC_VMULUWM): Likewise.
(vec_widen_umult_even_v4si): New define_insn.
(vec_widen_smult_even_v4si): Likewise.
(vec_widen_umult_odd_v4si): Likewise.
(vec_widen_smult_odd_v4si): Likewise.
(altivec_vmuluwm): Likewise.
* config/rs6000/altivec.h (vec_vmulesw): Add define to export power8
builtin function.
(vec_vmuleuw): Likewise.
(vec_vmulosw): Likewise.
(vec_vmulouw): Likewise.
(vec_vmuluwm): Likewise.
gcc/testsuite/
* gcc.target/powerpc/p8vector-builtin-8.c: New.
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c (revision 201409)
+++ gcc/config/rs6000/rs6000-c.c (working copy)
@@ -1752,6 +1752,10 @@ const struct altivec_builtin_types altiv
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMULESH, ALTIVEC_BUILTIN_VMULESH,
RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_VMULESW, P8V_BUILTIN_VMULESW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMULEUW, P8V_BUILTIN_VMULEUW,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOUB,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSB,
@@ -1760,6 +1764,14 @@ const struct altivec_builtin_types altiv
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0 },
{ ALTIVEC_BUILTIN_VEC_MULO, ALTIVEC_BUILTIN_VMULOSH,
RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_VMULOSW, P8V_BUILTIN_VMULOSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMULOUW, P8V_BUILTIN_VMULOUW,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMULUWM, P8V_BUILTIN_VMULUWM,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMULUWM, P8V_BUILTIN_VMULUWM,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMULOSH, ALTIVEC_BUILTIN_VMULOSH,
RS6000_BTI_V4SI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
{ ALTIVEC_BUILTIN_VEC_VMULOUH, ALTIVEC_BUILTIN_VMULOUH,
Index: gcc/config/rs6000/rs6000-builtin.def
===================================================================
--- gcc/config/rs6000/rs6000-builtin.def (revision 201409)
+++ gcc/config/rs6000/rs6000-builtin.def (working copy)
@@ -1315,6 +1315,11 @@ BU_P8V_AV_2 (VMINUD, "vminud", CONST, u
BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3)
BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew)
BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow)
+BU_P8V_AV_2 (VMULESW, "vmulesw", CONST, vec_widen_smult_even_v4si)
+BU_P8V_AV_2 (VMULEUW, "vmuleuw", CONST, vec_widen_umult_even_v4si)
+BU_P8V_AV_2 (VMULOSW, "vmulosw", CONST, vec_widen_smult_odd_v4si)
+BU_P8V_AV_2 (VMULOUW, "vmulouw", CONST, vec_widen_umult_odd_v4si)
+BU_P8V_AV_2 (VMULUWM, "vmuluwm", CONST, altivec_vmuluwm)
BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
@@ -1382,6 +1387,11 @@ BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
BU_P8V_OVERLOAD_2 (VMINUD, "vminud")
BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew")
BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow")
+BU_P8V_OVERLOAD_2 (VMULESW, "vmulesw")
+BU_P8V_OVERLOAD_2 (VMULEUW, "vmuleuw")
+BU_P8V_OVERLOAD_2 (VMULOSW, "vmulosw")
+BU_P8V_OVERLOAD_2 (VMULOUW, "vmulouw")
+BU_P8V_OVERLOAD_2 (VMULUWM, "vmuluwm")
BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
Index: gcc/config/rs6000/altivec.md
===================================================================
--- gcc/config/rs6000/altivec.md (revision 201409)
+++ gcc/config/rs6000/altivec.md (working copy)
@@ -37,10 +37,15 @@ (define_c_enum "unspec"
UNSPEC_VMULESB
UNSPEC_VMULEUH
UNSPEC_VMULESH
+ UNSPEC_VMULEUW
+ UNSPEC_VMULESW
UNSPEC_VMULOUB
UNSPEC_VMULOSB
UNSPEC_VMULOUH
UNSPEC_VMULOSH
+ UNSPEC_VMULOUW
+ UNSPEC_VMULOSW
+ UNSPEC_VMULUWM
UNSPEC_VPKPX
UNSPEC_VPACK_SIGN_SIGN_SAT
UNSPEC_VPACK_SIGN_UNS_SAT
@@ -1003,6 +1008,24 @@ (define_insn "vec_widen_smult_even_v8hi"
"vmulesh %0,%1,%2"
[(set_attr "type" "veccomplex")])
+(define_insn "vec_widen_umult_even_v4si"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULEUW))]
+ "TARGET_P8_VECTOR"
+ "vmuleuw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
+(define_insn "vec_widen_smult_even_v4si"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULESW))]
+ "TARGET_P8_VECTOR"
+ "vmulesw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
(define_insn "vec_widen_umult_odd_v16qi"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1039,6 +1062,32 @@ (define_insn "vec_widen_smult_odd_v8hi"
"vmulosh %0,%1,%2"
[(set_attr "type" "veccomplex")])
+(define_insn "vec_widen_umult_odd_v4si"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULOUW))]
+ "TARGET_P8_VECTOR"
+ "vmulouw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
+(define_insn "vec_widen_smult_odd_v4si"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULOSW))]
+ "TARGET_P8_VECTOR"
+ "vmulosw %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
+
+(define_insn "altivec_vmuluwm"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v")]
+ UNSPEC_VMULUWM))]
+ "TARGET_P8_VECTOR"
+ "vmuluwm %0,%1,%2"
+ [(set_attr "type" "veccomplex")])
;; Vector pack/unpack
(define_insn "altivec_vpkpx"
Index: gcc/config/rs6000/altivec.h
===================================================================
--- gcc/config/rs6000/altivec.h (revision 201409)
+++ gcc/config/rs6000/altivec.h (working copy)
@@ -149,10 +149,15 @@
#define vec_vmulesh __builtin_vec_vmulesh
#define vec_vmuleuh __builtin_vec_vmuleuh
#define vec_vmuleub __builtin_vec_vmuleub
+#define vec_vmulesw __builtin_vec_vmulesw
+#define vec_vmuleuw __builtin_vec_vmuleuw
#define vec_vmulosh __builtin_vec_vmulosh
#define vec_vmulouh __builtin_vec_vmulouh
#define vec_vmulosb __builtin_vec_vmulosb
#define vec_vmuloub __builtin_vec_vmuloub
+#define vec_vmulosw __builtin_vec_vmulosw
+#define vec_vmulouw __builtin_vec_vmulouw
+#define vec_vmuluwm __builtin_vec_vmuluwm
#define vec_nmsub __builtin_vec_nmsub
#define vec_packpx __builtin_vec_packpx
#define vec_vpkswss __builtin_vec_vpkswss
Index: gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c (revision 0)
@@ -0,0 +1,78 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+#include <altivec.h>
+
+typedef vector long long vll_sign;
+typedef vector unsigned long long vll_uns;
+
+typedef vector int vi_sign;
+typedef vector unsigned int vi_uns;
+
+vll_sign vec_mul_esw_1 (vi_sign a, vi_sign b)
+{
+ return __builtin_vec_vmulesw (a, b);
+}
+
+vll_sign vec_mul_esw_2 (vi_sign a, vi_sign b)
+{
+ return vec_vmulesw (a, b);
+}
+
+vll_uns vec_mul_euw_1 (vi_uns a, vi_uns b)
+{
+ return __builtin_vec_vmuleuw (a, b);
+}
+
+vll_uns vec_mul_euw_2 (vi_uns a, vi_uns b)
+{
+ return vec_vmuleuw (a, b);
+}
+
+vll_sign vec_mul_osw_1 (vi_sign a, vi_sign b)
+{
+ return __builtin_vec_vmulosw (a, b);
+}
+
+vll_sign vec_mul_osw_2 (vi_sign a, vi_sign b)
+{
+ return vec_vmulosw (a, b);
+}
+
+vll_uns vec_mul_ouw_1 (vi_uns a, vi_uns b)
+{
+ return __builtin_vec_vmulouw (a, b);
+}
+
+vll_uns vec_mul_ouw_2 (vi_uns a, vi_uns b)
+{
+ return vec_vmulouw (a, b);
+}
+
+vi_sign vec_mul_uwm_1 (vi_sign a, vi_sign b)
+{
+ return __builtin_vec_vmuluwm (a, b);
+}
+
+vi_sign vec_mul_uwm_2 (vi_sign a, vi_sign b)
+{
+ return vec_vmuluwm (a, b);
+}
+
+vi_uns vec_mul_uwm_3 (vi_uns a, vi_uns b)
+{
+ return __builtin_vec_vmuluwm (a, b);
+}
+
+vi_uns vec_mul_uwm_4 (vi_uns a, vi_uns b)
+{
+ return vec_vmuluwm (a, b);
+}
+
+/* { dg-final { scan-assembler-times "vmulesw" 2 } } */
+/* { dg-final { scan-assembler-times "vmuleuw" 2 } } */
+/* { dg-final { scan-assembler-times "vmulosw" 2 } } */
+/* { dg-final { scan-assembler-times "vmulouw" 2 } } */
+/* { dg-final { scan-assembler-times "vmuluwm" 4 } } */