This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, rs6000] Add support for vec_extract_fp_from_shorth() and vec_extract_fp_from_short
- From: Carl Love <cel at us dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org, David Edelsohn <dje dot gcc at gmail dot com>, Segher Boessenkool <segher at kernel dot crashing dot org>
- Cc: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>, cel at us dot ibm dot com
- Date: Wed, 12 Jul 2017 16:08:20 -0700
- Subject: [PATCH, rs6000] Add support for vec_extract_fp_from_shorth() and vec_extract_fp_from_short
- Authentication-results: sourceware.org; auth=none
GCC Maintainers:
The following patch adds support for the vec_extract_fp_from_shorth()
and vec_extract_fp_from_short builtin functions. The patch has been
tested on powerpc64le-unknown-linux-gnu (Power 8 LE) and
powerpc64le-unknown-linux-gnu (Power 9 LE). The test generates 1
unsupported test on Power 8 and 2 test passes on Power 9.
Please let me know if the following patch is acceptable. Thanks.
Carl Love
----------------------------------------------------
gcc/ChangeLog:
2017-07-12 Carl Love <cel@us.ibm.com>
* config/rs6000/rs6000-c.c: Add support for built-in functions
vector float vec_extract_fp32_from_shorth (vector unsigned short);
vector float vec_extract_fp32_from_shortl (vector unsigned short);
* config/rs6000/altivec.h (vec_extract_fp_from_shorth,
vec_extract_fp_from_shortl): Add defines for the two builtins.
* config/rs6000/rs6000-builtin.def (VEXTRACT_FP_FROM_SHORTH,
VEXTRACT_FP_FROM_SHORTL): Add BU_P9V_OVERLOAD_1 and BU_P9V_VSX_1
new builtins.
* config/rs6000/vsx.md(vsx_xvcvhpsp): Add define_insn.
(vextract_fp_from_shorth, vextract_fp_from_shortl): Add define_expands.
* doc/extend.texi: Update the built-in documentation file for the
new built-in function.
gcc/testsuite/ChangeLog:
2017-07-12 Carl Love <cel@us.ibm.com>
* gcc.target/powerpc/builtins-3-p9-runnable.c: Add new test file for
the new built-ins.
---
gcc/config/rs6000/altivec.h | 3 +
gcc/config/rs6000/rs6000-builtin.def | 5 ++
gcc/config/rs6000/rs6000-c.c | 5 ++
gcc/config/rs6000/vsx.md | 70 +++++++++++++++++++++-
gcc/doc/extend.texi | 3 +
.../gcc.target/powerpc/builtins-3-p9-runnable.c | 36 +++++++++++
6 files changed, 121 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 71cdca5..4d34a97 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -449,6 +449,9 @@
#define vec_insert_exp __builtin_vec_insert_exp
#define vec_test_data_class __builtin_vec_test_data_class
+#define vec_extract_fp_from_shorth __builtin_vec_vextract_fp_from_shorth
+#define vec_extract_fp_from_shortl __builtin_vec_vextract_fp_from_shortl
+
#define scalar_extract_exp __builtin_vec_scalar_extract_exp
#define scalar_extract_sig __builtin_vec_scalar_extract_sig
#define scalar_insert_exp __builtin_vec_scalar_insert_exp
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index e098e1c..400189e 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2057,6 +2057,9 @@ BU_P9V_OVERLOAD_1 (VSTDCNSP, "scalar_test_neg_sp")
BU_P9V_OVERLOAD_1 (REVB, "revb")
+BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth")
+BU_P9V_OVERLOAD_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl")
+
/* ISA 3.0 vector scalar overloaded 2 argument functions. */
BU_P9V_OVERLOAD_2 (VSIEDP, "scalar_insert_exp")
@@ -2074,6 +2077,8 @@ BU_P9V_VSX_1 (VEEDP, "extract_exp_dp", CONST, xvxexpdp)
BU_P9V_VSX_1 (VEESP, "extract_exp_sp", CONST, xvxexpsp)
BU_P9V_VSX_1 (VESDP, "extract_sig_dp", CONST, xvxsigdp)
BU_P9V_VSX_1 (VESSP, "extract_sig_sp", CONST, xvxsigsp)
+BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTH, "vextract_fp_from_shorth", CONST, vextract_fp_from_shorth)
+BU_P9V_VSX_1 (VEXTRACT_FP_FROM_SHORTL, "vextract_fp_from_shortl", CONST, vextract_fp_from_shortl)
/* 2 argument vsx vector functions added in ISA 3.0 (power9). */
BU_P9V_VSX_2 (VIEDP, "insert_exp_dp", CONST, xviexpdp)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index c769442..a1d09ba 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -5164,6 +5164,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B,
RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 },
+ { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTH, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTH,
+ RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P9V_BUILTIN_VEC_VEXTRACT_FP_FROM_SHORTL, P9V_BUILTIN_VEXTRACT_FP_FROM_SHORTL,
+ RS6000_BTI_V4SF, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
{ P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX,
RS6000_BTI_INTQI, RS6000_BTI_UINTSI,
RS6000_BTI_V16QI, 0 },
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 2ddfae5..573eb3f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -326,6 +326,7 @@
UNSPEC_VSX_CVDPSXWS
UNSPEC_VSX_CVDPUXWS
UNSPEC_VSX_CVSPDP
+ UNSPEC_VSX_CVHPSP
UNSPEC_VSX_CVSPDPN
UNSPEC_VSX_CVDPSPN
UNSPEC_VSX_CVSXWDP
@@ -367,6 +368,8 @@
UNSPEC_VSX_SIEXPDP
UNSPEC_VSX_SCMPEXPDP
UNSPEC_VSX_STSTDC
+ UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH
+ UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL
UNSPEC_VSX_VXEXP
UNSPEC_VSX_VXSIG
UNSPEC_VSX_VIEXP
@@ -1745,6 +1748,15 @@
"xscvspdp %x0,%x1"
[(set_attr "type" "fp")])
+;; Generate xvcvhpsp instruction
+(define_insn "vsx_xvcvhpsp"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand: V8HI 1 "vsx_register_operand" "f")]
+ UNSPEC_VSX_CVHPSP))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "xvcvhpsp %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
;; format of scalars is actually DF.
(define_insn "vsx_xscvdpsp_scalar"
@@ -4419,7 +4431,63 @@
"xxinsertw %x0,%x1,%3"
[(set_attr "type" "vecperm")])
-
+;; Generate vector extract four float 32 values from left four elements
+;; of eight element vector of float 16 values.
+(define_expand "vextract_fp_from_shorth"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "v")]
+ UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))]
+ "TARGET_P9_VECTOR"
+{
+ int vals[16] = {0, 1, 0 ,0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7, 0, 8};
+ int i;
+
+ rtx rtx_tmp = gen_reg_rtx (V8HImode);
+ rtx rvals[16];
+ rtx mask = gen_reg_rtx (V16QImode);
+ rtvec v;
+
+ for (i = 0; i < 16; i++)
+ rvals[i] = GEN_INT (vals[i]);
+
+ /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
+ inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
+ src half words 0,1,2,3 for the conversion instruction. */
+ v = gen_rtvec_v (16, rvals);
+ emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+ emit_insn (gen_altivec_vperm_v8hi (rtx_tmp, operands[1], operands[1], mask));
+ emit_insn (gen_vsx_xvcvhpsp (operands[0], rtx_tmp));
+ DONE;
+})
+
+;; Generate vector extract four float 32 values from right four elements
+;; of eight element vector of float 16 values.
+(define_expand "vextract_fp_from_shortl"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "v")]
+ UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))]
+ "TARGET_P9_VECTOR"
+{
+ int vals[16] = {8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15, 0, 0};
+ int i;
+ rtx rtx_tmp = gen_reg_rtx (V8HImode);
+ rtx rvals[16];
+ rtx mask = gen_reg_rtx (V16QImode);
+ rtvec v;
+
+ for (i = 0; i < 16; i++)
+ rvals[i] = GEN_INT (vals[i]);
+
+ /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16
+ inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move
+ src half words 4,5,6,7 for the conversion instruction. */
+ v = gen_rtvec_v (16, rvals);
+ emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
+ emit_insn (gen_altivec_vperm_v8hi (rtx_tmp, operands[1], operands[1], mask));
+ emit_insn (gen_vsx_xvcvhpsp (operands[0], rtx_tmp));
+ DONE;
+})
+
;; Support for ISA 3.0 vector byte reverse
;; Swap all bytes with in a vector
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 530a82d..0135fc7 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18258,6 +18258,9 @@ vector bool short vec_cmpne (vector bool short, vector bool short);
vector bool int vec_cmpne (vector bool int, vector bool int);
vector bool long long vec_cmpne (vector bool long long, vector bool long long);
+vector float vec_extract_fp32_from_shorth (vector unsigned short);
+vector float vec_extract_fp32_from_shortl (vector unsigned short);
+
vector long long vec_vctz (vector long long);
vector unsigned long long vec_vctz (vector unsigned long long);
vector int vec_vctz (vector int);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c
new file mode 100644
index 0000000..ce1a2ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9-runnable.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { powerpc64*-*-* && { lp64 && p9vector_hw } } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2 -mupper-regs-di" } */
+
+#include <altivec.h> // vector
+
+void abort (void);
+
+int main() {
+ int i;
+ vector float vfr, vfexpt;
+ vector unsigned short vusha;
+
+ /* 1.0, -2.0, 0.0, 8.5, 1.5, 0.5, 1.25, -0.25 */
+ vusha = (vector unsigned short){0B011110000000000, 0B1100000000000000,
+ 0B000000000000000, 0B0100100001000000,
+ 0B011111000000000, 0B0011100000000000,
+ 0B011110100000000, 0B1011010000000000};
+
+ vfexpt = (vector float){1.0, -2.0, 0.0, 8.5};
+ vfr = vec_extract_fp_from_shorth(vusha);
+
+ for (i=0; i<4; i++) {
+ if (vfr[i] != vfexpt[i])
+ abort();
+ }
+
+ vfexpt = (vector float){1.5, 0.5, 1.25, -0.25};
+ vfr = vec_extract_fp_from_shortl(vusha);
+
+ for (i=0; i<4; i++) {
+ if (vfr[i] != vfexpt[i])
+ abort();
+ }
+}
--
1.9.1