[gcc(refs/users/meissner/heads/work043)] Load V2DF vector constants with XXSPLTIDP

Tue Mar 30 20:28:57 GMT 2021

https://gcc.gnu.org/g:0f1494f41ae272ef267c0de2a5b9382b2ca0aacd

commit 0f1494f41ae272ef267c0de2a5b9382b2ca0aacd
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Tue Mar 30 16:28:19 2021 -0400

    Load V2DF vector constants with XXSPLTIDP
    
    This patch adds support for loading up a V2DF vector where each element is
    the same and it is a DF constant that can be loaded with the XXSPLTIDP
    instruction.
    
    gcc/
    2021-03-30  Michael Meissner  <meissner@linux.ibm.com>
    
            * config/rs6000/predicates.md (xxspltidp_operand): Add V2DF
            support.
            * config/rs6000/rs6000.c (rs6000_expand_vector_init): If we can,
            load up a V2DF vector constant with VEC_DUPLICATE.
            * config/rs6000/vsx.md (V2DF constant splitter): New splitter.
            (vsx_mov<mode>_64bit): Support loading V2DF constants via the
            XXSPLTIDP instruction.
            (vsx_mov<mode>_32bit): Support loading V2DF constants via the
            XXSPLTIDP instruction.
            (vsx_splat_v2df_const): New insn.

Diff:
---
 gcc/config/rs6000/predicates.md | 42 ++++++++++++++++-----
 gcc/config/rs6000/rs6000.c      |  9 +++++
 gcc/config/rs6000/vsx.md        | 81 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 109 insertions(+), 23 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 089a2597ce7..b5b17a8b7c6 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -565,25 +565,47 @@
   (ior (match_operand 0 "vsx_register_operand")
        (match_operand 0 "reg_or_logical_cint_operand")))
 
-;; Return 1 if operand is a SF/DF CONST_DOUBLE that can be loaded via the ISA
-;; 3.1 XXSPLTIDP instruction.  This function has to check, if the immediate
-;; specifies a single-precision denormal value (i.e., bits 1:8 equal to 0 and
-;; bits 9:31 not equal to 0), since the result is undefined in the hardware.
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via the ISA 3.1 XXSPLTIDP instruction.  This function has to check,
+;; if the immediate specifies a single-precision denormal value (i.e., bits 1:8
+;; equal to 0 and bits 9:31 not equal to 0), since the result is undefined in
+;; the hardware.
 (define_predicate "xxspltidp_operand"
-  (match_code "const_double")
+  (match_code "const_double,const_vector,vec_duplicate")
 {
   long value;
+  rtx element;
 
   if (!TARGET_POWER10 || !TARGET_VSX)
     return 0;
 
-  if (mode != SFmode && mode != DFmode)
+  if (mode == V2DFmode)
+    {
+      /* Handle VEC_DUPLICATE and CONST_VECTOR.  */
+      if (GET_CODE (op) == VEC_DUPLICATE)
+	element = XEXP (op, 0);
+
+      else if (GET_CODE (op) == CONST_VECTOR)
+	{
+	  element = CONST_VECTOR_ELT (op, 0);
+	  if (!rtx_equal_p (element, CONST_VECTOR_ELT (op, 1)))
+	    return 0;
+	}
+
+      else
+	return 0;
+    }
+
+  else if (mode == SFmode || mode == DFmode)
+    element = op;
+
+  else
     return 0;
 
-  if (!CONST_DOUBLE_P (op))
+  if (!CONST_DOUBLE_P (element))
     return 0;
 
-  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (element);
   if (!exact_real_truncate (SFmode, rv))
     return 0;
 
@@ -690,7 +712,9 @@
       if (zero_constant (op, mode) || all_ones_constant (op, mode))
 	return true;
 
-      if (TARGET_POWER10 && xxspltiw_constant_p (op, mode, &constant))
+      if (TARGET_POWER10
+	  && (xxspltiw_constant_p (op, mode, &constant)
+	      || xxspltidp_operand (op, mode)))
 	return true;
 
       if (TARGET_P9_VECTOR
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9d5fe4f2bd4..7ce1990b883 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6694,6 +6694,15 @@ rs6000_expand_vector_init (rtx target, rtx vals)
 	  return;
 	}
 
+      /* Generate XXSPLTIDP if we can.  */
+      if (TARGET_POWER10 && all_same && mode == V2DFmode
+	  && xxspltidp_operand (XVECEXP (vals, 0, 0), DFmode))
+	{
+	  rtx dup = gen_rtx_VEC_DUPLICATE (mode, XVECEXP (vals, 0, 0));
+	  emit_insn (gen_rtx_SET (target, dup));							 
+	  return;
+	}
+
       rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
       bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
       if ((int_vector_p || TARGET_VSX) && all_const_zero)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4404407d330..5acd12d7fb4 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1158,6 +1158,20 @@
   [(set_attr "type" "vecperm")
    (set_attr "length" "8")])
 
+;; Split V2DF vector constants that can be loaded with XXSPLTIDP.
+;; Xxspltidp_operand will match either VEC_DUPLICATE or CONST_VECTOR, but we
+;; don't have to split the VEC_DUPLICATE case.  The move pattern handles
+;; setting the vector to 0.
+(define_split
+  [(set (match_operand:V2DF 0 "vsx_register_operand")
+	(match_operand:V2DF 1 "xxspltidp_operand"))]
+  "TARGET_POWER10 && TARGET_VSX && GET_CODE (operands[1]) == CONST_VECTOR
+   && operands[1] != CONST0_RTX (V2DFmode)"
+  [(set (match_dup 0)
+	(vec_duplicate:V2DF (match_dup 2)))]
+{
+  operands[2] = CONST_VECTOR_ELT (operands[1], 0);
+})
 
 ;; Prefer using vector registers over GPRs.  Prefer using ISA 3.0's XXSPLTISB
 ;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
@@ -1167,17 +1181,17 @@
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
-;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
+;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)  XXSPLTIDP
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
-                ?wa,       v,         <??r>,     wZ,        v")
+                ?wa,       v,         <??r>,     wZ,        v,         wa")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        we,        r,         r,
                 wQ,        Y,         r,         r,         wE,        jwM,
-                ?jwM,      W,         <nW>,      v,         wZ"))]
+                ?jwM,      W,         <nW>,      v,         wZ,        eF"))]
 
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
@@ -1188,36 +1202,44 @@
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
                 store,     load,      store,     *,         vecsimple, vecsimple,
-                vecsimple, *,         *,         vecstore,  vecload")
+                vecsimple, *,         *,         vecstore,  vecload,   vecperm")
    (set_attr "num_insns"
                "*,         *,         *,         2,         *,         2,
                 2,         2,         2,         2,         *,         *,
-                *,         5,         2,         *,         *")
+                *,         5,         2,         *,         *,         *")
    (set_attr "max_prefixed_insns"
                "*,         *,         *,         *,         *,         2,
                 2,         2,         2,         2,         *,         *,
-                *,         *,         *,         *,         *")
+                *,         *,         *,         *,         *,         *")
    (set_attr "length"
                "*,         *,         *,         8,         *,         8,
                 8,         8,         8,         8,         *,         *,
-                *,         20,        8,         *,         *")
+                *,         20,        8,         *,         *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
                 *,         *,         *,         *,         p9v,       *,
-                <VSisa>,   *,         *,         *,         *")])
+                <VSisa>,   *,         *,         *,         *,         p10")
+   (set_attr "prefixed"
+               "*,         *,         *,         *,         *,         *,
+                *,         *,         *,         *,         *,         *,
+                *,         *,         *,         *,         *,         yes")
+   (set_attr "prefixed_prepend_p"
+               "*,         *,         *,         *,         *,         *,
+                *,         *,         *,         *,         *,         *,
+                *,         *,         *,         *,         *,         no")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
-;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
+;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const  XXSPLTIDP
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
-                wa,        v,         ?wa,       v,         <??r>,
+                wa,        v,         ?wa,       v,         <??r>,     wa,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        Y,         r,         r,
-                wE,        jwM,       ?jwM,      W,         <nW>,
+                wE,        jwM,       ?jwM,      W,         <nW>,      eF,
                 v,         wZ"))]
 
   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1228,15 +1250,23 @@
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
-                vecsimple, vecsimple, vecsimple, *,         *,
+                vecsimple, vecsimple, vecsimple, *,         *,        vecperm,
                 vecstore,  vecload")
    (set_attr "length"
                "*,         *,         *,         16,        16,        16,
-                *,         *,         *,         20,        16,
+                *,         *,         *,         20,        16,        *,
                 *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
-                p9v,       *,         <VSisa>,   *,         *,
+                p9v,       *,         <VSisa>,   *,         *,         p10,
+                *,         *")
+   (set_attr "prefixed"
+               "*,         *,         *,         *,         *,         *,
+                *,         *,         *,         *,         *,         yes,
+                *,         *")
+   (set_attr "prefixed_prepend_p"
+               "*,         *,         *,         *,         *,         *,
+                *,         *,         *,         *,         *,         no,
                 *,         *")])
 
 ;; Explicit  load/store expanders for the builtin functions
@@ -4471,6 +4501,9 @@
   rtx op1 = operands[1];
   if (MEM_P (op1))
     operands[1] = rs6000_force_indexed_or_indirect_mem (op1);
+  else if (TARGET_POWER10 && <MODE>mode == V2DFmode
+	   && xxspltidp_operand (op1, <MODE>mode))
+    ;
   else if (!REG_P (op1))
     op1 = force_reg (<VSX_D:VS_scalar>mode, op1);
 })
@@ -4493,6 +4526,26 @@
   "lxvdsx %x0,%y1"
   [(set_attr "type" "vecload")])
 
+;; Load V2DFmode constant via the ISA 3.1 XXSPLTIDP instruction
+(define_insn "*vsx_splat_v2df_const"
+  [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa")
+	(vec_duplicate:V2DF (match_operand:DF 1 "xxspltidp_operand" "j,eF")))]
+  "TARGET_POWER10"
+{
+  rtx op1 = operands[1];
+  if (op1 == CONST0_RTX (DFmode))
+    return "xxspltib %x0,0";
+
+  long value;
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op1);
+  REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
+  operands[2] = GEN_INT (value);
+  return "xxspltidp %x0,%2";
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "*,yes")
+   (set_attr "prefixed_prepend_p" "*,no")])
+
 ;; V4SI splat support
 (define_insn "vsx_splat_v4si"
   [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we")