[gcc(refs/users/meissner/heads/work054)] Generate LXVKQ on power10.
Michael Meissner
meissner@gcc.gnu.org
Fri Jun 4 01:25:51 GMT 2021
https://gcc.gnu.org/g:ac51d8244f28d7c94edb5ee5595a62cd9de9ea7d
commit ac51d8244f28d7c94edb5ee5595a62cd9de9ea7d
Author: Michael Meissner <meissner@linux.ibm.com>
Date: Thu Jun 3 21:25:35 2021 -0400
Generate LXVKQ on power10.
This patch generates the LXVKQ instruction to load certain IEEE 128-bit
constants.
gcc/
2021-06-03 Michael Meissner <meissner@linux.ibm.com>
* config/rs6000/constraint.md (eQ): New constraint.
* config/rs6000/predicates.md (easy_fp_constant): If the constant
can be loaded with LXVKQ, it is easy.
(lxvkq_operand): New predicate.
* config/rs6000/rs6000-protos.h (lxvkq_constant_p): New
declaration.
* config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add -mlxvkq.
(POWERPC_MASKS): Add -mlxvkq.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
support for -mlxvkq.
(lxvkq_constant_p): New function.
(output_vec_const_move): Add support for generating lxvkq.
(rs6000_output_move_128bit): Add support for generating lxvkq.
(rs6000_opt_masks): Add -mlxvkq.
* config/rs6000/rs6000.opt (-mlxvkq): New option.
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support to
generate lxvkq.
(vsx_mov<mode>_32bit): Add support to generate lxvkq.
gcc/testsuite/
2021-06-03 Michael Meissner <meissner@linux.ibm.com>
* gcc.target/powerpc/float128-constant.c: New test.
Diff:
---
gcc/config/rs6000/constraints.md | 5 +
gcc/config/rs6000/predicates.md | 14 ++
gcc/config/rs6000/rs6000-cpus.def | 2 +
gcc/config/rs6000/rs6000-protos.h | 1 +
gcc/config/rs6000/rs6000.c | 103 ++++++++++++++-
gcc/config/rs6000/rs6000.opt | 4 +
gcc/config/rs6000/vsx.md | 28 ++--
.../gcc.target/powerpc/float128-constant.c | 144 +++++++++++++++++++++
8 files changed, 286 insertions(+), 15 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index d665e2a94db..d14ce98e9ac 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -224,6 +224,11 @@
"A signed 34-bit integer constant if prefixed instructions are supported."
(match_operand 0 "cint34_operand"))
+;; KF/TF scalar than can be loaded with XVKQ
+(define_constraint "eQ"
+ "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+ (match_operand 0 "lxvkq_operand"))
+
;; Floating-point constraints. These two are defined so that insn
;; length attributes can be calculated exactly.
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index a81a26cca74..9584663ce16 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -611,6 +611,11 @@
if (xxsplti32dx_operand (op, mode))
return 1;
+ /* If we have the ISA 3.1 LXVKQ instruction, see if the constant can be loaded
+ with that instruction. */
+ if (lxvkq_operand (op, mode))
+ return 1;
+
/* Otherwise consider floating point constants hard, so that the
constant gets pushed to memory during the early RTL phases. This
has the advantage that double precision constants that can be
@@ -699,6 +704,15 @@
return xxsplti32dx_constant_p (op, mode, &high, &low);
})
+;; Return 1 if the operand is an IEEE 128-bit special constant that can be
+;; loaded with the LXVKQ instruction.
+(define_predicate "lxvkq_operand"
+ (match_code "const_double")
+{
+ int immediate = 0;
+ return lxvkq_constant_p (op, mode, &immediate);
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index b8e787714e3..c0d89434fcd 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -83,6 +83,7 @@
#define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \
| OPTION_MASK_POWER10 \
| OTHER_POWER10_MASKS \
+ | OPTION_MASK_LXVKQ \
| OPTION_MASK_P10_FUSION \
| OPTION_MASK_P10_FUSION_LD_CMPI \
| OPTION_MASK_P10_FUSION_2LOGICAL \
@@ -146,6 +147,7 @@
| OPTION_MASK_P10_FUSION_2ADD \
| OPTION_MASK_HTM \
| OPTION_MASK_ISEL \
+ | OPTION_MASK_LXVKQ \
| OPTION_MASK_MFCRF \
| OPTION_MASK_MMA \
| OPTION_MASK_MODULO \
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 9e60f027bd5..600d64424f5 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -35,6 +35,7 @@ extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
HOST_WIDE_INT *);
+extern bool lxvkq_constant_p (rtx, machine_mode, int *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 80b63e12013..9db2d595230 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4511,9 +4511,13 @@ rs6000_option_override_internal (bool global_init_p)
if ((rs6000_isa_flags_explicit & OPTION_MASK_XXSPLTIDP) == 0)
rs6000_isa_flags |= OPTION_MASK_XXSPLTIDP;
+
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_LXVKQ) == 0)
+ rs6000_isa_flags |= OPTION_MASK_LXVKQ;
}
else
- rs6000_isa_flags &= ~(OPTION_MASK_XXSPLTIW
+ rs6000_isa_flags &= ~(OPTION_MASK_LXVKQ
+ | OPTION_MASK_XXSPLTIW
| OPTION_MASK_XXSPLTIDP
| OPTION_MASK_XXSPLTI32DX);
@@ -6722,6 +6726,86 @@ xxsplti32dx_constant_p (rtx op,
return false;
}
+/* Return true if OP is of the given MODE is one of the 18 special values that
+ can be generated with the LXVKQ instruction.
+
+ Return the constant that will go in the LXVKQ instruction.
+
+ The LXVKQ immediates are:
+ 1 - 7: 1.0 .. 7.0.
+ 8: Positive infinity.
+ 9: Default quiet NaN.
+ 16: -0.0.
+ 17 - 23: -1.0 .. 7.0.
+ 24: Negative infinity. */
+
+bool
+lxvkq_constant_p (rtx op,
+ machine_mode mode,
+ int *imm_p)
+{
+ *imm_p = -1;
+
+ if (!TARGET_LXVKQ)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ if (!FLOAT128_IEEE_P (mode))
+ return false;
+
+ if (!CONST_DOUBLE_P (op))
+ return false;
+
+ /* All of the values generated can be expressed as SFmode values, so if it
+ doesn't fit in SFmode, exit. */
+ const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+ if (!exact_real_truncate (SFmode, rv))
+ return 0;
+
+ /* +/- Inifinity is 8/24. */
+ if (REAL_VALUE_ISINF (*rv))
+ {
+ *imm_p = real_isneg (rv) ? 24 : 8;
+ return true;
+ }
+
+ /* NaN is 9. */
+ if (REAL_VALUE_ISNAN (*rv) && !REAL_VALUE_NEGATIVE (*rv))
+ {
+ *imm_p = 9;
+ return true;
+ }
+
+ /* -0.0 is 16. */
+ if (REAL_VALUE_MINUS_ZERO (*rv))
+ {
+ *imm_p = 16;
+ return true;
+ }
+
+ /* The other values are all integers 1..7, and -1..-7. */
+ if (!real_isinteger (rv, mode))
+ return false;
+
+ HOST_WIDE_INT value = real_to_integer (rv);
+ if (value >= 1 && value <= 7)
+ {
+ *imm_p = value;
+ return true;
+ }
+ else if (value >= -7 && value <= -1)
+ {
+ /* Subtraction is used because value is negative. */
+ *imm_p = 16 - value;
+ return true;
+ }
+
+ /* We can't load the value with LXVKQ. */
+ return false;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6738,6 +6822,7 @@ output_vec_const_move (rtx *operands)
bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
int xxspltib_value = 256;
int num_insns = -1;
+ int lxvkq_immediate = 0;
if (zero_constant (vec, mode))
{
@@ -6773,6 +6858,12 @@ output_vec_const_move (rtx *operands)
if (xxsplti32dx_operand (vec, mode))
return "#";
+ if (lxvkq_constant_p (vec, mode, &lxvkq_immediate))
+ {
+ operands[2] = GEN_INT (lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
if (TARGET_P9_VECTOR
&& xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
{
@@ -13457,6 +13548,7 @@ rs6000_output_move_128bit (rtx operands[])
int src_regno;
bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
+ int lxvkq_immediate = 0;
if (REG_P (dest))
{
@@ -13601,6 +13693,14 @@ rs6000_output_move_128bit (rtx operands[])
}
/* Constants. */
+ else if (dest_vmx_p
+ && CONST_DOUBLE_P (src)
+ && lxvkq_constant_p (src, mode, &lxvkq_immediate))
+ {
+ operands[2] = GEN_INT (lxvkq_immediate);
+ return "lxvkq %x0,%2";
+ }
+
else if (dest_regno >= 0
&& (CONST_INT_P (src)
|| CONST_WIDE_INT_P (src)
@@ -24300,6 +24400,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
{ "isel", OPTION_MASK_ISEL, false, true },
+ { "lxvkq", OPTION_MASK_LXVKQ, false, true },
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
{ "mfpgpr", 0, false, true },
{ "mma", OPTION_MASK_MMA, false, true },
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index e28a81d5fdd..e65dd8762a4 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -651,3 +651,7 @@ Generate (do not generate) XXSPLTIDP instructions.
mxxsplti32dx
Target Undocumented Mask(XXSPLTI32DX) Var(rs6000_isa_flags)
Generate (do not generate) XXSPLTI32DX instructions.
+
+mlxvkq
+Target Undocumented Mask(LXVKQ) Var(rs6000_isa_flags)
+Generate (do not generate) LXVKQ instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 254653d31d5..b49d5b44573 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1196,17 +1196,17 @@
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
-;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
+;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) LXVKQ
(define_insn "vsx_mov<mode>_64bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, r, we, ?wQ,
?&r, ??r, ??Y, <??r>, wa, v,
- ?wa, v, <??r>, wZ, v")
+ ?wa, v, <??r>, wZ, v, wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, we, r, r,
wQ, Y, r, r, wE, jwM,
- ?jwM, W, <nW>, v, wZ"))]
+ ?jwM, W, <nW>, v, wZ, eQ"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1217,37 +1217,37 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, mtvsr, mfvsr, load,
store, load, store, *, vecsimple, vecsimple,
- vecsimple, *, *, vecstore, vecload")
+ vecsimple, *, *, vecstore, vecload, vecsimple")
(set_attr "num_insns"
"*, *, *, 2, *, 2,
2, 2, 2, 2, *, *,
- *, 5, 2, *, *")
+ *, 5, 2, *, *, *")
(set_attr "max_prefixed_insns"
"*, *, *, *, *, 2,
2, 2, 2, 2, *, *,
- *, *, *, *, *")
+ *, *, *, *, *, *")
(set_attr "length"
"*, *, *, 8, *, 8,
8, 8, 8, 8, *, *,
- *, 20, 8, *, *")
+ *, 20, 8, *, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
*, *, *, *, p9v, *,
- <VSisa>, *, *, *, *")])
+ <VSisa>, *, *, *, *, p10")])
;; VSX store VSX load VSX move GPR load GPR store GPR move
;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const
-;; LVX (VMX) STVX (VMX)
+;; LVX (VMX) STVX (VMX) LXVKQ
(define_insn "*vsx_mov<mode>_32bit"
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
"=ZwO, wa, wa, ??r, ??Y, <??r>,
wa, v, ?wa, v, <??r>,
- wZ, v")
+ wZ, v, wa")
(match_operand:VSX_M 1 "input_operand"
"wa, ZwO, wa, Y, r, r,
wE, jwM, ?jwM, W, <nW>,
- v, wZ"))]
+ v, wZ, eQ"))]
"!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
@@ -1258,15 +1258,15 @@
[(set_attr "type"
"vecstore, vecload, vecsimple, load, store, *,
vecsimple, vecsimple, vecsimple, *, *,
- vecstore, vecload")
+ vecstore, vecload, vecsimple")
(set_attr "length"
"*, *, *, 16, 16, 16,
*, *, *, 20, 16,
- *, *")
+ *, *, *")
(set_attr "isa"
"<VSisa>, <VSisa>, <VSisa>, *, *, *,
p9v, *, <VSisa>, *, *,
- *, *")])
+ *, *, p10")])
;; Explicit load/store expanders for the builtin functions
(define_expand "vsx_load_<mode>"
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..a5cbe0b477f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,144 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+ constants. */
+
+_Float128
+return_0 (void)
+{
+ return 0.0f128; /* XXSPLTIB 34,0. */
+}
+
+_Float128
+return_1 (void)
+{
+ return 1.0f128; /* LXVKQ 34,1. */
+}
+
+_Float128
+return_2 (void)
+{
+ return 2.0f128; /* LXVKQ 34,2. */
+}
+
+_Float128
+return_3 (void)
+{
+ return 3.0f128; /* LXVKQ 34,3. */
+}
+
+_Float128
+return_4 (void)
+{
+ return 4.0f128; /* LXVKQ 34,4. */
+}
+
+_Float128
+return_5 (void)
+{
+ return 5.0f128; /* LXVKQ 34,5. */
+}
+
+_Float128
+return_6 (void)
+{
+ return 6.0f128; /* LXVKQ 34,6. */
+}
+
+_Float128
+return_7 (void)
+{
+ return 7.0f128; /* LXVKQ 34,7. */
+}
+
+_Float128
+return_m0 (void)
+{
+ return -0.0f128; /* LXVKQ 34,16. */
+}
+
+_Float128
+return_m1 (void)
+{
+ return -1.0f128; /* LXVKQ 34,17. */
+}
+
+_Float128
+return_m2 (void)
+{
+ return -2.0f128; /* LXVKQ 34,18. */
+}
+
+_Float128
+return_m3 (void)
+{
+ return -3.0f128; /* LXVKQ 34,19. */
+}
+
+_Float128
+return_m4 (void)
+{
+ return -4.0f128; /* LXVKQ 34,20. */
+}
+
+_Float128
+return_m5 (void)
+{
+ return -5.0f128; /* LXVKQ 34,21. */
+}
+
+_Float128
+return_m6 (void)
+{
+ return -6.0f128; /* LXVKQ 34,22. */
+}
+
+_Float128
+return_m7 (void)
+{
+ return -7.0f128; /* LXVKQ 34,23. */
+}
+
+_Float128
+return_inf (void)
+{
+ return __builtin_inff128 (); /* LXVKQ 34,8. */
+}
+
+_Float128
+return_minf (void)
+{
+ return - __builtin_inff128 (); /* LXVKQ 34,24. */
+}
+
+_Float128
+return_nan (void)
+{
+ return __builtin_nanf128 (""); /* LXVKQ 34,9. */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction. */
+_Float128
+return_mnan (void)
+{
+ return - __builtin_nanf128 (""); /* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+ return __builtin_nanf128 ("1"); /* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+ return __builtin_nansf128 (""); /* PLXV 34,... */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M} 18 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M} 3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M} 1 } } */
+
More information about the Gcc-cvs
mailing list