[PATCH], PR target/81593, Optimize PowerPC vector sets coming from a vector extracts
Michael Meissner
meissner@linux.vnet.ibm.com
Thu Jul 27 23:21:00 GMT 2017
This patches optimizes the PowerPC vector set operation for 64-bit doubles and
longs where the elements in the vector set may have been extracted from another
vector (PR target/81593):
Here an an example:
vector double
test_vpasted (vector double high, vector double low)
{
vector double res;
res[1] = high[1];
res[0] = low[0];
return res;
}
Previously it would generate:
xxpermdi 12,34,34,2
vspltisw 2,0
xxlor 0,35,35
xxpermdi 34,34,12,0
xxpermdi 34,0,34,1
and with these patches, it now generates:
xxpermdi 34,35,34,1
I have tested it on a little endian power8 system and a big endian power7
system with the usual bootstrap and make checks with no regressions. Can I
check this into the trunk?
I also built Spec 2006 with the compiler, and saw no changes in the code
generated. This isn't surprising because it isn't something that auto
vectorization might generate by default.
[gcc]
2017-07-27 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/81593
* config/rs6000/rs6000-protos.h (rs6000_emit_xxpermdi): New
declaration.
* config/rs6000/rs6000.c (rs6000_emit_xxpermdi): New function to
emit XXPERMDI accessing either double word in either vector
register inputs.
* config/rs6000/vsx.md (vsx_concat_<mode>, VSX_D iterator):
Rewrite VEC_CONCAT insn to call rs6000_emit_xxpermdi. Simplify
the constraints with the removal of the -mupper-regs-* switches.
(vsx_concat_<mode>_1): New combiner insns to optimize CONCATs
where either register might have come from VEC_SELECT.
(vsx_concat_<mode>_2): Likewise.
(vsx_concat_<mode>_3): Likewise.
(vsx_set_<mode>, VSX_D iterator): Rewrite insn to generate a
VEC_CONCAT rather than use an UNSPEC to specify the option.
[gcc/testsuite]
2017-07-27 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/81593
* gcc.target/powerpc/vsx-extract-6.c: New test.
* gcc.target/powerpc/vsx-extract-7.c: Likewise.
--
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.vnet.ibm.com, phone: +1 (978) 899-4797
-------------- next part --------------
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h (svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000/rs6000-protos.h) (revision 250577)
+++ gcc/config/rs6000/rs6000-protos.h (.../gcc/config/rs6000/rs6000-protos.h) (working copy)
@@ -233,6 +233,7 @@ extern void rs6000_asm_output_dwarf_pcre
const char *label);
extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size,
const char *label);
+extern const char *rs6000_emit_xxpermdi (rtx[], rtx, rtx);
/* Declare functions in rs6000-c.c */
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000/rs6000.c) (revision 250577)
+++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000/rs6000.c) (working copy)
@@ -39167,6 +39167,38 @@ rs6000_optab_supported_p (int op, machin
return true;
}
}
+
+
+/* Emit a XXPERMDI instruction that can extract from either double word of the
+ two arguments. ELEMENT1 and ELEMENT2 are either NULL or they are 0/1 giving
+ which double word to be used for the operand. */
+
+const char *
+rs6000_emit_xxpermdi (rtx operands[], rtx element1, rtx element2)
+{
+ int op1_dword = (!element1) ? 0 : INTVAL (element1);
+ int op2_dword = (!element2) ? 0 : INTVAL (element2);
+
+ gcc_assert (IN_RANGE (op1_dword | op2_dword, 0, 1));
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ operands[3] = GEN_INT (2*op1_dword + op2_dword);
+ return "xxpermdi %x0,%x1,%x2,%3";
+ }
+ else
+ {
+ if (element1)
+ op1_dword = 1 - op1_dword;
+
+ if (element2)
+ op2_dword = 1 - op2_dword;
+
+ operands[3] = GEN_INT (op1_dword + 2*op2_dword);
+ return "xxpermdi %x0,%x2,%x1,%3";
+ }
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md (svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000/vsx.md) (revision 250577)
+++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000/vsx.md) (working copy)
@@ -2366,19 +2366,17 @@ (define_insn "*vsx_float_fix_v2df2"
;; Build a V2DF/V2DI vector from two scalars
(define_insn "vsx_concat_<mode>"
- [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we")
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
(vec_concat:VSX_D
- (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b")
- (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))]
+ (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
+ (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
if (which_alternative == 0)
- return (BYTES_BIG_ENDIAN
- ? "xxpermdi %x0,%x1,%x2,0"
- : "xxpermdi %x0,%x2,%x1,0");
+ return rs6000_emit_xxpermdi (operands, NULL_RTX, NULL_RTX);
else if (which_alternative == 1)
- return (BYTES_BIG_ENDIAN
+ return (VECTOR_ELT_ORDER_BIG
? "mtvsrdd %x0,%1,%2"
: "mtvsrdd %x0,%2,%1");
@@ -2387,6 +2385,47 @@ (define_insn "vsx_concat_<mode>"
}
[(set_attr "type" "vecperm")])
+;; Combiner patterns to allow creating XXPERMDI's to access either double
+;; register in a vector register. Note, rs6000_emit_xxpermdi expects
+;; operands[0..2] to be the vector registers.
+(define_insn "*vsx_concat_<mode>_1"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_concat:VSX_D
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))
+ (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ return rs6000_emit_xxpermdi (operands, operands[3], NULL_RTX);
+})
+
+(define_insn "*vsx_concat_<mode>_2"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_concat:VSX_D
+ (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ return rs6000_emit_xxpermdi (operands, NULL_RTX, operands[3]);
+})
+
+(define_insn "*vsx_concat_<mode>_3"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_concat:VSX_D
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ return rs6000_emit_xxpermdi (operands, operands[3], operands[4]);
+})
+
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -2587,25 +2626,35 @@ (define_expand "vsx_set_v1ti"
DONE;
})
-;; Set the element of a V2DI/VD2F mode
-(define_insn "vsx_set_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
- (unspec:VSX_D
- [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
- (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
- (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
- UNSPEC_VSX_SET))]
+;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
+(define_expand "vsx_set_<mode>"
+ [(use (match_operand:VSX_D 0 "vsx_register_operand"))
+ (use (match_operand:VSX_D 1 "vsx_register_operand"))
+ (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
+ (use (match_operand:QI 3 "const_0_to_1_operand"))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
- if (INTVAL (operands[3]) == idx_first)
- return \"xxpermdi %x0,%x2,%x1,1\";
- else if (INTVAL (operands[3]) == 1 - idx_first)
- return \"xxpermdi %x0,%x1,%x2,0\";
+ rtx dest = operands[0];
+ rtx vec_reg = operands[1];
+ rtx value = operands[2];
+ rtx ele = operands[3];
+ rtx tmp = gen_reg_rtx (<VS_scalar>mode);
+
+ if (ele == const0_rtx)
+ {
+ emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
+ emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
+ DONE;
+ }
+ else if (ele == const1_rtx)
+ {
+ emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
+ emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
+ DONE;
+ }
else
gcc_unreachable ();
-}
- [(set_attr "type" "vecperm")])
+})
;; Extract a DF/DI element from V2DF/V2DI
;; Optimize cases were we can do a simple or direct move.
Index: gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c (svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c) (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c (.../gcc/testsuite/gcc.target/powerpc/vsx-extract-6.c) (revision 250640)
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+vector unsigned long
+test_vpasted (vector unsigned long high, vector unsigned long low)
+{
+ vector unsigned long res;
+ res[1] = high[1];
+ res[0] = low[0];
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
Index: gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c (svn+ssh://meissner@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c) (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c (.../gcc/testsuite/gcc.target/powerpc/vsx-extract-7.c) (revision 250640)
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+vector double
+test_vpasted (vector double high, vector double low)
+{
+ vector double res;
+ res[1] = high[1];
+ res[0] = low[0];
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
More information about the Gcc-patches
mailing list