[PATCH][RS6000] Fix PR87870: ppc64 generates poor code when loading constants into TImode vars

Peter Bergner bergner@linux.ibm.com
Fri Nov 16 22:26:00 GMT 2018


On 11/16/18 11:06 AM, Segher Boessenkool wrote:
> On Tue, Nov 13, 2018 at 11:29:20AM -0600, Peter Bergner wrote:
>> I'll note I didn't change the vsx_mov<mode>_32bit pattern, since TImode
>> isn't supported with -m32.  However, if you want, I could remove the
>> redundant "*r" <- "jwM" alternative there too?
> 
> Yeah, please keep the patterns in synch.

Ok, here is the patch with the vsx_mov<mod>_32bit pattern changed too.

However, when I made the change below, the length attribute seems a
little off.  For *_64bit, we have a length of 4, but for *_32bit, we
have a length of 32.  The "4" looks correct for both *_64bit and *_32bit
if we're loading an easy_vector_constant into one of the vector regs.
For loading a TImode constant into a GPR, then it could be anything from
8 bytes to 40bytes (loading 0xdeadbeefcafebabefacefeedbaaaaaad) for
-m64.  Since TImode isn't supposrted in -m32 (yet?), who knows, probably
it would be 16 bytes to ??? bytes.

Should those sizes be updated too?  If so, what should they be?
The smallest, average or worst case lengths?  I assume we could use
another iterator to separate the vector lengths from the gpr lengths
if we need to.

Peter


gcc/
	PR target/87870
	* config/rs6000/vsx.md (nW): New mode iterator.
	(vsx_mov<mode>_64bit): Use it.  Remove redundant GPR 0/-1 alternative.

gcc/testsuite/
	PR target/87870
	* gcc.target/powerpc/pr87870.c: New test.

Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 265971)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -183,6 +183,18 @@ (define_mode_attr ??r	[(V16QI	"??r")
 			 (TF	"??r")
 			 (TI	"r")])
 
+;; A mode attribute used for 128-bit constant values.
+(define_mode_attr nW	[(V16QI	"W")
+			 (V8HI	"W")
+			 (V4SI	"W")
+			 (V4SF	"W")
+			 (V2DI	"W")
+			 (V2DF	"W")
+			 (V1TI	"W")
+			 (KF	"W")
+			 (TF	"W")
+			 (TI	"n")])
+
 ;; Same size integer type for floating point data
 (define_mode_attr VSi [(V4SF  "v4si")
 		       (V2DF  "v2di")
@@ -1193,17 +1205,17 @@ (define_insn_and_split "*xxspltib_<mode>
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
-;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
+;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
                 ?&r,       ??r,       ??Y,       <??r>,     wo,        v,
-                ?<VSa>,    *r,        v,         ??r,       wZ,        v")
+                ?<VSa>,    v,         <??r>,     wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
                 wQ,        Y,         r,         r,         wE,        jwM,
-                ?jwM,      jwM,       W,         W,         v,         wZ"))]
+                ?jwM,      W,         <nW>,      v,         wZ"))]
 
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
@@ -1214,25 +1226,25 @@ (define_insn "vsx_mov<mode>_64bit"
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
                 store,     load,      store,     *,         vecsimple, vecsimple,
-                vecsimple, *,         *,         *,         vecstore,  vecload")
+                vecsimple, *,         *,         vecstore,  vecload")
 
    (set_attr "length"
                "4,         4,         4,         8,         4,         8,
                 8,         8,         8,         8,         4,         4,
-                4,         8,         20,        20,        4,         4")])
+                4,         20,        20,        4,         4")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
-;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
+;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       <??r>,
-                wo,        v,         ?<VSa>,    *r,        v,         ??r,
+                wo,        v,         ?<VSa>,    v,         <??r>,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
-                wE,        jwM,       ?jwM,      jwM,       W,         W,
+                wE,        jwM,       ?jwM,      W,         <nW>,
                 v,         wZ"))]
 
   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1243,12 +1255,12 @@ (define_insn "*vsx_mov<mode>_32bit"
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
-                vecsimple, vecsimple, vecsimple, *,         *,        *,
+                vecsimple, vecsimple, vecsimple, *,         *,
                 vecstore,  vecload")
 
    (set_attr "length"
                "4,         4,         4,         16,        16,        16,
-                4,         4,         4,         16,        20,        32,
+                4,         4,         4,         20,        32,
                 4,         4")])
 
 ;; Explicit  load/store expanders for the builtin functions
Index: gcc/testsuite/gcc.target/powerpc/pr87870.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr87870.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/pr87870.c	(working copy)
@@ -0,0 +1,28 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-options "-O2" } */
+
+__int128
+test0 (void)
+{
+  return 0;
+}
+
+__int128
+test1 (void)
+{
+  return 1;
+}
+
+__int128
+test2 (void)
+{
+  return -1;
+}
+
+__int128
+test3 (void)
+{
+  return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
+}
+
+/* { dg-final { scan-assembler-not {\mld\M} } } */



More information about the Gcc-patches mailing list