This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][RS6000] Fix PR87870: ppc64 generates poor code when loading constants into TImode vars


On 11/16/18 5:29 PM, Segher Boessenkool wrote:
> On Fri, Nov 16, 2018 at 04:26:18PM -0600, Peter Bergner wrote:
>> However, when I made the change below, the length attribute seems a
>> little off.  For *_64bit, we have a length of 4, but for *_32bit, we
>> have a length of 32.  The "4" looks correct for both *_64bit and *_32bit
>> if we're loading an easy_vector_constant into one of the vector regs.
>> For loading a TImode constant into a GPR, then it could be anything from
>> 8 bytes to 40bytes (loading 0xdeadbeefcafebabefacefeedbaaaaaad) for
>> -m64.  Since TImode isn't supposrted in -m32 (yet?), who knows, probably
>> it would be 16 bytes to ??? bytes.
>>
>> Should those sizes be updated too?  If so, what should they be?
>> The smallest, average or worst case lengths?  I assume we could use
>> another iterator to separate the vector lengths from the gpr lengths
>> if we need to.
> 
> Worst case.  This is required for correctness.

Ok, I looked into this and the point where we must have correct length info
is in final assembly generation, so very very late.  For the alternatives
I'm changing, we're loading into GPR regs and these alternatives are always
split (split2), so these length values are never used/seen at final assembly
time.

Given the above, I'm guessing we should probably go with the most common
length value (ie, 8 for 64-bit and 16 for 32-bit)?  The following patch
implements that.  Does this seem reasonable to you?

Peter

gcc/
	PR target/87870
	* config/rs6000/vsx.md (nW): New mode iterator.
	(vsx_mov<mode>_64bit): Use it.  Remove redundant GPR 0/-1 alternative.
	Update length attribute for (<??r>, <nW>)  alternative.
	(vsx_mov<mode>_32bit): Likewise.

gcc/testsuite/
	PR target/87870
	* gcc.target/powerpc/pr87870.c: New test.


Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 265971)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -183,6 +183,18 @@ (define_mode_attr ??r	[(V16QI	"??r")
 			 (TF	"??r")
 			 (TI	"r")])
 
+;; A mode attribute used for 128-bit constant values.
+(define_mode_attr nW	[(V16QI	"W")
+			 (V8HI	"W")
+			 (V4SI	"W")
+			 (V4SF	"W")
+			 (V2DI	"W")
+			 (V2DF	"W")
+			 (V1TI	"W")
+			 (KF	"W")
+			 (TF	"W")
+			 (TI	"n")])
+
 ;; Same size integer type for floating point data
 (define_mode_attr VSi [(V4SF  "v4si")
 		       (V2DF  "v2di")
@@ -1193,17 +1205,17 @@ (define_insn_and_split "*xxspltib_<mode>
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
-;;              VSX 0/-1   GPR 0/-1   VMX const GPR const  LVX (VMX)   STVX (VMX)
+;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      <VSa>,     <VSa>,     r,         we,        ?wQ,
                 ?&r,       ??r,       ??Y,       <??r>,     wo,        v,
-                ?<VSa>,    *r,        v,         ??r,       wZ,        v")
+                ?<VSa>,    v,         <??r>,     wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "<VSa>,     ZwO,       <VSa>,     we,        r,         r,
                 wQ,        Y,         r,         r,         wE,        jwM,
-                ?jwM,      jwM,       W,         W,         v,         wZ"))]
+                ?jwM,      W,         <nW>,      v,         wZ"))]
 
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
    && (register_operand (operands[0], <MODE>mode) 
@@ -1214,25 +1226,25 @@ (define_insn "vsx_mov<mode>_64bit"
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mffgpr,    mftgpr,    load,
                 store,     load,      store,     *,         vecsimple, vecsimple,
-                vecsimple, *,         *,         *,         vecstore,  vecload")
+                vecsimple, *,         *,         vecstore,  vecload")
 
    (set_attr "length"
                "4,         4,         4,         8,         4,         8,
                 8,         8,         8,         8,         4,         4,
-                4,         8,         20,        20,        4,         4")])
+                4,         20,        8,         4,         4")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
-;;              XXSPLTIB   VSPLTISW   VSX 0/-1   GPR 0/-1   VMX const  GPR const
+;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      <VSa>,     <VSa>,     ??r,       ??Y,       <??r>,
-                wo,        v,         ?<VSa>,    *r,        v,         ??r,
+                wo,        v,         ?<VSa>,    v,         <??r>,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "<VSa>,     ZwO,       <VSa>,     Y,         r,         r,
-                wE,        jwM,       ?jwM,      jwM,       W,         W,
+                wE,        jwM,       ?jwM,      W,         <nW>,
                 v,         wZ"))]
 
   "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1243,12 +1255,12 @@ (define_insn "*vsx_mov<mode>_32bit"
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
-                vecsimple, vecsimple, vecsimple, *,         *,        *,
+                vecsimple, vecsimple, vecsimple, *,         *,
                 vecstore,  vecload")
 
    (set_attr "length"
                "4,         4,         4,         16,        16,        16,
-                4,         4,         4,         16,        20,        32,
+                4,         4,         4,         20,        16,
                 4,         4")])
 
 ;; Explicit  load/store expanders for the builtin functions
Index: gcc/testsuite/gcc.target/powerpc/pr87870.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr87870.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/pr87870.c	(working copy)
@@ -0,0 +1,28 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-options "-O2" } */
+
+__int128
+test0 (void)
+{
+  return 0;
+}
+
+__int128
+test1 (void)
+{
+  return 1;
+}
+
+__int128
+test2 (void)
+{
+  return -1;
+}
+
+__int128
+test3 (void)
+{
+  return ((__int128)0xdeadbeefcafebabe << 64) | 0xfacefeedbaaaaaad;
+}
+
+/* { dg-final { scan-assembler-not {\mld\M} } } */



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]