[PATCH #2, rs6000][GCC6] Fix PR78543, ICE in push_reload on powerpc64le-linux-gnu
Michael Meissner
meissner@linux.vnet.ibm.com
Mon Mar 27 19:58:00 GMT 2017
On Fri, Mar 24, 2017 at 07:16:32PM -0500, Segher Boessenkool wrote:
> Hi Mike,
>
> This patch is okay for trunk. Also for 6, but what is the hurry there?
I applied the patch (svn id 246508 for trunk, svn id 246509 for gcc 6.0). The
hurry up for GCC 6 is due to the fact that the bug does not show by default in
trunk (due to failing with RELOAD), but the bug shows up in GCC 6 (since RELOAD
is default there), and it affects some of the Linux distributions.
[gcc]
2017-03-27 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/78543
* config/rs6000/rs6000.md (bswaphi2_extenddi): Combine bswap
HImode and SImode with zero extend to DImode to one insn.
(bswap<mode>2_extenddi): Likewise.
(bswapsi2_extenddi): Likewise.
(bswaphi2_extendsi): Likewise.
(bswaphi2): Combine bswap HImode and SImode into one insn.
Separate memory insns from swapping register.
(bswapsi2): Likewise.
(bswap<mode>2): Likewise.
(bswaphi2_internal): Delete, no longer used.
(bswapsi2_internal): Likewise.
(bswap<mode>2_load): Split bswap HImode/SImode into separate load,
store, and gpr<-gpr swap insns.
(bswap<mode>2_store): Likewise.
(bswaphi2_reg): Register only splitter, combine with the splitter.
(bswaphi2 splitter): Likewise.
(bswapsi2_reg): Likewise.
(bswapsi2 splitter): Likewise.
(bswapdi2): If we have the LDBRX and STDBRX instructions, split
the insns into load, store, and register/register insns.
(bswapdi2_ldbrx): Likewise.
(bswapdi2_load): Likewise.
(bswapdi2_store): Likewise.
(bswapdi2_reg): Likewise.
[gcc/testsuite]
2017-03-27 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/78543
* gcc.target/powerpc/pr78543.c: New test.
--
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meissner@linux.vnet.ibm.com, phone: +1 (978) 899-4797
-------------- next part --------------
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 246507)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -2350,12 +2350,12 @@ (define_insn "cmpb<mode>3"
;; Since the hardware zeros the upper part of the register, save generating the
;; AND immediate if we are converting to unsigned
-(define_insn "*bswaphi2_extenddi"
+(define_insn "*bswap<mode>2_extenddi"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(zero_extend:DI
- (bswap:HI (match_operand:HI 1 "memory_operand" "Z"))))]
+ (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z"))))]
"TARGET_POWERPC64"
- "lhbrx %0,%y1"
+ "l<wd>brx %0,%y1"
[(set_attr "length" "4")
(set_attr "type" "load")])
@@ -2368,34 +2368,52 @@ (define_insn "*bswaphi2_extendsi"
[(set_attr "length" "4")
(set_attr "type" "load")])
-(define_expand "bswaphi2"
- [(parallel [(set (match_operand:HI 0 "reg_or_mem_operand" "")
- (bswap:HI
- (match_operand:HI 1 "reg_or_mem_operand" "")))
- (clobber (match_scratch:SI 2 ""))])]
+;; Separate the bswap patterns into load, store, and gpr<-gpr. This prevents
+;; the register allocator from converting a gpr<-gpr swap into a store and then
+;; load with byte swap, which can be slower than doing it in the registers. It
+;; also prevents certain failures with the RELOAD register allocator.
+
+(define_expand "bswap<mode>2"
+ [(use (match_operand:HSI 0 "reg_or_mem_operand"))
+ (use (match_operand:HSI 1 "reg_or_mem_operand"))]
""
{
- if (!REG_P (operands[0]) && !REG_P (operands[1]))
- operands[1] = force_reg (HImode, operands[1]);
+ rtx dest = operands[0];
+ rtx src = operands[1];
+
+ if (!REG_P (dest) && !REG_P (src))
+ src = force_reg (<MODE>mode, src);
+
+ if (MEM_P (src))
+ emit_insn (gen_bswap<mode>2_load (dest, src));
+ else if (MEM_P (dest))
+ emit_insn (gen_bswap<mode>2_store (dest, src));
+ else
+ emit_insn (gen_bswap<mode>2_reg (dest, src));
+ DONE;
})
-(define_insn "bswaphi2_internal"
- [(set (match_operand:HI 0 "reg_or_mem_operand" "=r,Z,&r")
+(define_insn "bswap<mode>2_load"
+ [(set (match_operand:HSI 0 "gpc_reg_operand" "=r")
+ (bswap:HSI (match_operand:HSI 1 "memory_operand" "Z")))]
+ ""
+ "l<wd>brx %0,%y1"
+ [(set_attr "type" "load")])
+
+(define_insn "bswap<mode>2_store"
+ [(set (match_operand:HSI 0 "memory_operand" "=Z")
+ (bswap:HSI (match_operand:HSI 1 "gpc_reg_operand" "r")))]
+ ""
+ "st<wd>brx %1,%y0"
+ [(set_attr "type" "store")])
+
+(define_insn_and_split "bswaphi2_reg"
+ [(set (match_operand:HI 0 "gpc_reg_operand" "=&r")
(bswap:HI
- (match_operand:HI 1 "reg_or_mem_operand" "Z,r,r")))
- (clobber (match_scratch:SI 2 "=X,X,&r"))]
+ (match_operand:HI 1 "gpc_reg_operand" "r")))
+ (clobber (match_scratch:SI 2 "=&r"))]
""
- "@
- lhbrx %0,%y1
- sthbrx %1,%y0
- #"
- [(set_attr "length" "4,4,12")
- (set_attr "type" "load,store,*")])
-
-(define_split
- [(set (match_operand:HI 0 "gpc_reg_operand" "")
- (bswap:HI (match_operand:HI 1 "gpc_reg_operand" "")))
- (clobber (match_operand:SI 2 "gpc_reg_operand" ""))]
+ "#"
"reload_completed"
[(set (match_dup 3)
(and:SI (lshiftrt:SI (match_dup 4)
@@ -2408,48 +2426,21 @@ (define_split
(set (match_dup 3)
(ior:SI (match_dup 3)
(match_dup 2)))]
- "
{
operands[3] = simplify_gen_subreg (SImode, operands[0], HImode, 0);
operands[4] = simplify_gen_subreg (SImode, operands[1], HImode, 0);
-}")
-
-(define_insn "*bswapsi2_extenddi"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
- (zero_extend:DI
- (bswap:SI (match_operand:SI 1 "memory_operand" "Z"))))]
- "TARGET_POWERPC64"
- "lwbrx %0,%y1"
- [(set_attr "length" "4")
- (set_attr "type" "load")])
-
-(define_expand "bswapsi2"
- [(set (match_operand:SI 0 "reg_or_mem_operand" "")
- (bswap:SI
- (match_operand:SI 1 "reg_or_mem_operand" "")))]
- ""
-{
- if (!REG_P (operands[0]) && !REG_P (operands[1]))
- operands[1] = force_reg (SImode, operands[1]);
-})
-
-(define_insn "*bswapsi2_internal"
- [(set (match_operand:SI 0 "reg_or_mem_operand" "=r,Z,&r")
- (bswap:SI
- (match_operand:SI 1 "reg_or_mem_operand" "Z,r,r")))]
- ""
- "@
- lwbrx %0,%y1
- stwbrx %1,%y0
- #"
- [(set_attr "length" "4,4,12")
- (set_attr "type" "load,store,*")])
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "*")])
;; We are always BITS_BIG_ENDIAN, so the bit positions below in
;; zero_extract insns do not change for -mlittle.
-(define_split
- [(set (match_operand:SI 0 "gpc_reg_operand" "")
- (bswap:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
+(define_insn_and_split "bswapsi2_reg"
+ [(set (match_operand:SI 0 "gpc_reg_operand" "=&r")
+ (bswap:SI
+ (match_operand:SI 1 "gpc_reg_operand" "r")))]
+ ""
+ "#"
"reload_completed"
[(set (match_dup 0) ; DABC
(rotate:SI (match_dup 1)
@@ -2465,11 +2456,13 @@ (define_split
(const_int 24))
(const_int 255))
(and:SI (match_dup 0)
- (const_int -256))))
-
- ]
+ (const_int -256))))]
"")
+;; On systems with LDBRX/STDBRX generate the loads/stores directly, just like
+;; we do for L{H,W}BRX and ST{H,W}BRX above. If not, we have to generate more
+;; complex code.
+
(define_expand "bswapdi2"
[(parallel [(set (match_operand:DI 0 "reg_or_mem_operand" "")
(bswap:DI
@@ -2478,33 +2471,56 @@ (define_expand "bswapdi2"
(clobber (match_scratch:DI 3 ""))])]
""
{
- if (!REG_P (operands[0]) && !REG_P (operands[1]))
- operands[1] = force_reg (DImode, operands[1]);
+ rtx dest = operands[0];
+ rtx src = operands[1];
+
+ if (!REG_P (dest) && !REG_P (src))
+ operands[1] = src = force_reg (DImode, src);
+
+ if (TARGET_POWERPC64 && TARGET_LDBRX)
+ {
+ if (MEM_P (src))
+ emit_insn (gen_bswapdi2_load (dest, src));
+ else if (MEM_P (dest))
+ emit_insn (gen_bswapdi2_store (dest, src));
+ else
+ emit_insn (gen_bswapdi2_reg (dest, src));
+ DONE;
+ }
if (!TARGET_POWERPC64)
{
/* 32-bit mode needs fewer scratch registers, but 32-bit addressing mode
that uses 64-bit registers needs the same scratch registers as 64-bit
mode. */
- emit_insn (gen_bswapdi2_32bit (operands[0], operands[1]));
+ emit_insn (gen_bswapdi2_32bit (dest, src));
DONE;
}
})
;; Power7/cell has ldbrx/stdbrx, so use it directly
-(define_insn "*bswapdi2_ldbrx"
- [(set (match_operand:DI 0 "reg_or_mem_operand" "=r,Z,&r")
- (bswap:DI (match_operand:DI 1 "reg_or_mem_operand" "Z,r,r")))
- (clobber (match_scratch:DI 2 "=X,X,&r"))
- (clobber (match_scratch:DI 3 "=X,X,&r"))]
- "TARGET_POWERPC64 && TARGET_LDBRX
- && (REG_P (operands[0]) || REG_P (operands[1]))"
- "@
- ldbrx %0,%y1
- stdbrx %1,%y0
- #"
- [(set_attr "length" "4,4,36")
- (set_attr "type" "load,store,*")])
+(define_insn "bswapdi2_load"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+ (bswap:DI (match_operand:DI 1 "memory_operand" "Z")))]
+ "TARGET_POWERPC64 && TARGET_LDBRX"
+ "ldbrx %0,%y1"
+ [(set_attr "type" "load")])
+
+(define_insn "bswapdi2_store"
+ [(set (match_operand:DI 0 "memory_operand" "=Z")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))]
+ "TARGET_POWERPC64 && TARGET_LDBRX"
+ "stdbrx %1,%y0"
+ [(set_attr "type" "store")])
+
+(define_insn "bswapdi2_reg"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=&r")
+ (bswap:DI (match_operand:DI 1 "gpc_reg_operand" "r")))
+ (clobber (match_scratch:DI 2 "=&r"))
+ (clobber (match_scratch:DI 3 "=&r"))]
+ "TARGET_POWERPC64 && TARGET_LDBRX"
+ "#"
+ [(set_attr "length" "36")])
;; Non-power7/cell, fall back to use lwbrx/stwbrx
(define_insn "*bswapdi2_64bit"
Index: gcc/testsuite/gcc.target/powerpc/pr78543.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/pr78543.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr78543.c (working copy)
@@ -0,0 +1,60 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O1 -mno-lra" } */
+
+typedef long a;
+enum c { e, f, g, h, i, ab } j();
+int l, n, o, p;
+a q, r;
+void *memcpy();
+void b();
+static int k(int *s) {
+ int m;
+ if (j(&m))
+ *s = m;
+ return !0;
+}
+void d(char s) {
+ int af[4];
+ int ag;
+ enum c ah;
+ char ai[24 << 11];
+ unsigned aj;
+ if (!k(&aj))
+ goto ak;
+ for (;;) {
+ if (!k(&ag))
+ goto ak;
+ switch (ah) {
+ case e:
+ b("");
+ b("bad length %d for GUID in fileinfo v%u for \"%s\"");
+ case i:
+ b("bad length %d for TTH in fileinfo v%u for \"%s\"", aj);
+ case ab:
+ if (ag % 24)
+ b("for \"%s\"", s);
+ case f:
+ if (20 == ag)
+ case h:
+ if (20 == ag)
+ o = 0;
+ break;
+ case g:
+ memcpy(af, ai, sizeof af);
+ b();
+ if (p) {
+ a al, am;
+ r = al << 2 | am;
+ n = af[2];
+ al = n;
+ l = __builtin_bswap32(af[3]);
+ am = q = n | l;
+ }
+ default:
+ b("%s0 unhandled field ID %u 0", __func__);
+ }
+ }
+ak:;
+}
More information about the Gcc-patches
mailing list