This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Biener <rguenther at suse dot de>, Eric Botcazou <ebotcazou at adacore dot com>, Jeff Law <law at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Segher Boessenkool <segher at kernel dot crashing dot org>
- Date: Tue, 13 Jan 2015 17:18:19 +0100
- Subject: [PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
My PR60663 fix unfortunately stopped CSE of all inline-asms, even when
they e.g. only have the clobbers added by default.
This patch attempts to restore the old behavior, with the exceptions:
1) as always, asm volatile is not CSEd
2) inline-asm with multiple outputs are not CSEd
3) on request from Richard (which Segher on IRC argues against), "memory"
clobber also prevents CSE; this can be removed by removing the
int j, lim = XVECLEN (x, 0); and loop below it
4) inline-asm with clobbers is never copied into an insn that wasn't
inline-asm before, so if there are clobbers, we allow CSEing of
e.g. two same inline-asms, but only by reusing results of one
of those
Bootstrapped/regtested on x86_64-linux and i686-linux, tested also
with arm cross after reverting the PR60663 arm cost fix.
Ok for trunk this way, or with 3) removed?
2015-01-13 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/63637
PR rtl-optimization/60663
* cse.c (merge_equiv_classes): Set new_elt->cost to MAX_COST
if elt->cost is MAX_COST for ASM_OPERANDS.
(find_sets_in_insn): Fix up comment typo.
(cse_insn): Don't set src_volatile for all non-volatile
ASM_OPERANDS in PARALLELs, but just those with multiple outputs
or with "memory" clobber. Set elt->cost to MAX_COST
for ASM_OPERANDS in PARALLEL. Set src_elt->cost to MAX_COST
if new_src is ASM_OPERANDS and elt->cost is MAX_COST.
* gcc.dg/pr63637-1.c: New test.
* gcc.dg/pr63637-2.c: New test.
* gcc.dg/pr63637-3.c: New test.
* gcc.dg/pr63637-4.c: New test.
* gcc.dg/pr63637-5.c: New test.
* gcc.dg/pr63637-6.c: New test.
* gcc.target/i386/pr63637-1.c: New test.
* gcc.target/i386/pr63637-2.c: New test.
* gcc.target/i386/pr63637-3.c: New test.
* gcc.target/i386/pr63637-4.c: New test.
* gcc.target/i386/pr63637-5.c: New test.
* gcc.target/i386/pr63637-6.c: New test.
--- gcc/cse.c.jj 2015-01-09 21:59:44.000000000 +0100
+++ gcc/cse.c 2015-01-13 13:26:23.391216064 +0100
@@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c
}
new_elt = insert (exp, class1, hash, mode);
new_elt->in_memory = hash_arg_in_memory;
+ if (GET_CODE (exp) == ASM_OPERANDS && elt->cost == MAX_COST)
+ new_elt->cost = MAX_COST;
}
}
}
@@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc
{
int i, lim = XVECLEN (x, 0);
- /* Go over the epressions of the PARALLEL in forward order, to
+ /* Go over the expressions of the PARALLEL in forward order, to
put them in the same order in the SETS array. */
for (i = 0; i < lim; i++)
{
@@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn)
&& REGNO (dest) >= FIRST_PSEUDO_REGISTER)
sets[i].src_volatile = 1;
- /* Also do not record result of a non-volatile inline asm with
- more than one result or with clobbers, we do not want CSE to
- break the inline asm apart. */
else if (GET_CODE (src) == ASM_OPERANDS
&& GET_CODE (x) == PARALLEL)
- sets[i].src_volatile = 1;
+ {
+ /* Do not record result of a non-volatile inline asm with
+ more than one result. */
+ if (n_sets > 1)
+ sets[i].src_volatile = 1;
+
+ int j, lim = XVECLEN (x, 0);
+ for (j = 0; j < lim; j++)
+ {
+ rtx y = XVECEXP (x, 0, j);
+ /* And do not record result of a non-volatile inline asm
+ with "memory" clobber. */
+ if (GET_CODE (y) == CLOBBER && MEM_P (XEXP (y, 0)))
+ {
+ sets[i].src_volatile = 1;
+ break;
+ }
+ }
+ }
#if 0
/* It is no longer clear why we used to do this, but it doesn't
@@ -5230,8 +5247,8 @@ cse_insn (rtx_insn *insn)
;
/* Look for a substitution that makes a valid insn. */
- else if (validate_unshare_change
- (insn, &SET_SRC (sets[i].rtl), trial, 0))
+ else if (validate_unshare_change (insn, &SET_SRC (sets[i].rtl),
+ trial, 0))
{
rtx new_rtx = canon_reg (SET_SRC (sets[i].rtl), insn);
@@ -5593,6 +5610,12 @@ cse_insn (rtx_insn *insn)
}
elt = insert (src, classp, sets[i].src_hash, mode);
elt->in_memory = sets[i].src_in_memory;
+ /* If inline asm has any clobbers, ensure we only reuse
+ existing inline asms and never try to put the ASM_OPERANDS
+ into an insn that isn't inline asm. */
+ if (GET_CODE (src) == ASM_OPERANDS
+ && GET_CODE (x) == PARALLEL)
+ elt->cost = MAX_COST;
sets[i].src_elt = classp = elt;
}
if (sets[i].src_const && sets[i].src_const_elt == 0
@@ -5906,6 +5929,9 @@ cse_insn (rtx_insn *insn)
}
src_elt = insert (new_src, classp, src_hash, new_mode);
src_elt->in_memory = elt->in_memory;
+ if (GET_CODE (new_src) == ASM_OPERANDS
+ && elt->cost == MAX_COST)
+ src_elt->cost = MAX_COST;
}
else if (classp && classp != src_elt->first_same_value)
/* Show that two things that we've seen before are
--- gcc/testsuite/gcc.dg/pr63637-1.c.jj 2015-01-13 13:40:56.385782037 +0100
+++ gcc/testsuite/gcc.dg/pr63637-1.c 2015-01-13 13:41:08.931559978 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a));
+ asm ("# Magic instruction" : "=r" (b));
+ asm ("# Magic instruction" : "=r" (c));
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.dg/pr63637-2.c.jj 2015-01-13 13:41:36.967063752 +0100
+++ gcc/testsuite/gcc.dg/pr63637-2.c 2015-01-13 13:42:04.758571844 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : "r" (0));
+ asm ("# Magic instruction" : "=r" (b) : "r" (0));
+ asm ("# Magic instruction" : "=r" (c) : "r" (0));
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.dg/pr63637-3.c.jj 2015-01-13 13:43:58.820552956 +0100
+++ gcc/testsuite/gcc.dg/pr63637-3.c 2015-01-13 13:44:21.702147954 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : : "memory");
+ asm ("# Magic instruction" : "=r" (b) : : "memory");
+ asm ("# Magic instruction" : "=r" (c) : : "memory");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-4.c.jj 2015-01-13 13:44:01.624503326 +0100
+++ gcc/testsuite/gcc.dg/pr63637-4.c 2015-01-13 13:44:44.220749376 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : "r" (0) : "memory");
+ asm ("# Magic instruction" : "=r" (b) : "r" (0) : "memory");
+ asm ("# Magic instruction" : "=r" (c) : "r" (0) : "memory");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-5.c.jj 2015-01-13 13:46:36.837756064 +0100
+++ gcc/testsuite/gcc.dg/pr63637-5.c 2015-01-13 13:47:01.461320229 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c, d, e, f;
+ asm ("# Magic instruction" : "=r" (a), "=r" (d));
+ asm ("# Magic instruction" : "=r" (b), "=r" (e));
+ asm ("# Magic instruction" : "=r" (c), "=r" (f));
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-6.c.jj 2015-01-13 13:46:39.834703018 +0100
+++ gcc/testsuite/gcc.dg/pr63637-6.c 2015-01-13 13:47:27.915851986 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c, d, e, f;
+ asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0));
+ asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0));
+ asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0));
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-1.c.jj 2015-01-13 13:40:13.996531691 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-1.c 2015-01-13 13:42:37.945984430 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : : "eax");
+ asm ("# Magic instruction" : "=r" (b) : : "edx");
+ asm ("# Magic instruction" : "=r" (c) : : "ecx");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-2.c.jj 2015-01-13 13:42:12.557433805 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-2.c 2015-01-13 13:42:30.656113460 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax");
+ asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx");
+ asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-3.c.jj 2015-01-13 13:43:06.407480663 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-3.c 2015-01-13 13:43:28.600087856 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : : "eax", "memory");
+ asm ("# Magic instruction" : "=r" (b) : : "edx", "memory");
+ asm ("# Magic instruction" : "=r" (c) : : "ecx", "memory");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-4.c.jj 2015-01-13 13:43:09.505425830 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-4.c 2015-01-13 13:43:44.769801653 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c;
+ asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax", "memory");
+ asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx", "memory");
+ asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx", "memory");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-5.c.jj 2015-01-13 13:45:38.747784252 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-5.c 2015-01-13 13:45:34.350862077 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c, d, e, f;
+ asm ("# Magic instruction" : "=r" (a), "=r" (d) : : "eax");
+ asm ("# Magic instruction" : "=r" (b), "=r" (e) : : "edx");
+ asm ("# Magic instruction" : "=r" (c), "=r" (f) : : "ecx");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-6.c.jj 2015-01-13 13:45:54.923497943 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-6.c 2015-01-13 13:46:23.965983893 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+ int a, b, c, d, e, f;
+ asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0) : "eax");
+ asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0) : "edx");
+ asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0) : "ecx");
+ return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
Jakub