This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Reenable CSE of non-volatile inline asm (PR rtl-optimization/63637)


Hi!

My PR60663 fix unfortunately stopped CSE of all inline-asms, even when
they e.g. only have the clobbers added by default.

This patch attempts to restore the old behavior, with the exceptions:
1) as always, asm volatile is not CSEd
2) inline-asm with multiple outputs are not CSEd
3) on request from Richard (which Segher on IRC argues against), "memory"
   clobber also prevents CSE; this can be removed by removing the
   int j, lim = XVECLEN (x, 0); and loop below it
4) inline-asm with clobbers is never copied into an insn that wasn't
   inline-asm before, so if there are clobbers, we allow CSEing of
   e.g. two same inline-asms, but only by reusing results of one
   of those

Bootstrapped/regtested on x86_64-linux and i686-linux, tested also
with arm cross after reverting the PR60663 arm cost fix.

Ok for trunk this way, or with 3) removed?

2015-01-13  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/63637
	PR rtl-optimization/60663
	* cse.c (merge_equiv_classes): Set new_elt->cost to MAX_COST
	if elt->cost is MAX_COST for ASM_OPERANDS.
	(find_sets_in_insn): Fix up comment typo.
	(cse_insn): Don't set src_volatile for all non-volatile
	ASM_OPERANDS in PARALLELs, but just those with multiple outputs
	or with "memory" clobber.  Set elt->cost to MAX_COST
	for ASM_OPERANDS in PARALLEL.  Set src_elt->cost to MAX_COST
	if new_src is ASM_OPERANDS and elt->cost is MAX_COST.

	* gcc.dg/pr63637-1.c: New test.
	* gcc.dg/pr63637-2.c: New test.
	* gcc.dg/pr63637-3.c: New test.
	* gcc.dg/pr63637-4.c: New test.
	* gcc.dg/pr63637-5.c: New test.
	* gcc.dg/pr63637-6.c: New test.
	* gcc.target/i386/pr63637-1.c: New test.
	* gcc.target/i386/pr63637-2.c: New test.
	* gcc.target/i386/pr63637-3.c: New test.
	* gcc.target/i386/pr63637-4.c: New test.
	* gcc.target/i386/pr63637-5.c: New test.
	* gcc.target/i386/pr63637-6.c: New test.

--- gcc/cse.c.jj	2015-01-09 21:59:44.000000000 +0100
+++ gcc/cse.c	2015-01-13 13:26:23.391216064 +0100
@@ -1792,6 +1792,8 @@ merge_equiv_classes (struct table_elt *c
 	    }
 	  new_elt = insert (exp, class1, hash, mode);
 	  new_elt->in_memory = hash_arg_in_memory;
+	  if (GET_CODE (exp) == ASM_OPERANDS && elt->cost == MAX_COST)
+	    new_elt->cost = MAX_COST;
 	}
     }
 }
@@ -4258,7 +4260,7 @@ find_sets_in_insn (rtx_insn *insn, struc
     {
       int i, lim = XVECLEN (x, 0);
 
-      /* Go over the epressions of the PARALLEL in forward order, to
+      /* Go over the expressions of the PARALLEL in forward order, to
 	 put them in the same order in the SETS array.  */
       for (i = 0; i < lim; i++)
 	{
@@ -4634,12 +4636,27 @@ cse_insn (rtx_insn *insn)
 	  && REGNO (dest) >= FIRST_PSEUDO_REGISTER)
 	sets[i].src_volatile = 1;
 
-      /* Also do not record result of a non-volatile inline asm with
-	 more than one result or with clobbers, we do not want CSE to
-	 break the inline asm apart.  */
       else if (GET_CODE (src) == ASM_OPERANDS
 	       && GET_CODE (x) == PARALLEL)
-	sets[i].src_volatile = 1;
+	{
+	  /* Do not record result of a non-volatile inline asm with
+	     more than one result.  */
+	  if (n_sets > 1)
+	    sets[i].src_volatile = 1;
+
+	  int j, lim = XVECLEN (x, 0);
+	  for (j = 0; j < lim; j++)
+	    {
+	      rtx y = XVECEXP (x, 0, j);
+	      /* And do not record result of a non-volatile inline asm
+		 with "memory" clobber.  */
+	      if (GET_CODE (y) == CLOBBER && MEM_P (XEXP (y, 0)))
+		{
+		  sets[i].src_volatile = 1;
+		  break;
+		}
+	    }
+	}
 
 #if 0
       /* It is no longer clear why we used to do this, but it doesn't
@@ -5230,8 +5247,8 @@ cse_insn (rtx_insn *insn)
 	    ;
 
 	  /* Look for a substitution that makes a valid insn.  */
-	  else if (validate_unshare_change
-		     (insn, &SET_SRC (sets[i].rtl), trial, 0))
+	  else if (validate_unshare_change (insn, &SET_SRC (sets[i].rtl),
+					    trial, 0))
 	    {
 	      rtx new_rtx = canon_reg (SET_SRC (sets[i].rtl), insn);
 
@@ -5593,6 +5610,12 @@ cse_insn (rtx_insn *insn)
 		  }
 		elt = insert (src, classp, sets[i].src_hash, mode);
 		elt->in_memory = sets[i].src_in_memory;
+		/* If inline asm has any clobbers, ensure we only reuse
+		   existing inline asms and never try to put the ASM_OPERANDS
+		   into an insn that isn't inline asm.  */
+		if (GET_CODE (src) == ASM_OPERANDS
+		    && GET_CODE (x) == PARALLEL)
+		  elt->cost = MAX_COST;
 		sets[i].src_elt = classp = elt;
 	      }
 	    if (sets[i].src_const && sets[i].src_const_elt == 0
@@ -5906,6 +5929,9 @@ cse_insn (rtx_insn *insn)
 		      }
 		    src_elt = insert (new_src, classp, src_hash, new_mode);
 		    src_elt->in_memory = elt->in_memory;
+		    if (GET_CODE (new_src) == ASM_OPERANDS
+			&& elt->cost == MAX_COST)
+		      src_elt->cost = MAX_COST;
 		  }
 		else if (classp && classp != src_elt->first_same_value)
 		  /* Show that two things that we've seen before are
--- gcc/testsuite/gcc.dg/pr63637-1.c.jj	2015-01-13 13:40:56.385782037 +0100
+++ gcc/testsuite/gcc.dg/pr63637-1.c	2015-01-13 13:41:08.931559978 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a));
+  asm ("# Magic instruction" : "=r" (b));
+  asm ("# Magic instruction" : "=r" (c));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.dg/pr63637-2.c.jj	2015-01-13 13:41:36.967063752 +0100
+++ gcc/testsuite/gcc.dg/pr63637-2.c	2015-01-13 13:42:04.758571844 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0));
+  asm ("# Magic instruction" : "=r" (b) : "r" (0));
+  asm ("# Magic instruction" : "=r" (c) : "r" (0));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.dg/pr63637-3.c.jj	2015-01-13 13:43:58.820552956 +0100
+++ gcc/testsuite/gcc.dg/pr63637-3.c	2015-01-13 13:44:21.702147954 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : : "memory");
+  asm ("# Magic instruction" : "=r" (b) : : "memory");
+  asm ("# Magic instruction" : "=r" (c) : : "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-4.c.jj	2015-01-13 13:44:01.624503326 +0100
+++ gcc/testsuite/gcc.dg/pr63637-4.c	2015-01-13 13:44:44.220749376 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0) : "memory");
+  asm ("# Magic instruction" : "=r" (b) : "r" (0) : "memory");
+  asm ("# Magic instruction" : "=r" (c) : "r" (0) : "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-5.c.jj	2015-01-13 13:46:36.837756064 +0100
+++ gcc/testsuite/gcc.dg/pr63637-5.c	2015-01-13 13:47:01.461320229 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d));
+  asm ("# Magic instruction" : "=r" (b), "=r" (e));
+  asm ("# Magic instruction" : "=r" (c), "=r" (f));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.dg/pr63637-6.c.jj	2015-01-13 13:46:39.834703018 +0100
+++ gcc/testsuite/gcc.dg/pr63637-6.c	2015-01-13 13:47:27.915851986 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0));
+  asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0));
+  asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0));
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-1.c.jj	2015-01-13 13:40:13.996531691 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-1.c	2015-01-13 13:42:37.945984430 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : : "eax");
+  asm ("# Magic instruction" : "=r" (b) : : "edx");
+  asm ("# Magic instruction" : "=r" (c) : : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-2.c.jj	2015-01-13 13:42:12.557433805 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-2.c	2015-01-13 13:42:30.656113460 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax");
+  asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx");
+  asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 1 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-3.c.jj	2015-01-13 13:43:06.407480663 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-3.c	2015-01-13 13:43:28.600087856 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : : "eax", "memory");
+  asm ("# Magic instruction" : "=r" (b) : : "edx", "memory");
+  asm ("# Magic instruction" : "=r" (c) : : "ecx", "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-4.c.jj	2015-01-13 13:43:09.505425830 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-4.c	2015-01-13 13:43:44.769801653 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c;
+  asm ("# Magic instruction" : "=r" (a) : "r" (0) : "eax", "memory");
+  asm ("# Magic instruction" : "=r" (b) : "r" (0) : "edx", "memory");
+  asm ("# Magic instruction" : "=r" (c) : "r" (0) : "ecx", "memory");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-5.c.jj	2015-01-13 13:45:38.747784252 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-5.c	2015-01-13 13:45:34.350862077 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d) : : "eax");
+  asm ("# Magic instruction" : "=r" (b), "=r" (e) : : "edx");
+  asm ("# Magic instruction" : "=r" (c), "=r" (f) : : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */
--- gcc/testsuite/gcc.target/i386/pr63637-6.c.jj	2015-01-13 13:45:54.923497943 +0100
+++ gcc/testsuite/gcc.target/i386/pr63637-6.c	2015-01-13 13:46:23.965983893 +0100
@@ -0,0 +1,15 @@
+/* PR rtl-optimization/63637 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+foo (void)
+{
+  int a, b, c, d, e, f;
+  asm ("# Magic instruction" : "=r" (a), "=r" (d) : "r" (0) : "eax");
+  asm ("# Magic instruction" : "=r" (b), "=r" (e) : "r" (0) : "edx");
+  asm ("# Magic instruction" : "=r" (c), "=r" (f) : "r" (0) : "ecx");
+  return a + b + c;
+}
+
+/* { dg-final { scan-assembler-times "# Magic instruction" 3 } } */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]