[PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737]

Jakub Jelinek jakub@redhat.com
Wed Jan 27 09:20:38 GMT 2021


Hi!

This patch adds a peephole2 for the optimization requested in the PR,
namely that we emit awful code for __atomic_sub_fetch (x, y, z) == 0
or __atomic_sub_fetch (x, y, z) != 0 when y is not constant.
This can't be done in the combiner which punts on combining UNSPEC_VOLATILE
into other insns.

For other ops we'd need different peephole2s, this one is specific with its
comparison instruction and negation that need to be matched.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Is this ok for trunk
(as exception), or for GCC 12?

2021-01-27  Jakub Jelinek  <jakub@redhat.com>

	PR target/98737
	* config/i386/sync.md (neg; mov; lock xadd; add peephole2): New
	define_peephole2.
	(*atomic_fetch_sub_cmp<mode>): New define_insn.

	* gcc.target/i386/pr98737.c: New test.

--- gcc/config/i386/sync.md.jj	2021-01-04 10:25:45.392159555 +0100
+++ gcc/config/i386/sync.md	2021-01-26 16:03:13.911100510 +0100
@@ -777,6 +777,63 @@ (define_insn "*atomic_fetch_add_cmp<mode
   return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
 })
 
+;; Similarly, peephole for __sync_sub_fetch (x, b) == 0 into just
+;; lock sub followed by testing of flags instead of lock xadd, negation and
+;; comparison.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "register_operand")
+		   (neg (match_dup 0)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand:SWI 1 "register_operand")
+	(match_operand:SWI 2 "register_operand"))
+   (parallel [(set (match_operand:SWI 3 "register_operand")
+		   (unspec_volatile:SWI
+		     [(match_operand:SWI 4 "memory_operand")
+		      (match_operand:SI 5 "const_int_operand")]
+		     UNSPECV_XCHG))
+	      (set (match_dup 4)
+		   (plus:SWI (match_dup 4)
+			     (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (neg:SWI
+				  (match_operand:SWI 6 "register_operand"))
+				(match_dup 3)))
+	      (clobber (match_dup 3))])]
+  "(GET_MODE (operands[0]) == <LEAMODE>mode
+    || GET_MODE (operands[0]) == <MODE>mode)
+   && reg_or_subregno (operands[0]) == reg_or_subregno (operands[2])
+   && (rtx_equal_p (operands[2], operands[3])
+       ? rtx_equal_p (operands[1], operands[6])
+       : (rtx_equal_p (operands[2], operands[6])
+	  && rtx_equal_p (operands[1], operands[3])))
+   && peep2_reg_dead_p (4, operands[6])
+   && peep2_reg_dead_p (4, operands[3])
+   && !reg_overlap_mentioned_p (operands[1], operands[4])
+   && !reg_overlap_mentioned_p (operands[2], operands[4])"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ
+		     (unspec_volatile:SWI [(match_dup 4) (match_dup 5)]
+					  UNSPECV_XCHG)
+		     (match_dup 2)))
+	      (set (match_dup 4)
+		   (minus:SWI (match_dup 4)
+			      (match_dup 2)))])])
+
+(define_insn "*atomic_fetch_sub_cmp<mode>"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (unspec_volatile:SWI
+	    [(match_operand:SWI 0 "memory_operand" "+m")
+	     (match_operand:SI 2 "const_int_operand")]		;; model
+	    UNSPECV_XCHG)
+	  (match_operand:SWI 1 "register_operand" "r")))
+   (set (match_dup 0)
+	(minus:SWI (match_dup 0)
+		   (match_dup 1)))]
+  ""
+  "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}")
+
 ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
 ;; In addition, it is always a full barrier, so we can ignore the memory model.
 (define_insn "atomic_exchange<mode>"
--- gcc/testsuite/gcc.target/i386/pr98737.c.jj	2021-01-26 15:59:24.640620178 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737.c	2021-01-26 16:00:02.898205888 +0100
@@ -0,0 +1,38 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+foo (long x)
+{
+  return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+bar (int x)
+{
+  return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+baz (short x)
+{
+  return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+qux (char x)
+{
+  return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}

	Jakub



More information about the Gcc-patches mailing list