[PATCH] i386: Add peephole2 for __atomic_sub_fetch (x, y, z) == 0 [PR98737]
Jakub Jelinek
jakub@redhat.com
Wed Jan 27 09:20:38 GMT 2021
Hi!
This patch adds a peephole2 for the optimization requested in the PR,
namely that we emit awful code for __atomic_sub_fetch (x, y, z) == 0
or __atomic_sub_fetch (x, y, z) != 0 when y is not constant.
This can't be done in the combiner which punts on combining UNSPEC_VOLATILE
into other insns.
For other ops we'd need different peephole2s, this one is specific with its
comparison instruction and negation that need to be matched.
Bootstrapped/regtested on x86_64-linux and i686-linux. Is this ok for trunk
(as exception), or for GCC 12?
2021-01-27 Jakub Jelinek <jakub@redhat.com>
PR target/98737
* config/i386/sync.md (neg; mov; lock xadd; add peephole2): New
define_peephole2.
(*atomic_fetch_sub_cmp<mode>): New define_insn.
* gcc.target/i386/pr98737.c: New test.
--- gcc/config/i386/sync.md.jj 2021-01-04 10:25:45.392159555 +0100
+++ gcc/config/i386/sync.md 2021-01-26 16:03:13.911100510 +0100
@@ -777,6 +777,63 @@ (define_insn "*atomic_fetch_add_cmp<mode
return "lock{%;} %K3add{<imodesuffix>}\t{%1, %0|%0, %1}";
})
+;; Similarly, peephole for __sync_sub_fetch (x, b) == 0 into just
+;; lock sub followed by testing of flags instead of lock xadd, negation and
+;; comparison.
+(define_peephole2
+ [(parallel [(set (match_operand 0 "register_operand")
+ (neg (match_dup 0)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_operand:SWI 1 "register_operand")
+ (match_operand:SWI 2 "register_operand"))
+ (parallel [(set (match_operand:SWI 3 "register_operand")
+ (unspec_volatile:SWI
+ [(match_operand:SWI 4 "memory_operand")
+ (match_operand:SI 5 "const_int_operand")]
+ UNSPECV_XCHG))
+ (set (match_dup 4)
+ (plus:SWI (match_dup 4)
+ (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (neg:SWI
+ (match_operand:SWI 6 "register_operand"))
+ (match_dup 3)))
+ (clobber (match_dup 3))])]
+ "(GET_MODE (operands[0]) == <LEAMODE>mode
+ || GET_MODE (operands[0]) == <MODE>mode)
+ && reg_or_subregno (operands[0]) == reg_or_subregno (operands[2])
+ && (rtx_equal_p (operands[2], operands[3])
+ ? rtx_equal_p (operands[1], operands[6])
+ : (rtx_equal_p (operands[2], operands[6])
+ && rtx_equal_p (operands[1], operands[3])))
+ && peep2_reg_dead_p (4, operands[6])
+ && peep2_reg_dead_p (4, operands[3])
+ && !reg_overlap_mentioned_p (operands[1], operands[4])
+ && !reg_overlap_mentioned_p (operands[2], operands[4])"
+ [(parallel [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:SWI [(match_dup 4) (match_dup 5)]
+ UNSPECV_XCHG)
+ (match_dup 2)))
+ (set (match_dup 4)
+ (minus:SWI (match_dup 4)
+ (match_dup 2)))])])
+
+(define_insn "*atomic_fetch_sub_cmp<mode>"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec_volatile:SWI
+ [(match_operand:SWI 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (match_operand:SWI 1 "register_operand" "r")))
+ (set (match_dup 0)
+ (minus:SWI (match_dup 0)
+ (match_dup 1)))]
+ ""
+ "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}")
+
;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space.
;; In addition, it is always a full barrier, so we can ignore the memory model.
(define_insn "atomic_exchange<mode>"
--- gcc/testsuite/gcc.target/i386/pr98737.c.jj 2021-01-26 15:59:24.640620178 +0100
+++ gcc/testsuite/gcc.target/i386/pr98737.c 2021-01-26 16:00:02.898205888 +0100
@@ -0,0 +1,38 @@
+/* PR target/98737 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-additional-options "-march=i686" { target ia32 } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subq\t" { target lp64 } } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subl\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subw\t" } } */
+/* { dg-final { scan-assembler "lock\[^\n\r]\*subb\t" } } */
+/* { dg-final { scan-assembler-not "lock\[^\n\r]\*xadd" } } */
+
+long a;
+int b;
+short c;
+char d;
+
+int
+foo (long x)
+{
+ return __atomic_sub_fetch (&a, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+bar (int x)
+{
+ return __atomic_sub_fetch (&b, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+baz (short x)
+{
+ return __atomic_sub_fetch (&c, x, __ATOMIC_RELEASE) == 0;
+}
+
+int
+qux (char x)
+{
+ return __atomic_sub_fetch (&d, x, __ATOMIC_RELEASE) == 0;
+}
Jakub
More information about the Gcc-patches
mailing list