[gcc r13-740] Optimize double word negation of zero extended values on x86.

Roger Sayle sayle@gcc.gnu.org
Tue May 24 14:20:32 GMT 2022


https://gcc.gnu.org/g:e8a25550dac458a2afd8d456540e94e060fa2384

commit r13-740-ge8a25550dac458a2afd8d456540e94e060fa2384
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Tue May 24 15:18:56 2022 +0100

    Optimize double word negation of zero extended values on x86.
    
    It's not uncommon for GCC to convert between a (zero or one) Boolean
    value and a (zero or all ones) mask value, possibly of a wider type,
    using negation.
    
    Currently on x86_64, the following simple test case:
    __int128 foo(unsigned long x) { return -(__int128)x; }
    
    compiles with -O2 to:
    
            movq    %rdi, %rax
            xorl    %edx, %edx
            negq    %rax
            adcq    $0, %rdx
            negq    %rdx
            ret
    
    with this patch, which adds an additional peephole2 to i386.md,
    we instead generate the improved:
    
            movq    %rdi, %rax
            negq    %rax
            sbbq    %rdx, %rdx
            ret
    
    [and likewise for the (DImode) long long version using -m32.]
    A peephole2 is appropriate as the double word negation and the
    operation providing the xor are typically only split after combine.
    
    In fact, the new peephole2 sequence:
    ;; Convert:
    ;;   xorl %edx, %edx
    ;;   negl %eax
    ;;   adcl $0, %edx
    ;;   negl %edx
    ;; to:
    ;;   negl %eax
    ;;   sbbl %edx, %edx    // *x86_mov<mode>cc_0_m1
    
    is nearly identical to (and placed immediately after) the existing:
    ;; Convert:
    ;;   mov %esi, %edx
    ;;   negl %eax
    ;;   adcl $0, %edx
    ;;   negl %edx
    ;; to:
    ;;   xorl %edx, %edx
    ;;   negl %eax
    ;;   sbbl %esi, %edx
    
    One potential objection/concern is that "sbb? %reg,%reg" may possibly be
    incorrectly perceived as a false register dependency on older hardware,
    much like "xor? %reg,%reg" may be perceived as a false dependency on
    really old hardware.  This doesn't currently appear to be a concern
    for the i386 backend's *x86_move<mode>cc_0_m1 as shown by the following
    test code:
    
    int bar(unsigned int x, unsigned int y) {
      return x > y ? -1 : 0;
    }
    
    which currently generates a "naked" sbb:
            cmp     esi, edi
            sbb     eax, eax
            ret
    
    If anyone does potentially encounter a stall, it would easy to add
    a splitter or peephole2 controlled by a tuning flag to insert an additional
    xor to break the false dependency chain (when not optimizing for size),
    but I don't believe this is required on recent microarchitectures.
    
    2022-05-24 Roger Sayle  <roger@nextmovesoftware.com>
    
    gcc/ChangeLog
            * config/i386/i386.md (peephole2): Convert xor;neg;adc;neg,
            i.e. a double word negation of a zero extended operand, to
            neg;sbb.
    
    gcc/testsuite/ChangeLog
            * gcc.target/i386/neg-zext-1.c: New test case for -m32.
            * gcc.target/i386/neg-zext-2.c: New test case for -m64.

Diff:
---
 gcc/config/i386/i386.md                    | 40 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/neg-zext-1.c |  7 ++++++
 gcc/testsuite/gcc.target/i386/neg-zext-2.c |  7 ++++++
 3 files changed, 54 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 050dee7d43a..b9b8f78dc89 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11040,6 +11040,46 @@
      (clobber (reg:CC FLAGS_REG))])]
   "ix86_expand_clear (operands[0]);")
 
+;; Convert:
+;;   xorl %edx, %edx
+;;   negl %eax
+;;   adcl $0, %edx
+;;   negl %edx
+;; to:
+;;   negl %eax
+;;   sbbl %edx, %edx	// *x86_mov<mode>cc_0_m1
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (reg:CCC FLAGS_REG)
+	  (ne:CCC (match_operand:SWI48 1 "general_reg_operand") (const_int 0)))
+     (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
+   (parallel
+    [(set (match_dup 0)
+	  (plus:SWI48 (plus:SWI48
+			(ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
+			(match_dup 0))
+		      (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_dup 0)
+	  (neg:SWI48 (match_dup 0)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "REGNO (operands[0]) != REGNO (operands[1])"
+  [(parallel
+    [(set (reg:CCC FLAGS_REG)
+	  (ne:CCC (match_dup 1) (const_int 0)))
+     (set (match_dup 1) (neg:SWI48 (match_dup 1)))])
+   (parallel
+    [(set (match_dup 0)
+	  (if_then_else:SWI48 (ltu:SWI48 (reg:CC FLAGS_REG) (const_int 0))
+			      (const_int -1)
+			      (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*neg<mode>_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
 	(neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")))
diff --git a/gcc/testsuite/gcc.target/i386/neg-zext-1.c b/gcc/testsuite/gcc.target/i386/neg-zext-1.c
new file mode 100644
index 00000000000..ec91fb1bc0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/neg-zext-1.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2" } */
+
+long long foo(unsigned int x) { return -(long long)x; }
+
+/* { dg-final { scan-assembler "sbb" } } */
+/* { dg-final { scan-assembler-not "adc" } } */
diff --git a/gcc/testsuite/gcc.target/i386/neg-zext-2.c b/gcc/testsuite/gcc.target/i386/neg-zext-2.c
new file mode 100644
index 00000000000..a6ed077f60c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/neg-zext-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2" } */
+
+__int128 fool(unsigned long x) { return -(__int128)x; }
+
+/* { dg-final { scan-assembler "sbb" } } */
+/* { dg-final { scan-assembler-not "adc" } } */


More information about the Gcc-cvs mailing list