This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/71016] [6/7 Regression] Redundant sign extension with conditional __builtin_clzl


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71016

--- Comment #8 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
So, trying:
long int
foo (long int i)
{
  return i == 0 ? 17 : __builtin_clzl (i);
}

long int
bar (long int i)
{
  return i == 0 ? 17 : __builtin_popcountl (i);
}

long int
baz (long int i)
{
  return i == 0 ? 17 : __builtin_ffsl (i);
}
at -O2 to see effect on various builtins.  On x86_64-linux I see:
-       bsrq    %rdi, %rdx
-       movl    $17, %eax
-       xorq    $63, %rdx
+       bsrq    %rdi, %rax
+       movl    $17, %edx
+       xorq    $63, %rax
        testq   %rdi, %rdi
-       cmovne  %edx, %eax
        cltq
+       cmove   %rdx, %rax
in foo (so not better not worse; nonzero_bits isn't able to see through the (63
- CLZ (x)) ^ 63 stuff; but the patched compiler has potential to do better, if
we during expansion on the SUBREG set promoted flag to SRP_SIGNED_AND_UNSIGNED,
perhaps we could get rid of the unneeded cltq), in bar:
        movl    $17, %eax
-       cltq
        ret
(better; something in the pro_and_epilogue pass creates set to constant
followed by sign extension and there is no combiner afterwards to fix stuff
up),
and in baz:
-       xorl    %edx, %edx
-       movl    $17, %eax
-       rep; bsfq       %rdi, %rdx
-       addq    $1, %rdx
+       xorl    %eax, %eax
+       movl    $17, %edx
+       rep; bsfq       %rdi, %rax
+       addq    $1, %rax
        testq   %rdi, %rdi
-       cmovne  %edx, %eax
        cltq
+       cmove   %rdx, %rax
(different, but not better or worse).
On aarch64-linux, I see:
        clz     x2, x0
        cmp     x0, 0
-       mov     w1, 17
-       csel    w0, w1, w2, eq
-       sxtw    x0, w0
+       mov     x1, 17
+       csel    x0, x2, x1, ne
in foo (better),
-       mov     w0, 17
-       sxtw    x0, w0
+       mov     x0, 17
in bar (better) and
-       rbit    x1, x0
-       cmp     x0, 0
-       clz     x1, x1
-       mov     w2, 17
-       csinc   w0, w2, w1, eq
+       cbz     x0, .L14
+       rbit    x0, x0
+       clz     x0, x0
+       add     x0, x0, 1
        sxtw    x0, w0
        ret
+       .align  3
+.L14:
+       mov     x0, 17
+       ret
in baz (worse).  Of course, only foo (with some more sensical constant instead
of 17) is real-world stuff.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]