[Bug tree-optimization/103797] Clang vectorized LightPixel while GCC does not

ubizjak at gmail dot com gcc-bugzilla@gcc.gnu.org
Thu Dec 23 08:58:48 GMT 2021


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103797

--- Comment #14 from Uroš Bizjak <ubizjak at gmail dot com> ---
(In reply to Uroš Bizjak from comment #13)
> Created attachment 52051 [details]
> Patch that implements v2sf division

This patch also enables vectorization of the testcase from Comment #7. Using
-ffast-math, it also generates vectorized reciprocal:

        movss   f(%rip), %xmm4
        movss   test+8(%rip), %xmm3
        movq    test(%rip), %xmm2
        mulss   %xmm4, %xmm3
        movaps  %xmm4, %xmm0
        shufps  $0xe0, %xmm0, %xmm0
        mulps   %xmm0, %xmm2
        movhps  .LC0(%rip), %xmm0
-->     rcpps   %xmm0, %xmm1
        sqrtss  %xmm3, %xmm3
        mulps   %xmm1, %xmm0
        sqrtps  %xmm2, %xmm2
        divss   %xmm4, %xmm3
        movaps  %xmm2, %xmm5
        mulps   %xmm1, %xmm0
        addps   %xmm1, %xmm1
        subps   %xmm0, %xmm1
        mulps   %xmm1, %xmm5
        movlps  %xmm5, test(%rip)
        movss   %xmm3, test+8(%rip)
        ret


More information about the Gcc-bugs mailing list