[Bug tree-optimization/103797] Clang vectorized LightPixel while GCC does not
ubizjak at gmail dot com
gcc-bugzilla@gcc.gnu.org
Thu Dec 23 08:58:48 GMT 2021
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103797
--- Comment #14 from Uroš Bizjak <ubizjak at gmail dot com> ---
(In reply to Uroš Bizjak from comment #13)
> Created attachment 52051 [details]
> Patch that implements v2sf division
This patch also enables vectorization of the testcase from Comment #7. Using
-ffast-math, it also generates vectorized reciprocal:
movss f(%rip), %xmm4
movss test+8(%rip), %xmm3
movq test(%rip), %xmm2
mulss %xmm4, %xmm3
movaps %xmm4, %xmm0
shufps $0xe0, %xmm0, %xmm0
mulps %xmm0, %xmm2
movhps .LC0(%rip), %xmm0
--> rcpps %xmm0, %xmm1
sqrtss %xmm3, %xmm3
mulps %xmm1, %xmm0
sqrtps %xmm2, %xmm2
divss %xmm4, %xmm3
movaps %xmm2, %xmm5
mulps %xmm1, %xmm0
addps %xmm1, %xmm1
subps %xmm0, %xmm1
mulps %xmm1, %xmm5
movlps %xmm5, test(%rip)
movss %xmm3, test+8(%rip)
ret
More information about the Gcc-bugs
mailing list