[Bug target/31667] Integer extensions vectorization could be improved
pinskia at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Sun Aug 15 23:28:18 GMT 2021
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31667
--- Comment #5 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
We produce this now:
movdqa x(%rip), %xmm1
pxor %xmm0, %xmm0
movdqa %xmm1, %xmm2
punpckhbw %xmm0, %xmm1
movaps %xmm1, y+16(%rip)
movdqa x+16(%rip), %xmm1
punpcklbw %xmm0, %xmm2
movaps %xmm2, y(%rip)
movdqa %xmm1, %xmm2
punpckhbw %xmm0, %xmm1
movaps %xmm1, y+48(%rip)
movdqa x+32(%rip), %xmm1
punpcklbw %xmm0, %xmm2
movaps %xmm2, y+32(%rip)
movdqa %xmm1, %xmm2
punpckhbw %xmm0, %xmm1
movaps %xmm1, y+80(%rip)
movdqa x+48(%rip), %xmm1
punpcklbw %xmm0, %xmm2
movaps %xmm2, y+64(%rip)
movdqa %xmm1, %xmm2
punpckhbw %xmm0, %xmm1
punpcklbw %xmm0, %xmm2
movaps %xmm1, y+112(%rip)
movaps %xmm2, y+96(%rip)
And even ICC produce a similar thing except scheduled differently.
More information about the Gcc-bugs
mailing list