This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug tree-optimization/86557] missed vectorization with std::vector compared to icc 18
- From: "rguenth at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Wed, 18 Jul 2018 09:18:25 +0000
- Subject: [Bug tree-optimization/86557] missed vectorization with std::vector compared to icc 18
- Auto-submitted: auto-generated
- References: <bug-86557-4@http.gcc.gnu.org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86557
--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
ICC seems to emulate this even for SSE2 where I'm not sure this is profitable:
..B1.2: # Preds ..B1.2 ..B1.1
# Execution count [1.02e+03]
movdqu .L_2il0floatpacket.0(%rip), %xmm2 #6.19
lea x(,%rax,8), %rdx #6.12
movdqu (%rdx), %xmm1 #6.12
movdqa %xmm2, %xmm0 #6.19
pand %xmm1, %xmm0 #6.19
movdqa %xmm1, %xmm3 #6.19
psrlq $1, %xmm3 #6.19
psrad $1, %xmm0 #6.19
por %xmm0, %xmm3 #6.19
psrlq $62, %xmm3 #6.19
paddq %xmm1, %xmm3 #6.19
pand %xmm3, %xmm2 #6.19
psrlq $2, %xmm3 #6.19
psrad $2, %xmm2 #6.19
por %xmm2, %xmm3 #6.19
movdqu %xmm3, (%rdx) #6.5
addq $2, %rax #5.3
cmpq $1024, %rax #5.3
jb ..B1.2 # Prob 99% #5.3
and for AVX2:
..B1.2: # Preds ..B1.2 ..B1.1
# Execution count [1.02e+03]
lea x(,%rax,8), %rdx #6.12
vmovdqu (%rdx), %ymm4 #6.12
vpsrlq $1, %ymm4, %ymm0 #6.19
vpsrad $1, %ymm4, %ymm1 #6.19
vpblendw $204, %ymm1, %ymm0, %ymm2 #6.19
vpsrlq $62, %ymm2, %ymm3 #6.19
vpaddq %ymm4, %ymm3, %ymm5 #6.19
vpsrlq $2, %ymm5, %ymm6 #6.19
vpsrad $2, %ymm5, %ymm7 #6.19
vpblendw $204, %ymm7, %ymm6, %ymm8 #6.19
vmovdqu %ymm8, (%rdx) #6.5
addq $4, %rax #5.3
cmpq $1024, %rax #5.3
jb ..B1.2 # Prob 99% #5.3
long x[1024];
void foo()
{
for (int i = 0; i < 1024; ++i)
x[i] = x[i] / 4;
}