Bug 46012

Summary: 256bit vectorizer failed on int->double
Product: gcc Reporter: H.J. Lu <hjl.tools>
Component: tree-optimizationAssignee: Not yet assigned to anyone <unassigned>
Status: NEW ---    
Severity: enhancement CC: areg.melikadamyan, rguenth
Priority: P3 Keywords: missed-optimization
Version: 4.6.0   
Target Milestone: ---   
See Also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46011
Host: Target:
Build: Known to work:
Known to fail: Last reconfirmed: 2021-02-23 00:00:00
Bug Depends on:    
Bug Blocks: 53947    

Description H.J. Lu 2010-10-14 08:53:36 UTC
For

---
double a[1024];
float b[1024];
int c[1024];

void dependence_distance_4_mixed_0 (void)
{
  int i;
  for (i = 0; i < 1020; ++i)
    a[i + 4] = a[i] + a[i + 4] + c[i];
}
---

with -O3 -ffast-math -mavx, vect256 branch generates:

.L2:
	vmovapd	a(%rax,%rax), %ymm0
	vcvtdq2pd	c(%rax), %ymm1
	vaddpd	a+32(%rax,%rax), %ymm0, %ymm0
	vaddpd	%ymm1, %ymm0, %ymm0
	vmovapd	%ymm0, a+32(%rax,%rax)
	addq	$16, %rax
	cmpq	$4080, %rax
	jne	.L2

Trunk at revision 165455 generates

.L2:
	vmovapd	16(%rax), %xmm2
	vaddpd	-16(%rax), %xmm2, %xmm2
	vmovdqa	(%rdx), %xmm0
	addq	$16, %rdx
	vpshufd	$238, %xmm0, %xmm1
	vcvtdq2pd	%xmm0, %xmm0
	vcvtdq2pd	%xmm1, %xmm1
	vaddpd	%xmm1, %xmm2, %xmm1
	vmovapd	(%rax), %xmm2
	vaddpd	-32(%rax), %xmm2, %xmm2
	vmovapd	%xmm1, 16(%rax)
	vaddpd	%xmm0, %xmm2, %xmm0
	vmovapd	%xmm0, (%rax)
	addq	$32, %rax
	cmpq	%rax, %rcx
	jne	.L2
Comment 1 Richard Biener 2010-10-14 09:01:56 UTC
Related to PR46011
Comment 2 Richard Biener 2012-07-13 08:47:11 UTC
Link to vectorizer missed-optimization meta-bug.