This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug tree-optimization/79151] New: Missed vectorization with identical formulas
- From: "tkoenig at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Thu, 19 Jan 2017 18:16:11 +0000
- Subject: [Bug tree-optimization/79151] New: Missed vectorization with identical formulas
- Auto-submitted: auto-generated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79151
Bug ID: 79151
Summary: Missed vectorization with identical formulas
Product: gcc
Version: unknown
Status: UNCONFIRMED
Severity: enhancement
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: tkoenig at gcc dot gnu.org
Target Milestone: ---
Consider the following code. The function "scalar" contains two formulas in a
function which are identical, except for the coefficients which
differ.
This could be vectorized. As an example of how this could be done,
see the function "vector" where vectorization intrinsics are used.
You will see that "vector" is much shorter; all the operations are
done using vector intrinsics.
This is for x86_64-pc-linux-gnu.
#include <stdio.h>
void scalar(const double *restrict a, const double *restrict b,
double x, double *ar, double *br)
{
double ra, rb;
int i;
ra = a[0] + a[1]/x - 1.0/(a[0]-a[1]);
rb = b[0] + b[1]/x - 1.0/(b[0]-b[1]);
*ar = ra;
*br = rb;
}
void vector(const double *restrict a, const double *restrict b,
double x, double *ar, double *br)
{
typedef double v2do __attribute__((vector_size (16)));
v2do c0, c1, r;
c0[0] = a[0];
c0[1] = b[0];
c1[0] = a[1];
c1[1] = b[1];
r = c0 + c1/x - 1.0/(c0-c1);
*ar = r[0];
*br = r[1];
}
double a[] = {1.0, -1.5};
double b[] = {1.3, -1.2};
int main()
{
double x = 1.24;
double ar, br;
scalar(a, b, x, &ar, &br);
printf("%f %f\n", ar, br);
vector(a, b, x, &ar, &br);
printf("%f %f\n", ar, br);
return 0;
}
Assembly for the function "scalar":
scalar:
.LFB11:
.cfi_startproc
movsd 8(%rdi), %xmm4
movsd 8(%rsi), %xmm5
movapd %xmm4, %xmm1
movsd (%rdi), %xmm2
movapd %xmm5, %xmm7
divsd %xmm0, %xmm1
divsd %xmm0, %xmm7
addsd %xmm2, %xmm1
subsd %xmm4, %xmm2
movapd %xmm2, %xmm4
movsd (%rsi), %xmm3
movsd .LC0(%rip), %xmm2
movapd %xmm7, %xmm0
movapd %xmm2, %xmm6
addsd %xmm3, %xmm0
subsd %xmm5, %xmm3
divsd %xmm4, %xmm6
divsd %xmm3, %xmm2
subsd %xmm6, %xmm1
movsd %xmm1, (%rdx)
subsd %xmm2, %xmm0
movsd %xmm0, (%rcx)
ret
Assembly for the function "vector":
vector:
.LFB12:
.cfi_startproc
movsd 8(%rsi), %xmm2
movsd 8(%rdi), %xmm3
unpcklpd %xmm0, %xmm0
unpcklpd %xmm2, %xmm3
movapd .LC1(%rip), %xmm2
movsd (%rdi), %xmm1
movapd %xmm3, %xmm4
movhpd (%rsi), %xmm1
divpd %xmm0, %xmm4
movapd %xmm4, %xmm0
addpd %xmm1, %xmm0
subpd %xmm3, %xmm1
divpd %xmm1, %xmm2
addpd %xmm2, %xmm0
movlpd %xmm0, (%rdx)
movhpd %xmm0, (%rcx)
ret