This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug tree-optimization/79151] New: Missed vectorization with identical formulas

From: "tkoenig at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: Thu, 19 Jan 2017 18:16:11 +0000
Subject: [Bug tree-optimization/79151] New: Missed vectorization with identical formulas
Auto-submitted: auto-generated

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79151

            Bug ID: 79151
           Summary: Missed vectorization with identical formulas
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

Consider the following code. The function "scalar" contains two formulas in a
function which are identical, except for the coefficients which
differ.

This could be vectorized.  As an example of how this could be done,
see the function "vector" where vectorization intrinsics are used.

You will see that "vector" is much shorter; all the operations are
done using vector intrinsics.

This is for x86_64-pc-linux-gnu.

#include <stdio.h>

void scalar(const double *restrict a, const double *restrict b,
        double x, double *ar, double *br)
{
  double ra, rb;
  int i;

  ra = a[0] + a[1]/x - 1.0/(a[0]-a[1]);
  rb = b[0] + b[1]/x - 1.0/(b[0]-b[1]);

  *ar = ra;
  *br = rb;
}

void vector(const double *restrict a, const double *restrict b,
        double x, double *ar, double *br)
{
  typedef double v2do __attribute__((vector_size (16)));
  v2do c0, c1, r;

  c0[0] = a[0];
  c0[1] = b[0];
  c1[0] = a[1];
  c1[1] = b[1];

  r = c0 + c1/x - 1.0/(c0-c1);
  *ar = r[0];
  *br = r[1];
}

double a[] = {1.0, -1.5};
double b[] = {1.3, -1.2};

int main()
{
  double x = 1.24;
  double ar, br;

  scalar(a, b, x, &ar, &br);
  printf("%f %f\n", ar, br);
  vector(a, b, x, &ar, &br);
  printf("%f %f\n", ar, br);

  return 0;
}

Assembly for the function "scalar":

scalar:
.LFB11:
        .cfi_startproc
        movsd   8(%rdi), %xmm4
        movsd   8(%rsi), %xmm5
        movapd  %xmm4, %xmm1
        movsd   (%rdi), %xmm2
        movapd  %xmm5, %xmm7
        divsd   %xmm0, %xmm1
        divsd   %xmm0, %xmm7
        addsd   %xmm2, %xmm1
        subsd   %xmm4, %xmm2
        movapd  %xmm2, %xmm4
        movsd   (%rsi), %xmm3
        movsd   .LC0(%rip), %xmm2
        movapd  %xmm7, %xmm0
        movapd  %xmm2, %xmm6
        addsd   %xmm3, %xmm0
        subsd   %xmm5, %xmm3
        divsd   %xmm4, %xmm6
        divsd   %xmm3, %xmm2
        subsd   %xmm6, %xmm1
        movsd   %xmm1, (%rdx)
        subsd   %xmm2, %xmm0
        movsd   %xmm0, (%rcx)
        ret

Assembly for the function "vector":

vector:
.LFB12:
        .cfi_startproc
        movsd   8(%rsi), %xmm2
        movsd   8(%rdi), %xmm3
        unpcklpd        %xmm0, %xmm0
        unpcklpd        %xmm2, %xmm3
        movapd  .LC1(%rip), %xmm2
        movsd   (%rdi), %xmm1
        movapd  %xmm3, %xmm4
        movhpd  (%rsi), %xmm1
        divpd   %xmm0, %xmm4
        movapd  %xmm4, %xmm0
        addpd   %xmm1, %xmm0
        subpd   %xmm3, %xmm1
        divpd   %xmm1, %xmm2
        addpd   %xmm2, %xmm0
        movlpd  %xmm0, (%rdx)
        movhpd  %xmm0, (%rcx)
        ret

Follow-Ups:
- [Bug tree-optimization/79151] Missed BB vectorization with strided/scalar stores
  - From: rguenth at gcc dot gnu.org

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]