This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/50698] New: pretending to create versioning for alias when not required


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50698

             Bug #: 50698
           Summary: pretending to create versioning for alias when not
                    required
    Classification: Unclassified
           Product: gcc
           Version: 4.7.0
            Status: UNCONFIRMED
          Severity: minor
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: vincenzo.innocente@cern.ch


in the example below the vectorizer reports "versioning for alias" for function
sumS (not for sum2).  The compiler has all information to determine that no
aliasing can happen and indeed the generated code eventually does not contain
any run time check!
One consequence is that in some similar real-life cases I'm now obliged to set
--param vect-max-version-for-alias-checks to values as large as 100.

c++ -Ofast -ftree-vectorizer-verbose=1 -c Arena.cpp -mtune=corei7  --param
vect-max-version-for-alias-checks=10

Analyzing loop at Arena.cpp:14
Vectorizing loop at Arena.cpp:14
14: LOOP VECTORIZED.
Arena.cpp:18: note: vectorized 1 loops in function.
Analyzing loop at Arena.cpp:14
Vectorizing loop at Arena.cpp:14
14: created 2 versioning for alias checks.
14: LOOP VECTORIZED.
Arena.cpp:22: note: vectorized 1 loops in function.

if I set --param vect-max-version-for-alias-checks=1 it will not vectorize
sumS.

At the end I do not see any runtime check in the generated code!
otool -v -t -V -X Arena.o
__Z4sum2v:
    leaq    _mem+0x00001000(%rip),%rax
    leaq    _mem+0x00002000(%rip),%rdx
    nop
    movaps    0x00001000(%rax),%xmm0
    addps    (%rax),%xmm0
    movaps    %xmm0,0xfffff000(%rax)
    addq    $0x10,%rax
    cmpq    %rdx,%rax
    jne    0x00000010
    repz/ret
    nopl    __Z4sum2v(%rax)
__Z4sumSv:
    leaq    _mem+0x00001000(%rip),%rax
    leaq    _mem+0x00002000(%rip),%rdx
    nop
    movaps    0x00001000(%rax),%xmm0
    addps    (%rax),%xmm0
    movaps    %xmm0,0xfffff000(%rax)
    addq    $0x10,%rax
    cmpq    %rdx,%rax
    jne    0x00000040
    repz/ret



float mem[4096];
const int N=1024;

struct XYZ {
  float * mem;
  int n;
  float * x() { return mem;}
  float * y() { return x()+n;}
  float * z() { return y()+n;}
};

inline
void sum(float * x, float * y, float * z, int n) {
  for (int i=0;i!=n; ++i)
    x[i]=y[i]+z[i];
}

void sum2() {
  sum(mem,mem+N,mem+2*N,N);
}

void sumS() {
  XYZ xyz; xyz.mem=mem; xyz.n=N;
  sum(xyz.x(),xyz.y(),xyz.z(),xyz.n);
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]