This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug tree-optimization/53346] [4.6/4.7/4.8 Regression] Bad vectorization in the proc cptrf2 of rnflow.f90
- From: "ubizjak at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Thu, 17 May 2012 20:09:42 +0000
- Subject: [Bug tree-optimization/53346] [4.6/4.7/4.8 Regression] Bad vectorization in the proc cptrf2 of rnflow.f90
- Auto-submitted: auto-generated
- References: <bug-53346-4@http.gcc.gnu.org/bugzilla/>
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53346
--- Comment #4 from Uros Bizjak <ubizjak at gmail dot com> 2012-05-17 20:09:42 UTC ---
Instead of this:
.L228:
movl $0, -4(%rdx,%rax,4)
addq $1, %rax
cmpq %rax, %rsi
jge .L228
vectorization generates following:
movq %rdx, %rax
movq %r9, %r8
andl $15, %eax
shrq $2, %rax
negq %rax
andl $3, %eax
cmpq %r9, %rax
cmovbe %rax, %r8
cmpq $6, %r9
cmovbe %r9, %r8
testq %r8, %r8
je .L233
leaq 1(%r8), %rsi
movl $1, %eax
.p2align 4,,10
.p2align 3
.L176:
movl $0, -4(%rdx,%rax,4)
addq $1, %rax
cmpq %rsi, %rax
jne .L176
cmpq %r9, %r8
je .L182
.L174:
movq %r9, %rbp
subq %r8, %rbp
movq %rbp, %r11
shrq $2, %r11
leaq 0(,%r11,4), %rbx
testq %rbx, %rbx
je .L181
pxor %xmm0, %xmm0
leaq (%rdx,%r8,4), %r8
xorl %esi, %esi
.p2align 4,,10
.p2align 3
.L183:
addq $1, %rsi
movdqa %xmm0, (%r8)
addq $16, %r8
cmpq %rsi, %r11
ja .L183
addq %rbx, %rax
cmpq %rbx, %rbp
je .L182
.p2align 4,,10
.p2align 3
.L181:
movl $0, -4(%rdx,%rax,4)
addq $1, %rax
cmpq %rax, %r9
jge .L181
Whoa.