This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug tree-optimization/68501] New: [6 Regression] sqrt builtin is not used anymore
- From: "afomin.mailbox at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Mon, 23 Nov 2015 14:43:11 +0000
- Subject: [Bug tree-optimization/68501] New: [6 Regression] sqrt builtin is not used anymore
- Auto-submitted: auto-generated
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68501
Bug ID: 68501
Summary: [6 Regression] sqrt builtin is not used anymore
Product: gcc
Version: 6.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: afomin.mailbox at gmail dot com
CC: izamyatin at gmail dot com, rsandifo at redhat dot com,
ysrumyan at gmail dot com
Target Milestone: ---
Target: x86_64-*-*
Created attachment 36812
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=36812&action=edit
A reproducer
For the attached reproducer compiled with g++ -mavx -Ofast we do not use IA
sqrt builtin since r230492 thus emitting more insns.
r230491
.L8:
vmovaps (%r14,%rax), %ymm0
addl $1, %r12d
vmovups 0(%r13,%rax), %xmm1
vinsertf128 $0x1, 16(%r13,%rax), %ymm1, %ymm1
vmulps %ymm1, %ymm1, %ymm1
vmulps %ymm0, %ymm0, %ymm0
vaddps %ymm1, %ymm0, %ymm1
vrsqrtps %ymm1, %ymm2
vmulps %ymm1, %ymm2, %ymm0
vmulps %ymm2, %ymm0, %ymm0
vaddps %ymm4, %ymm0, %ymm0
vmulps %ymm3, %ymm2, %ymm2
vmulps %ymm2, %ymm0, %ymm0
vmovups %xmm0, (%r10,%rax)
vextractf128 $0x1, %ymm0, 16(%r10,%rax)
addq $32, %rax
cmpl %r12d, %r9d
ja .L8
r230492
.L8:
.L8:
vmovaps (%r14,%rax), %ymm0
addl $1, %r12d
vmovups 0(%r13,%rax), %xmm1
vinsertf128 $0x1, 16(%r13,%rax), %ymm1, %ymm1
vmulps %ymm1, %ymm1, %ymm1
vmulps %ymm0, %ymm0, %ymm0
vaddps %ymm1, %ymm0, %ymm1
vcmpneqps %ymm1, %ymm2, %ymm5
vrsqrtps %ymm1, %ymm0
vandps %ymm5, %ymm0, %ymm0
vmulps %ymm1, %ymm0, %ymm1
vmulps %ymm0, %ymm1, %ymm0
vaddps %ymm4, %ymm0, %ymm0
vmulps %ymm3, %ymm1, %ymm1
vmulps %ymm1, %ymm0, %ymm0
vrcpps %ymm0, %ymm1
vmulps %ymm0, %ymm1, %ymm0
vmulps %ymm0, %ymm1, %ymm0
vaddps %ymm1, %ymm1, %ymm1
vsubps %ymm0, %ymm1, %ymm0
vmovups %xmm0, (%r10,%rax)
vextractf128 $0x1, %ymm0, 16(%r10,%rax)
addq $32, %rax
cmpl %r12d, %r9d
ja .L8