This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug c++/78180] Poor optimization of std::array on gcc 4.8/5.4/6.2 as compared to simple raw array
- From: "trippels at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Wed, 02 Nov 2016 08:50:36 +0000
- Subject: [Bug c++/78180] Poor optimization of std::array on gcc 4.8/5.4/6.2 as compared to simple raw array
- Auto-submitted: auto-generated
- References: <bug-78180-4@http.gcc.gnu.org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78180
--- Comment #3 from Markus Trippelsdorf <trippels at gcc dot gnu.org> ---
void BM_Rolling<long [128]>: | void
BM_Rolling<std::array<long, 128ul> >:
.LFB1712: | .LFB1713:
.cfi_startproc .cfi_startproc
pushq %r13 <
.cfi_def_cfa_offset 16 <
.cfi_offset 13, -16 <
pushq %r12 <
.cfi_def_cfa_offset 24 <
.cfi_offset 12, -24 <
movl $128, %ecx <
pushq %rbp pushq %rbp
.cfi_def_cfa_offset 32 | .cfi_def_cfa_offset 16
.cfi_offset 6, -32 | .cfi_offset 6, -16
pushq %rbx pushq %rbx
.cfi_def_cfa_offset 40 | .cfi_def_cfa_offset 24
.cfi_offset 3, -40 | .cfi_offset 3, -24
movq %rdi, %rbp <
xorl %eax, %eax xorl %eax, %eax
xorl %ebx, %ebx | movq %rdi, %rbx
xorl %r12d, %r12d | movl $128, %ecx
> xorl %ebp, %ebp
subq $1048, %rsp subq $1048, %rsp
.cfi_def_cfa_offset 1088 | .cfi_def_cfa_offset
1072
xorl %r13d, %r13d <
movq %rsp, %rdi movq %rsp, %rdi
rep stosq rep stosq
> movabsq $429496729600,
%rax
> movq %rax,
1024(%rsp)
.p2align 4,,10 .p2align 4,,10
.p2align 3 .p2align 3
.L28: | .L7:
cmpb $0, 0(%rbp) | cmpb $0, (%rbx)
je .L39 | je .L20
.L23: | .L2:
movq 8(%rbp), %rax | movq 8(%rbx), %rax
cmpq 72(%rbp), %rax | cmpq 72(%rbx), %rax
leaq 1(%rax), %rdx leaq 1(%rax), %rdx
movq %rdx, 8(%rbp) | movq %rdx, 8(%rbx)
jnb .L40 | jnb .L21
movq 24(%rbp), %r8 | movq 24(%rbx), %r8
movq 16(%rbp), %rdi | movq 16(%rbx), %rsi
movq %r8, %rax movq %r8, %rax
subq %rdi, %rax | subq %rsi, %rax
sarq $2, %rax sarq $2, %rax
testq %rax, %rax testq %rax, %rax
je .L25 | je .L4
movl (%rdi), %edx | movl (%rsi), %eax
xorl %eax, %eax | xorl %edx, %edx
xorl %ecx, %ecx xorl %ecx, %ecx
movl %ebx, %esi | testl %eax, %eax
testl %edx, %edx | jg .L5
jg .L26 | jmp .L7
jmp .L28 <
.p2align 4,,10 .p2align 4,,10
.p2align 3 .p2align 3
.L31: | .L11:
addq $1, %rax | addq $1, %rdx
cmpl (%rdi), %ecx | cmpl (%rsi), %ecx
jge .L28 | jge .L7
.L26: | .L5:
addl $1, %ebx | movl 1024(%rsp),
%edi
movq %rax, (%rsp,%rsi,8) | movq %rdi, %rax
movq %rax, %rdx | movq %rdx,
(%rsp,%rdi,8)
cmpl $100, %ebx | addl $1, %eax
cmove %r13d, %ebx | cmpl 1028(%rsp),
%eax
movl %ebx, %esi | movl %eax,
1024(%rsp)
subq (%rsp,%rsi,8), %rdx | jne .L9
cmpq $999999, %rdx | movl $0, 1024(%rsp)
jg .L30 | xorl %eax, %eax
addq $1, %r12 | .L9:
movq 16(%rbp), %rdi | movq %rdx, %rdi
movq 24(%rbp), %r8 | subq (%rsp,%rax,8),
%rdi
.L30: | cmpq $999999, %rdi
movq %r8, %rdx | jg .L10
> addq $1, %rbp
> movq 16(%rbx), %rsi
> movq 24(%rbx), %r8
> .L10:
> movq %r8, %rax
addl $1, %ecx addl $1, %ecx
subq %rdi, %rdx | subq %rsi, %rax
sarq $2, %rdx | sarq $2, %rax
testq %rdx, %rdx | testq %rax, %rax
jne .L31 | jne .L11