This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug c++/78180] Poor optimization of std::array on gcc 4.8/5.4/6.2 as compared to simple raw array


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78180

Markus Trippelsdorf <trippels at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2016-11-02
                 CC|                            |trippels at gcc dot gnu.org
     Ever confirmed|0                           |1

--- Comment #2 from Markus Trippelsdorf <trippels at gcc dot gnu.org> ---
        .type   void BM_Rolling<long [128]>(benchmark::State&), @function       
void BM_Rolling<long [128]>(benchmark::State&):                                 
.LFB1712:                                                                       
        .cfi_startproc                                                          
        pushq   %r13                                                            
        .cfi_def_cfa_offset 16                                                  
        .cfi_offset 13, -16                                                     
        pushq   %r12                                                            
        .cfi_def_cfa_offset 24                                                  
        .cfi_offset 12, -24                                                     
        movl    $128, %ecx                                                      
        pushq   %rbp                                                            
        .cfi_def_cfa_offset 32                                                  
        .cfi_offset 6, -32                                                      
        pushq   %rbx                                                            
        .cfi_def_cfa_offset 40                                                  
        .cfi_offset 3, -40                                                      
        movq    %rdi, %rbp                                                      
        xorl    %eax, %eax                                                      
        xorl    %ebx, %ebx                                                      
        xorl    %r12d, %r12d                                                    
        subq    $1048, %rsp                                                     
        .cfi_def_cfa_offset 1088                                                
        xorl    %r13d, %r13d                                                    
        movq    %rsp, %rdi                                                      
        rep stosq                                                               
        .p2align 4,,10                                                          
        .p2align 3                                                              
.L28:                                                                           
        cmpb    $0, 0(%rbp)                                                     
        je      .L39                                                            
.L23:                                                                           
        movq    8(%rbp), %rax                                                   
        cmpq    72(%rbp), %rax                                                  
        leaq    1(%rax), %rdx                                                   
        movq    %rdx, 8(%rbp)                                                   
        jnb     .L40                                                            
        movq    24(%rbp), %r8                                                   
        movq    16(%rbp), %rdi                                                  
        movq    %r8, %rax                                                       
        subq    %rdi, %rax                                                      
        sarq    $2, %rax                                                        
        testq   %rax, %rax                                                      
        je      .L25                                                            
        movl    (%rdi), %edx                                                    
        xorl    %eax, %eax                                                      
        xorl    %ecx, %ecx                                                      
        movl    %ebx, %esi                                                      
        testl   %edx, %edx                                                      
        jg      .L26                                                            
        jmp     .L28                                                            
        .p2align 4,,10                                                          
        .p2align 3                                                              
.L31:                                                                           
        addq    $1, %rax                                                        
        cmpl    (%rdi), %ecx                                                    
        jge     .L28                                                            
.L26:                                                                           
        addl    $1, %ebx                                                        
        movq    %rax, (%rsp,%rsi,8)                                             
        movq    %rax, %rdx                                                      
        cmpl    $100, %ebx                                                      
        cmove   %r13d, %ebx                                                     
        movl    %ebx, %esi                                                      
        subq    (%rsp,%rsi,8), %rdx                                             
        cmpq    $999999, %rdx                                                   
        jg      .L30                                                            
        addq    $1, %r12                                                        
        movq    16(%rbp), %rdi                                                  
        movq    24(%rbp), %r8                                                   
.L30:                                                                           
        movq    %r8, %rdx                                                       
        addl    $1, %ecx                                                        
        subq    %rdi, %rdx                                                      
        sarq    $2, %rdx                                                        
        testq   %rdx, %rdx                                                      
        jne     .L31 

vs.

        .type   void BM_Rolling<std::array<long, 128ul> >(benchmark::State&),
@function                                                                       
void BM_Rolling<std::array<long, 128ul> >(benchmark::State&):                   
.LFB1713:                                                                       
        .cfi_startproc                                                          
        pushq   %rbp                                                            
        .cfi_def_cfa_offset 16                                                  
        .cfi_offset 6, -16                                                      
        pushq   %rbx                                                            
        .cfi_def_cfa_offset 24                                                  
        .cfi_offset 3, -24                                                      
        xorl    %eax, %eax                                                      
        movq    %rdi, %rbx                                                      
        movl    $128, %ecx                                                      
        xorl    %ebp, %ebp                                                      
        subq    $1048, %rsp                                                     
        .cfi_def_cfa_offset 1072                                                
        movq    %rsp, %rdi                                                      
        rep stosq                                                               
        movabsq $429496729600, %rax                                             
        movq    %rax, 1024(%rsp)                                                
        .p2align 4,,10                                                          
        .p2align 3                                                              
.L7:                                                                            
        cmpb    $0, (%rbx)                                                      
        je      .L20                                                            
.L2:                                                                            
        movq    8(%rbx), %rax                                                   
        cmpq    72(%rbx), %rax                                                  
        leaq    1(%rax), %rdx                                                   
        movq    %rdx, 8(%rbx)                                                   
        jnb     .L21                                                            
        movq    24(%rbx), %r8                                                   
        movq    16(%rbx), %rsi                                                  
        movq    %r8, %rax                                                       
        subq    %rsi, %rax                                                      
        sarq    $2, %rax                                                        
        testq   %rax, %rax                                                      
        je      .L4                                                             
        movl    (%rsi), %eax                                                    
        xorl    %edx, %edx                                                      
        xorl    %ecx, %ecx                                                      
        testl   %eax, %eax                                                      
        jg      .L5                                                             
        jmp     .L7                                                             
        .p2align 4,,10                                                          
        .p2align 3                                                              
.L11:                                                                           
        addq    $1, %rdx                                                        
        cmpl    (%rsi), %ecx                                                    
        jge     .L7                                                             
.L5:                                                                            
        movl    1024(%rsp), %edi                                                
        movq    %rdi, %rax                                                      
        movq    %rdx, (%rsp,%rdi,8)                                             
        addl    $1, %eax                                                        
        cmpl    1028(%rsp), %eax                                                
        movl    %eax, 1024(%rsp)                                                
        jne     .L9                                                             
        movl    $0, 1024(%rsp)                                                  
        xorl    %eax, %eax                                                      
.L9:                                                                            
        movq    %rdx, %rdi                                                      
        subq    (%rsp,%rax,8), %rdi                                             
        cmpq    $999999, %rdi                                                   
        jg      .L10                                                            
        addq    $1, %rbp                                                        
        movq    16(%rbx), %rsi                                                  
        movq    24(%rbx), %r8                                                   
.L10:                                                                           
        movq    %r8, %rax                                                       
        addl    $1, %ecx                                                        
        subq    %rsi, %rax                                                      
        sarq    $2, %rax                                                        
        testq   %rax, %rax                                                      
        jne     .L11

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]