[Bug ipa/65701] r221530 makes 187.facerec drop with -Ofast -flto

hubicka at ucw dot cz gcc-bugzilla@gcc.gnu.org
Thu Apr 9 17:59:00 GMT 2015


https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65701

--- Comment #8 from Jan Hubicka <hubicka at ucw dot cz> ---
With spaces removed to be readable
>
>   1.11 ???3682:   mov    0x60(%rsp),%rdx
>   9.32 ???3687:?????????vmovss (%rax,%r12,2),%xmm5
>   1.44 ???     ???  vmovss (%rax),%xmm6
>   4.46 ???     ???  inc    %rdi
>   0.01 ???     ???  add    $0x10,%rcx
>   1.17 ???     ???  vinser $0x10,(%rax,%r13,1),%xmm5,%xmm0
>   1.92 ???     ???  vinser $0x10,(%rax,%r12,1),%xmm6,%xmm1
>   0.28 ???     ???  add    %r14,%rax
>   0.07 ???     ???  vmovlh %xmm0,%xmm1,%xmm0
>   2.48 ???     ???  vfmadd %xmm3,-0x10(%rcx),%xmm0,%xmm3
>   5.15 ???     ???  cmp    %rdi,%rdx
>   0.01 ???     ?????????ja     3687
>   1.21 ???        vhaddp %xmm3,%xmm3,%xmm3
>  10.30 ???        mov    0x58(%rsp),%rax
>   0.03 ???        mov    %r13,0x10(%rsp)
>   0.00 ???        add %rax,%rsi
>   1.18 ???        vhaddp %xmm3,%xmm3,%xmm3
>  10.80 ???        vaddss %xmm3,%xmm4,%xmm4
>   4.47 ???        cmp    0x68(%rsp),%rax
>
> (the slower variant) to:
>
>   1.38 ???        xor    %ecx,%ecx
>   6.04 ???17c0:?????????vmovss (%rax,%r11,2),%xmm3
>   0.18 ???     ???  mov    0x90(%rsp),%rsi
>   1.43 ???     ???  inc    %rcx
>   1.42 ???     ???  vmovss (%rax),%xmm5
>   0.36 ???     ???  vmovss (%rdx,%rbx,2),%xmm6
>   2.81 ???     ???  vmovss (%rdx),%xmm7
>   0.90 ???     ???  vinser $0x10,(%rax,%rsi,1),%xmm3,%xmm2
>   2.96 ???     ???  mov    0x88(%rsp),%rsi
>   0.04 ???     ???  vinser $0x10,(%rax,%r11,1),%xmm5,%xmm4
>   2.76 ???     ???  add    0x70(%rsp),%rax
>   0.07 ???     ???  vinser $0x10,(%rdx,%rbx,1),%xmm7,%xmm3
>   0.02 ???     ???  vmovlh %xmm2,%xmm4,%xmm4
>   2.69 ???     ???  vinser $0x10,(%rdx,%rsi,1),%xmm6,%xmm2
>   1.13 ???     ???  add    0x78(%rsp),%rdx
>   0.04 ???     ???  vmovlh %xmm2,%xmm3,%xmm2
>   0.01 ???     ???  vfmadd %xmm0,%xmm2,%xmm4,%xmm0
>   2.74 ???     ???  cmp    %rcx,0x80(%rsp)
>   0.07 ???     ?????????ja     17c0
>   1.39 ???        vhaddp %xmm0,%xmm0,%xmm0
>   4.45 ???        mov    0x48(%rsp),%rsi
>   1.42 ???        vhaddp %xmm0,%xmm0,%xmm0
>   7.96 ???        vaddss %xmm0,%xmm1,%xmm1
>   4.09 ???        cmp    %r15,0x60(%rsp)
>   0.01 ???      ??? je     18b1
>
> (the faster variant, dunno why)



More information about the Gcc-bugs mailing list