[Bug regression/33928] 33% performance slowdown from 4.2.2 to 4.3.0 in floating-point code with computed gotos

rguenth at gcc dot gnu dot org gcc-bugzilla@gcc.gnu.org
Sun Oct 28 16:38:00 GMT 2007



------- Comment #9 from rguenth at gcc dot gnu dot org  2007-10-28 16:38 -------
The main difference I see is that 4.2 avoids re-use of %eax as index register:

.L34:
        movq    %r11, %rdi
        addq    8(%r10), %rdi
        movq    8(%r10), %rsi
        movq    8(%r10), %rdx
        movq    40(%r10), %rax
        leaq    4(%r11), %rbx
        addq    %rdi, %rsi
        leaq    4(%rdi), %r9
        movq    %rdi, -8(%r10)
        addq    %rsi, %rdx
        leaq    4(%rsi), %r8
        movq    %rsi, -24(%r10)
        leaq    4(%rdx), %rcx
        movq    %r9, -16(%r10)
        movq    %rdx, -40(%r10)
        movq    %r8, -32(%r10)
        addq    $7, %rax
        movq    %rcx, -48(%r10)
        movsd   (%rax,%rcx,2), %xmm12
        leaq    (%rbx,%rbx), %rcx
        movsd   (%rax,%rdx,2), %xmm3
        leaq    (%rax,%r11,2), %rdx
        addq    $8, %r11
        movsd   (%rax,%r8,2), %xmm14
        cmpq    %r11, %r13
        movsd   (%rax,%rsi,2), %xmm13
        movsd   (%rax,%r9,2), %xmm11
        movsd   (%rax,%rdi,2), %xmm10
        movsd   (%rax,%rcx), %xmm8
...

while 4.3 always re-loads %rax as index:

.L26:
        leaq    4(%rdi), %rdx
        movq    %rdi, %rax
        movq    %rdx, -8(%rsp)
        addq    (%r8), %rax
        movq    %rax, (%r9)
        addq    $4, %rax
        movq    %rax, (%rbp)
        movq    (%r9), %rax
        addq    (%r8), %rax
        movq    %rax, (%r10)
        addq    $4, %rax
        movq    %rax, (%rbx)
        movq    (%r10), %rax
        addq    (%r8), %rax
        movq    %rax, (%r11)
        movq    -64(%rsp), %rcx
        addq    $4, %rax
        movq    %rax, (%rcx)
        movq    (%rsi), %rdx
        movq    -8(%rsp), %rcx
        addq    $7, %rdx
        movsd   (%rdx,%rax,2), %xmm13
        movq    (%r11), %rax
        addq    %rcx, %rcx
        movsd   (%rdx,%rcx), %xmm8
        movsd   (%rdx,%rax,2), %xmm3
        movq    (%rbx), %rax
        movsd   (%rdx,%rax,2), %xmm14
        movq    (%r10), %rax
        movsd   (%rdx,%rax,2), %xmm12
        movq    (%rbp), %rax
        movsd   (%rdx,%rax,2), %xmm11
        movq    (%r9), %rax
        movsd   (%rdx,%rax,2), %xmm10
        movq    (%r12), %rax
        leaq    (%rdx,%rdi,2), %rdx
...

the root cause needs to be investigated still.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33928



More information about the Gcc-bugs mailing list