This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug rtl-optimization/55342] [4.8/4.9 Regression] [LRA,x86] Non-optimal code for simple loop with LRA

From: "ysrumyan at gmail dot com" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: Thu, 05 Sep 2013 14:51:33 +0000
Subject: [Bug rtl-optimization/55342] [4.8/4.9 Regression] [LRA,x86] Non-optimal code for simple loop with LRA
Auto-submitted: auto-generated
References: <bug-55342-4 at http dot gcc dot gnu dot org/bugzilla/>

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55342

--- Comment #9 from Yuri Rumyantsev <ysrumyan at gmail dot com> ---
The issue still exists in 4.9 compiler but we got another 30% degradation after
r202165 fix. It can be reproduced with modified test-case which as attached
with any 4.9 compiler, namely code produced for inner loop looks like:

.L8:
    movl    %esi, %ecx
    movl    %esi, %edi
    movzbl    3(%esp), %edx
    cmpb    %cl, %dl
    movl    %edx, %ecx
    cmovbe    %ecx, %edi
.L4:
    movl    %esi, %edx
    movl    %edi, %ecx
    subl    %ecx, %edx
    movl    28(%esp), %ecx
    movl    28(%esp), %esi
    addl    $4, 28(%esp)
    movb    %dl, (%ecx)
    movl    %edi, %ecx
    subl    %ecx, %ebx
    movl    %edi, %edx
    movzbl    3(%esp), %ecx
    movb    %bl, 1(%esi)
    subl    %edx, %ecx
    movl    %edi, %ebx
    movb    %cl, 2(%esi)
    movl    28(%esp), %esi
    cmpl    %ebp, %eax
    movb    %bl, -1(%esi)
    je    .L1
.L5:
    movzbl    (%eax), %esi
    leal    3(%eax), %eax
    movzbl    -2(%eax), %ebx
    notl    %esi
    notl    %ebx
    movl    %esi, %edx
    movzbl    -1(%eax), %ecx
    cmpb    %bl, %dl
    movb    %cl, 3(%esp)
    notb    3(%esp)
    jb    .L8
    movzbl    3(%esp), %edx
    movl    %ebx, %edi
    cmpb    %bl, %dl
    cmovbe    %edx, %edi
    jmp    .L4

and you can see that (1) there are 2 additional moves on top of blocks marked
with .L4 and .L8; (2) redundant spill/fills of 'write' base in block marked
with .L4 (28(%esp)).
To reproduce it is sufficient to compile modified test-case with '-m32
-march=atom' options.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]