This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

[Bug tree-optimization/46306] New: inefficient code generated for array accesses

From: "davidxl at gcc dot gnu.org" <gcc-bugzilla at gcc dot gnu dot org>
To: gcc-bugs at gcc dot gnu dot org
Date: Thu, 4 Nov 2010 19:15:06 +0000
Subject: [Bug tree-optimization/46306] New: inefficient code generated for array accesses
Auto-submitted: auto-generated

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46306

           Summary: inefficient code generated for array accesses
           Product: gcc
           Version: 4.6.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: davidxl@gcc.gnu.org


//Example:

int foo (int i, int *p, int t)
{
    int p2 = p[i];
    int temp = 0;
    int temp2 = 1;
    int temp3 = 4;
    if (p[i+1] > t)
     {
       temp = p2;
       temp2 = p2 + 2;
       temp3 = p2 + 3;
     }
    return p[temp] + p [temp2] + p[temp3];
}

Two problems seen the code generated by trunk gcc at -O2

1) all the shift operation are redundant and should be folded as the stride in
the memory operand
2) unnecessary code duplication 

(may be handled by a pass that converts memory access with linear address into
target memref in straight line code)

foo:
.LFB0:
    .cfi_startproc
    movslq    %edi, %rdi
    movl    (%rsi,%rdi,4), %eax
    cmpl    %edx, 4(%rsi,%rdi,4)
    jle    .L3
    movslq    %eax, %rdi
    leal    2(%rax), %ecx
    salq    $2, %rdi
    leal    3(%rax), %edx
    movslq    %ecx, %rcx
    movl    (%rsi,%rdi), %eax
    salq    $2, %rcx
    movslq    %edx, %rdx
    addl    (%rsi,%rcx), %eax
    salq    $2, %rdx
    addl    (%rsi,%rdx), %eax
    ret
    .p2align 4,,10
    .p2align 3
.L3:
    movl    $16, %edx
    movl    $4, %ecx
    xorl    %edi, %edi
    movl    (%rsi,%rdi), %eax
    addl    (%rsi,%rcx), %eax
    addl    (%rsi,%rdx), %eax
    ret


// The following code is generated by another compiler -- not ideal, but
better:
foo:
.Leh_func_begin0:
    pushq    %rbp
.Ltmp0:
    movq    %rsp, %rbp
.Ltmp1:
    movslq    %edi, %rax
    leal    1(%rax), %ecx
    movslq    %ecx, %rcx
    cmpl    %edx, (%rsi,%rcx,4)
    jg    .LBB0_2
    movl    $1, %eax
    xorl    %ecx, %ecx
    movl    $4, %edx
    jmp    .LBB0_3
.LBB0_2:
    movslq    (%rsi,%rax,4), %rcx
    leal    3(%rcx), %eax
    movslq    %eax, %rdx
    leal    2(%rcx), %eax
    movslq    %eax, %rax
.LBB0_3:
    movl    (%rsi,%rax,4), %eax
    addl    (%rsi,%rcx,4), %eax
    addl    (%rsi,%rdx,4), %eax
    popq    %rbp
    ret

Follow-Ups:
- [Bug tree-optimization/46306] inefficient code generated for array accesses
  - From: pinskia at gcc dot gnu.org

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]