This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug c/37233] New: 64-bit product of 32-bit value loses optimisation when inlined


I have noticed that optimization is lost when a 32-bit multiply with 64-bit
product is inlined. This also happens in 3.4 (Cygwin gcc).

The function:

uint64_t mul2(uint32_t a, uint32_t b) {
  return a * (uint64_t)b;
}

is encoded 'properly' as a single mul instruction:
gcc -O3 -fomit-frame-pointer -S

.globl mul2
        .type   mul2, @function
mul2:
        pushl   %ebx
        movl    8(%esp), %eax
        mull    12(%esp)
        popl    %ebx
        ret
        .size   mul2, .-mul2
        .p2align 4,,15
.globl mul32
        .type   mul32, @function

(NOTE: the "pushl %ebx" and "popl %ebx" seem redundant.)

However, when inlined in the same source file, it is encoded as a mul
instruction AND a redundant imul instruction with a zero argument.

void mul32(uint32_t * a, unsigned n, uint32_t factor) {
  unsigned i;
  uint64_t p = 0;

  for (i = 0; i < n; ++i) {
    p += mul2(a[i], factor);
    a[i] = p;
    p >>= 32;
  }

  a[n] = p;
}

compiles to (I added comments):
gcc -O3 -fomit-frame-pointer -S

.globl mul32
        .type   mul32, @function
mul32:
        pushl   %ebp
        xorl    %eax, %eax
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $12, %esp
        movl    36(%esp), %edx
        movl    32(%esp), %ebp
        testl   %edx, %edx
        je      .L6
        movl    40(%esp), %eax
# redundant make zero
        xorl    %edx, %edx
        xorl    %edi, %edi
# redundant store zero
        movl    %edx, 4(%esp)
        xorl    %ebx, %ebx
        xorl    %esi, %esi
        movl    %eax, (%esp)
        .p2align 4,,7
.L7:
        movl    (%ebp,%edi,4), %eax
# redundant load zero
        movl    4(%esp), %ecx
# redundant multiply of zero
        imull   %eax, %ecx
        mull    (%esp)
        addl    %ebx, %eax
# redundant add zero to high word
        leal    (%ecx,%edx), %edx
        movl    %eax, %ebx
        adcl    %esi, %edx
        movl    %edx, %esi
        movl    %eax, (%ebp,%edi,4)
        movl    %esi, %ebx
        addl    $1, %edi
        xorl    %esi, %esi
        cmpl    36(%esp), %edi
        jne     .L7
        movl    %ebx, %eax
.L6:
        movl    36(%esp), %edx
        movl    %eax, (%ebp,%edx,4)
        addl    $12, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
        .size   mul32, .-mul32

Regards,
Andrew Robb.
mailto:ajrobb@bigfoot.com


-- 
           Summary: 64-bit product of 32-bit value loses optimisation when
                    inlined
           Product: gcc
           Version: 4.2.1
            Status: UNCONFIRMED
          Severity: minor
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: ajrobb at bigfoot dot com
 GCC build triplet: i586-suse-linux
  GCC host triplet: i586-suse-linux
GCC target triplet: i586-suse-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37233


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]