[Bug other/26674] New: missed optimization / 128-bit arithmetic.

pluto at agmk dot net gcc-bugzilla@gcc.gnu.org
Tue Mar 14 12:07:00 GMT 2006


__uint128_t sqr_1(__uint64_t x)
{
    return (x * (__uint128_t)x);
}

gcc-4.1.1-20060308 produces an ugly code:

sqr_1:  xorl    %edx, %edx      # D.1810
        movq    %rdi, %rax      # x, D.1810
        movq    %rdx, %rcx      #, tmp62
        imulq   %rdi, %rcx      # D.1810, tmp62
        mulq    %rdi    # D.1810
        addq    %rcx, %rcx      # tmp62
        addq    %rdx, %rcx      #, tmp62
        movq    %rcx, %rdx      # tmp62,
        ret

the optimal solution is:

        movq    %rdi, %rax
        mulq    %rax        ; or mulq %rdi
        ret


__uint128_t sqr_2(__uint64_t x)
{
    union {
        __uint128_t v;
        struct {
            __uint64_t lo;
            __uint64_t hi;
        } q;
    } r;
    asm volatile("mulq %%rax" : "=d" (r.q.hi), "=a" (r.q.lo) : "a" (x));
    return r.v;
}

sqr_2 gives better code but still unoptimal :/

sqr_2:  movq    %rdi, %rax      # x, x
#APP
        mulq %rax
#NO_APP
        movq    %rdx, -16(%rsp) # tmp60, r.q.hi
        movq    %rax, -24(%rsp) # tmp61, r.q.lo
        movq    -16(%rsp), %rdx # r.v, r.v
        movq    -24(%rsp), %rax # r.v, r.v
        ret


-- 
           Summary: missed optimization / 128-bit arithmetic.
           Product: gcc
           Version: 4.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: other
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: pluto at agmk dot net
 GCC build triplet: x86-64-linux
  GCC host triplet: x86-64-linux
GCC target triplet: x86-64-linux


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26674



More information about the Gcc-bugs mailing list