[Bug c/28417] New: suboptimal 'division by constant' optimization

Tue Jul 18 08:34:00 GMT 2006

It looks like expmed.c::choose_multiplier(), which is responsible for finding
parameters needed for replacing division by constant with mul+shift, sometimes
fails to find optimal parameters.

One example: A/1577682821 can be calculated by executing A*365384439 >>
(27+32), for any 32-bit unsigned A. However, gcc-4.1.1 and all previous
versions generate much more elaborate code.

The below program demonstrates tis and also two more similar examples. It also
checks that mul+shift works for any unsigned A, by testing all possibe values
of A.

#include <stdint.h>
#include <stdio.h>

unsigned v;
void f9188_mul365384439_shift27(unsigned A) { v = A/(unsigned)1577682821; }
void f9188_mul365384439_shift27_prime(unsigned A) { v =
(((uint64_t)A)*(unsigned)365384439) >> (27+32); }
void f8399_mul2283243215_shift29(unsigned A) { v = A/(unsigned)1009898111; }
void f8399_mul2283243215_shift29_prime(unsigned A) { v =
(((uint64_t)A)*(unsigned)2283243215) >> (29+32); }
void f8267_mul2482476753_shift30(unsigned A) { v = A/(unsigned)1857695551; }
void f8267_mul2482476753_shift30_prime(unsigned A) { v =
(((uint64_t)A)*(unsigned)2482476753) >> (30+32); }

/* Generated code is suboptimal (gcc 4.1.1):
void f9188_mul365384439_shift27(unsigned A) { v = A/1577682821; }
f9188_mul365384439_shift27:
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        movl    16(%esp), %ebx
        movl    $1551183727, %ecx
        movl    %ebx, %eax
        mull    %ecx
        subl    %edx, %ebx
        shrl    %ebx
        leal    (%ebx,%edx), %eax
        shrl    $30, %eax
        movl    %eax, v
        popl    %ebx
        popl    %esi
        popl    %edi
        ret
void f8399_mul2283243215_shift29(unsigned A) { v = A/1009898111; }
f8399_mul2283243215_shift29:
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        movl    16(%esp), %ebx
        movl    $271519133, %ecx
        movl    %ebx, %eax
        mull    %ecx
        subl    %edx, %ebx
        shrl    %ebx
        leal    (%ebx,%edx), %eax
        shrl    $29, %eax
        movl    %eax, v
        popl    %ebx
        popl    %esi
        popl    %edi
        ret
void f8267_mul2482476753_shift30(unsigned A) { v = A/1857695551; }
f8267_mul2482476753_shift30:
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        movl    16(%esp), %ebx
        movl    $669986209, %ecx
        movl    %ebx, %eax
        mull    %ecx
        subl    %edx, %ebx
        shrl    %ebx
        leal    (%ebx,%edx), %eax
        shrl    $30, %eax
        movl    %eax, v
        popl    %ebx
        popl    %esi
        popl    %edi
        ret

These operations can be done like this:
f9188_mul365384439_shift27_prime:
        movl    $365384439, %eax
        mull    4(%esp)
        movl    %edx, %eax
        shrl    $27, %eax
        movl    %eax, v
        ret
f8399_mul2283243215_shift29_prime:
        movl    $-2011724081, %eax
        mull    4(%esp)
        movl    %edx, %eax
        shrl    $29, %eax
        movl    %eax, v
        ret
f8267_mul2482476753_shift30_prime:
        movl    $-1812490543, %eax
        mull    4(%esp)
        movl    %edx, %eax
        shrl    $30, %eax
        movl    %eax, v
        ret

(Why there is that silly movl %edx, %eax - that's another good question...)

The verification program is below. Was compiled with -Os
(in order to force gcc to use div insn for a/b - we want to do true
division for the purpose of correctness check!)
and didn't report a failure.
*/

int main()
{
        unsigned A=0,u,v;
        while(++A) {
                (A & 0xffff) || printf("A=%u\r",A);

                u = (((uint64_t)A)*(unsigned)365384439) >> (27+32);
                v = A / (unsigned)1577682821;
                if(u!=v) { printf("1: A=%u u=%u v=%u\n", A,u,v); return 0; }

                u = (((uint64_t)A)*(unsigned)2283243215) >> (29+32);
                v = A / (unsigned)1009898111;
                if(u!=v) { printf("2: A=%u u=%u v=%u\n", A,u,v); return 0; }

                u = (((uint64_t)A)*(unsigned)2482476753) >> (30+32);
                v  =A / (unsigned)1857695551;
                if(u!=v) { printf("3: A=%u u=%u v=%u\n", A,u,v); return 0; }
        }
        puts("");
        return 0;
}

-- 
           Summary: suboptimal 'division by constant' optimization
           Product: gcc
           Version: 4.1.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: vda dot linux at googlemail dot com

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28417