Bug 86011 - Inefficient code generated for ldivmod with constant value
Summary: Inefficient code generated for ldivmod with constant value
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: target (show other bugs)
Version: 9.2.0
: P3 normal
Target Milestone: 11.0
Assignee: Not yet assigned to anyone
URL:
Keywords: missed-optimization
Depends on:
Blocks:
 
Reported: 2018-05-31 04:43 UTC by Patrick Oppenlander
Modified: 2021-12-21 11:39 UTC (History)
1 user (show)

See Also:
Host:
Target: arm
Build:
Known to work:
Known to fail: 8.2.0
Last reconfirmed: 2019-03-04 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Patrick Oppenlander 2018-05-31 04:43:49 UTC
Tested with 7.2.0 and 8.1.0.

The following example unnecessarily calls ldivmod twice:

struct foo { long a, b; };
struct foo test(long long x)
{
	return (struct foo){x / 77, x % 77};
}

armv7m-linux-musleabi-gcc -c -O2 test.c
armv7m-linux-musleabi-objdump -d test.o

00000000 <test>:
   0:	b5d0      	push	{r4, r6, r7, lr}
   2:	4616      	mov	r6, r2
   4:	461f      	mov	r7, r3
   6:	4604      	mov	r4, r0
   8:	224d      	movs	r2, #77	; 0x4d
   a:	2300      	movs	r3, #0
   c:	4630      	mov	r0, r6
   e:	4639      	mov	r1, r7
  10:	f7ff fffe 	bl	0 <__aeabi_ldivmod>
  14:	4639      	mov	r1, r7
  16:	6020      	str	r0, [r4, #0]
  18:	224d      	movs	r2, #77	; 0x4d
  1a:	4630      	mov	r0, r6
  1c:	2300      	movs	r3, #0
  1e:	f7ff fffe 	bl	0 <__aeabi_ldivmod>
  22:	4620      	mov	r0, r4
  24:	6062      	str	r2, [r4, #4]
  26:	bdd0      	pop	{r4, r6, r7, pc}

If the test is rearranged so that the denominator is a function argument the generated code is as expected:

struct foo { long a, b; };
struct foo test(long long x, long den)
{
	return (struct foo){x / den, x % den};
}

armv7m-linux-musleabi-gcc -c -O2 test.c
armv7m-linux-musleabi-objdump -d test.o

00000000 <test>:
   0:	b5d0      	push	{r4, r6, r7, lr}
   2:	4616      	mov	r6, r2
   4:	461f      	mov	r7, r3
   6:	9a04      	ldr	r2, [sp, #16]
   8:	4604      	mov	r4, r0
   a:	4639      	mov	r1, r7
   c:	4630      	mov	r0, r6
   e:	17d3      	asrs	r3, r2, #31
  10:	f7ff fffe 	bl	0 <__aeabi_ldivmod>
  14:	e9c4 0200 	strd	r0, r2, [r4]
  18:	4620      	mov	r0, r4
  1a:	bdd0      	pop	{r4, r6, r7, pc}
Comment 1 Richard Biener 2018-06-01 08:04:42 UTC
I think this was fixed with GCC 8.  Can you check?
Comment 2 Patrick Oppenlander 2018-06-01 22:24:41 UTC
Sure,

# cat test.c
struct foo { long a, b; };
struct foo test(long long x)
{
	return (struct foo){x / 77, x % 77};
}
# gcc --version
gcc (GCC) 8.1.0
Copyright (C) 2018 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

# gcc -c -O2 test.c
# objdump -d test.o

test.o:     file format elf32-littlearm


Disassembly of section .text:

00000000 <test>:
   0:	e92d40d0 	push	{r4, r6, r7, lr}
   4:	e1a06002 	mov	r6, r2
   8:	e1a07003 	mov	r7, r3
   c:	e1a04000 	mov	r4, r0
  10:	e3a0204d 	mov	r2, #77	; 0x4d
  14:	e3a03000 	mov	r3, #0
  18:	e1a00006 	mov	r0, r6
  1c:	e1a01007 	mov	r1, r7
  20:	ebfffffe 	bl	0 <__aeabi_ldivmod>
  24:	e1a01007 	mov	r1, r7
  28:	e3a0204d 	mov	r2, #77	; 0x4d
  2c:	e3a03000 	mov	r3, #0
  30:	e5840000 	str	r0, [r4]
  34:	e1a00006 	mov	r0, r6
  38:	ebfffffe 	bl	0 <__aeabi_ldivmod>
  3c:	e1a00004 	mov	r0, r4
  40:	e5842004 	str	r2, [r4, #4]
  44:	e8bd80d0 	pop	{r4, r6, r7, pc}

Looks like the same problem is still there.
Comment 3 Patrick Oppenlander 2019-11-19 23:35:25 UTC
Still present in 9.2.0.
Comment 4 Andrew Pinski 2021-12-21 11:39:23 UTC
Fixed in GCC 11:

        lsrs    r1, r2, #30
        push    {r4, lr}
        orr     r1, r1, r3, lsl #2
        bic     lr, r1, #-1073741824
        bic     r1, r2, #-1073741824
        add     r1, r1, lr
        asrs    r4, r3, #31
        add     r3, r1, r3, lsr #28
        and     r1, r4, #137
        add     r3, r3, r1
        movw    r1, #57025
        movt    r1, 13617
        bic     r4, r4, #75
        umull   lr, r1, r1, r3
        mov     lr, #77
        lsrs    r1, r1, #4
        mls     r3, lr, r1, r3
        movw    r1, #14469
        movt    r1, 16171
        add     r3, r3, r4
        str     r3, [r0, #4]
        subs    r2, r2, r3
        umull   r2, r1, r2, r1
        str     r2, [r0]
        pop     {r4, pc}

For:
#define N 7723
struct foo { long a, b; };
struct foo test(long long x)
{
	return (struct foo){x / N, x % N};
}
GCC 11+ Produces:

        push    {r4, lr}
        mov     r4, r0
        mov     r1, r3
        mov     r0, r2
        movs    r3, #0
        movw    r2, #7723
        bl      __aeabi_ldivmod
        strd    r0, r2, [r4]
        mov     r0, r4
        pop     {r4, pc}

Which is exactly what you want too.