This is the mail archive of the gcc-prs@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

optimization/9566: Inline function produces much worse code than manual inlining.

From: osv at javad dot ru
To: gcc-gnats at gcc dot gnu dot org
Date: 4 Feb 2003 10:16:10 -0000
Subject: optimization/9566: Inline function produces much worse code than manual inlining.
Reply-to: osv at javad dot ru

>Number:         9566
>Category:       optimization
>Synopsis:       Inline function produces much worse code than manual inlining.
>Confidential:   no
>Severity:       serious
>Priority:       low
>Responsible:    unassigned
>State:          open
>Class:          pessimizes-code
>Submitter-Id:   net
>Arrival-Date:   Tue Feb 04 10:26:00 UTC 2003
>Closed-Date:
>Last-Modified:
>Originator:     Sergei Organov
>Release:        gcc version 3.3 20030203 (prerelease)
>Organization:
>Environment:
Linux 2.4.20 i686
>Description:
In the code below functions g1() calls inline function copy() 
and g2() is equivalent to g1() but the body of copy() is inserted
into the body of g2() manually. The assembly code produced for
g1() is very bad compared to those of g2(). The difference is most visible
for RISC processors (an example PowerPC assembly result is shown) 
though it could be seen on CISC processors as well.

The C++ code (note that the code is minimized to demonstrate the problem, 
so please ignore using of unitialized variables):

struct A {
  char const* src;
  char* dest;
  void copy() { *++dest = *++src; }
};

void g1() {
  A a;
  for(int i = 0; i < 10; ++i)
    a.copy();
}

void g2() {
  A a;
  for(int i = 0; i < 10; ++i)
    *++a.dest = *++a.src;
}

The resulting assembly for PowerPC (note the loop body is 8 vs 4
instructions):

$ ~/try-3.2/tools/bin/ppc-rtems-gcc -c -O4 -save-temps -mregnames struct.cc -o struct.o
$ cat struct.s

	.file	"struct.cc"
	.section	".text"
	.align 2
	.globl _Z2g1v
	.type	_Z2g1v, @function
_Z2g1v:
.LFB5:
	li %r3,10
	mtctr %r3
	stwu %r1,-16(%r1)
.LCFI0:
	addi %r8,%r1,8
.L10:
	lwz %r5,8(%r1)
	lwz %r3,4(%r8)
	addi %r6,%r5,1
	addi %r7,%r3,1
	stw %r7,4(%r8)
	stw %r6,8(%r1)
	lbz %r4,1(%r5)
	stb %r4,1(%r3)
	bdnz .L10
	addi %r1,%r1,16
	blr
.LFE5:
	.size	_Z2g1v, .-_Z2g1v
	.align 2
	.globl _Z2g2v
	.type	_Z2g2v, @function
_Z2g2v:
.LFB6:
	li %r3,10
	mtctr %r3
	li %r7,0
	li %r8,0
.L19:
	addi %r7,%r7,1
	lbz %r4,0(%r7)
	addi %r8,%r8,1
	stb %r4,0(%r8)
	bdnz .L19
	blr
.LFE6:
	.size	_Z2g2v, .-_Z2g2v
	.ident	"GCC: (GNU) 3.3 20030203 (prerelease)"

>How-To-Repeat:
Compile provided C++ code with '-O4 -save-temps' and look at resulting assembly.
>Fix:

>Release-Note:
>Audit-Trail:
>Unformatted:

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]