Bug 2480 - aliasing problem with global structures
aliasing problem with global structures
Status: RESOLVED FIXED
Product: gcc
Classification: Unclassified
Component: tree-optimization
3.1
: P3 enhancement
: 4.4.0
Assigned To: Not yet assigned to anyone
: alias, missed-optimization
Depends on:
Blocks: 27799
  Show dependency treegraph
 
Reported: 2001-04-03 16:55 UTC by Dan Nicolaescu
Modified: 2009-04-03 11:27 UTC (History)
3 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2008-03-14 20:06:36


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Dan Nicolaescu 2001-04-03 16:55:59 UTC
The following code: 

struct example
{
  char a;
  int  b;
  char c;
} *ex1;

void
bar (void)
{
  ex1->a = 1;
  ex1->b = 2;
  ex1->c = 3;
}

void
foo (struct example *ex2)
{
  ex2->a = 1;
  ex2->b = 2;
  ex2->c = 3;
}

When compiled with -O2 -fstrict-aliasing -S on sun-sparc-solaris2.7 
with a GCC mainline snapshot from 2000-04-02 
(but the same problem occurs on the GCC-3.0 branch and 2.95.2)


bar:
	!#PROLOGUE# 0
	!#PROLOGUE# 1
	sethi	%hi(ex1), %o2
	ld	[%o2+%lo(ex1)], %o1
	mov	1, %o0
	stb	%o0, [%o1]
	ld	[%o2+%lo(ex1)], %o3
	^^^^^
	after the store ex1 is reloaded. 
	true_dependence returns true for these last 2 instructions, it
	seems that the ex1->a is put in the alias set 0. That is a
	mistake, but I couldn't find where that is done....
	
	It looks like GCC treats ex1->a as a char*, but that is
	incorrect, a store to ex1->a cannot alias ex1

	mov	3, %o0
	mov	2, %o1
	st	%o0, [%o3+8]
	^^^ 
	After this store ex1 is not reloaded, but in this case the
	struct member is an "int"
	
	retl
	st	%o1, [%o3+4]


foo:
	!#PROLOGUE# 0
	!#PROLOGUE# 1
	mov	3, %o1
	st	%o1, [%o0+8]
	mov	1, %o2
	mov	2, %o1
	stb	%o2, [%o0]
	retl
	st	%o1, [%o0+4]

nothing like that happens here, when the pointer to the structure is
passed as a parameter. 


This is an important pessimization. 
GCC itself contains a lot of global pointers to structures...

Release:
gcc version 3.1 20010402 20011026

Environment:
sun-sparc-solaris2.7 [Also ix868 Linux - Neil]

How-To-Repeat:
Compile the code in the description with -O2 -S -fstrict-aliasing
and look at the resulting assembly
Comment 1 Wolfgang Bangerth 2003-03-07 02:19:07 UTC
State-Changed-From-To: open->analyzed
State-Changed-Why: Indeed. This odd behavior persists in 3.2, 3.3 and present
    mainline. Annoying. As noted, this is even better visible
    in x86 assembler:
    
    bar:
    	pushl %ebp
    	movl %esp,%ebp
    	movl ex1,%eax
    	movb $1,(%eax)
    	movl ex1,%eax    <-- this is the duplicate reload
    	movl $2,4(%eax)
    	movb $3,8(%eax)
    	movl %ebp,%esp
    	popl %ebp
    	ret
    
    foo:
    	pushl %ebp
    	movl %esp,%ebp
    	movl 8(%ebp),%eax
    	movb $1,(%eax)
    	movl $2,4(%eax)
    	movb $3,8(%eax)
    	movl %ebp,%esp
    	popl %ebp
    	ret
    
    W.
Comment 2 Dan Nicolaescu 2003-05-03 16:18:09 UTC
From: Dan Nicolaescu <dann@ics.uci.edu>
To: bangerth@dealii.org
Cc: gcc-gnats@gcc.gnu.org
Subject: Re: c/2480: aliasing problem with global structures
Date: Sat, 03 May 2003 16:18:09 -0700

 With the tweaks below the code in this PR can be added to the
 GCC testsuite in case somebody wants to do that.
 
 /* { dg-do link } */
 
 struct example
 {
   char a;
   int b;
   char c;
 } *ex1;
 
 extern void link_error(void);
 
 void
 bar (void)
 {
   ex1->a = 1;
   ex1->b = 2;
   ex1->c = 3;
   
   if (ex1->a != 1)
     link_error ();
   if (ex1->b != 2)
     link_error ();
   if (ex1->c != 3)
     link_error ();
 
 }
 
 void
 foo (struct example *ex2)
 {
   ex2->a = 1;
   ex2->b = 2;
   ex2->c = 3;
 
   if (ex2->a != 1)
     link_error ();
   if (ex2->b != 2)
     link_error ();
   if (ex2->c != 3)
     link_error ();
 
 }
 
 int main (void)
 {
   bar ();
   foo (ex1);
   return 0;
 }
Comment 3 Andrew Pinski 2003-12-26 02:07:06 UTC
It also happens on the tree-ssa.
Comment 4 Andrew Pinski 2004-01-19 19:28:18 UTC
dann, is this fixed on the tree-ssa with PTA?
Comment 5 Dan Nicolaescu 2004-01-19 19:42:39 UTC
No, tree-ssa does not help.

RTL optimizers can optimize the "foo" function in the testcase, 
the tree-ssa optimizers don't, here is the .optimized dump: 

foo (ex2)
{
<bb 0>:
  ex2->a = 1;
  ex2->b = 2;
  ex2->c = 3;
  if (ex2->a != 1) goto <L0>; else goto <L1>;

<L0>:;
  link_error ();

<L1>:;
  if (ex2->b != 2) goto <L2>; else goto <L3>;

<L2>:;
  link_error ();

<L3>:;
  if (ex2->c != 3) goto <L4>; else goto <L5>;

<L4>:;
  link_error ();

<L5>:;
  return;

}

The x86 aseembly: 

foo:
        movl    4(%esp), %eax
        movb    $1, (%eax)
        movl    $2, 4(%eax)
        movb    $3, 8(%eax)
        ret


Comment 6 Steven Bosscher 2005-01-23 14:54:53 UTC
CVS today: 
 
        .text 
        .p2align 4,,15 
.globl bar 
        .type   bar, @function 
bar: 
        movl    ex1, %eax 
        pushl   %ebp 
        movl    %esp, %ebp 
        movb    $1, (%eax) 
        movl    $2, 4(%eax) 
        movb    $3, 8(%eax) 
        leave 
        ret 
        .size   bar, .-bar 
        .p2align 4,,15 
.globl foo 
        .type   foo, @function 
foo: 
        pushl   %ebp 
        movl    %esp, %ebp 
        movl    8(%ebp), %eax 
        movb    $1, (%eax) 
        movl    $2, 4(%eax) 
        movb    $3, 8(%eax) 
        leave 
        ret 
        .size   foo, .-foo 
 
Comment 7 Andrew Pinski 2005-01-23 15:15:14 UTC
This is not fixed, see example #2.
Comment 8 Steven Bosscher 2005-01-23 15:23:26 UTC
Whoops.  I forgot to link.  
 
Comment 9 Andrew Pinski 2005-06-09 16:45:01 UTC
Part of this has been fixed, there is only one loading of ex1 now on the mainline.
Comment 10 Richard Biener 2008-03-14 20:06:36 UTC
This is related to PR27799.  It is also fixed with -fstrict-aliasing on the
tree level:

bar ()
{
  struct example * ex1.0;

<bb 2>:
  ex1.0 = ex1;
  ex1.0->a = 1;
  ex1.0->b = 2;
  ex1.0->c = 3;
  return;

}

Without -fstrict-aliasing we get (see PR27799):

bar ()
{
  struct example * ex1.0;

<bb 2>:
  # VUSE <ex1_4(D)>
  ex1.0_1 = ex1;
  # ex1_6 = VDEF <ex1_4(D)>
  # SMT.5_7 = VDEF <SMT.5_5(D)>
  ex1.0_1->a = 1;
  # VUSE <ex1_6>
  ex1.0_2 = ex1;
  # ex1_8 = VDEF <ex1_6>
  # SMT.5_9 = VDEF <SMT.5_7>
  ex1.0_2->b = 2;
  # VUSE <ex1_8>
  ex1.0_3 = ex1;
  # ex1_10 = VDEF <ex1_8>
  # SMT.5_11 = VDEF <SMT.5_9>
  ex1.0_3->c = 3;
  return;

while with -fstrict-aliasing we have

bar ()
{
  struct example * ex1.0;

<bb 2>:
  # VUSE <ex1_4(D)>
  ex1.0_1 = ex1;
  # SMT.5_6 = VDEF <SMT.5_5(D)>
  ex1.0_1->a = 1;
  # VUSE <ex1_4(D)>
  ex1.0_2 = ex1;
  # SMT.5_7 = VDEF <SMT.5_6>
  ex1.0_2->b = 2;
  # VUSE <ex1_4(D)>
  ex1.0_3 = ex1;
  # SMT.5_8 = VDEF <SMT.5_7>
  ex1.0_3->c = 3;
  return;

}

If you disable all tree optimizations that do the optimization you still
have the missed optimization on the RTL level and get

bar:
        pushl   %ebp
        movl    %esp, %ebp
        movl    ex1, %eax
        movb    $1, (%eax)
        movl    ex1, %eax
        movl    $2, 4(%eax)
        movb    $3, 8(%eax)
        popl    %ebp
        ret

without strict-aliasing it gets even

bar:
        pushl   %ebp
        movl    %esp, %ebp
        movl    ex1, %eax
        movb    $1, (%eax)
        movl    ex1, %eax
        movl    $2, 4(%eax)
        movl    ex1, %eax
        movb    $3, 8(%eax)
        popl    %ebp
        ret
Comment 11 Andrew Pinski 2008-04-07 01:32:57 UTC
Hmm, we do get something different on the trunk:
bar ()
{
  struct example * ex1.0;

<bb 2>:
  ex1.0 = ex1;
  ex1.0->a = 1;
  ex1.0->b = 2;
  ex1.0->c = 3;
  if (ex1.0->b != 2)
    goto <bb 3>;
  else
    goto <bb 5>;

<bb 3>:
  link_error ();
  if (ex1->c != 3)
    goto <bb 4>;
  else
    goto <bb 5>;

<bb 4>:
Invalid sum of incoming frequencies 5123, should be 6216
  link_error () [tail call];

<bb 5>:
Invalid sum of incoming frequencies 11093, should be 10000
  return;

}
Comment 12 Richard Biener 2008-04-07 09:00:40 UTC
This is because the oracle is confused by the link_error () call which clobbers
*ex2 and *ex1.  Note that SCCVN does not consider control-dependence as it
considers all edges executable all the time.
Comment 13 Richard Biener 2009-04-03 11:27:22 UTC
Fixed at -O2 since 4.4.
Comment 14 Richard Biener 2009-04-03 12:38:28 UTC
Subject: Bug 2480

Author: rguenth
Date: Fri Apr  3 12:38:08 2009
New Revision: 145499

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=145499
Log:
2009-04-03  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/2480
	PR tree-optimization/23086
	* gcc.dg/tree-ssa/pr2480.c: New testcase.
	* gcc.dg/tree-ssa/pr23086.c: Likewise.

Added:
    trunk/gcc/testsuite/gcc.dg/tree-ssa/pr23086.c
    trunk/gcc/testsuite/gcc.dg/tree-ssa/pr2480.c
Modified:
    trunk/gcc/testsuite/ChangeLog