c/2462: "restrict" implementation bug

Dan Nicolaescu dann@godzilla.ics.uci.edu
Mon Apr 2 14:36:00 GMT 2001


>Number:         2462
>Category:       c
>Synopsis:       "restrict" implementation bug
>Confidential:   no
>Severity:       serious
>Priority:       medium
>Responsible:    unassigned
>State:          open
>Class:          pessimizes-code
>Submitter-Id:   net
>Arrival-Date:   Mon Apr 02 14:36:01 PDT 2001
>Closed-Date:
>Last-Modified:
>Originator:     Dan Nicolaescu <dann@godzilla.ics.uci.edu>
>Release:        gcc version 3.1 20010402 (experimental)
>Organization:
>Environment:
sparc-sun-solaris2.7
But the problem is arhitecture independent.
The problem is also present in gcc-2.95.2
>Description:
According to http://wwwold.dkuug.dk/JTC1/SC22/WG14/www/docs/n897.pdf  
(pointed to by a link from http://gcc.gnu.org/readings.html ) Chapter 
6.7.3.1 paragraph 9  global variables with a restrict qualifier should
act "as if it were declared as an array". 
An in Chapter 6.7.3.1 paragraph 4: a pointer returned from a call to
"malloc" is the initial single mean to access an array. 
The conclusion from these 2 is that a pointer from malloc cannot alias
a global restricted var. 

It seems that there is a bug when using both a restricted global var
and a pointer obtained from "malloc" call. 
As shown in the example bellow when using either of them individualy
"restrict" works correctly. 

Here is an example:

float *  __restrict__ d;
float *  __restrict__ g;
float *  __restrict__ h;

struct two_floatp {
  float *first;
  float *second;
};

/* a malloced pointer, local restricted vars and a global restricted var */
float* 
foo (float * a, float * b, float * c, int n)
{
  int i;
  float *f;
  f = (float*) malloc (n * sizeof (float));


  {
    float * __restrict__ p = a;
    float * __restrict__ q = b;
    float * __restrict__ r = c;
    
    for (i = 0;i < n; i++)
      {
        f[i] = p[i] + q[i];
        d[i] = p[i] + r[i]; 
        f[i] += p[i] + q[i];
      }

  }
  return f;
}


/* a malloced pointer and a restricted global var */
float* 
bar (float *  __restrict__ b,
     float *  __restrict__ c, int n)
{
  int i;

  float * f = (float*) malloc (n * sizeof (float));
  
  for (i = 0;i < n; i++)
    {
      f[i] = b[i] + c[i];
      d[i] = b[i] + c[i]; 
      f[i] += b[i] + c[i];
    }
  return f;
}


/* 2 malloced pointers */
struct two_floatp
foobar (float *  __restrict__ b,
        float *  __restrict__ c, int n)
{
  int i;

  struct two_floatp retval;

  float * f = (float*) malloc (n * sizeof (float));
  float * ff = (float*) malloc (n * sizeof (float));
  
  for (i = 0;i < n; i++)
    {
      f[i] = b[i] + c[i];
      ff[i] = b[i] + c[i]; 
      f[i] += b[i] + c[i];
    }
  retval.first = f;
  retval.second = ff;
  return retval;
}

/* 2 restricted global vars */
float* 
baz (float *  __restrict__ b,
     float *  __restrict__ c, int n)
{
  int i;
  for (i = 0;i < n; i++)
    {
      g[i] = b[i] + c[i];
      d[i] = b[i] + c[i]; 
      g[i] += b[i] + c[i];
    }
  return g;
}

Following is the SPARC assembly just for the loops from all the functions.
	

foo:
[snip]	
.LL5:
	sll	%o2, 2, %o0
	ld	[%i0+%o0], %f3
	add	%o2, 1, %o2
	ld	[%i1+%o0], %f4
	cmp	%o2, %i3
	fadds	%f3, %f4, %f4
	ld	[%i2+%o0], %f2
	fadds	%f3, %f2, %f3
	st	%f4, [%o1+%o0]
	st	%f3, [%o3+%o0]
	ld	[%o1+%o0], %f2
	fadds	%f2, %f4, %f2
	bl	.LL5
	st	%f2, [%o1+%o0]
	
Note there are 2 extra loads

		
bar:
[snip]	
.LL12:
	sll	%o2, 2, %o0
	ld	[%i0+%o0], %f3
	add	%o2, 1, %o2
	ld	[%i1+%o0], %f2
	cmp	%o2, %i2
	fadds	%f3, %f2, %f3
	st	%f3, [%o1+%o0]
	st	%f3, [%o3+%o0]
	ld	[%o1+%o0], %f2
	fadds	%f2, %f3, %f2
	bl	.LL12
	st	%f2, [%o1+%o0]
	
Note one extra load
		
foobar:
[snip]	
.LL19:
	sll	%o1, 2, %o0
	ld	[%i0+%o0], %f2
	add	%o1, 1, %o1
	ld	[%i1+%o0], %f3
	cmp	%o1, %i2
	fadds	%f2, %f3, %f2
	fadds	%f2, %f2, %f4
	st	%f2, [%o2+%o0]
	bl	.LL19
	st	%f4, [%l1+%o0]

This one is fine.
		
baz:
[snip]	
.LL26:
	sll	%i3, 2, %i0
	ld	[%o7+%i0], %f2
	add	%i3, 1, %i3
	ld	[%i1+%i0], %f3
	cmp	%i3, %i2
	fadds	%f2, %f3, %f2
	fadds	%f2, %f2, %f4
	st	%f2, [%i5+%i0]
	bl	.LL26
	st	%f4, [%i4+%i0]
	b	.LL30
	ld	[%g1+%lo(g)], %i0
	
As is this one. 


>How-To-Repeat:
Compile with gcc -O2 -fstrict-aliasing -S 
and look at the assembly for the foo and bar functions. 
>Fix:

>Release-Note:
>Audit-Trail:
>Unformatted:



More information about the Gcc-bugs mailing list