This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

"restrict" implementation bug ?



According to http://wwwold.dkuug.dk/JTC1/SC22/WG14/www/docs/n897.pdf  
(pointed to by a link from http://gcc.gnu.org/readings.html)
(and BTW there is a n937.pdf file in the same place that seems to be
a newer version)
global variables with a restrict qualifier should act "as if it were
declared as an array". 

It seems that there is a bug when using both a restricted global var
and a pointer obtained from "malloc" call. 
As shown in the example bellow when using either of them individualy
"restrict" works correctly. 

Here is an example:

#include <stdlib.h>

float *  __restrict__ d;
float *  __restrict__ g;
float *  __restrict__ h;

struct two_floatp {
  float *first;
  float *second;
};

/* a malloced pointer, local restricted vars and a global restricted var */
float* 
foo (float * a, float * b, float * c, int n)
{
  int i;
  float *f;
  f = (float*) malloc (n * sizeof (float));


  {
    float * __restrict__ p = a;
    float * __restrict__ q = b;
    float * __restrict__ r = c;
    
    for (i = 0;i < n; i++)
      {
        f[i] = p[i] + q[i];
        d[i] = p[i] + r[i]; 
        f[i] += p[i] + q[i];
      }

  }
  return f;
}


/* a malloced pointer and a restricted global var */
float* 
bar (float *  __restrict__ b,
     float *  __restrict__ c, int n)
{
  int i;

  float * f = (float*) malloc (n * sizeof (float));
  
  for (i = 0;i < n; i++)
    {
      f[i] = b[i] + c[i];
      d[i] = b[i] + c[i]; 
      f[i] += b[i] + c[i];
    }
  return f;
}


/* 2 malloced pointers */
struct two_floatp
foobar (float *  __restrict__ b,
        float *  __restrict__ c, int n)
{
  int i;

  struct two_floatp retval;

  float * f = (float*) malloc (n * sizeof (float));
  float * ff = (float*) malloc (n * sizeof (float));
  
  for (i = 0;i < n; i++)
    {
      f[i] = b[i] + c[i];
      ff[i] = b[i] + c[i]; 
      f[i] += b[i] + c[i];
    }
  retval.first = f;
  retval.second = ff;
  return retval;
}

/* 2 restricted global vars */
float* 
baz (float *  __restrict__ b,
     float *  __restrict__ c, int n)
{
  int i;
  for (i = 0;i < n; i++)
    {
      g[i] = b[i] + c[i];
      d[i] = b[i] + c[i]; 
      g[i] += b[i] + c[i];
    }
  return g;
}

The relevant assembly (just the loops) resulted rhen compiling this
with gcc-2.95.2 on sparc-solaris2.7 using -O2 -fstrict-aliasing is:
(something very similar happens with a month old snapshot too, but
that one crashes for me sometimes...)

foo:
[snip]
.LL6:
	ld	[%o1+%l0], %f3
	addcc	%i3, -1, %i3
	ld	[%o1+%i1], %f4
	fadds	%f3, %f4, %f4
	ld	[%o1+%i2], %f2
	fadds	%f3, %f2, %f3
	st	%f4, [%o1+%i0]
	st	%f3, [%o1+%o0]
	ld	[%o1+%i0], %f2
	fadds	%f2, %f4, %f2
	st	%f2, [%o1+%i0]
	bne	.LL6
	add	%o1, 4, %o1

Note there are 2 extra loads


bar:
[snip]
.LL12:
	ld	[%o1+%i1], %f2
	addcc	%i2, -1, %i2
	ld	[%o1+%l0], %f3
	fadds	%f3, %f2, %f3
	st	%f3, [%o1+%i0]
	st	%f3, [%o1+%o0]
	ld	[%o1+%i0], %f2
	fadds	%f2, %f3, %f2
	st	%f2, [%o1+%i0]
	bne	.LL12
	add	%o1, 4, %o1

Note one extra load


foobar:
[snip]
.LL18:
	ld	[%o1+%i0], %f2
	addcc	%i2, -1, %i2
	ld	[%o1+%i1], %f3
	fadds	%f2, %f3, %f2
	fadds	%f2, %f2, %f4
	st	%f2, [%o1+%o0]
	st	%f4, [%o1+%l1]
	bne	.LL18
	add	%o1, 4, %o1
	st	%l1, [%l2]

This one is fine. 

baz:
[snip]
.LL25:
	ld	[%g2+%o0], %f2
	addcc	%o2, -1, %o2
	ld	[%g2+%o1], %f3
	fadds	%f2, %f3, %f2
	fadds	%f2, %f2, %f4
	st	%f2, [%g2+%o3]
	st	%f4, [%g2+%g3]
	bne	.LL25
	add	%g2, 4, %g2

As is this one. 

foo and bar may show the same problem...

Hope this helps.

        --dan



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]