This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
"restrict" implementation bug ?
- To: gcc at gcc dot gnu dot org
- Subject: "restrict" implementation bug ?
- From: Dan Nicolaescu <dann at godzilla dot ICS dot UCI dot EDU>
- Date: Sun, 01 Apr 2001 21:35:23 -0700
According to http://wwwold.dkuug.dk/JTC1/SC22/WG14/www/docs/n897.pdf
(pointed to by a link from http://gcc.gnu.org/readings.html)
(and BTW there is a n937.pdf file in the same place that seems to be
a newer version)
global variables with a restrict qualifier should act "as if it were
declared as an array".
It seems that there is a bug when using both a restricted global var
and a pointer obtained from "malloc" call.
As shown in the example bellow when using either of them individualy
"restrict" works correctly.
Here is an example:
#include <stdlib.h>
float * __restrict__ d;
float * __restrict__ g;
float * __restrict__ h;
struct two_floatp {
float *first;
float *second;
};
/* a malloced pointer, local restricted vars and a global restricted var */
float*
foo (float * a, float * b, float * c, int n)
{
int i;
float *f;
f = (float*) malloc (n * sizeof (float));
{
float * __restrict__ p = a;
float * __restrict__ q = b;
float * __restrict__ r = c;
for (i = 0;i < n; i++)
{
f[i] = p[i] + q[i];
d[i] = p[i] + r[i];
f[i] += p[i] + q[i];
}
}
return f;
}
/* a malloced pointer and a restricted global var */
float*
bar (float * __restrict__ b,
float * __restrict__ c, int n)
{
int i;
float * f = (float*) malloc (n * sizeof (float));
for (i = 0;i < n; i++)
{
f[i] = b[i] + c[i];
d[i] = b[i] + c[i];
f[i] += b[i] + c[i];
}
return f;
}
/* 2 malloced pointers */
struct two_floatp
foobar (float * __restrict__ b,
float * __restrict__ c, int n)
{
int i;
struct two_floatp retval;
float * f = (float*) malloc (n * sizeof (float));
float * ff = (float*) malloc (n * sizeof (float));
for (i = 0;i < n; i++)
{
f[i] = b[i] + c[i];
ff[i] = b[i] + c[i];
f[i] += b[i] + c[i];
}
retval.first = f;
retval.second = ff;
return retval;
}
/* 2 restricted global vars */
float*
baz (float * __restrict__ b,
float * __restrict__ c, int n)
{
int i;
for (i = 0;i < n; i++)
{
g[i] = b[i] + c[i];
d[i] = b[i] + c[i];
g[i] += b[i] + c[i];
}
return g;
}
The relevant assembly (just the loops) resulted rhen compiling this
with gcc-2.95.2 on sparc-solaris2.7 using -O2 -fstrict-aliasing is:
(something very similar happens with a month old snapshot too, but
that one crashes for me sometimes...)
foo:
[snip]
.LL6:
ld [%o1+%l0], %f3
addcc %i3, -1, %i3
ld [%o1+%i1], %f4
fadds %f3, %f4, %f4
ld [%o1+%i2], %f2
fadds %f3, %f2, %f3
st %f4, [%o1+%i0]
st %f3, [%o1+%o0]
ld [%o1+%i0], %f2
fadds %f2, %f4, %f2
st %f2, [%o1+%i0]
bne .LL6
add %o1, 4, %o1
Note there are 2 extra loads
bar:
[snip]
.LL12:
ld [%o1+%i1], %f2
addcc %i2, -1, %i2
ld [%o1+%l0], %f3
fadds %f3, %f2, %f3
st %f3, [%o1+%i0]
st %f3, [%o1+%o0]
ld [%o1+%i0], %f2
fadds %f2, %f3, %f2
st %f2, [%o1+%i0]
bne .LL12
add %o1, 4, %o1
Note one extra load
foobar:
[snip]
.LL18:
ld [%o1+%i0], %f2
addcc %i2, -1, %i2
ld [%o1+%i1], %f3
fadds %f2, %f3, %f2
fadds %f2, %f2, %f4
st %f2, [%o1+%o0]
st %f4, [%o1+%l1]
bne .LL18
add %o1, 4, %o1
st %l1, [%l2]
This one is fine.
baz:
[snip]
.LL25:
ld [%g2+%o0], %f2
addcc %o2, -1, %o2
ld [%g2+%o1], %f3
fadds %f2, %f3, %f2
fadds %f2, %f2, %f4
st %f2, [%g2+%o3]
st %f4, [%g2+%g3]
bne .LL25
add %g2, 4, %g2
As is this one.
foo and bar may show the same problem...
Hope this helps.
--dan