c/2462: "restrict" implementation bug
Dan Nicolaescu
dann@godzilla.ics.uci.edu
Mon Apr 2 14:36:00 GMT 2001
>Number: 2462
>Category: c
>Synopsis: "restrict" implementation bug
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: unassigned
>State: open
>Class: pessimizes-code
>Submitter-Id: net
>Arrival-Date: Mon Apr 02 14:36:01 PDT 2001
>Closed-Date:
>Last-Modified:
>Originator: Dan Nicolaescu <dann@godzilla.ics.uci.edu>
>Release: gcc version 3.1 20010402 (experimental)
>Organization:
>Environment:
sparc-sun-solaris2.7
But the problem is arhitecture independent.
The problem is also present in gcc-2.95.2
>Description:
According to http://wwwold.dkuug.dk/JTC1/SC22/WG14/www/docs/n897.pdf
(pointed to by a link from http://gcc.gnu.org/readings.html ) Chapter
6.7.3.1 paragraph 9 global variables with a restrict qualifier should
act "as if it were declared as an array".
An in Chapter 6.7.3.1 paragraph 4: a pointer returned from a call to
"malloc" is the initial single mean to access an array.
The conclusion from these 2 is that a pointer from malloc cannot alias
a global restricted var.
It seems that there is a bug when using both a restricted global var
and a pointer obtained from "malloc" call.
As shown in the example bellow when using either of them individualy
"restrict" works correctly.
Here is an example:
float * __restrict__ d;
float * __restrict__ g;
float * __restrict__ h;
struct two_floatp {
float *first;
float *second;
};
/* a malloced pointer, local restricted vars and a global restricted var */
float*
foo (float * a, float * b, float * c, int n)
{
int i;
float *f;
f = (float*) malloc (n * sizeof (float));
{
float * __restrict__ p = a;
float * __restrict__ q = b;
float * __restrict__ r = c;
for (i = 0;i < n; i++)
{
f[i] = p[i] + q[i];
d[i] = p[i] + r[i];
f[i] += p[i] + q[i];
}
}
return f;
}
/* a malloced pointer and a restricted global var */
float*
bar (float * __restrict__ b,
float * __restrict__ c, int n)
{
int i;
float * f = (float*) malloc (n * sizeof (float));
for (i = 0;i < n; i++)
{
f[i] = b[i] + c[i];
d[i] = b[i] + c[i];
f[i] += b[i] + c[i];
}
return f;
}
/* 2 malloced pointers */
struct two_floatp
foobar (float * __restrict__ b,
float * __restrict__ c, int n)
{
int i;
struct two_floatp retval;
float * f = (float*) malloc (n * sizeof (float));
float * ff = (float*) malloc (n * sizeof (float));
for (i = 0;i < n; i++)
{
f[i] = b[i] + c[i];
ff[i] = b[i] + c[i];
f[i] += b[i] + c[i];
}
retval.first = f;
retval.second = ff;
return retval;
}
/* 2 restricted global vars */
float*
baz (float * __restrict__ b,
float * __restrict__ c, int n)
{
int i;
for (i = 0;i < n; i++)
{
g[i] = b[i] + c[i];
d[i] = b[i] + c[i];
g[i] += b[i] + c[i];
}
return g;
}
Following is the SPARC assembly just for the loops from all the functions.
foo:
[snip]
.LL5:
sll %o2, 2, %o0
ld [%i0+%o0], %f3
add %o2, 1, %o2
ld [%i1+%o0], %f4
cmp %o2, %i3
fadds %f3, %f4, %f4
ld [%i2+%o0], %f2
fadds %f3, %f2, %f3
st %f4, [%o1+%o0]
st %f3, [%o3+%o0]
ld [%o1+%o0], %f2
fadds %f2, %f4, %f2
bl .LL5
st %f2, [%o1+%o0]
Note there are 2 extra loads
bar:
[snip]
.LL12:
sll %o2, 2, %o0
ld [%i0+%o0], %f3
add %o2, 1, %o2
ld [%i1+%o0], %f2
cmp %o2, %i2
fadds %f3, %f2, %f3
st %f3, [%o1+%o0]
st %f3, [%o3+%o0]
ld [%o1+%o0], %f2
fadds %f2, %f3, %f2
bl .LL12
st %f2, [%o1+%o0]
Note one extra load
foobar:
[snip]
.LL19:
sll %o1, 2, %o0
ld [%i0+%o0], %f2
add %o1, 1, %o1
ld [%i1+%o0], %f3
cmp %o1, %i2
fadds %f2, %f3, %f2
fadds %f2, %f2, %f4
st %f2, [%o2+%o0]
bl .LL19
st %f4, [%l1+%o0]
This one is fine.
baz:
[snip]
.LL26:
sll %i3, 2, %i0
ld [%o7+%i0], %f2
add %i3, 1, %i3
ld [%i1+%i0], %f3
cmp %i3, %i2
fadds %f2, %f3, %f2
fadds %f2, %f2, %f4
st %f2, [%i5+%i0]
bl .LL26
st %f4, [%i4+%i0]
b .LL30
ld [%g1+%lo(g)], %i0
As is this one.
>How-To-Repeat:
Compile with gcc -O2 -fstrict-aliasing -S
and look at the assembly for the foo and bar functions.
>Fix:
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the Gcc-bugs
mailing list