This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch, fortran] PR28005 - gfortran: matmul produces wrong result - Ping


Ahem! Ping....

:ADDPATCH fortran:

This patch fixes an error in the MATMUL intrinsic. This has code to treat a frequent construct:

r = matmul (transpose (a), b)

It does this by detecting the stride in the second dimension to be one and then using an appropriately adapted algorithm. Tobias Burnus discovered, however, that the condition is not unique. This

r = matmul (a(2, 1:n), b(1:n, :))

also produces this signature and the optimized code for the first case fails because the stride used for a is one,rather than n.

The patch branches on the rank of a being one to similar code to that of the transpose case but with one loop less and the correct stride in a. The testcase checks this and that the transpose temporary treatment is not broken by the patch.

Regtested on FC5/Athlon1700. OK for trunk and 4.1?

Paul

2006-06-14 Paul Thomas <pault@gcc.gnu.org>

   PR libfortran/28005
   * m4/matmul.m4: aystride = 1 does not uniquely detect the
   presence of a temporary transpose; an array element in the
   first dimension produces the same signature.  Detect this
   using the rank of a and add specific code.
   * generated/matmul_r4.c: Regenerate.
   * generated/matmul_r8.c: Regenerate.
   * generated/matmul_r10.c: Regenerate.
   * generated/matmul_r16.c: Regenerate.
   * generated/matmul_c4.c: Regenerate.
   * generated/matmul_c8.c: Regenerate.
   * generated/matmul_c10.c: Regenerate.
   * generated/matmul_c16.c: Regenerate.
   * generated/matmul_i4.c: Regenerate.
   * generated/matmul_i8.c: Regenerate.
   * generated/matmul_i16.c: Regenerate.

2006-06-14 Paul Thomas <pault@gcc.gnu.org>

   PR fortran/28005
   * gfortran.dg/matmul_3.f90: New test.

------------------------------------------------------------------------

Index: libgfortran/m4/matmul.m4
===================================================================
*** libgfortran/m4/matmul.m4 (revision 114548)
--- libgfortran/m4/matmul.m4 (working copy)
*************** sinclude(`matmul_asm_'rtype_code`.m4')dn
*** 212,233 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const rtype_name *restrict abase_x;
! const rtype_name *restrict bbase_y;
! rtype_name *restrict dest_y;
! rtype_name s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (rtype_name) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 212,250 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const rtype_name *restrict abase_x;
! const rtype_name *restrict bbase_y;
! rtype_name *restrict dest_y;
! rtype_name s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (rtype_name) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const rtype_name *restrict bbase_y;
! rtype_name s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (rtype_name) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_r8.c
===================================================================
*** libgfortran/generated/matmul_r8.c (revision 114548)
--- libgfortran/generated/matmul_r8.c (working copy)
*************** matmul_r8 (gfc_array_r8 * const restrict
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_REAL_8 *restrict abase_x;
! const GFC_REAL_8 *restrict bbase_y;
! GFC_REAL_8 *restrict dest_y;
! GFC_REAL_8 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_REAL_8) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_8 *restrict abase_x;
! const GFC_REAL_8 *restrict bbase_y;
! GFC_REAL_8 *restrict dest_y;
! GFC_REAL_8 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_8) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_REAL_8 *restrict bbase_y;
! GFC_REAL_8 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_REAL_8) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_r16.c
===================================================================
*** libgfortran/generated/matmul_r16.c (revision 114548)
--- libgfortran/generated/matmul_r16.c (working copy)
*************** matmul_r16 (gfc_array_r16 * const restri
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_REAL_16 *restrict abase_x;
! const GFC_REAL_16 *restrict bbase_y;
! GFC_REAL_16 *restrict dest_y;
! GFC_REAL_16 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_REAL_16) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_16 *restrict abase_x;
! const GFC_REAL_16 *restrict bbase_y;
! GFC_REAL_16 *restrict dest_y;
! GFC_REAL_16 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_16) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_REAL_16 *restrict bbase_y;
! GFC_REAL_16 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_REAL_16) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_c8.c
===================================================================
*** libgfortran/generated/matmul_c8.c (revision 114548)
--- libgfortran/generated/matmul_c8.c (working copy)
*************** matmul_c8 (gfc_array_c8 * const restrict
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_COMPLEX_8 *restrict abase_x;
! const GFC_COMPLEX_8 *restrict bbase_y;
! GFC_COMPLEX_8 *restrict dest_y;
! GFC_COMPLEX_8 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_8) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_8 *restrict abase_x;
! const GFC_COMPLEX_8 *restrict bbase_y;
! GFC_COMPLEX_8 *restrict dest_y;
! GFC_COMPLEX_8 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_8) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_COMPLEX_8 *restrict bbase_y;
! GFC_COMPLEX_8 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_8) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_i8.c
===================================================================
*** libgfortran/generated/matmul_i8.c (revision 114548)
--- libgfortran/generated/matmul_i8.c (working copy)
*************** matmul_i8 (gfc_array_i8 * const restrict
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_INTEGER_8 *restrict abase_x;
! const GFC_INTEGER_8 *restrict bbase_y;
! GFC_INTEGER_8 *restrict dest_y;
! GFC_INTEGER_8 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_INTEGER_8) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_INTEGER_8 *restrict abase_x;
! const GFC_INTEGER_8 *restrict bbase_y;
! GFC_INTEGER_8 *restrict dest_y;
! GFC_INTEGER_8 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_INTEGER_8) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_INTEGER_8 *restrict bbase_y;
! GFC_INTEGER_8 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_INTEGER_8) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_c16.c
===================================================================
*** libgfortran/generated/matmul_c16.c (revision 114548)
--- libgfortran/generated/matmul_c16.c (working copy)
*************** matmul_c16 (gfc_array_c16 * const restri
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_COMPLEX_16 *restrict abase_x;
! const GFC_COMPLEX_16 *restrict bbase_y;
! GFC_COMPLEX_16 *restrict dest_y;
! GFC_COMPLEX_16 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_16) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_16 *restrict abase_x;
! const GFC_COMPLEX_16 *restrict bbase_y;
! GFC_COMPLEX_16 *restrict dest_y;
! GFC_COMPLEX_16 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_16) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_COMPLEX_16 *restrict bbase_y;
! GFC_COMPLEX_16 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_16) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_r10.c
===================================================================
*** libgfortran/generated/matmul_r10.c (revision 114548)
--- libgfortran/generated/matmul_r10.c (working copy)
*************** matmul_r10 (gfc_array_r10 * const restri
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_REAL_10 *restrict abase_x;
! const GFC_REAL_10 *restrict bbase_y;
! GFC_REAL_10 *restrict dest_y;
! GFC_REAL_10 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_REAL_10) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_10 *restrict abase_x;
! const GFC_REAL_10 *restrict bbase_y;
! GFC_REAL_10 *restrict dest_y;
! GFC_REAL_10 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_10) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_REAL_10 *restrict bbase_y;
! GFC_REAL_10 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_REAL_10) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_r4.c
===================================================================
*** libgfortran/generated/matmul_r4.c (revision 114548)
--- libgfortran/generated/matmul_r4.c (working copy)
*************** matmul_r4 (gfc_array_r4 * const restrict
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_REAL_4 *restrict abase_x;
! const GFC_REAL_4 *restrict bbase_y;
! GFC_REAL_4 *restrict dest_y;
! GFC_REAL_4 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_REAL_4) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_4 *restrict abase_x;
! const GFC_REAL_4 *restrict bbase_y;
! GFC_REAL_4 *restrict dest_y;
! GFC_REAL_4 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_4) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_REAL_4 *restrict bbase_y;
! GFC_REAL_4 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_REAL_4) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_c10.c
===================================================================
*** libgfortran/generated/matmul_c10.c (revision 114548)
--- libgfortran/generated/matmul_c10.c (working copy)
*************** matmul_c10 (gfc_array_c10 * const restri
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_COMPLEX_10 *restrict abase_x;
! const GFC_COMPLEX_10 *restrict bbase_y;
! GFC_COMPLEX_10 *restrict dest_y;
! GFC_COMPLEX_10 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_10) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_10 *restrict abase_x;
! const GFC_COMPLEX_10 *restrict bbase_y;
! GFC_COMPLEX_10 *restrict dest_y;
! GFC_COMPLEX_10 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_10) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_COMPLEX_10 *restrict bbase_y;
! GFC_COMPLEX_10 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_10) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_c4.c
===================================================================
*** libgfortran/generated/matmul_c4.c (revision 114548)
--- libgfortran/generated/matmul_c4.c (working copy)
*************** matmul_c4 (gfc_array_c4 * const restrict
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_COMPLEX_4 *restrict abase_x;
! const GFC_COMPLEX_4 *restrict bbase_y;
! GFC_COMPLEX_4 *restrict dest_y;
! GFC_COMPLEX_4 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_4) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_4 *restrict abase_x;
! const GFC_COMPLEX_4 *restrict bbase_y;
! GFC_COMPLEX_4 *restrict dest_y;
! GFC_COMPLEX_4 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_4) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_COMPLEX_4 *restrict bbase_y;
! GFC_COMPLEX_4 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_4) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_i4.c
===================================================================
*** libgfortran/generated/matmul_i4.c (revision 114548)
--- libgfortran/generated/matmul_i4.c (working copy)
*************** matmul_i4 (gfc_array_i4 * const restrict
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_INTEGER_4 *restrict abase_x;
! const GFC_INTEGER_4 *restrict bbase_y;
! GFC_INTEGER_4 *restrict dest_y;
! GFC_INTEGER_4 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_INTEGER_4) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_INTEGER_4 *restrict abase_x;
! const GFC_INTEGER_4 *restrict bbase_y;
! GFC_INTEGER_4 *restrict dest_y;
! GFC_INTEGER_4 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_INTEGER_4) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_INTEGER_4 *restrict bbase_y;
! GFC_INTEGER_4 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_INTEGER_4) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: libgfortran/generated/matmul_i16.c
===================================================================
*** libgfortran/generated/matmul_i16.c (revision 114548)
--- libgfortran/generated/matmul_i16.c (working copy)
*************** matmul_i16 (gfc_array_i16 * const restri
*** 210,231 ****
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! const GFC_INTEGER_16 *restrict abase_x;
! const GFC_INTEGER_16 *restrict bbase_y;
! GFC_INTEGER_16 *restrict dest_y;
! GFC_INTEGER_16 s;
! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
{
! abase_x = &abase[x*axstride];
s = (GFC_INTEGER_16) 0;
for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
}
}
}
--- 210,248 ----
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_INTEGER_16 *restrict abase_x;
! const GFC_INTEGER_16 *restrict bbase_y;
! GFC_INTEGER_16 *restrict dest_y;
! GFC_INTEGER_16 s;
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_INTEGER_16) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
{
! const GFC_INTEGER_16 *restrict bbase_y;
! GFC_INTEGER_16 s;
! ! for (y = 0; y < ycount; y++)
{
! bbase_y = &bbase[y*bystride];
s = (GFC_INTEGER_16) 0;
for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
}
}
}
Index: gcc/testsuite/gfortran.dg/matmul_3.f90
===================================================================
*** gcc/testsuite/gfortran.dg/matmul_3.f90 (revision 0)
--- gcc/testsuite/gfortran.dg/matmul_3.f90 (revision 0)
***************
*** 0 ****
--- 1,36 ----
+ ! { dg-do run }
+ ! Check the fix for PR28005, in which the mechanism for dealing
+ ! with matmul (transpose (a), b) would cause wrong results for
+ ! matmul (a(i, 1:n), b(1:n, 1:n)).
+ !
+ ! Based on the original testcase contributed by
+ ! Tobias Burnus <tobias.burnus@physik.fu-berlin.de>
+ ! + implicit none
+ integer, parameter :: nmax = 3
+ integer :: i, n = 2
+ integer, dimension(nmax,nmax) :: iB=0 , iC=1
+ integer, dimension(nmax,nmax) :: iX1=99, iX2=99, iChk
+ iChk = reshape((/30,66,102,36,81,126,42,96,150/),(/3,3/))
+ + ! This would give 3, 3, 99
+ iB = reshape((/1 ,3 ,0 ,2 ,5 ,0 ,0 ,0 ,0 /),(/3,3/))
+ iX1(1:n,1) = matmul( iB(2,1:n),iC(1:n,1:n) )
+ + ! This would give 4, 4, 99
+ ib(3,1) = 1
+ iX2(1:n,1) = matmul( iB(2,1:n),iC(1:n,1:n) )
+ + ! Whereas, we should have 8, 8, 99
+ if (any (iX1(1:n,1) .ne. (/8, 8, 99/))) call abort ()
+ if (any (iX1 .ne. iX2)) call abort ()
+ + ! Make sure that the fix does not break transpose temporaries.
+ iB = reshape((/(i, i = 1, 9)/),(/3,3/))
+ ic = transpose (iB)
+ iX1 = transpose (iB)
+ iX1 = matmul (iX1, iC)
+ iX2 = matmul (transpose (iB), iC)
+ if (any (iX1 .ne. iX2)) call abort ()
+ if (any (iX1 .ne. iChk)) call abort ()
+ end


------------------------------------------------------------------------

2006-06-14 Paul Thomas <pault@gcc.gnu.org>

	PR libfortran/28005
	* m4/matmul.m4: aystride = 1 does not uniquely detect the
	presence of a temporary transpose; an array element in the
	first dimension produces the same signature.  Detect this
	using the rank of a and add specific code.
	* generated/matmul_r4.c: Regenerate.
	* generated/matmul_r8.c: Regenerate.
	* generated/matmul_r10.c: Regenerate.
	* generated/matmul_r16.c: Regenerate.
	* generated/matmul_c4.c: Regenerate.
	* generated/matmul_c8.c: Regenerate.
	* generated/matmul_c10.c: Regenerate.
	* generated/matmul_c16.c: Regenerate.
	* generated/matmul_i4.c: Regenerate.
	* generated/matmul_i8.c: Regenerate.
	* generated/matmul_i16.c: Regenerate.

2006-06-14 Paul Thomas <pault@gcc.gnu.org>

PR fortran/28005
* gfortran.dg/matmul_3.f90: New test.




Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]