This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Bug fortran/28005] [4.1 only] gfortran: matmul produces wrong result
- From: Paul Thomas <paulthomas2 at wanadoo dot fr>
- To: "'fortran at gcc dot gnu dot org'" <fortran at gcc dot gnu dot org>, patch <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 25 Aug 2006 16:08:12 +0200
- Subject: [Bug fortran/28005] [4.1 only] gfortran: matmul produces wrong result
Dear all,
I though this was going to be much more difficult than it turned out to
be, which is why I have been sitting on the port from 4.2. As it
happens, all I had to do was to copy the working part of matmul.m4 from
4.2 to 4.1 and it worked fine.
I will commit to 4.1 on Sunday morning, unless there is any objection.
Paul
2006-08-25 Paul Thomas <pault@gcc.gnu.org>
PR fortran/28005 <http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28005>
* m4/matmul.m4: Working part of function ported from
trunk.
* generated/matmul_r4.c: Regenerate.
* generated/matmul_r8.c: Regenerate.
* generated/matmul_r10.c: Regenerate.
* generated/matmul_r16.c: Regenerate.
* generated/matmul_c4.c: Regenerate.
* generated/matmul_c8.c: Regenerate.
* generated/matmul_c10.c: Regenerate.
* generated/matmul_c16.c: Regenerate.
* generated/matmul_i4.c: Regenerate.
* generated/matmul_i8.c: Regenerate.
* generated/matmul_i16.c: Regenerate.
2006-08-25 Paul Thomas <pault@gcc.gnu.org>
PR fortran/28005 <http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28005>
* gfortran.dg/matmul_3.f90: New test.
Index: libgfortran/m4/matmul.m4
===================================================================
*** libgfortran/m4/matmul.m4 (revision 116368)
--- libgfortran/m4/matmul.m4 (working copy)
*************** sinclude(`matmul_asm_'rtype_code`.m4')dn
*** 206,212 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 206,250 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const rtype_name *restrict abase_x;
! const rtype_name *restrict bbase_y;
! rtype_name *restrict dest_y;
! rtype_name s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (rtype_name) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const rtype_name *restrict bbase_y;
! rtype_name s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (rtype_name) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** sinclude(`matmul_asm_'rtype_code`.m4')dn
*** 218,223 ****
--- 256,282 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const rtype_name *restrict abase_x;
+ const rtype_name *restrict bbase_y;
+ rtype_name *restrict dest_y;
+ rtype_name s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (rtype_name) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_c10.c
===================================================================
*** libgfortran/generated/matmul_c10.c (revision 116368)
--- libgfortran/generated/matmul_c10.c (working copy)
*************** matmul_c10 (gfc_array_c10 * const restri
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_10 *restrict abase_x;
! const GFC_COMPLEX_10 *restrict bbase_y;
! GFC_COMPLEX_10 *restrict dest_y;
! GFC_COMPLEX_10 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_10) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_COMPLEX_10 *restrict bbase_y;
! GFC_COMPLEX_10 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_COMPLEX_10) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_c10 (gfc_array_c10 * const restri
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_COMPLEX_10 *restrict abase_x;
+ const GFC_COMPLEX_10 *restrict bbase_y;
+ GFC_COMPLEX_10 *restrict dest_y;
+ GFC_COMPLEX_10 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_COMPLEX_10) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_c16.c
===================================================================
*** libgfortran/generated/matmul_c16.c (revision 116368)
--- libgfortran/generated/matmul_c16.c (working copy)
*************** matmul_c16 (gfc_array_c16 * const restri
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_16 *restrict abase_x;
! const GFC_COMPLEX_16 *restrict bbase_y;
! GFC_COMPLEX_16 *restrict dest_y;
! GFC_COMPLEX_16 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_16) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_COMPLEX_16 *restrict bbase_y;
! GFC_COMPLEX_16 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_COMPLEX_16) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_c16 (gfc_array_c16 * const restri
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_COMPLEX_16 *restrict abase_x;
+ const GFC_COMPLEX_16 *restrict bbase_y;
+ GFC_COMPLEX_16 *restrict dest_y;
+ GFC_COMPLEX_16 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_COMPLEX_16) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_c4.c
===================================================================
*** libgfortran/generated/matmul_c4.c (revision 116368)
--- libgfortran/generated/matmul_c4.c (working copy)
*************** matmul_c4 (gfc_array_c4 * const restrict
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_4 *restrict abase_x;
! const GFC_COMPLEX_4 *restrict bbase_y;
! GFC_COMPLEX_4 *restrict dest_y;
! GFC_COMPLEX_4 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_4) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_COMPLEX_4 *restrict bbase_y;
! GFC_COMPLEX_4 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_COMPLEX_4) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_c4 (gfc_array_c4 * const restrict
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_COMPLEX_4 *restrict abase_x;
+ const GFC_COMPLEX_4 *restrict bbase_y;
+ GFC_COMPLEX_4 *restrict dest_y;
+ GFC_COMPLEX_4 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_COMPLEX_4) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_c8.c
===================================================================
*** libgfortran/generated/matmul_c8.c (revision 116368)
--- libgfortran/generated/matmul_c8.c (working copy)
*************** matmul_c8 (gfc_array_c8 * const restrict
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_COMPLEX_8 *restrict abase_x;
! const GFC_COMPLEX_8 *restrict bbase_y;
! GFC_COMPLEX_8 *restrict dest_y;
! GFC_COMPLEX_8 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_COMPLEX_8) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_COMPLEX_8 *restrict bbase_y;
! GFC_COMPLEX_8 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_COMPLEX_8) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_c8 (gfc_array_c8 * const restrict
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_COMPLEX_8 *restrict abase_x;
+ const GFC_COMPLEX_8 *restrict bbase_y;
+ GFC_COMPLEX_8 *restrict dest_y;
+ GFC_COMPLEX_8 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_COMPLEX_8) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_i16.c
===================================================================
*** libgfortran/generated/matmul_i16.c (revision 116368)
--- libgfortran/generated/matmul_i16.c (working copy)
*************** matmul_i16 (gfc_array_i16 * const restri
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_INTEGER_16 *restrict abase_x;
! const GFC_INTEGER_16 *restrict bbase_y;
! GFC_INTEGER_16 *restrict dest_y;
! GFC_INTEGER_16 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_INTEGER_16) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_INTEGER_16 *restrict bbase_y;
! GFC_INTEGER_16 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_INTEGER_16) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_i16 (gfc_array_i16 * const restri
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_INTEGER_16 *restrict abase_x;
+ const GFC_INTEGER_16 *restrict bbase_y;
+ GFC_INTEGER_16 *restrict dest_y;
+ GFC_INTEGER_16 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_INTEGER_16) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_i4.c
===================================================================
*** libgfortran/generated/matmul_i4.c (revision 116368)
--- libgfortran/generated/matmul_i4.c (working copy)
*************** matmul_i4 (gfc_array_i4 * const restrict
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_INTEGER_4 *restrict abase_x;
! const GFC_INTEGER_4 *restrict bbase_y;
! GFC_INTEGER_4 *restrict dest_y;
! GFC_INTEGER_4 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_INTEGER_4) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_INTEGER_4 *restrict bbase_y;
! GFC_INTEGER_4 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_INTEGER_4) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_i4 (gfc_array_i4 * const restrict
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_INTEGER_4 *restrict abase_x;
+ const GFC_INTEGER_4 *restrict bbase_y;
+ GFC_INTEGER_4 *restrict dest_y;
+ GFC_INTEGER_4 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_INTEGER_4) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_i8.c
===================================================================
*** libgfortran/generated/matmul_i8.c (revision 116368)
--- libgfortran/generated/matmul_i8.c (working copy)
*************** matmul_i8 (gfc_array_i8 * const restrict
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_INTEGER_8 *restrict abase_x;
! const GFC_INTEGER_8 *restrict bbase_y;
! GFC_INTEGER_8 *restrict dest_y;
! GFC_INTEGER_8 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_INTEGER_8) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_INTEGER_8 *restrict bbase_y;
! GFC_INTEGER_8 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_INTEGER_8) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_i8 (gfc_array_i8 * const restrict
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_INTEGER_8 *restrict abase_x;
+ const GFC_INTEGER_8 *restrict bbase_y;
+ GFC_INTEGER_8 *restrict dest_y;
+ GFC_INTEGER_8 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_INTEGER_8) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_r10.c
===================================================================
*** libgfortran/generated/matmul_r10.c (revision 116368)
--- libgfortran/generated/matmul_r10.c (working copy)
*************** matmul_r10 (gfc_array_r10 * const restri
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_10 *restrict abase_x;
! const GFC_REAL_10 *restrict bbase_y;
! GFC_REAL_10 *restrict dest_y;
! GFC_REAL_10 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_10) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_REAL_10 *restrict bbase_y;
! GFC_REAL_10 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_REAL_10) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_r10 (gfc_array_r10 * const restri
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_REAL_10 *restrict abase_x;
+ const GFC_REAL_10 *restrict bbase_y;
+ GFC_REAL_10 *restrict dest_y;
+ GFC_REAL_10 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_REAL_10) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_r16.c
===================================================================
*** libgfortran/generated/matmul_r16.c (revision 116368)
--- libgfortran/generated/matmul_r16.c (working copy)
*************** matmul_r16 (gfc_array_r16 * const restri
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_16 *restrict abase_x;
! const GFC_REAL_16 *restrict bbase_y;
! GFC_REAL_16 *restrict dest_y;
! GFC_REAL_16 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_16) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_REAL_16 *restrict bbase_y;
! GFC_REAL_16 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_REAL_16) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_r16 (gfc_array_r16 * const restri
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_REAL_16 *restrict abase_x;
+ const GFC_REAL_16 *restrict bbase_y;
+ GFC_REAL_16 *restrict dest_y;
+ GFC_REAL_16 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_REAL_16) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_r4.c
===================================================================
*** libgfortran/generated/matmul_r4.c (revision 116368)
--- libgfortran/generated/matmul_r4.c (working copy)
*************** matmul_r4 (gfc_array_r4 * const restrict
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_4 *restrict abase_x;
! const GFC_REAL_4 *restrict bbase_y;
! GFC_REAL_4 *restrict dest_y;
! GFC_REAL_4 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_4) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_REAL_4 *restrict bbase_y;
! GFC_REAL_4 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_REAL_4) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_r4 (gfc_array_r4 * const restrict
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_REAL_4 *restrict abase_x;
+ const GFC_REAL_4 *restrict bbase_y;
+ GFC_REAL_4 *restrict dest_y;
+ GFC_REAL_4 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_REAL_4) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: libgfortran/generated/matmul_r8.c
===================================================================
*** libgfortran/generated/matmul_r8.c (revision 116368)
--- libgfortran/generated/matmul_r8.c (working copy)
*************** matmul_r8 (gfc_array_r8 * const restrict
*** 204,210 ****
}
}
}
! else
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
--- 204,248 ----
}
}
}
! else if (rxstride == 1 && aystride == 1 && bxstride == 1)
! {
! if (GFC_DESCRIPTOR_RANK (a) != 1)
! {
! const GFC_REAL_8 *restrict abase_x;
! const GFC_REAL_8 *restrict bbase_y;
! GFC_REAL_8 *restrict dest_y;
! GFC_REAL_8 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! dest_y = &dest[y*rystride];
! for (x = 0; x < xcount; x++)
! {
! abase_x = &abase[x*axstride];
! s = (GFC_REAL_8) 0;
! for (n = 0; n < count; n++)
! s += abase_x[n] * bbase_y[n];
! dest_y[x] = s;
! }
! }
! }
! else
! {
! const GFC_REAL_8 *restrict bbase_y;
! GFC_REAL_8 s;
!
! for (y = 0; y < ycount; y++)
! {
! bbase_y = &bbase[y*bystride];
! s = (GFC_REAL_8) 0;
! for (n = 0; n < count; n++)
! s += abase[n*axstride] * bbase_y[n];
! dest[y*rystride] = s;
! }
! }
! }
! else if (axstride < aystride)
{
for (y = 0; y < ycount; y++)
for (x = 0; x < xcount; x++)
*************** matmul_r8 (gfc_array_r8 * const restrict
*** 216,221 ****
--- 254,280 ----
/* dest[x,y] += a[x,n] * b[n,y] */
dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
}
+ else
+ {
+ const GFC_REAL_8 *restrict abase_x;
+ const GFC_REAL_8 *restrict bbase_y;
+ GFC_REAL_8 *restrict dest_y;
+ GFC_REAL_8 s;
+
+ for (y = 0; y < ycount; y++)
+ {
+ bbase_y = &bbase[y*bystride];
+ dest_y = &dest[y*rystride];
+ for (x = 0; x < xcount; x++)
+ {
+ abase_x = &abase[x*axstride];
+ s = (GFC_REAL_8) 0;
+ for (n = 0; n < count; n++)
+ s += abase_x[n*aystride] * bbase_y[n*bxstride];
+ dest_y[x*rxstride] = s;
+ }
+ }
+ }
}
#endif
Index: gcc/testsuite/gfortran.dg/matmul_3.f90
===================================================================
*** gcc/testsuite/gfortran.dg/matmul_3.f90 (revision 0)
--- gcc/testsuite/gfortran.dg/matmul_3.f90 (revision 0)
***************
*** 0 ****
--- 1,36 ----
+ ! { dg-do run }
+ ! Check the fix for PR28005, in which the mechanism for dealing
+ ! with matmul (transpose (a), b) would cause wrong results for
+ ! matmul (a(i, 1:n), b(1:n, 1:n)).
+ !
+ ! Based on the original testcase contributed by
+ ! Tobias Burnus <tobias.burnus@physik.fu-berlin.de>
+ !
+ implicit none
+ integer, parameter :: nmax = 3
+ integer :: i, n = 2
+ integer, dimension(nmax,nmax) :: iB=0 , iC=1
+ integer, dimension(nmax,nmax) :: iX1=99, iX2=99, iChk
+ iChk = reshape((/30,66,102,36,81,126,42,96,150/),(/3,3/))
+
+ ! This would give 3, 3, 99
+ iB = reshape((/1 ,3 ,0 ,2 ,5 ,0 ,0 ,0 ,0 /),(/3,3/))
+ iX1(1:n,1) = matmul( iB(2,1:n),iC(1:n,1:n) )
+
+ ! This would give 4, 4, 99
+ ib(3,1) = 1
+ iX2(1:n,1) = matmul( iB(2,1:n),iC(1:n,1:n) )
+
+ ! Whereas, we should have 8, 8, 99
+ if (any (iX1(1:n,1) .ne. (/8, 8, 99/))) print *, "#1", iX1
+ if (any (iX1 .ne. iX2)) print *,"#2", iX2
+
+ ! Make sure that the fix does not break transpose temporaries.
+ iB = reshape((/(i, i = 1, 9)/),(/3,3/))
+ ic = transpose (iB)
+ iX1 = transpose (iB)
+ iX1 = matmul (iX1, iC)
+ iX2 = matmul (transpose (iB), iC)
+ if (any (iX1 .ne. iX2)) print *,"#3", iX1
+ if (any (iX1 .ne. iChk)) print *,"#4", iChk
+ end