This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch, libfortran] Speed up / orthogonalize in_pack and in_unpack
- From: Thomas Koenig <tkoenig at netcologne dot de>
- To: fortran at gcc dot gnu dot org, gcc-patches at gcc dot gnu dot org
- Date: Wed, 19 Mar 2008 10:19:18 +0100
- Subject: [patch, libfortran] Speed up / orthogonalize in_pack and in_unpack
Hello world,
now that Real World (TM) time constraints have lessened somewhat, I
finally have some time again to hack on gfortran :-)
Here's a patch that adds a few missing types (kind=1 and 2 integer,
kind=10 and kind=16 complex and real) to the internal routines that are
called by internal_pack and internal_unpack, so we don't have to call
memcpy on these. It also provides a routine for each of the real types
(I favored orthogonality over library code size for this).
The test cases check that there are no regressions for any
of the types that were touched.
Regression-tested on i686-pc-linux-gnu (although I couldn't test
the large-integer case). OK?
Thomas
2008-03-19 Thomas Koenig <tkoenig@gcc.gnu.org>
PR libfortran/32972
* Makefile.am (in_pack_c): Add in_pack_i1.c, in_pack_i2.c,
in_pack_r4.c, in_pack_r8.c, in_pack_r10.c and in_pack_r16.c.
(in_unpack_c): Add in_unpack_i1.c, in_unpack_i2.c,
in_unpack_r4.c, in_unpack_r8.c, in_unpack_r10.c and
in_unpack_r16.c.
* Makefile.in: Regenerate.
* libgfortran.h: Add prototypes for internal_pack_1,
internal_pack_2, internal_pack_16, internal_pack_r4,
internal_pack_r8, internal_pack_r10, internal_pack_r16,
internal_pack_c10 and internal_pack_c16. Add prototypes for
internal_unpack_1, internal_unpack_2, internal_unpack_16,
internal_unpack_r4, internal_unpack_r8, internal_unpack_r10,
internal_unpack_r16, internal_unpack_c10 and
internal_unpack_c16.
* runtime/in_pack_generic.c (internal_pack): Use sizeof instead
of hardwired sizes.
Add calls to internal_pack_1, internal_pack_2,
internal_pack_16, internal_pack_r4, internal_pack_r8,
internal_pack_r10, internal_pack_r16, internal_pack_c10 and
internal_pack_c16.
* runtime/in_unpack_generic.c (internal_unpack): Use sizeof
instead of hardwired sizes.
Add calls to internal_unpack_1, internal_unpack_2,
internal_unpack_16, internal_unpack_r4, internal_unpack_r8,
internal_unpack_r10, internal_unpack_r16, internal_unpack_c10
and internal_unpack_c16.
* generated/in_pack_r4.c: New file.
* generated/in_pack_i2.c: New file.
* generated/in_unpack_i1.c: New file.
* generated/in_pack_r10.c: New file.
* generated/in_unpack_r4.c: New file.
* generated/in_unpack_i2.c: New file.
* generated/in_unpack_r16.c: New file.
* generated/in_pack_r8.c: New file.
* generated/in_unpack_r10.c: New file.
* generated/in_unpack_r8.c: New file.
* generated/in_pack_r16.c: New file.
* generated/in_pack_i1.c: New file.
2008-03-19 Thomas Koenig <tkoenig@gcc.gnu.org>
PR libfortran/32972
* gfortran.dg/internal_pack_1.f90: New test case.
* gfortran.dg/internal_pack_2.f90: New test case.
* gfortran.dg/internal_pack_3.f90: New test case.
! { dg-do run }
! Test that the internal pack and unpack routines work OK
! for different data types
program main
integer(kind=1), dimension(3) :: i1
integer(kind=2), dimension(3) :: i2
integer(kind=4), dimension(3) :: i4
integer(kind=8), dimension(3) :: i8
real(kind=4), dimension(3) :: r4
real(kind=8), dimension(3) :: r8
i1 = (/ -1, 1, -3 /)
call sub_i1(i1(1:3:2))
if (any(i1 /= (/ 3, 1, 2 /))) call abort
i2 = (/ -1, 1, -3 /)
call sub_i2(i2(1:3:2))
if (any(i2 /= (/ 3, 1, 2 /))) call abort
i4 = (/ -1, 1, -3 /)
call sub_i4(i4(1:3:2))
if (any(i4 /= (/ 3, 1, 2 /))) call abort
i8 = (/ -1, 1, -3 /)
call sub_i8(i8(1:3:2))
if (any(i8 /= (/ 3, 1, 2 /))) call abort
r4 = (/ -1.0, 1.0, -3.0 /)
call sub_r4(r4(1:3:2))
if (any(r4 /= (/ 3.0, 1.0, 2.0/))) call abort
r8 = (/ -1.0_8, 1.0_8, -3.0_8 /)
call sub_r8(r8(1:3:2))
if (any(r8 /= (/ 3.0_8, 1.0_8, 2.0_8/))) call abort
end program main
subroutine sub_i1(i)
integer(kind=1), dimension(2) :: i
if (i(1) /= -1) call abort
if (i(2) /= -3) call abort
i(1) = 3
i(2) = 2
end subroutine sub_i1
subroutine sub_i2(i)
integer(kind=2), dimension(2) :: i
if (i(1) /= -1) call abort
if (i(2) /= -3) call abort
i(1) = 3
i(2) = 2
end subroutine sub_i2
subroutine sub_i4(i)
integer(kind=4), dimension(2) :: i
if (i(1) /= -1) call abort
if (i(2) /= -3) call abort
i(1) = 3
i(2) = 2
end subroutine sub_i4
subroutine sub_i8(i)
integer(kind=8), dimension(2) :: i
if (i(1) /= -1) call abort
if (i(2) /= -3) call abort
i(1) = 3
i(2) = 2
end subroutine sub_i8
subroutine sub_r4(r)
real(kind=4), dimension(2) :: r
if (r(1) /= -1.) call abort
if (r(2) /= -3.) call abort
r(1) = 3.
r(2) = 2.
end subroutine sub_r4
subroutine sub_r8(r)
real(kind=8), dimension(2) :: r
if (r(1) /= -1._8) call abort
if (r(2) /= -3._8) call abort
r(1) = 3._8
r(2) = 2._8
end subroutine sub_r8
! { dg-do run }
! { dg-require-effective-target fortran_large_real }
! Test that the internal pack and unpack routines work OK
! for our large real type.
program main
implicit none
integer,parameter :: k = selected_real_kind (precision (0.0_8) + 1)
real(kind=k), dimension(3) :: rk
rk = (/ -1.0_k, 1.0_k, -3.0_k /)
call sub_rk(rk(1:3:2))
if (any(rk /= (/ 3.0_k, 1.0_k, 2.0_k/))) call abort
end program main
subroutine sub_rk(r)
implicit none
integer,parameter :: k = selected_real_kind (precision (0.0_8) + 1)
real(kind=k), dimension(2) :: r
if (r(1) /= -1._k) call abort
if (r(2) /= -3._k) call abort
r(1) = 3._k
r(2) = 2._k
end subroutine sub_rk
! { dg-do run }
! { dg-require-effective-target fortran_large_int }
! Test that the internal pack and unpack routines work OK
! for our large integer type.
program main
integer,parameter :: k = selected_int_kind (range (0_8) + 1)
integer(kind=k), dimension(3) :: ik
ik = (/ -1, 1, -3 /)
call sub_ik(ik(1:3:2))
if (any(ik /= (/ 3, 1, 2 /))) call abort
end program main
subroutine sub_ik(i)
integer(kind=k), dimension(2) :: i
if (i(1) /= -1) call abort
if (i(2) /= -3) call abort
i(1) = 3
i(2) = 2
end subroutine sub_ik
Index: Makefile.am
===================================================================
--- Makefile.am (revision 133308)
+++ Makefile.am (working copy)
@@ -380,18 +380,30 @@ $(srcdir)/generated/cshift1_8.c \
$(srcdir)/generated/cshift1_16.c
in_pack_c = \
+$(srcdir)/generated/in_pack_i1.c \
+$(srcdir)/generated/in_pack_i2.c \
$(srcdir)/generated/in_pack_i4.c \
$(srcdir)/generated/in_pack_i8.c \
$(srcdir)/generated/in_pack_i16.c \
+$(srcdir)/generated/in_pack_r4.c \
+$(srcdir)/generated/in_pack_r8.c \
+$(srcdir)/generated/in_pack_r10.c \
+$(srcdir)/generated/in_pack_r16.c \
$(srcdir)/generated/in_pack_c4.c \
$(srcdir)/generated/in_pack_c8.c \
$(srcdir)/generated/in_pack_c10.c \
$(srcdir)/generated/in_pack_c16.c
in_unpack_c = \
+$(srcdir)/generated/in_unpack_i1.c \
+$(srcdir)/generated/in_unpack_i2.c \
$(srcdir)/generated/in_unpack_i4.c \
$(srcdir)/generated/in_unpack_i8.c \
$(srcdir)/generated/in_unpack_i16.c \
+$(srcdir)/generated/in_unpack_r4.c \
+$(srcdir)/generated/in_unpack_r8.c \
+$(srcdir)/generated/in_unpack_r10.c \
+$(srcdir)/generated/in_unpack_r16.c \
$(srcdir)/generated/in_unpack_c4.c \
$(srcdir)/generated/in_unpack_c8.c \
$(srcdir)/generated/in_unpack_c10.c \
Index: libgfortran.h
===================================================================
--- libgfortran.h (revision 133308)
+++ libgfortran.h (working copy)
@@ -609,10 +609,15 @@ extern void reshape_packed (char *, inde
const char *, index_type);
internal_proto(reshape_packed);
-/* Repacking functions. */
+/* Repacking functions. These are called internally by internal_pack
+ and internal_unpack. */
+
+GFC_INTEGER_1 *internal_pack_1 (gfc_array_i1 *);
+internal_proto(internal_pack_1);
+
+GFC_INTEGER_2 *internal_pack_2 (gfc_array_i2 *);
+internal_proto(internal_pack_2);
-/* ??? These aren't currently used by the compiler, though we
- certainly could do so. */
GFC_INTEGER_4 *internal_pack_4 (gfc_array_i4 *);
internal_proto(internal_pack_4);
@@ -624,6 +629,22 @@ GFC_INTEGER_16 *internal_pack_16 (gfc_ar
internal_proto(internal_pack_16);
#endif
+GFC_REAL_4 *internal_pack_r4 (gfc_array_r4 *);
+internal_proto(internal_pack_r4);
+
+GFC_REAL_8 *internal_pack_r8 (gfc_array_r8 *);
+internal_proto(internal_pack_r8);
+
+#if defined HAVE_GFC_REAL_10
+GFC_REAL_10 *internal_pack_r10 (gfc_array_r10 *);
+internal_proto(internal_pack_r10);
+#endif
+
+#if defined HAVE_GFC_REAL_16
+GFC_REAL_16 *internal_pack_r16 (gfc_array_r16 *);
+internal_proto(internal_pack_r16);
+#endif
+
GFC_COMPLEX_4 *internal_pack_c4 (gfc_array_c4 *);
internal_proto(internal_pack_c4);
@@ -635,6 +656,17 @@ GFC_COMPLEX_10 *internal_pack_c10 (gfc_a
internal_proto(internal_pack_c10);
#endif
+#if defined HAVE_GFC_COMPLEX_16
+GFC_COMPLEX_16 *internal_pack_c16 (gfc_array_c16 *);
+internal_proto(internal_pack_c16);
+#endif
+
+extern void internal_unpack_1 (gfc_array_i1 *, const GFC_INTEGER_1 *);
+internal_proto(internal_unpack_1);
+
+extern void internal_unpack_2 (gfc_array_i2 *, const GFC_INTEGER_2 *);
+internal_proto(internal_unpack_2);
+
extern void internal_unpack_4 (gfc_array_i4 *, const GFC_INTEGER_4 *);
internal_proto(internal_unpack_4);
@@ -646,6 +678,22 @@ extern void internal_unpack_16 (gfc_arra
internal_proto(internal_unpack_16);
#endif
+extern void internal_unpack_r4 (gfc_array_r4 *, const GFC_REAL_4 *);
+internal_proto(internal_unpack_r4);
+
+extern void internal_unpack_r8 (gfc_array_r8 *, const GFC_REAL_8 *);
+internal_proto(internal_unpack_r8);
+
+#if defined HAVE_GFC_REAL_10
+extern void internal_unpack_r10 (gfc_array_r10 *, const GFC_REAL_10 *);
+internal_proto(internal_unpack_r10);
+#endif
+
+#if defined HAVE_GFC_REAL_16
+extern void internal_unpack_r16 (gfc_array_r16 *, const GFC_REAL_16 *);
+internal_proto(internal_unpack_r16);
+#endif
+
extern void internal_unpack_c4 (gfc_array_c4 *, const GFC_COMPLEX_4 *);
internal_proto(internal_unpack_c4);
Index: runtime/in_pack_generic.c
===================================================================
--- runtime/in_pack_generic.c (revision 133308)
+++ runtime/in_pack_generic.c (working copy)
@@ -65,25 +65,65 @@ internal_pack (gfc_array_char * source)
{
case GFC_DTYPE_INTEGER:
case GFC_DTYPE_LOGICAL:
- case GFC_DTYPE_REAL:
switch (size)
{
- case 4:
- return internal_pack_4 ((gfc_array_i4 *)source);
+ case sizeof (GFC_INTEGER_1):
+ return internal_pack_1 ((gfc_array_i1 *) source);
+
+ case sizeof (GFC_INTEGER_2):
+ return internal_pack_2 ((gfc_array_i2 *) source);
+
+ case sizeof (GFC_INTEGER_4):
+ return internal_pack_4 ((gfc_array_i4 *) source);
- case 8:
- return internal_pack_8 ((gfc_array_i8 *)source);
+ case sizeof (GFC_INTEGER_8):
+ return internal_pack_8 ((gfc_array_i8 *) source);
+
+#if defined(HAVE_GFC_INTEGER_16)
+ case sizeof (GFC_INTEGER_16):
+ return internal_pack_16 (gfc_array_i16 *) source);
+#endif
}
break;
+ case GFC_DTYPE_REAL:
+ switch (size)
+ {
+ case sizeof (GFC_REAL_4):
+ return internal_pack_r4 ((gfc_array_r4 *) source);
+
+ case sizeof (GFC_REAL_8):
+ return internal_pack_r8 ((gfc_array_r8 *) source);
+
+#if defined (HAVE_GFC_REAL_10)
+ case sizeof (GFC_REAL_10):
+ return internal_pack_r10 ((gfc_array_r10 *) source);
+#endif
+
+#if defined (HAVE_GFC_REAL_16)
+ case sizeof (GFC_REAL_16):
+ return internal_pack_r16 ((gfc_array_r16 *) source);
+#endif
+ }
case GFC_DTYPE_COMPLEX:
switch (size)
{
- case 8:
- return internal_pack_c4 ((gfc_array_c4 *)source);
+ case sizeof (GFC_COMPLEX_4):
+ return internal_pack_c4 ((gfc_array_c4 *) source);
- case 16:
- return internal_pack_c8 ((gfc_array_c8 *)source);
+ case sizeof (GFC_COMPLEX_8):
+ return internal_pack_c8 ((gfc_array_c8 *) source);
+
+#if defined (HAVE_GFC_COMPLEX_10)
+ case sizeof (GFC_COMPLEX_10):
+ return internal_pack_c10 ((gfc_array_c10 *) source);
+#endif
+
+#if defined (HAVE_GFC_COMPLEX_16)
+ case sizeof (GFC_COMPLEX_16):
+ return internal_pack_c16 ((gfc_array_c16 *) source);
+#endif
+
}
break;
Index: runtime/in_unpack_generic.c
===================================================================
--- runtime/in_unpack_generic.c (revision 133308)
+++ runtime/in_unpack_generic.c (working copy)
@@ -62,29 +62,80 @@ internal_unpack (gfc_array_char * d, con
{
case GFC_DTYPE_INTEGER:
case GFC_DTYPE_LOGICAL:
- case GFC_DTYPE_REAL:
switch (size)
{
- case 4:
- internal_unpack_4 ((gfc_array_i4 *)d, (const GFC_INTEGER_4 *)s);
+ case sizeof (GFC_INTEGER_1):
+ internal_unpack_1 ((gfc_array_i1 *) d, (const GFC_INTEGER_1 *) s);
+ return;
+
+ case sizeof (GFC_INTEGER_2):
+ internal_unpack_2 ((gfc_array_i2 *) d, (const GFC_INTEGER_2 *) s);
+ return;
+
+ case sizeof (GFC_INTEGER_4):
+ internal_unpack_4 ((gfc_array_i4 *) d, (const GFC_INTEGER_4 *) s);
+ return;
+
+ case sizeof (GFC_INTEGER_8):
+ internal_unpack_8 ((gfc_array_i8 *) d, (const GFC_INTEGER_8 *) s);
return;
- case 8:
- internal_unpack_8 ((gfc_array_i8 *)d, (const GFC_INTEGER_8 *)s);
+#if defined (HAVE_GFC_INTEGER_16)
+ case sizeof (GFC_INTEGER_16):
+ internal_unpack_16 ((gfc_array_i16 *) d, (const GFC_INTEGER_16 *) s);
return;
+#endif
}
break;
+ case GFC_DTYPE_REAL:
+ switch (size)
+ {
+ case sizeof (GFC_REAL_4):
+ internal_unpack_r4 ((gfc_array_r4 *) d, (const GFC_REAL_4 *) s);
+ return;
+
+ case sizeof (GFC_REAL_8):
+ internal_unpack_r8 ((gfc_array_r8 *) d, (const GFC_REAL_8 *) s);
+ return;
+
+#if defined(HAVE_GFC_REAL_10)
+ case sizeof (GFC_REAL_10):
+ internal_unpack_r10 ((gfc_array_r10 *) d, (const GFC_REAL_10 *) s);
+ return;
+#endif
+
+#if defined(HAVE_GFC_REAL_16)
+ case sizeof (GFC_REAL_16):
+ internal_unpack_r16 ((gfc_array_r16 *) d, (const GFC_REAL_16 *) s);
+ return;
+#endif
+
+ }
+
case GFC_DTYPE_COMPLEX:
switch (size)
{
- case 8:
+ case sizeof (GFC_COMPLEX_4):
internal_unpack_c4 ((gfc_array_c4 *)d, (const GFC_COMPLEX_4 *)s);
return;
- case 16:
+ case sizeof (GFC_COMPLEX_8):
internal_unpack_c8 ((gfc_array_c8 *)d, (const GFC_COMPLEX_8 *)s);
return;
+
+#if defined(HAVE_GFC_COMPLEX_10)
+ case sizeof (GFC_COMPLEX_10):
+ internal_unpack_c10 ((gfc_array_c10 *) d, (const GFC_COMPLEX_10 *) s);
+ return;
+#endif
+
+#if defined(HAVE_GFC_COMPLEX_16)
+ case sizeof (GFC_COMPLEX_16):
+ internal_unpack_c16 ((gfc_array_c16 *) d, (const GFC_COMPLEX_16 *) s);
+ return;
+#endif
+
}
default:
break;