This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch, libfortran] Speed up / orthogonalize in_pack and in_unpack


Hello world,

now that Real World (TM) time constraints have lessened somewhat, I
finally have some time again to hack on gfortran :-)

Here's a patch that adds a few missing types (kind=1 and 2 integer,
kind=10 and kind=16 complex and real) to the internal routines that are
called by internal_pack and internal_unpack, so we don't have to call
memcpy on these.  It also provides a routine for each of the real types
(I favored orthogonality over library code size for this).

The test cases check that there are no regressions for any
of the types that were touched.

Regression-tested on i686-pc-linux-gnu (although I couldn't test
the large-integer case).  OK?

	Thomas

2008-03-19  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR libfortran/32972
	* Makefile.am (in_pack_c): Add in_pack_i1.c, in_pack_i2.c,
	in_pack_r4.c, in_pack_r8.c, in_pack_r10.c and in_pack_r16.c.
	(in_unpack_c): Add in_unpack_i1.c, in_unpack_i2.c,
	in_unpack_r4.c, in_unpack_r8.c, in_unpack_r10.c and
	in_unpack_r16.c.
	* Makefile.in: Regenerate.
	* libgfortran.h:  Add prototypes for internal_pack_1,
	internal_pack_2, internal_pack_16, internal_pack_r4,
	internal_pack_r8, internal_pack_r10, internal_pack_r16,
	internal_pack_c10 and internal_pack_c16.  Add prototypes for
	internal_unpack_1, internal_unpack_2, internal_unpack_16,
	internal_unpack_r4, internal_unpack_r8, internal_unpack_r10,
	internal_unpack_r16, internal_unpack_c10 and
	internal_unpack_c16.
	* runtime/in_pack_generic.c (internal_pack): Use sizeof instead
	of hardwired sizes.
	Add calls to internal_pack_1, internal_pack_2,
	internal_pack_16, internal_pack_r4, internal_pack_r8,
	internal_pack_r10, internal_pack_r16, internal_pack_c10 and
	internal_pack_c16.
	* runtime/in_unpack_generic.c (internal_unpack):  Use sizeof
	instead of hardwired sizes.
	Add calls to internal_unpack_1, internal_unpack_2,
	internal_unpack_16, internal_unpack_r4, internal_unpack_r8,
	internal_unpack_r10, internal_unpack_r16, internal_unpack_c10
	and internal_unpack_c16.
	* generated/in_pack_r4.c:  New file.
	* generated/in_pack_i2.c:  New file.
	* generated/in_unpack_i1.c:  New file.
	* generated/in_pack_r10.c:  New file.
	* generated/in_unpack_r4.c:  New file.
	* generated/in_unpack_i2.c:  New file.
	* generated/in_unpack_r16.c:  New file.
	* generated/in_pack_r8.c:  New file.
	* generated/in_unpack_r10.c:  New file.
	* generated/in_unpack_r8.c:  New file.
	* generated/in_pack_r16.c:  New file.
	* generated/in_pack_i1.c:  New file.

2008-03-19  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR libfortran/32972
	* gfortran.dg/internal_pack_1.f90:  New test case.
	* gfortran.dg/internal_pack_2.f90:  New test case.
	* gfortran.dg/internal_pack_3.f90:  New test case.


! { dg-do run }
! Test that the internal pack and unpack routines work OK
! for different data types

program main
  integer(kind=1), dimension(3) :: i1
  integer(kind=2), dimension(3) :: i2
  integer(kind=4), dimension(3) :: i4
  integer(kind=8), dimension(3) :: i8
  real(kind=4), dimension(3) :: r4
  real(kind=8), dimension(3) :: r8

  i1 = (/ -1, 1, -3 /)
  call sub_i1(i1(1:3:2))
  if (any(i1 /= (/ 3, 1, 2 /))) call abort

  i2 = (/ -1, 1, -3 /)
  call sub_i2(i2(1:3:2))
  if (any(i2 /= (/ 3, 1, 2 /))) call abort

  i4 = (/ -1, 1, -3 /)
  call sub_i4(i4(1:3:2))
  if (any(i4 /= (/ 3, 1, 2 /))) call abort

  i8 = (/ -1, 1, -3 /)
  call sub_i8(i8(1:3:2))
  if (any(i8 /= (/ 3, 1, 2 /))) call abort

  r4 = (/ -1.0, 1.0, -3.0 /)
  call sub_r4(r4(1:3:2))
  if (any(r4 /= (/ 3.0, 1.0, 2.0/))) call abort

  r8 = (/ -1.0_8, 1.0_8, -3.0_8 /)
  call sub_r8(r8(1:3:2))
  if (any(r8 /= (/ 3.0_8, 1.0_8, 2.0_8/))) call abort

end program main

subroutine sub_i1(i)
  integer(kind=1), dimension(2) :: i
  if (i(1) /= -1) call abort
  if (i(2) /= -3) call abort
  i(1) = 3
  i(2) = 2
end subroutine sub_i1

subroutine sub_i2(i)
  integer(kind=2), dimension(2) :: i
  if (i(1) /= -1) call abort
  if (i(2) /= -3) call abort
  i(1) = 3
  i(2) = 2
end subroutine sub_i2

subroutine sub_i4(i)
  integer(kind=4), dimension(2) :: i
  if (i(1) /= -1) call abort
  if (i(2) /= -3) call abort
  i(1) = 3
  i(2) = 2
end subroutine sub_i4

subroutine sub_i8(i)
  integer(kind=8), dimension(2) :: i
  if (i(1) /= -1) call abort
  if (i(2) /= -3) call abort
  i(1) = 3
  i(2) = 2
end subroutine sub_i8

subroutine sub_r4(r)
  real(kind=4), dimension(2) :: r
  if (r(1) /= -1.) call abort
  if (r(2) /= -3.) call abort
  r(1) = 3.
  r(2) = 2.
end subroutine sub_r4

subroutine sub_r8(r)
  real(kind=8), dimension(2) :: r
  if (r(1) /= -1._8) call abort
  if (r(2) /= -3._8) call abort
  r(1) = 3._8
  r(2) = 2._8
end subroutine sub_r8
! { dg-do run }
! { dg-require-effective-target fortran_large_real }
! Test that the internal pack and unpack routines work OK
! for our large real type.

program main
  implicit none
  integer,parameter :: k = selected_real_kind (precision (0.0_8) + 1)
  real(kind=k), dimension(3) :: rk

  rk = (/ -1.0_k, 1.0_k, -3.0_k /)
  call sub_rk(rk(1:3:2))
  if (any(rk /= (/ 3.0_k, 1.0_k, 2.0_k/))) call abort

end program main

subroutine sub_rk(r)
  implicit none
  integer,parameter :: k = selected_real_kind (precision (0.0_8) + 1)
  real(kind=k), dimension(2) :: r
  if (r(1) /= -1._k) call abort
  if (r(2) /= -3._k) call abort
  r(1) = 3._k
  r(2) = 2._k
end subroutine sub_rk
! { dg-do run }
! { dg-require-effective-target fortran_large_int }
! Test that the internal pack and unpack routines work OK
! for our large integer type.

program main
  integer,parameter :: k = selected_int_kind (range (0_8) + 1)
  integer(kind=k), dimension(3) :: ik

  ik = (/ -1, 1, -3 /)
  call sub_ik(ik(1:3:2))
  if (any(ik /= (/ 3, 1, 2 /))) call abort
end program main

subroutine sub_ik(i)
  integer(kind=k), dimension(2) :: i
  if (i(1) /= -1) call abort
  if (i(2) /= -3) call abort
  i(1) = 3
  i(2) = 2
end subroutine sub_ik
Index: Makefile.am
===================================================================
--- Makefile.am	(revision 133308)
+++ Makefile.am	(working copy)
@@ -380,18 +380,30 @@ $(srcdir)/generated/cshift1_8.c \
 $(srcdir)/generated/cshift1_16.c
 
 in_pack_c = \
+$(srcdir)/generated/in_pack_i1.c \
+$(srcdir)/generated/in_pack_i2.c \
 $(srcdir)/generated/in_pack_i4.c \
 $(srcdir)/generated/in_pack_i8.c \
 $(srcdir)/generated/in_pack_i16.c \
+$(srcdir)/generated/in_pack_r4.c \
+$(srcdir)/generated/in_pack_r8.c \
+$(srcdir)/generated/in_pack_r10.c \
+$(srcdir)/generated/in_pack_r16.c \
 $(srcdir)/generated/in_pack_c4.c \
 $(srcdir)/generated/in_pack_c8.c \
 $(srcdir)/generated/in_pack_c10.c \
 $(srcdir)/generated/in_pack_c16.c
 
 in_unpack_c = \
+$(srcdir)/generated/in_unpack_i1.c \
+$(srcdir)/generated/in_unpack_i2.c \
 $(srcdir)/generated/in_unpack_i4.c \
 $(srcdir)/generated/in_unpack_i8.c \
 $(srcdir)/generated/in_unpack_i16.c \
+$(srcdir)/generated/in_unpack_r4.c \
+$(srcdir)/generated/in_unpack_r8.c \
+$(srcdir)/generated/in_unpack_r10.c \
+$(srcdir)/generated/in_unpack_r16.c \
 $(srcdir)/generated/in_unpack_c4.c \
 $(srcdir)/generated/in_unpack_c8.c \
 $(srcdir)/generated/in_unpack_c10.c \
Index: libgfortran.h
===================================================================
--- libgfortran.h	(revision 133308)
+++ libgfortran.h	(working copy)
@@ -609,10 +609,15 @@ extern void reshape_packed (char *, inde
 			    const char *, index_type);
 internal_proto(reshape_packed);
 
-/* Repacking functions.  */
+/* Repacking functions.  These are called internally by internal_pack
+   and internal_unpack.  */
+
+GFC_INTEGER_1 *internal_pack_1 (gfc_array_i1 *);
+internal_proto(internal_pack_1);
+
+GFC_INTEGER_2 *internal_pack_2 (gfc_array_i2 *);
+internal_proto(internal_pack_2);
 
-/* ??? These aren't currently used by the compiler, though we
-   certainly could do so.  */
 GFC_INTEGER_4 *internal_pack_4 (gfc_array_i4 *);
 internal_proto(internal_pack_4);
 
@@ -624,6 +629,22 @@ GFC_INTEGER_16 *internal_pack_16 (gfc_ar
 internal_proto(internal_pack_16);
 #endif
 
+GFC_REAL_4 *internal_pack_r4 (gfc_array_r4 *);
+internal_proto(internal_pack_r4);
+
+GFC_REAL_8 *internal_pack_r8 (gfc_array_r8 *);
+internal_proto(internal_pack_r8);
+
+#if defined HAVE_GFC_REAL_10
+GFC_REAL_10 *internal_pack_r10 (gfc_array_r10 *);
+internal_proto(internal_pack_r10);
+#endif
+
+#if defined HAVE_GFC_REAL_16
+GFC_REAL_16 *internal_pack_r16 (gfc_array_r16 *);
+internal_proto(internal_pack_r16);
+#endif
+
 GFC_COMPLEX_4 *internal_pack_c4 (gfc_array_c4 *);
 internal_proto(internal_pack_c4);
 
@@ -635,6 +656,17 @@ GFC_COMPLEX_10 *internal_pack_c10 (gfc_a
 internal_proto(internal_pack_c10);
 #endif
 
+#if defined HAVE_GFC_COMPLEX_16
+GFC_COMPLEX_16 *internal_pack_c16 (gfc_array_c16 *);
+internal_proto(internal_pack_c16);
+#endif
+
+extern void internal_unpack_1 (gfc_array_i1 *, const GFC_INTEGER_1 *);
+internal_proto(internal_unpack_1);
+
+extern void internal_unpack_2 (gfc_array_i2 *, const GFC_INTEGER_2 *);
+internal_proto(internal_unpack_2);
+
 extern void internal_unpack_4 (gfc_array_i4 *, const GFC_INTEGER_4 *);
 internal_proto(internal_unpack_4);
 
@@ -646,6 +678,22 @@ extern void internal_unpack_16 (gfc_arra
 internal_proto(internal_unpack_16);
 #endif
 
+extern void internal_unpack_r4 (gfc_array_r4 *, const GFC_REAL_4 *);
+internal_proto(internal_unpack_r4);
+
+extern void internal_unpack_r8 (gfc_array_r8 *, const GFC_REAL_8 *);
+internal_proto(internal_unpack_r8);
+
+#if defined HAVE_GFC_REAL_10
+extern void internal_unpack_r10 (gfc_array_r10 *, const GFC_REAL_10 *);
+internal_proto(internal_unpack_r10);
+#endif
+
+#if defined HAVE_GFC_REAL_16
+extern void internal_unpack_r16 (gfc_array_r16 *, const GFC_REAL_16 *);
+internal_proto(internal_unpack_r16);
+#endif
+
 extern void internal_unpack_c4 (gfc_array_c4 *, const GFC_COMPLEX_4 *);
 internal_proto(internal_unpack_c4);
 
Index: runtime/in_pack_generic.c
===================================================================
--- runtime/in_pack_generic.c	(revision 133308)
+++ runtime/in_pack_generic.c	(working copy)
@@ -65,25 +65,65 @@ internal_pack (gfc_array_char * source)
     {
     case GFC_DTYPE_INTEGER:
     case GFC_DTYPE_LOGICAL:
-    case GFC_DTYPE_REAL:
       switch (size)
 	{
-	case 4:
-	  return internal_pack_4 ((gfc_array_i4 *)source);
+	case sizeof (GFC_INTEGER_1):
+	  return internal_pack_1 ((gfc_array_i1 *) source);
+
+	case sizeof (GFC_INTEGER_2):
+	  return internal_pack_2 ((gfc_array_i2 *) source);
+
+	case sizeof (GFC_INTEGER_4):
+	  return internal_pack_4 ((gfc_array_i4 *) source);
 	  
-	case 8:
-	  return internal_pack_8 ((gfc_array_i8 *)source);
+	case sizeof (GFC_INTEGER_8):
+	  return internal_pack_8 ((gfc_array_i8 *) source);
+
+#if defined(HAVE_GFC_INTEGER_16)
+	case sizeof (GFC_INTEGER_16):
+	  return internal_pack_16 (gfc_array_i16 *) source);
+#endif
 	}
       break;
 
+    case GFC_DTYPE_REAL:
+      switch (size)
+	{
+	case sizeof (GFC_REAL_4):
+	  return internal_pack_r4 ((gfc_array_r4 *) source);
+
+	case sizeof (GFC_REAL_8):
+	  return internal_pack_r8 ((gfc_array_r8 *) source);
+
+#if defined (HAVE_GFC_REAL_10)
+	case sizeof (GFC_REAL_10):
+	  return internal_pack_r10 ((gfc_array_r10 *) source);
+#endif
+
+#if defined (HAVE_GFC_REAL_16)
+	case sizeof (GFC_REAL_16):
+	  return internal_pack_r16 ((gfc_array_r16 *) source);
+#endif
+	}
     case GFC_DTYPE_COMPLEX:
       switch (size)
 	{
-	case 8:
-	  return internal_pack_c4 ((gfc_array_c4 *)source);
+	case sizeof (GFC_COMPLEX_4):
+	  return internal_pack_c4 ((gfc_array_c4 *) source);
 	  
-	case 16:
-	  return internal_pack_c8 ((gfc_array_c8 *)source);
+	case sizeof (GFC_COMPLEX_8):
+	  return internal_pack_c8 ((gfc_array_c8 *) source);
+
+#if defined (HAVE_GFC_COMPLEX_10)
+	case sizeof (GFC_COMPLEX_10):
+	  return internal_pack_c10 ((gfc_array_c10 *) source);
+#endif
+
+#if defined (HAVE_GFC_COMPLEX_16)
+	case sizeof (GFC_COMPLEX_16):
+	  return internal_pack_c16 ((gfc_array_c16 *) source);
+#endif
+
 	}
       break;
 
Index: runtime/in_unpack_generic.c
===================================================================
--- runtime/in_unpack_generic.c	(revision 133308)
+++ runtime/in_unpack_generic.c	(working copy)
@@ -62,29 +62,80 @@ internal_unpack (gfc_array_char * d, con
     {
     case GFC_DTYPE_INTEGER:
     case GFC_DTYPE_LOGICAL:
-    case GFC_DTYPE_REAL:
       switch (size)
 	{
-	case 4:
-	  internal_unpack_4 ((gfc_array_i4 *)d, (const GFC_INTEGER_4 *)s);
+	case sizeof (GFC_INTEGER_1):
+	  internal_unpack_1 ((gfc_array_i1 *) d, (const GFC_INTEGER_1 *) s);
+	  return;
+
+	case sizeof (GFC_INTEGER_2):
+	  internal_unpack_2 ((gfc_array_i2 *) d, (const GFC_INTEGER_2 *) s);
+	  return;
+
+	case sizeof (GFC_INTEGER_4):
+	  internal_unpack_4 ((gfc_array_i4 *) d, (const GFC_INTEGER_4 *) s);
+	  return;
+
+	case sizeof (GFC_INTEGER_8):
+	  internal_unpack_8 ((gfc_array_i8 *) d, (const GFC_INTEGER_8 *) s);
 	  return;
 
-	case 8:
-	  internal_unpack_8 ((gfc_array_i8 *)d, (const GFC_INTEGER_8 *)s);
+#if defined (HAVE_GFC_INTEGER_16)
+	case sizeof (GFC_INTEGER_16):
+	  internal_unpack_16 ((gfc_array_i16 *) d, (const GFC_INTEGER_16 *) s);
 	  return;
+#endif
 	}
       break;
 
+    case GFC_DTYPE_REAL:
+      switch (size)
+	{
+	case sizeof (GFC_REAL_4):
+	  internal_unpack_r4 ((gfc_array_r4 *) d, (const GFC_REAL_4 *) s);
+	  return;
+
+	case sizeof (GFC_REAL_8):
+	  internal_unpack_r8 ((gfc_array_r8 *) d, (const GFC_REAL_8 *) s);
+	  return;
+
+#if defined(HAVE_GFC_REAL_10)
+	case sizeof (GFC_REAL_10):
+	  internal_unpack_r10 ((gfc_array_r10 *) d, (const GFC_REAL_10 *) s);
+	  return;
+#endif
+
+#if defined(HAVE_GFC_REAL_16)
+	case sizeof (GFC_REAL_16):
+	  internal_unpack_r16 ((gfc_array_r16 *) d, (const GFC_REAL_16 *) s);
+	  return;
+#endif
+
+	}
+
     case GFC_DTYPE_COMPLEX:
       switch (size) 
 	{
-	case 8:
+	case sizeof (GFC_COMPLEX_4):
 	  internal_unpack_c4 ((gfc_array_c4 *)d, (const GFC_COMPLEX_4 *)s);
 	  return;
 
-	case 16:
+	case sizeof (GFC_COMPLEX_8):
 	  internal_unpack_c8 ((gfc_array_c8 *)d, (const GFC_COMPLEX_8 *)s);
 	  return;
+
+#if defined(HAVE_GFC_COMPLEX_10)
+	case sizeof (GFC_COMPLEX_10):
+	  internal_unpack_c10 ((gfc_array_c10 *) d, (const GFC_COMPLEX_10 *) s);
+	  return;
+#endif
+
+#if defined(HAVE_GFC_COMPLEX_16)
+	case sizeof (GFC_COMPLEX_16):
+	  internal_unpack_c16 ((gfc_array_c16 *) d, (const GFC_COMPLEX_16 *) s);
+	  return;
+#endif
+
 	}
     default:
       break;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]