This is the mail archive of the fortran@gcc.gnu.org mailing list for the GNU Fortran project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

patch, libfortran: Speedup for unpack intrinsic


Hello world,

here is a speedup patch for the unpack intrinsic, with
a speedup that's quite OK:

$ cat unpack-speed.f90 
program main
  integer, parameter:: n = 100
  integer :: i
  real, dimension(n*n) :: vect
  real, dimension(n,n) :: a,b
  logical(kind=1), dimension(n,n) :: mask
  call random_number(a)
  mask = a > 0.5
  call random_number(b)
  call random_number(vect)
  do i=1,10000
  a = unpack(vect,mask,0.)
  end do
end program main
$ gfortran -static -O3 unpack-speed.f90 
$ time ./a.out

real	0m1.064s
user	0m1.048s
sys	0m0.000s
$ gfortran-4.3 -static -O3 unpack-speed.f90 
$ time ./a.out

real	0m2.772s
user	0m2.760s
sys	0m0.000s

Regression-tested on i686-pc-linux-gnu.  I'll ask Dominique in a
separate E-Mail to cross-check this for me (although I did try
to be more careful this time :-)

OK once this has passed cross-check?

	Thomas

2007-03-23  Thomas Koenig  <tkoenig@gcc.gnu.org

	PR libfortran/32972
	* Makefile.am:  Add new variable, i_unpack_c, containing
	unpack_i1.c, unpack_i2.c, unpack_i4.c, unpack_i8.c,
	unpack_i16.c, unpack_r4.c, unpack_r8.c, unpack_r10.c,
	unpack_r16.c, unpack_c4.c, unpack_c8.c, unpack_c10.c
	and unpack_c16.c
	Add i_unpack_c to gfor_built_src.
	Add rule to generate i_unpack_c from m4/unpack.m4.
	* Makefile.in:  Regenerated.
	* libgfortran.h: Add prototypes for unpack0_i1, unpack0_i2,
	unpack0_i4, unpack0_i8, unpack0_i16, unpack0_r4, unpack0_r8,
	unpack0_r10, unpack0_r16, unpack0_c4, unpack0_c8, unpack0_c10,
	unpack0_c16, unpack1_i1, unpack1_i2, unpack1_i4, unpack1_i8,
	unpack1_i16, unpack1_r4, unpack1_r8, unpack1_r10, unpack1_r16,
	unpack1_c4, unpack1_c8, unpack1_c10 and unpack1_c16.
	* intrinsics/pack_generic.c (unpack1):  Add calls to specific
	unpack1 functions.
	(unpack0):  Add calls to specific unpack0 functions.
	* m4/unpack.m4:  New file.
	* generated/unpack_i1.c:  New file.
	* generated/unpack_i2.c:  New file.
	* generated/unpack_i4.c:  New file.
	* generated/unpack_i8.c:  New file.
	* generated/unpack_i16.c:  New file.
	* generated/unpack_r4.c:  New file.
	* generated/unpack_r8.c:  New file.
	* generated/unpack_r10.c:  New file.
	* generated/unpack_r16.c:  New file.
	* generated/unpack_c4.c:  New file.
	* generated/unpack_c8.c:  New file.
	* generated/unpack_c10.c:  New file.
	* generated/unpack_c16.c:  New file.

2007-03-23  Thomas Koenig  <tkoenig@gcc.gnu.org

	PR libfortran/32972
	* gfortran.dg/intrinsic_unpack_1.f90:  New test case.
	* gfortran.dg/intrinsic_unpack_2.f90:  New test case.
	* gfortran.dg/intrinsic_unpack_3.f90:  New test case.

! { dg-do run }
! Program to test the UNPACK intrinsic for the types usually present.
program intrinsic_unpack
   implicit none
   integer(kind=1), dimension(3, 3) :: a1, b1
   integer(kind=2), dimension(3, 3) :: a2, b2
   integer(kind=4), dimension(3, 3) :: a4, b4
   integer(kind=8), dimension(3, 3) :: a8, b8
   real(kind=4), dimension(3,3) :: ar4, br4
   real(kind=8), dimension(3,3) :: ar8, br8
   logical, dimension(3, 3) :: mask
   character(len=100) line1, line2
   integer i

   mask = reshape ((/.false.,.true.,.false.,.true.,.false.,.false.,&
                    &.false.,.false.,.true./), (/3, 3/));
   a1 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
   b1 = unpack ((/2_1, 3_1, 4_1/), mask, a1)
   if (any (b1 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort
   write (line1,'(10I4)') b1
   write (line2,'(10I4)') unpack((/2_1, 3_1, 4_1/), mask, a1)
   if (line1 .ne. line2) call abort
   b1 = -1
   b1 = unpack ((/2_1, 3_1, 4_1/), mask, 0_1)
   if (any (b1 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort

   a2 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
   b2 = unpack ((/2_2, 3_2, 4_2/), mask, a2)
   if (any (b2 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort
   write (line1,'(10I4)') b2
   write (line2,'(10I4)') unpack((/2_2, 3_2, 4_2/), mask, a2)
   if (line1 .ne. line2) call abort
   b2 = -1
   b2 = unpack ((/2_2, 3_2, 4_2/), mask, 0_2)
   if (any (b2 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort

   a4 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
   b4 = unpack ((/2_4, 3_4, 4_4/), mask, a4)
   if (any (b4 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort
   write (line1,'(10I4)') b4
   write (line2,'(10I4)') unpack((/2_4, 3_4, 4_4/), mask, a4)
   if (line1 .ne. line2) call abort
   b4 = -1
   b4 = unpack ((/2_4, 3_4, 4_4/), mask, 0_4)
   if (any (b4 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort

   a8 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
   b8 = unpack ((/2_8, 3_8, 4_8/), mask, a8)
   if (any (b8 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort
   write (line1,'(10I4)') b8
   write (line2,'(10I4)') unpack((/2_8, 3_8, 4_8/), mask, a8)
   if (line1 .ne. line2) call abort
   b8 = -1
   b8 = unpack ((/2_8, 3_8, 4_8/), mask, 0_8)
   if (any (b8 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort

   ar4 = reshape ((/1._4, 0._4, 0._4, 0._4, 1._4, 0._4, 0._4, 0._4, 1._4/), &
         (/3, 3/));
   br4 = unpack ((/2._4, 3._4, 4._4/), mask, ar4)
   if (any (br4 .ne. reshape ((/1._4, 2._4, 0._4, 3._4, 1._4, 0._4, &
                               0._4, 0._4, 4._4/), (/3, 3/)))) &
      call abort
   write (line1,'(9F9.5)') br4
   write (line2,'(9F9.5)') unpack((/2._4, 3._4, 4._4/), mask, ar4)
   if (line1 .ne. line2) call abort
   br4 = -1._4
   br4 = unpack ((/2._4, 3._4, 4._4/), mask, 0._4)
   if (any (br4 .ne. reshape ((/0._4, 2._4, 0._4, 3._4, 0._4, 0._4, &
      0._4, 0._4, 4._4/), (/3, 3/)))) &
      call abort

   ar8 = reshape ((/1._8, 0._8, 0._8, 0._8, 1._8, 0._8, 0._8, 0._8, 1._8/), &
         (/3, 3/));
   br8 = unpack ((/2._8, 3._8, 4._8/), mask, ar8)
   if (any (br8 .ne. reshape ((/1._8, 2._8, 0._8, 3._8, 1._8, 0._8, &
                               0._8, 0._8, 4._8/), (/3, 3/)))) &
      call abort
   write (line1,'(9F9.5)') br8
   write (line2,'(9F9.5)') unpack((/2._8, 3._8, 4._8/), mask, ar8)
   if (line1 .ne. line2) call abort
   br8 = -1._8
   br8 = unpack ((/2._8, 3._8, 4._8/), mask, 0._8)
   if (any (br8 .ne. reshape ((/0._8, 2._8, 0._8, 3._8, 0._8, 0._8, &
      0._8, 0._8, 4._8/), (/3, 3/)))) &
      call abort

end program
! { dg-do run }
! { dg-require-effective-target fortran_large_real }
! Program to test the UNPACK intrinsic for large real type
program intrinsic_unpack
   implicit none
   integer,parameter :: k = selected_real_kind (precision (0.0_8) + 1)

   real(kind=k), dimension(3,3) :: ark, brk
   logical, dimension(3, 3) :: mask
   character(len=100) line1, line2
   integer i

   mask = reshape ((/.false.,.true.,.false.,.true.,.false.,.false.,&
                    &.false.,.false.,.true./), (/3, 3/));

   ark = reshape ((/1._k, 0._k, 0._k, 0._k, 1._k, 0._k, 0._k, 0._k, 1._k/), &
         (/3, 3/));
   brk = unpack ((/2._k, 3._k, 4._k/), mask, ark)
   if (any (brk .ne. reshape ((/1._k, 2._k, 0._k, 3._k, 1._k, 0._k, &
                               0._k, 0._k, 4._k/), (/3, 3/)))) &
      call abort
   write (line1,'(9F9.5)') brk
   write (line2,'(9F9.5)') unpack((/2._k, 3._k, 4._k/), mask, ark)
   if (line1 .ne. line2) call abort
   brk = -1._k
   brk = unpack ((/2._k, 3._k, 4._k/), mask, 0._k)
   if (any (brk .ne. reshape ((/0._k, 2._k, 0._k, 3._k, 0._k, 0._k, &
      0._k, 0._k, 4._k/), (/3, 3/)))) &
      call abort

end program
! { dg-do run }
! { dg-require-effective-target fortran_large_int }
! Program to test the UNPACK intrinsic for a long integer type
program intrinsic_unpack
   implicit none
   integer,parameter :: k = selected_int_kind (range (0_8) + 1)
   integer(kind=k), dimension(3, 3) :: ak, bk
   logical, dimension(3, 3) :: mask
   character(len=100) line1, line2
   integer i

   mask = reshape ((/.false.,.true.,.false.,.true.,.false.,.false.,&
                    &.false.,.false.,.true./), (/3, 3/));

   ak = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
   bk = unpack ((/2_k, 3_k, 4_k/), mask, ak)
   if (any (bk .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort
   write (line1,'(10I4)') bk
   write (line2,'(10I4)') unpack((/2_k, 3_k, 4_k/), mask, ak)
   if (line1 .ne. line2) call abort
   bk = -1
   bk = unpack ((/2_k, 3_k, 4_k/), mask, 0_k)
   if (any (bk .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
      call abort

end program
Index: libgfortran.h
===================================================================
--- libgfortran.h	(revision 133427)
+++ libgfortran.h	(working copy)
@@ -774,6 +774,142 @@ extern void pack_c16 (gfc_array_c16 *, c
 internal_proto(pack_c16);
 #endif
 
+/* Internal auxiliary functions for the unpack intrinsic.  */
+
+extern void unpack0_i1 (gfc_array_i1 *, const gfc_array_i1 *,
+			const gfc_array_l1 *, const GFC_INTEGER_1 *);
+internal_proto(unpack0_i1);
+
+extern void unpack0_i2 (gfc_array_i2 *, const gfc_array_i2 *,
+			const gfc_array_l1 *, const GFC_INTEGER_2 *);
+internal_proto(unpack0_i2);
+
+extern void unpack0_i4 (gfc_array_i4 *, const gfc_array_i4 *,
+			const gfc_array_l1 *, const GFC_INTEGER_4 *);
+internal_proto(unpack0_i4);
+
+extern void unpack0_i8 (gfc_array_i8 *, const gfc_array_i8 *,
+			const gfc_array_l1 *, const GFC_INTEGER_8 *);
+internal_proto(unpack0_i8);
+
+#ifdef HAVE_GFC_INTEGER_16
+
+extern void unpack0_i16 (gfc_array_i16 *, const gfc_array_i16 *,
+			 const gfc_array_l1 *, const GFC_INTEGER_16 *);
+internal_proto(unpack0_i16);
+
+#endif
+
+extern void unpack0_r4 (gfc_array_r4 *, const gfc_array_r4 *,
+			const gfc_array_l1 *, const GFC_REAL_4 *);
+internal_proto(unpack0_r4);
+
+extern void unpack0_r8 (gfc_array_r8 *, const gfc_array_r8 *,
+			const gfc_array_l1 *, const GFC_REAL_8 *);
+internal_proto(unpack0_r8);
+
+#ifdef HAVE_GFC_REAL_10
+
+extern void unpack0_r10 (gfc_array_r10 *, const gfc_array_r10 *,
+			 const gfc_array_l1 *, const GFC_REAL_10 *);
+internal_proto(unpack0_r10);
+
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+
+extern void unpack0_r16 (gfc_array_r16 *, const gfc_array_r16 *,
+			 const gfc_array_l1 *, const GFC_REAL_16 *);
+internal_proto(unpack0_r16);
+
+#endif
+
+extern void unpack0_c4 (gfc_array_c4 *, const gfc_array_c4 *,
+			const gfc_array_l1 *, const GFC_COMPLEX_4 *);
+internal_proto(unpack0_c4);
+
+extern void unpack0_c8 (gfc_array_c8 *, const gfc_array_c8 *,
+			const gfc_array_l1 *, const GFC_COMPLEX_8 *);
+internal_proto(unpack0_c8);
+
+#ifdef HAVE_GFC_COMPLEX_10
+
+extern void unpack0_c10 (gfc_array_c10 *, const gfc_array_c10 *,
+			 const gfc_array_l1 *mask, const GFC_COMPLEX_10 *);
+internal_proto(unpack0_c10);
+
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+
+extern void unpack0_c16 (gfc_array_c16 *, const gfc_array_c16 *,
+			 const gfc_array_l1 *, const GFC_COMPLEX_16 *;
+internal_proto(unpack0_c16);
+
+#endif
+
+extern void unpack1_i1 (gfc_array_i1 *, const gfc_array_i1 *,
+			const gfc_array_l1 *, const gfc_array_i1 *);
+internal_proto(unpack1_i1);
+
+extern void unpack1_i2 (gfc_array_i2 *, const gfc_array_i2 *,
+			const gfc_array_l1 *, const gfc_array_i2 *);
+internal_proto(unpack1_i2);
+
+extern void unpack1_i4 (gfc_array_i4 *, const gfc_array_i4 *,
+			const gfc_array_l1 *, const gfc_array_i4 *);
+internal_proto(unpack1_i4);
+
+extern void unpack1_i8 (gfc_array_i8 *, const gfc_array_i8 *,
+			const gfc_array_l1 *, const gfc_array_i8 *);
+internal_proto(unpack1_i8);
+
+#ifdef HAVE_GFC_INTEGER_16
+extern void unpack1_i16 (gfc_array_i16 *, const gfc_array_i16 *,
+			 const gfc_array_l1 *, const gfc_array_i16 *);
+internal_proto(unpack1_i16);
+#endif
+
+extern void unpack1_r4 (gfc_array_r4 *, const gfc_array_r4 *,
+			const gfc_array_l1 *, const gfc_array_r4 *);
+internal_proto(unpack1_r4);
+
+extern void unpack1_r8 (gfc_array_r8 *, const gfc_array_r8 *,
+			const gfc_array_l1 *, const gfc_array_r8 *);
+internal_proto(unpack1_r8);
+
+#ifdef HAVE_REAL_10
+extern void unpack1_r10 (gfc_array_r10 *, const gfc_array_r10 *,
+			 const gfc_array_l1 *, const gfc_array_r10 *);
+internal_proto(unpack1_r10);
+#endif
+
+#ifdef HAVE_REAL_16
+extern void unpack1_r16 (gfc_array_r16 *, const gfc_array_r16 *,
+			 const gfc_array_l1 *, const gfc_array_r16 *);
+internal_proto(unpack1_r16);
+#endif
+
+extern void unpack1_c4 (gfc_array_c4 *, const gfc_array_c4 *,
+			const gfc_array_l1 *, const gfc_array_c4 *);
+internal_proto(unpack1_c4);
+
+extern void unpack1_c8 (gfc_array_c8 *, const gfc_array_c8 *,
+			const gfc_array_l1 *, const gfc_array_c8 *);
+internal_proto(unpack1_c8);
+
+#ifdef HAVE_COMPLEX_10
+extern void unpack1_c10 (gfc_array_c10 *, const gfc_array_c10 *,
+			 const gfc_array_l1 *, const gfc_array_c10 *);
+internal_proto(unpack1_c10);
+#endif
+
+#ifdef HAVE_COMPLEX_16
+extern void unpack1_c16 (gfc_array_c16 *, const gfc_array_c16 *,
+			 const gfc_array_l1 *, const gfc_array_c16 *);
+internal_proto(unpack1_c16);
+#endif
+
 /* string_intrinsics.c */
 
 extern int compare_string (GFC_INTEGER_4, const char *,
Index: m4/unpack.m4
===================================================================
--- m4/unpack.m4	(revision 0)
+++ m4/unpack.m4	(revision 0)
@@ -0,0 +1,339 @@
+`/* Specific implementation of the UNPACK intrinsic
+   Copyright 2008 Free Software Foundation, Inc.
+   Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on
+   unpack_generic.c by Paul Brook <paul@nowt.org>.
+
+This file is part of the GNU Fortran 95 runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+Ligbfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public
+License along with libgfortran; see the file COPYING.  If not,
+write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA.  */
+
+#include "libgfortran.h"
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>'
+
+include(iparm.m4)dnl
+
+`#if defined (HAVE_'rtype_name`)
+
+void
+unpack0_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
+		 const gfc_array_l1 *mask, const 'rtype_name` *fptr)
+{
+  /* r.* indicates the return array.  */
+  index_type rstride[GFC_MAX_DIMENSIONS];
+  index_type rstride0;
+  index_type rs;
+  'rtype_name` *rptr;
+  /* v.* indicates the vector array.  */
+  index_type vstride0;
+  'rtype_name` *vptr;
+  /* Value for field, this is constant.  */
+  const 'rtype_name` fval = *fptr;
+  /* m.* indicates the mask array.  */
+  index_type mstride[GFC_MAX_DIMENSIONS];
+  index_type mstride0;
+  const GFC_LOGICAL_1 *mptr;
+
+  index_type count[GFC_MAX_DIMENSIONS];
+  index_type extent[GFC_MAX_DIMENSIONS];
+  index_type n;
+  index_type dim;
+
+  int empty;
+  int mask_kind;
+
+  empty = 0;
+
+  mptr = mask->data;
+
+  /* Use the same loop for all logical types, by using GFC_LOGICAL_1
+     and using shifting to address size and endian issues.  */
+
+  mask_kind = GFC_DESCRIPTOR_SIZE (mask);
+
+  if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8
+#ifdef HAVE_GFC_LOGICAL_16
+      || mask_kind == 16
+#endif
+      )
+    {
+      /*  Do not convert a NULL pointer as we use test for NULL below.  */
+      if (mptr)
+	mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind);
+    }
+  else
+    runtime_error ("Funny sized logical array");
+
+  if (ret->data == NULL)
+    {
+      /* The front end has signalled that we need to populate the
+	 return array descriptor.  */
+      dim = GFC_DESCRIPTOR_RANK (mask);
+      rs = 1;
+      for (n = 0; n < dim; n++)
+	{
+	  count[n] = 0;
+	  ret->dim[n].stride = rs;
+	  ret->dim[n].lbound = 0;
+	  ret->dim[n].ubound = mask->dim[n].ubound - mask->dim[n].lbound;
+	  extent[n] = ret->dim[n].ubound + 1;
+	  empty = empty || extent[n] <= 0;
+	  rstride[n] = ret->dim[n].stride;
+	  mstride[n] = mask->dim[n].stride * mask_kind;
+	  rs *= extent[n];
+	}
+      ret->offset = 0;
+      ret->data = internal_malloc_size (rs * sizeof ('rtype_name`));
+    }
+  else
+    {
+      dim = GFC_DESCRIPTOR_RANK (ret);
+      for (n = 0; n < dim; n++)
+	{
+	  count[n] = 0;
+	  extent[n] = ret->dim[n].ubound + 1 - ret->dim[n].lbound;
+	  empty = empty || extent[n] <= 0;
+	  rstride[n] = ret->dim[n].stride;
+	  mstride[n] = mask->dim[n].stride * mask_kind;
+	}
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+    }
+
+  if (empty)
+    return;
+
+  if (mstride[0] == 0)
+    mstride[0] = 1;
+
+  vstride0 = vector->dim[0].stride;
+  if (vstride0 == 0)
+    vstride0 = 1;
+  rstride0 = rstride[0];
+  mstride0 = mstride[0];
+  rptr = ret->data;
+  vptr = vector->data;
+
+  while (rptr)
+    {
+      if (*mptr)
+        {
+	  /* From vector.  */
+	  *rptr = *vptr;
+	  vptr += vstride0;
+        }
+      else
+        {
+	  /* From field.  */
+	  *rptr = fval;
+        }
+      /* Advance to the next element.  */
+      rptr += rstride0;
+      mptr += mstride0;
+      count[0]++;
+      n = 0;
+      while (count[n] == extent[n])
+        {
+          /* When we get to the end of a dimension, reset it and increment
+             the next dimension.  */
+          count[n] = 0;
+          /* We could precalculate these products, but this is a less
+             frequently used path so probably not worth it.  */
+          rptr -= rstride[n] * extent[n];
+          mptr -= mstride[n] * extent[n];
+          n++;
+          if (n >= dim)
+            {
+              /* Break out of the loop.  */
+              rptr = NULL;
+              break;
+            }
+          else
+            {
+              count[n]++;
+              rptr += rstride[n];
+              mptr += mstride[n];
+            }
+        }
+    }
+}
+
+void
+unpack1_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
+		 const gfc_array_l1 *mask, const 'rtype` *field)
+{
+  /* r.* indicates the return array.  */
+  index_type rstride[GFC_MAX_DIMENSIONS];
+  index_type rstride0;
+  index_type rs;
+  'rtype_name` *rptr;
+  /* v.* indicates the vector array.  */
+  index_type vstride0;
+  'rtype_name` *vptr;
+  /* f.* indicates the field array.  */
+  index_type fstride[GFC_MAX_DIMENSIONS];
+  index_type fstride0;
+  const 'rtype_name` *fptr;
+  /* m.* indicates the mask array.  */
+  index_type mstride[GFC_MAX_DIMENSIONS];
+  index_type mstride0;
+  const GFC_LOGICAL_1 *mptr;
+
+  index_type count[GFC_MAX_DIMENSIONS];
+  index_type extent[GFC_MAX_DIMENSIONS];
+  index_type n;
+  index_type dim;
+
+  int empty;
+  int mask_kind;
+
+  empty = 0;
+
+  mptr = mask->data;
+
+  /* Use the same loop for all logical types, by using GFC_LOGICAL_1
+     and using shifting to address size and endian issues.  */
+
+  mask_kind = GFC_DESCRIPTOR_SIZE (mask);
+
+  if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8
+#ifdef HAVE_GFC_LOGICAL_16
+      || mask_kind == 16
+#endif
+      )
+    {
+      /*  Do not convert a NULL pointer as we use test for NULL below.  */
+      if (mptr)
+	mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind);
+    }
+  else
+    runtime_error ("Funny sized logical array");
+
+  if (ret->data == NULL)
+    {
+      /* The front end has signalled that we need to populate the
+	 return array descriptor.  */
+      dim = GFC_DESCRIPTOR_RANK (mask);
+      rs = 1;
+      for (n = 0; n < dim; n++)
+	{
+	  count[n] = 0;
+	  ret->dim[n].stride = rs;
+	  ret->dim[n].lbound = 0;
+	  ret->dim[n].ubound = mask->dim[n].ubound - mask->dim[n].lbound;
+	  extent[n] = ret->dim[n].ubound + 1;
+	  empty = empty || extent[n] <= 0;
+	  rstride[n] = ret->dim[n].stride;
+	  fstride[n] = field->dim[n].stride;
+	  mstride[n] = mask->dim[n].stride * mask_kind;
+	  rs *= extent[n];
+	}
+      ret->offset = 0;
+      ret->data = internal_malloc_size (rs * sizeof ('rtype_name`));
+    }
+  else
+    {
+      dim = GFC_DESCRIPTOR_RANK (ret);
+      for (n = 0; n < dim; n++)
+	{
+	  count[n] = 0;
+	  extent[n] = ret->dim[n].ubound + 1 - ret->dim[n].lbound;
+	  empty = empty || extent[n] <= 0;
+	  rstride[n] = ret->dim[n].stride;
+	  fstride[n] = field->dim[n].stride;
+	  mstride[n] = mask->dim[n].stride * mask_kind;
+	}
+      if (rstride[0] == 0)
+	rstride[0] = 1;
+    }
+
+  if (empty)
+    return;
+
+  if (fstride[0] == 0)
+    fstride[0] = 1;
+  if (mstride[0] == 0)
+    mstride[0] = 1;
+
+  vstride0 = vector->dim[0].stride;
+  if (vstride0 == 0)
+    vstride0 = 1;
+  rstride0 = rstride[0];
+  fstride0 = fstride[0];
+  mstride0 = mstride[0];
+  rptr = ret->data;
+  fptr = field->data;
+  vptr = vector->data;
+
+  while (rptr)
+    {
+      if (*mptr)
+        {
+          /* From vector.  */
+	  *rptr = *vptr;
+          vptr += vstride0;
+        }
+      else
+        {
+          /* From field.  */
+	  *rptr = *fptr;
+        }
+      /* Advance to the next element.  */
+      rptr += rstride0;
+      fptr += fstride0;
+      mptr += mstride0;
+      count[0]++;
+      n = 0;
+      while (count[n] == extent[n])
+        {
+          /* When we get to the end of a dimension, reset it and increment
+             the next dimension.  */
+          count[n] = 0;
+          /* We could precalculate these products, but this is a less
+             frequently used path so probably not worth it.  */
+          rptr -= rstride[n] * extent[n];
+          fptr -= fstride[n] * extent[n];
+          mptr -= mstride[n] * extent[n];
+          n++;
+          if (n >= dim)
+            {
+              /* Break out of the loop.  */
+              rptr = NULL;
+              break;
+            }
+          else
+            {
+              count[n]++;
+              rptr += rstride[n];
+              fptr += fstride[n];
+              mptr += mstride[n];
+            }
+        }
+    }
+}
+
+#endif
+'
Index: Makefile.am
===================================================================
--- Makefile.am	(revision 133427)
+++ Makefile.am	(working copy)
@@ -491,6 +491,21 @@ $(srcdir)/generated/pack_c8.c \
 $(srcdir)/generated/pack_c10.c \
 $(srcdir)/generated/pack_c16.c
 
+i_unpack_c = \
+$(srcdir)/generated/unpack_i1.c \
+$(srcdir)/generated/unpack_i2.c \
+$(srcdir)/generated/unpack_i4.c \
+$(srcdir)/generated/unpack_i8.c \
+$(srcdir)/generated/unpack_i16.c \
+$(srcdir)/generated/unpack_r4.c \
+$(srcdir)/generated/unpack_r8.c \
+$(srcdir)/generated/unpack_r10.c \
+$(srcdir)/generated/unpack_r16.c \
+$(srcdir)/generated/unpack_c4.c \
+$(srcdir)/generated/unpack_c8.c \
+$(srcdir)/generated/unpack_c10.c \
+$(srcdir)/generated/unpack_c16.c
+
 m4_files= m4/iparm.m4 m4/ifunction.m4 m4/iforeach.m4 m4/all.m4 \
     m4/any.m4 m4/count.m4 m4/maxloc0.m4 m4/maxloc1.m4 m4/maxval.m4 \
     m4/minloc0.m4 m4/minloc1.m4 m4/minval.m4 m4/product.m4 m4/sum.m4 \
@@ -499,7 +514,8 @@ m4_files= m4/iparm.m4 m4/ifunction.m4 m4
     m4/specific.m4 m4/specific2.m4 m4/head.m4 m4/shape.m4 m4/reshape.m4 \
     m4/transpose.m4 m4/eoshift1.m4 m4/eoshift3.m4 m4/exponent.m4 \
     m4/fraction.m4 m4/nearest.m4 m4/set_exponent.m4 m4/pow.m4 \
-    m4/misc_specifics.m4 m4/rrspacing.m4 m4/spacing.m4 m4/pack.m4
+    m4/misc_specifics.m4 m4/rrspacing.m4 m4/spacing.m4 m4/pack.m4 \
+    m4/unpack.m4
 
 gfor_built_src= $(i_all_c) $(i_any_c) $(i_count_c) $(i_maxloc0_c) \
     $(i_maxloc1_c) $(i_maxval_c) $(i_minloc0_c) $(i_minloc1_c) $(i_minval_c) \
@@ -507,7 +523,7 @@ gfor_built_src= $(i_all_c) $(i_any_c) $(
     $(i_matmul_c) $(i_matmull_c) $(i_transpose_c) $(i_shape_c) $(i_eoshift1_c) \
     $(i_eoshift3_c) $(i_cshift1_c) $(i_reshape_c) $(in_pack_c) $(in_unpack_c) \
     $(i_exponent_c) $(i_fraction_c) $(i_nearest_c) $(i_set_exponent_c) \
-    $(i_pow_c) $(i_rrspacing_c) $(i_spacing_c) $(i_pack_c) \
+    $(i_pow_c) $(i_rrspacing_c) $(i_spacing_c) $(i_pack_c) $(i_unpack_c) \
     selected_int_kind.inc selected_real_kind.inc kinds.h \
     kinds.inc c99_protos.inc fpu-target.h
 
@@ -826,6 +842,9 @@ $(i_pow_c): m4/pow.m4 $(I_M4_DEPS)
 $(i_pack_c): m4/pack.m4 $(I_M4_DEPS)
 	$(M4) -Dfile=$@ -I$(srcdir)/m4 pack.m4 > $@
 
+$(i_unpack_c): m4/unpack.m4 $(I_M4_DEPS)
+	$(M4) -Dfile=$@ -I$(srcdir)/m4 unpack.m4 > $@
+
 $(gfor_built_specific_src): m4/specific.m4 m4/head.m4
 	$(M4) -Dfile=$@ -I$(srcdir)/m4 specific.m4 > $@
 
Index: intrinsics/unpack_generic.c
===================================================================
--- intrinsics/unpack_generic.c	(revision 133426)
+++ intrinsics/unpack_generic.c	(working copy)
@@ -196,8 +196,103 @@ void
 unpack1 (gfc_array_char *ret, const gfc_array_char *vector,
 	 const gfc_array_l1 *mask, const gfc_array_char *field)
 {
-  unpack_internal (ret, vector, mask, field,
-		   GFC_DESCRIPTOR_SIZE (vector),
+  int type;
+  index_type size;
+
+  type = GFC_DESCRIPTOR_TYPE (vector);
+  size = GFC_DESCRIPTOR_SIZE (vector);
+
+  switch(type)
+    {
+    case GFC_DTYPE_INTEGER:
+    case GFC_DTYPE_LOGICAL:
+      switch(size)
+	{
+	case sizeof (GFC_INTEGER_1):
+	  unpack1_i1 ((gfc_array_i1 *) ret, (gfc_array_i1 *) vector,
+		      mask, (gfc_array_i1 *) field);
+	  return;
+
+	case sizeof (GFC_INTEGER_2):
+	  unpack1_i2 ((gfc_array_i2 *) ret, (gfc_array_i2 *) vector,
+		      mask, (gfc_array_i2 *) field);
+	  return;
+
+	case sizeof (GFC_INTEGER_4):
+	  unpack1_i4 ((gfc_array_i4 *) ret, (gfc_array_i4 *) vector,
+		      mask, (gfc_array_i4 *) field);
+	  return;
+
+	case sizeof (GFC_INTEGER_8):
+	  unpack1_i8 ((gfc_array_i8 *) ret, (gfc_array_i8 *) vector,
+		      mask, (gfc_array_i8 *) field);
+	  return;
+
+#ifdef HAVE_GFC_INTEGER_16
+	case sizeof (GFC_INTEGER_16):
+	  unpack1_i16 ((gfc_array_i16 *) ret, (gfc_array_i16 *) vector,
+		       mask, (gfc_array_i16 *) field);
+	  return;
+#endif
+	}
+    case GFC_DTYPE_REAL:
+      switch (size)
+	{
+	case sizeof (GFC_REAL_4):
+	  unpack1_r4 ((gfc_array_r4 *) ret, (gfc_array_r4 *) vector,
+		      mask, (gfc_array_r4 *) field);
+	  return;
+
+	case sizeof (GFC_REAL_8):
+	  unpack1_r8 ((gfc_array_r8 *) ret, (gfc_array_r8 *) vector,
+		      mask, (gfc_array_r8 *) field);
+	  return;
+
+#ifdef HAVE_GFC_REAL_10
+	case sizeof (GFC_REAL_10):
+	  unpack1_r10 ((gfc_array_r10 *) ret, (gfc_array_r10 *) vector,
+		      mask, (gfc_array_r10 *) field);
+	  return;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+	case sizeof (GFC_REAL_16):
+	  unpack1_r16 ((gfc_array_r16 *) ret, (gfc_array_r16 *) vector,
+		      mask, (gfc_array_r16 *) field);
+	  return;
+#endif
+	}
+
+    case GFC_DTYPE_COMPLEX:
+      switch (size)
+	{
+	case sizeof (GFC_COMPLEX_4):
+	  unpack1_c4 ((gfc_array_c4 *) ret, (gfc_array_c4 *) vector,
+		      mask, (gfc_array_c4 *) field);
+	  return;
+
+	case sizeof (GFC_COMPLEX_8):
+	  unpack1_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) vector,
+		      mask, (gfc_array_c8 *) field);
+	  return;
+
+#ifdef HAVE_GFC_COMPLEX_10
+	case sizeof (GFC_COMPLEX_10):
+	  unpack1_c10 ((gfc_array_c10 *) ret, (gfc_array_c10 *) vector,
+		      mask, (gfc_array_c10 *) field);
+	  return;
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+	case sizeof (GFC_COMPLEX_16):
+	  unpack1_c16 ((gfc_array_c16 *) ret, (gfc_array_c16 *) vector,
+		      mask, (gfc_array_c16 *) field);
+	  return;
+#endif
+	}
+
+    }
+  unpack_internal (ret, vector, mask, field, size,
 		   GFC_DESCRIPTOR_SIZE (field));
 }
 
@@ -227,6 +322,102 @@ unpack0 (gfc_array_char *ret, const gfc_
 {
   gfc_array_char tmp;
 
+  int type;
+  index_type size;
+
+  type = GFC_DESCRIPTOR_TYPE (vector);
+  size = GFC_DESCRIPTOR_SIZE (vector);
+
+  switch(type)
+    {
+    case GFC_DTYPE_INTEGER:
+    case GFC_DTYPE_LOGICAL:
+      switch(size)
+	{
+	case sizeof (GFC_INTEGER_1):
+	  unpack0_i1 ((gfc_array_i1 *) ret, (GFC_INTEGER_1 *) vector,
+		      mask, (gfc_array_i1 *) field);
+	  return;
+
+	case sizeof (GFC_INTEGER_2):
+	  unpack0_i2 ((gfc_array_i2 *) ret, (GFC_INTEGER_2 *) vector,
+		      mask, (gfc_array_i2 *) field);
+	  return;
+
+	case sizeof (GFC_INTEGER_4):
+	  unpack0_i4 ((gfc_array_i4 *) ret, (GFC_INTEGER_4 *) vector,
+		      mask, (gfc_array_i4 *) field);
+	  return;
+
+	case sizeof (GFC_INTEGER_8):
+	  unpack0_i8 ((gfc_array_i8 *) ret, (GFC_INTEGER_8 *) vector,
+		      mask, (gfc_array_i8 *) field);
+	  return;
+
+#ifdef HAVE_GFC_INTEGER_16
+	case sizeof (GFC_INTEGER_16):
+	  unpack0_i16 ((gfc_array_i16 *) ret, (GFC_INTEGER_16 *) vector,
+		      mask, (gfc_array_i16 *) field);
+	  return;
+#endif
+	}
+
+    case GFC_DTYPE_REAL:
+      switch(size)
+	{
+	case sizeof (GFC_REAL_4):
+	  unpack0_r4 ((gfc_array_r4 *) ret, (GFC_REAL_4 *) vector,
+	  	      mask, (gfc_array_r4 *) field);
+	  return;
+
+	case sizeof (GFC_REAL_8):
+	  unpack0_r8 ((gfc_array_r8 *) ret, (GFC_REAL_8 *) vector,
+	  	      mask, (gfc_array_r8 *) field);
+	  return;
+
+#ifdef HAVE_GFC_REAL_10
+	case sizeof (GFC_REAL_10):
+	  unpack0_r10 ((gfc_array_r10 *) ret, (GFC_REAL_10 *) vector,
+	  	      mask, (gfc_array_r10 *) field);
+	  return;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+	case sizeof (GFC_REAL_16):
+	  unpack0_r16 ((gfc_array_r16 *) ret, (GFC_REAL_16 *) vector,
+	  	      mask, (gfc_array_r16 *) field);
+	  return;
+#endif
+	}
+
+    case GFC_DTYPE_COMPLEX:
+      switch(size)
+	{
+	case sizeof (GFC_COMPLEX_4):
+	  unpack0_c4 ((gfc_array_c4 *) ret, (GFC_COMPLEX_4 *) vector,
+	  	      mask, (gfc_array_c4 *) field);
+	  return;
+
+	case sizeof (GFC_COMPLEX_8):
+	  unpack0_c8 ((gfc_array_c8 *) ret, (GFC_COMPLEX_8 *) vector,
+	  	      mask, (gfc_array_c8 *) field);
+	  return;
+
+#ifdef HAVE_GFC_COMPLEX_10
+	case sizeof (GFC_COMPLEX_10):
+	  unpack0_c10 ((gfc_array_c10 *) ret, (GFC_COMPLEX_10 *) vector,
+	  	      mask, (gfc_array_c10 *) field);
+	  return;
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+	case sizeof (GFC_COMPLEX_16):
+	  unpack0_c16 ((gfc_array_c16 *) ret, (GFC_COMPLEX_16 *) vector,
+	  	      mask, (gfc_array_c16 *) field);
+	  return;
+#endif
+	}
+    }
   memset (&tmp, 0, sizeof (tmp));
   tmp.dtype = 0;
   tmp.data = field;

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]