This is the mail archive of the
fortran@gcc.gnu.org
mailing list for the GNU Fortran project.
patch, libfortran: Speedup for unpack intrinsic
- From: Thomas Koenig <tkoenig at netcologne dot de>
- To: fortran at gcc dot gnu dot org, gcc-patches at gcc dot gnu dot org
- Date: Sun, 23 Mar 2008 17:10:23 +0100
- Subject: patch, libfortran: Speedup for unpack intrinsic
Hello world,
here is a speedup patch for the unpack intrinsic, with
a speedup that's quite OK:
$ cat unpack-speed.f90
program main
integer, parameter:: n = 100
integer :: i
real, dimension(n*n) :: vect
real, dimension(n,n) :: a,b
logical(kind=1), dimension(n,n) :: mask
call random_number(a)
mask = a > 0.5
call random_number(b)
call random_number(vect)
do i=1,10000
a = unpack(vect,mask,0.)
end do
end program main
$ gfortran -static -O3 unpack-speed.f90
$ time ./a.out
real 0m1.064s
user 0m1.048s
sys 0m0.000s
$ gfortran-4.3 -static -O3 unpack-speed.f90
$ time ./a.out
real 0m2.772s
user 0m2.760s
sys 0m0.000s
Regression-tested on i686-pc-linux-gnu. I'll ask Dominique in a
separate E-Mail to cross-check this for me (although I did try
to be more careful this time :-)
OK once this has passed cross-check?
Thomas
2007-03-23 Thomas Koenig <tkoenig@gcc.gnu.org
PR libfortran/32972
* Makefile.am: Add new variable, i_unpack_c, containing
unpack_i1.c, unpack_i2.c, unpack_i4.c, unpack_i8.c,
unpack_i16.c, unpack_r4.c, unpack_r8.c, unpack_r10.c,
unpack_r16.c, unpack_c4.c, unpack_c8.c, unpack_c10.c
and unpack_c16.c
Add i_unpack_c to gfor_built_src.
Add rule to generate i_unpack_c from m4/unpack.m4.
* Makefile.in: Regenerated.
* libgfortran.h: Add prototypes for unpack0_i1, unpack0_i2,
unpack0_i4, unpack0_i8, unpack0_i16, unpack0_r4, unpack0_r8,
unpack0_r10, unpack0_r16, unpack0_c4, unpack0_c8, unpack0_c10,
unpack0_c16, unpack1_i1, unpack1_i2, unpack1_i4, unpack1_i8,
unpack1_i16, unpack1_r4, unpack1_r8, unpack1_r10, unpack1_r16,
unpack1_c4, unpack1_c8, unpack1_c10 and unpack1_c16.
* intrinsics/pack_generic.c (unpack1): Add calls to specific
unpack1 functions.
(unpack0): Add calls to specific unpack0 functions.
* m4/unpack.m4: New file.
* generated/unpack_i1.c: New file.
* generated/unpack_i2.c: New file.
* generated/unpack_i4.c: New file.
* generated/unpack_i8.c: New file.
* generated/unpack_i16.c: New file.
* generated/unpack_r4.c: New file.
* generated/unpack_r8.c: New file.
* generated/unpack_r10.c: New file.
* generated/unpack_r16.c: New file.
* generated/unpack_c4.c: New file.
* generated/unpack_c8.c: New file.
* generated/unpack_c10.c: New file.
* generated/unpack_c16.c: New file.
2007-03-23 Thomas Koenig <tkoenig@gcc.gnu.org
PR libfortran/32972
* gfortran.dg/intrinsic_unpack_1.f90: New test case.
* gfortran.dg/intrinsic_unpack_2.f90: New test case.
* gfortran.dg/intrinsic_unpack_3.f90: New test case.
! { dg-do run }
! Program to test the UNPACK intrinsic for the types usually present.
program intrinsic_unpack
implicit none
integer(kind=1), dimension(3, 3) :: a1, b1
integer(kind=2), dimension(3, 3) :: a2, b2
integer(kind=4), dimension(3, 3) :: a4, b4
integer(kind=8), dimension(3, 3) :: a8, b8
real(kind=4), dimension(3,3) :: ar4, br4
real(kind=8), dimension(3,3) :: ar8, br8
logical, dimension(3, 3) :: mask
character(len=100) line1, line2
integer i
mask = reshape ((/.false.,.true.,.false.,.true.,.false.,.false.,&
&.false.,.false.,.true./), (/3, 3/));
a1 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
b1 = unpack ((/2_1, 3_1, 4_1/), mask, a1)
if (any (b1 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
write (line1,'(10I4)') b1
write (line2,'(10I4)') unpack((/2_1, 3_1, 4_1/), mask, a1)
if (line1 .ne. line2) call abort
b1 = -1
b1 = unpack ((/2_1, 3_1, 4_1/), mask, 0_1)
if (any (b1 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
a2 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
b2 = unpack ((/2_2, 3_2, 4_2/), mask, a2)
if (any (b2 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
write (line1,'(10I4)') b2
write (line2,'(10I4)') unpack((/2_2, 3_2, 4_2/), mask, a2)
if (line1 .ne. line2) call abort
b2 = -1
b2 = unpack ((/2_2, 3_2, 4_2/), mask, 0_2)
if (any (b2 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
a4 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
b4 = unpack ((/2_4, 3_4, 4_4/), mask, a4)
if (any (b4 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
write (line1,'(10I4)') b4
write (line2,'(10I4)') unpack((/2_4, 3_4, 4_4/), mask, a4)
if (line1 .ne. line2) call abort
b4 = -1
b4 = unpack ((/2_4, 3_4, 4_4/), mask, 0_4)
if (any (b4 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
a8 = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
b8 = unpack ((/2_8, 3_8, 4_8/), mask, a8)
if (any (b8 .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
write (line1,'(10I4)') b8
write (line2,'(10I4)') unpack((/2_8, 3_8, 4_8/), mask, a8)
if (line1 .ne. line2) call abort
b8 = -1
b8 = unpack ((/2_8, 3_8, 4_8/), mask, 0_8)
if (any (b8 .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
ar4 = reshape ((/1._4, 0._4, 0._4, 0._4, 1._4, 0._4, 0._4, 0._4, 1._4/), &
(/3, 3/));
br4 = unpack ((/2._4, 3._4, 4._4/), mask, ar4)
if (any (br4 .ne. reshape ((/1._4, 2._4, 0._4, 3._4, 1._4, 0._4, &
0._4, 0._4, 4._4/), (/3, 3/)))) &
call abort
write (line1,'(9F9.5)') br4
write (line2,'(9F9.5)') unpack((/2._4, 3._4, 4._4/), mask, ar4)
if (line1 .ne. line2) call abort
br4 = -1._4
br4 = unpack ((/2._4, 3._4, 4._4/), mask, 0._4)
if (any (br4 .ne. reshape ((/0._4, 2._4, 0._4, 3._4, 0._4, 0._4, &
0._4, 0._4, 4._4/), (/3, 3/)))) &
call abort
ar8 = reshape ((/1._8, 0._8, 0._8, 0._8, 1._8, 0._8, 0._8, 0._8, 1._8/), &
(/3, 3/));
br8 = unpack ((/2._8, 3._8, 4._8/), mask, ar8)
if (any (br8 .ne. reshape ((/1._8, 2._8, 0._8, 3._8, 1._8, 0._8, &
0._8, 0._8, 4._8/), (/3, 3/)))) &
call abort
write (line1,'(9F9.5)') br8
write (line2,'(9F9.5)') unpack((/2._8, 3._8, 4._8/), mask, ar8)
if (line1 .ne. line2) call abort
br8 = -1._8
br8 = unpack ((/2._8, 3._8, 4._8/), mask, 0._8)
if (any (br8 .ne. reshape ((/0._8, 2._8, 0._8, 3._8, 0._8, 0._8, &
0._8, 0._8, 4._8/), (/3, 3/)))) &
call abort
end program
! { dg-do run }
! { dg-require-effective-target fortran_large_real }
! Program to test the UNPACK intrinsic for large real type
program intrinsic_unpack
implicit none
integer,parameter :: k = selected_real_kind (precision (0.0_8) + 1)
real(kind=k), dimension(3,3) :: ark, brk
logical, dimension(3, 3) :: mask
character(len=100) line1, line2
integer i
mask = reshape ((/.false.,.true.,.false.,.true.,.false.,.false.,&
&.false.,.false.,.true./), (/3, 3/));
ark = reshape ((/1._k, 0._k, 0._k, 0._k, 1._k, 0._k, 0._k, 0._k, 1._k/), &
(/3, 3/));
brk = unpack ((/2._k, 3._k, 4._k/), mask, ark)
if (any (brk .ne. reshape ((/1._k, 2._k, 0._k, 3._k, 1._k, 0._k, &
0._k, 0._k, 4._k/), (/3, 3/)))) &
call abort
write (line1,'(9F9.5)') brk
write (line2,'(9F9.5)') unpack((/2._k, 3._k, 4._k/), mask, ark)
if (line1 .ne. line2) call abort
brk = -1._k
brk = unpack ((/2._k, 3._k, 4._k/), mask, 0._k)
if (any (brk .ne. reshape ((/0._k, 2._k, 0._k, 3._k, 0._k, 0._k, &
0._k, 0._k, 4._k/), (/3, 3/)))) &
call abort
end program
! { dg-do run }
! { dg-require-effective-target fortran_large_int }
! Program to test the UNPACK intrinsic for a long integer type
program intrinsic_unpack
implicit none
integer,parameter :: k = selected_int_kind (range (0_8) + 1)
integer(kind=k), dimension(3, 3) :: ak, bk
logical, dimension(3, 3) :: mask
character(len=100) line1, line2
integer i
mask = reshape ((/.false.,.true.,.false.,.true.,.false.,.false.,&
&.false.,.false.,.true./), (/3, 3/));
ak = reshape ((/1, 0, 0, 0, 1, 0, 0, 0, 1/), (/3, 3/));
bk = unpack ((/2_k, 3_k, 4_k/), mask, ak)
if (any (bk .ne. reshape ((/1, 2, 0, 3, 1, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
write (line1,'(10I4)') bk
write (line2,'(10I4)') unpack((/2_k, 3_k, 4_k/), mask, ak)
if (line1 .ne. line2) call abort
bk = -1
bk = unpack ((/2_k, 3_k, 4_k/), mask, 0_k)
if (any (bk .ne. reshape ((/0, 2, 0, 3, 0, 0, 0, 0, 4/), (/3, 3/)))) &
call abort
end program
Index: libgfortran.h
===================================================================
--- libgfortran.h (revision 133427)
+++ libgfortran.h (working copy)
@@ -774,6 +774,142 @@ extern void pack_c16 (gfc_array_c16 *, c
internal_proto(pack_c16);
#endif
+/* Internal auxiliary functions for the unpack intrinsic. */
+
+extern void unpack0_i1 (gfc_array_i1 *, const gfc_array_i1 *,
+ const gfc_array_l1 *, const GFC_INTEGER_1 *);
+internal_proto(unpack0_i1);
+
+extern void unpack0_i2 (gfc_array_i2 *, const gfc_array_i2 *,
+ const gfc_array_l1 *, const GFC_INTEGER_2 *);
+internal_proto(unpack0_i2);
+
+extern void unpack0_i4 (gfc_array_i4 *, const gfc_array_i4 *,
+ const gfc_array_l1 *, const GFC_INTEGER_4 *);
+internal_proto(unpack0_i4);
+
+extern void unpack0_i8 (gfc_array_i8 *, const gfc_array_i8 *,
+ const gfc_array_l1 *, const GFC_INTEGER_8 *);
+internal_proto(unpack0_i8);
+
+#ifdef HAVE_GFC_INTEGER_16
+
+extern void unpack0_i16 (gfc_array_i16 *, const gfc_array_i16 *,
+ const gfc_array_l1 *, const GFC_INTEGER_16 *);
+internal_proto(unpack0_i16);
+
+#endif
+
+extern void unpack0_r4 (gfc_array_r4 *, const gfc_array_r4 *,
+ const gfc_array_l1 *, const GFC_REAL_4 *);
+internal_proto(unpack0_r4);
+
+extern void unpack0_r8 (gfc_array_r8 *, const gfc_array_r8 *,
+ const gfc_array_l1 *, const GFC_REAL_8 *);
+internal_proto(unpack0_r8);
+
+#ifdef HAVE_GFC_REAL_10
+
+extern void unpack0_r10 (gfc_array_r10 *, const gfc_array_r10 *,
+ const gfc_array_l1 *, const GFC_REAL_10 *);
+internal_proto(unpack0_r10);
+
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+
+extern void unpack0_r16 (gfc_array_r16 *, const gfc_array_r16 *,
+ const gfc_array_l1 *, const GFC_REAL_16 *);
+internal_proto(unpack0_r16);
+
+#endif
+
+extern void unpack0_c4 (gfc_array_c4 *, const gfc_array_c4 *,
+ const gfc_array_l1 *, const GFC_COMPLEX_4 *);
+internal_proto(unpack0_c4);
+
+extern void unpack0_c8 (gfc_array_c8 *, const gfc_array_c8 *,
+ const gfc_array_l1 *, const GFC_COMPLEX_8 *);
+internal_proto(unpack0_c8);
+
+#ifdef HAVE_GFC_COMPLEX_10
+
+extern void unpack0_c10 (gfc_array_c10 *, const gfc_array_c10 *,
+ const gfc_array_l1 *mask, const GFC_COMPLEX_10 *);
+internal_proto(unpack0_c10);
+
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+
+extern void unpack0_c16 (gfc_array_c16 *, const gfc_array_c16 *,
+ const gfc_array_l1 *, const GFC_COMPLEX_16 *;
+internal_proto(unpack0_c16);
+
+#endif
+
+extern void unpack1_i1 (gfc_array_i1 *, const gfc_array_i1 *,
+ const gfc_array_l1 *, const gfc_array_i1 *);
+internal_proto(unpack1_i1);
+
+extern void unpack1_i2 (gfc_array_i2 *, const gfc_array_i2 *,
+ const gfc_array_l1 *, const gfc_array_i2 *);
+internal_proto(unpack1_i2);
+
+extern void unpack1_i4 (gfc_array_i4 *, const gfc_array_i4 *,
+ const gfc_array_l1 *, const gfc_array_i4 *);
+internal_proto(unpack1_i4);
+
+extern void unpack1_i8 (gfc_array_i8 *, const gfc_array_i8 *,
+ const gfc_array_l1 *, const gfc_array_i8 *);
+internal_proto(unpack1_i8);
+
+#ifdef HAVE_GFC_INTEGER_16
+extern void unpack1_i16 (gfc_array_i16 *, const gfc_array_i16 *,
+ const gfc_array_l1 *, const gfc_array_i16 *);
+internal_proto(unpack1_i16);
+#endif
+
+extern void unpack1_r4 (gfc_array_r4 *, const gfc_array_r4 *,
+ const gfc_array_l1 *, const gfc_array_r4 *);
+internal_proto(unpack1_r4);
+
+extern void unpack1_r8 (gfc_array_r8 *, const gfc_array_r8 *,
+ const gfc_array_l1 *, const gfc_array_r8 *);
+internal_proto(unpack1_r8);
+
+#ifdef HAVE_REAL_10
+extern void unpack1_r10 (gfc_array_r10 *, const gfc_array_r10 *,
+ const gfc_array_l1 *, const gfc_array_r10 *);
+internal_proto(unpack1_r10);
+#endif
+
+#ifdef HAVE_REAL_16
+extern void unpack1_r16 (gfc_array_r16 *, const gfc_array_r16 *,
+ const gfc_array_l1 *, const gfc_array_r16 *);
+internal_proto(unpack1_r16);
+#endif
+
+extern void unpack1_c4 (gfc_array_c4 *, const gfc_array_c4 *,
+ const gfc_array_l1 *, const gfc_array_c4 *);
+internal_proto(unpack1_c4);
+
+extern void unpack1_c8 (gfc_array_c8 *, const gfc_array_c8 *,
+ const gfc_array_l1 *, const gfc_array_c8 *);
+internal_proto(unpack1_c8);
+
+#ifdef HAVE_COMPLEX_10
+extern void unpack1_c10 (gfc_array_c10 *, const gfc_array_c10 *,
+ const gfc_array_l1 *, const gfc_array_c10 *);
+internal_proto(unpack1_c10);
+#endif
+
+#ifdef HAVE_COMPLEX_16
+extern void unpack1_c16 (gfc_array_c16 *, const gfc_array_c16 *,
+ const gfc_array_l1 *, const gfc_array_c16 *);
+internal_proto(unpack1_c16);
+#endif
+
/* string_intrinsics.c */
extern int compare_string (GFC_INTEGER_4, const char *,
Index: m4/unpack.m4
===================================================================
--- m4/unpack.m4 (revision 0)
+++ m4/unpack.m4 (revision 0)
@@ -0,0 +1,339 @@
+`/* Specific implementation of the UNPACK intrinsic
+ Copyright 2008 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on
+ unpack_generic.c by Paul Brook <paul@nowt.org>.
+
+This file is part of the GNU Fortran 95 runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 2 of the License, or (at your option) any later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+Ligbfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public
+License along with libgfortran; see the file COPYING. If not,
+write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+#include "libgfortran.h"
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>'
+
+include(iparm.m4)dnl
+
+`#if defined (HAVE_'rtype_name`)
+
+void
+unpack0_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
+ const gfc_array_l1 *mask, const 'rtype_name` *fptr)
+{
+ /* r.* indicates the return array. */
+ index_type rstride[GFC_MAX_DIMENSIONS];
+ index_type rstride0;
+ index_type rs;
+ 'rtype_name` *rptr;
+ /* v.* indicates the vector array. */
+ index_type vstride0;
+ 'rtype_name` *vptr;
+ /* Value for field, this is constant. */
+ const 'rtype_name` fval = *fptr;
+ /* m.* indicates the mask array. */
+ index_type mstride[GFC_MAX_DIMENSIONS];
+ index_type mstride0;
+ const GFC_LOGICAL_1 *mptr;
+
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ index_type n;
+ index_type dim;
+
+ int empty;
+ int mask_kind;
+
+ empty = 0;
+
+ mptr = mask->data;
+
+ /* Use the same loop for all logical types, by using GFC_LOGICAL_1
+ and using shifting to address size and endian issues. */
+
+ mask_kind = GFC_DESCRIPTOR_SIZE (mask);
+
+ if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8
+#ifdef HAVE_GFC_LOGICAL_16
+ || mask_kind == 16
+#endif
+ )
+ {
+ /* Do not convert a NULL pointer as we use test for NULL below. */
+ if (mptr)
+ mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind);
+ }
+ else
+ runtime_error ("Funny sized logical array");
+
+ if (ret->data == NULL)
+ {
+ /* The front end has signalled that we need to populate the
+ return array descriptor. */
+ dim = GFC_DESCRIPTOR_RANK (mask);
+ rs = 1;
+ for (n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ ret->dim[n].stride = rs;
+ ret->dim[n].lbound = 0;
+ ret->dim[n].ubound = mask->dim[n].ubound - mask->dim[n].lbound;
+ extent[n] = ret->dim[n].ubound + 1;
+ empty = empty || extent[n] <= 0;
+ rstride[n] = ret->dim[n].stride;
+ mstride[n] = mask->dim[n].stride * mask_kind;
+ rs *= extent[n];
+ }
+ ret->offset = 0;
+ ret->data = internal_malloc_size (rs * sizeof ('rtype_name`));
+ }
+ else
+ {
+ dim = GFC_DESCRIPTOR_RANK (ret);
+ for (n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ extent[n] = ret->dim[n].ubound + 1 - ret->dim[n].lbound;
+ empty = empty || extent[n] <= 0;
+ rstride[n] = ret->dim[n].stride;
+ mstride[n] = mask->dim[n].stride * mask_kind;
+ }
+ if (rstride[0] == 0)
+ rstride[0] = 1;
+ }
+
+ if (empty)
+ return;
+
+ if (mstride[0] == 0)
+ mstride[0] = 1;
+
+ vstride0 = vector->dim[0].stride;
+ if (vstride0 == 0)
+ vstride0 = 1;
+ rstride0 = rstride[0];
+ mstride0 = mstride[0];
+ rptr = ret->data;
+ vptr = vector->data;
+
+ while (rptr)
+ {
+ if (*mptr)
+ {
+ /* From vector. */
+ *rptr = *vptr;
+ vptr += vstride0;
+ }
+ else
+ {
+ /* From field. */
+ *rptr = fval;
+ }
+ /* Advance to the next element. */
+ rptr += rstride0;
+ mptr += mstride0;
+ count[0]++;
+ n = 0;
+ while (count[n] == extent[n])
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ /* We could precalculate these products, but this is a less
+ frequently used path so probably not worth it. */
+ rptr -= rstride[n] * extent[n];
+ mptr -= mstride[n] * extent[n];
+ n++;
+ if (n >= dim)
+ {
+ /* Break out of the loop. */
+ rptr = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ rptr += rstride[n];
+ mptr += mstride[n];
+ }
+ }
+ }
+}
+
+void
+unpack1_'rtype_code` ('rtype` *ret, const 'rtype` *vector,
+ const gfc_array_l1 *mask, const 'rtype` *field)
+{
+ /* r.* indicates the return array. */
+ index_type rstride[GFC_MAX_DIMENSIONS];
+ index_type rstride0;
+ index_type rs;
+ 'rtype_name` *rptr;
+ /* v.* indicates the vector array. */
+ index_type vstride0;
+ 'rtype_name` *vptr;
+ /* f.* indicates the field array. */
+ index_type fstride[GFC_MAX_DIMENSIONS];
+ index_type fstride0;
+ const 'rtype_name` *fptr;
+ /* m.* indicates the mask array. */
+ index_type mstride[GFC_MAX_DIMENSIONS];
+ index_type mstride0;
+ const GFC_LOGICAL_1 *mptr;
+
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ index_type n;
+ index_type dim;
+
+ int empty;
+ int mask_kind;
+
+ empty = 0;
+
+ mptr = mask->data;
+
+ /* Use the same loop for all logical types, by using GFC_LOGICAL_1
+ and using shifting to address size and endian issues. */
+
+ mask_kind = GFC_DESCRIPTOR_SIZE (mask);
+
+ if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8
+#ifdef HAVE_GFC_LOGICAL_16
+ || mask_kind == 16
+#endif
+ )
+ {
+ /* Do not convert a NULL pointer as we use test for NULL below. */
+ if (mptr)
+ mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind);
+ }
+ else
+ runtime_error ("Funny sized logical array");
+
+ if (ret->data == NULL)
+ {
+ /* The front end has signalled that we need to populate the
+ return array descriptor. */
+ dim = GFC_DESCRIPTOR_RANK (mask);
+ rs = 1;
+ for (n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ ret->dim[n].stride = rs;
+ ret->dim[n].lbound = 0;
+ ret->dim[n].ubound = mask->dim[n].ubound - mask->dim[n].lbound;
+ extent[n] = ret->dim[n].ubound + 1;
+ empty = empty || extent[n] <= 0;
+ rstride[n] = ret->dim[n].stride;
+ fstride[n] = field->dim[n].stride;
+ mstride[n] = mask->dim[n].stride * mask_kind;
+ rs *= extent[n];
+ }
+ ret->offset = 0;
+ ret->data = internal_malloc_size (rs * sizeof ('rtype_name`));
+ }
+ else
+ {
+ dim = GFC_DESCRIPTOR_RANK (ret);
+ for (n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ extent[n] = ret->dim[n].ubound + 1 - ret->dim[n].lbound;
+ empty = empty || extent[n] <= 0;
+ rstride[n] = ret->dim[n].stride;
+ fstride[n] = field->dim[n].stride;
+ mstride[n] = mask->dim[n].stride * mask_kind;
+ }
+ if (rstride[0] == 0)
+ rstride[0] = 1;
+ }
+
+ if (empty)
+ return;
+
+ if (fstride[0] == 0)
+ fstride[0] = 1;
+ if (mstride[0] == 0)
+ mstride[0] = 1;
+
+ vstride0 = vector->dim[0].stride;
+ if (vstride0 == 0)
+ vstride0 = 1;
+ rstride0 = rstride[0];
+ fstride0 = fstride[0];
+ mstride0 = mstride[0];
+ rptr = ret->data;
+ fptr = field->data;
+ vptr = vector->data;
+
+ while (rptr)
+ {
+ if (*mptr)
+ {
+ /* From vector. */
+ *rptr = *vptr;
+ vptr += vstride0;
+ }
+ else
+ {
+ /* From field. */
+ *rptr = *fptr;
+ }
+ /* Advance to the next element. */
+ rptr += rstride0;
+ fptr += fstride0;
+ mptr += mstride0;
+ count[0]++;
+ n = 0;
+ while (count[n] == extent[n])
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ /* We could precalculate these products, but this is a less
+ frequently used path so probably not worth it. */
+ rptr -= rstride[n] * extent[n];
+ fptr -= fstride[n] * extent[n];
+ mptr -= mstride[n] * extent[n];
+ n++;
+ if (n >= dim)
+ {
+ /* Break out of the loop. */
+ rptr = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ rptr += rstride[n];
+ fptr += fstride[n];
+ mptr += mstride[n];
+ }
+ }
+ }
+}
+
+#endif
+'
Index: Makefile.am
===================================================================
--- Makefile.am (revision 133427)
+++ Makefile.am (working copy)
@@ -491,6 +491,21 @@ $(srcdir)/generated/pack_c8.c \
$(srcdir)/generated/pack_c10.c \
$(srcdir)/generated/pack_c16.c
+i_unpack_c = \
+$(srcdir)/generated/unpack_i1.c \
+$(srcdir)/generated/unpack_i2.c \
+$(srcdir)/generated/unpack_i4.c \
+$(srcdir)/generated/unpack_i8.c \
+$(srcdir)/generated/unpack_i16.c \
+$(srcdir)/generated/unpack_r4.c \
+$(srcdir)/generated/unpack_r8.c \
+$(srcdir)/generated/unpack_r10.c \
+$(srcdir)/generated/unpack_r16.c \
+$(srcdir)/generated/unpack_c4.c \
+$(srcdir)/generated/unpack_c8.c \
+$(srcdir)/generated/unpack_c10.c \
+$(srcdir)/generated/unpack_c16.c
+
m4_files= m4/iparm.m4 m4/ifunction.m4 m4/iforeach.m4 m4/all.m4 \
m4/any.m4 m4/count.m4 m4/maxloc0.m4 m4/maxloc1.m4 m4/maxval.m4 \
m4/minloc0.m4 m4/minloc1.m4 m4/minval.m4 m4/product.m4 m4/sum.m4 \
@@ -499,7 +514,8 @@ m4_files= m4/iparm.m4 m4/ifunction.m4 m4
m4/specific.m4 m4/specific2.m4 m4/head.m4 m4/shape.m4 m4/reshape.m4 \
m4/transpose.m4 m4/eoshift1.m4 m4/eoshift3.m4 m4/exponent.m4 \
m4/fraction.m4 m4/nearest.m4 m4/set_exponent.m4 m4/pow.m4 \
- m4/misc_specifics.m4 m4/rrspacing.m4 m4/spacing.m4 m4/pack.m4
+ m4/misc_specifics.m4 m4/rrspacing.m4 m4/spacing.m4 m4/pack.m4 \
+ m4/unpack.m4
gfor_built_src= $(i_all_c) $(i_any_c) $(i_count_c) $(i_maxloc0_c) \
$(i_maxloc1_c) $(i_maxval_c) $(i_minloc0_c) $(i_minloc1_c) $(i_minval_c) \
@@ -507,7 +523,7 @@ gfor_built_src= $(i_all_c) $(i_any_c) $(
$(i_matmul_c) $(i_matmull_c) $(i_transpose_c) $(i_shape_c) $(i_eoshift1_c) \
$(i_eoshift3_c) $(i_cshift1_c) $(i_reshape_c) $(in_pack_c) $(in_unpack_c) \
$(i_exponent_c) $(i_fraction_c) $(i_nearest_c) $(i_set_exponent_c) \
- $(i_pow_c) $(i_rrspacing_c) $(i_spacing_c) $(i_pack_c) \
+ $(i_pow_c) $(i_rrspacing_c) $(i_spacing_c) $(i_pack_c) $(i_unpack_c) \
selected_int_kind.inc selected_real_kind.inc kinds.h \
kinds.inc c99_protos.inc fpu-target.h
@@ -826,6 +842,9 @@ $(i_pow_c): m4/pow.m4 $(I_M4_DEPS)
$(i_pack_c): m4/pack.m4 $(I_M4_DEPS)
$(M4) -Dfile=$@ -I$(srcdir)/m4 pack.m4 > $@
+$(i_unpack_c): m4/unpack.m4 $(I_M4_DEPS)
+ $(M4) -Dfile=$@ -I$(srcdir)/m4 unpack.m4 > $@
+
$(gfor_built_specific_src): m4/specific.m4 m4/head.m4
$(M4) -Dfile=$@ -I$(srcdir)/m4 specific.m4 > $@
Index: intrinsics/unpack_generic.c
===================================================================
--- intrinsics/unpack_generic.c (revision 133426)
+++ intrinsics/unpack_generic.c (working copy)
@@ -196,8 +196,103 @@ void
unpack1 (gfc_array_char *ret, const gfc_array_char *vector,
const gfc_array_l1 *mask, const gfc_array_char *field)
{
- unpack_internal (ret, vector, mask, field,
- GFC_DESCRIPTOR_SIZE (vector),
+ int type;
+ index_type size;
+
+ type = GFC_DESCRIPTOR_TYPE (vector);
+ size = GFC_DESCRIPTOR_SIZE (vector);
+
+ switch(type)
+ {
+ case GFC_DTYPE_INTEGER:
+ case GFC_DTYPE_LOGICAL:
+ switch(size)
+ {
+ case sizeof (GFC_INTEGER_1):
+ unpack1_i1 ((gfc_array_i1 *) ret, (gfc_array_i1 *) vector,
+ mask, (gfc_array_i1 *) field);
+ return;
+
+ case sizeof (GFC_INTEGER_2):
+ unpack1_i2 ((gfc_array_i2 *) ret, (gfc_array_i2 *) vector,
+ mask, (gfc_array_i2 *) field);
+ return;
+
+ case sizeof (GFC_INTEGER_4):
+ unpack1_i4 ((gfc_array_i4 *) ret, (gfc_array_i4 *) vector,
+ mask, (gfc_array_i4 *) field);
+ return;
+
+ case sizeof (GFC_INTEGER_8):
+ unpack1_i8 ((gfc_array_i8 *) ret, (gfc_array_i8 *) vector,
+ mask, (gfc_array_i8 *) field);
+ return;
+
+#ifdef HAVE_GFC_INTEGER_16
+ case sizeof (GFC_INTEGER_16):
+ unpack1_i16 ((gfc_array_i16 *) ret, (gfc_array_i16 *) vector,
+ mask, (gfc_array_i16 *) field);
+ return;
+#endif
+ }
+ case GFC_DTYPE_REAL:
+ switch (size)
+ {
+ case sizeof (GFC_REAL_4):
+ unpack1_r4 ((gfc_array_r4 *) ret, (gfc_array_r4 *) vector,
+ mask, (gfc_array_r4 *) field);
+ return;
+
+ case sizeof (GFC_REAL_8):
+ unpack1_r8 ((gfc_array_r8 *) ret, (gfc_array_r8 *) vector,
+ mask, (gfc_array_r8 *) field);
+ return;
+
+#ifdef HAVE_GFC_REAL_10
+ case sizeof (GFC_REAL_10):
+ unpack1_r10 ((gfc_array_r10 *) ret, (gfc_array_r10 *) vector,
+ mask, (gfc_array_r10 *) field);
+ return;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+ case sizeof (GFC_REAL_16):
+ unpack1_r16 ((gfc_array_r16 *) ret, (gfc_array_r16 *) vector,
+ mask, (gfc_array_r16 *) field);
+ return;
+#endif
+ }
+
+ case GFC_DTYPE_COMPLEX:
+ switch (size)
+ {
+ case sizeof (GFC_COMPLEX_4):
+ unpack1_c4 ((gfc_array_c4 *) ret, (gfc_array_c4 *) vector,
+ mask, (gfc_array_c4 *) field);
+ return;
+
+ case sizeof (GFC_COMPLEX_8):
+ unpack1_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) vector,
+ mask, (gfc_array_c8 *) field);
+ return;
+
+#ifdef HAVE_GFC_COMPLEX_10
+ case sizeof (GFC_COMPLEX_10):
+ unpack1_c10 ((gfc_array_c10 *) ret, (gfc_array_c10 *) vector,
+ mask, (gfc_array_c10 *) field);
+ return;
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+ case sizeof (GFC_COMPLEX_16):
+ unpack1_c16 ((gfc_array_c16 *) ret, (gfc_array_c16 *) vector,
+ mask, (gfc_array_c16 *) field);
+ return;
+#endif
+ }
+
+ }
+ unpack_internal (ret, vector, mask, field, size,
GFC_DESCRIPTOR_SIZE (field));
}
@@ -227,6 +322,102 @@ unpack0 (gfc_array_char *ret, const gfc_
{
gfc_array_char tmp;
+ int type;
+ index_type size;
+
+ type = GFC_DESCRIPTOR_TYPE (vector);
+ size = GFC_DESCRIPTOR_SIZE (vector);
+
+ switch(type)
+ {
+ case GFC_DTYPE_INTEGER:
+ case GFC_DTYPE_LOGICAL:
+ switch(size)
+ {
+ case sizeof (GFC_INTEGER_1):
+ unpack0_i1 ((gfc_array_i1 *) ret, (GFC_INTEGER_1 *) vector,
+ mask, (gfc_array_i1 *) field);
+ return;
+
+ case sizeof (GFC_INTEGER_2):
+ unpack0_i2 ((gfc_array_i2 *) ret, (GFC_INTEGER_2 *) vector,
+ mask, (gfc_array_i2 *) field);
+ return;
+
+ case sizeof (GFC_INTEGER_4):
+ unpack0_i4 ((gfc_array_i4 *) ret, (GFC_INTEGER_4 *) vector,
+ mask, (gfc_array_i4 *) field);
+ return;
+
+ case sizeof (GFC_INTEGER_8):
+ unpack0_i8 ((gfc_array_i8 *) ret, (GFC_INTEGER_8 *) vector,
+ mask, (gfc_array_i8 *) field);
+ return;
+
+#ifdef HAVE_GFC_INTEGER_16
+ case sizeof (GFC_INTEGER_16):
+ unpack0_i16 ((gfc_array_i16 *) ret, (GFC_INTEGER_16 *) vector,
+ mask, (gfc_array_i16 *) field);
+ return;
+#endif
+ }
+
+ case GFC_DTYPE_REAL:
+ switch(size)
+ {
+ case sizeof (GFC_REAL_4):
+ unpack0_r4 ((gfc_array_r4 *) ret, (GFC_REAL_4 *) vector,
+ mask, (gfc_array_r4 *) field);
+ return;
+
+ case sizeof (GFC_REAL_8):
+ unpack0_r8 ((gfc_array_r8 *) ret, (GFC_REAL_8 *) vector,
+ mask, (gfc_array_r8 *) field);
+ return;
+
+#ifdef HAVE_GFC_REAL_10
+ case sizeof (GFC_REAL_10):
+ unpack0_r10 ((gfc_array_r10 *) ret, (GFC_REAL_10 *) vector,
+ mask, (gfc_array_r10 *) field);
+ return;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+ case sizeof (GFC_REAL_16):
+ unpack0_r16 ((gfc_array_r16 *) ret, (GFC_REAL_16 *) vector,
+ mask, (gfc_array_r16 *) field);
+ return;
+#endif
+ }
+
+ case GFC_DTYPE_COMPLEX:
+ switch(size)
+ {
+ case sizeof (GFC_COMPLEX_4):
+ unpack0_c4 ((gfc_array_c4 *) ret, (GFC_COMPLEX_4 *) vector,
+ mask, (gfc_array_c4 *) field);
+ return;
+
+ case sizeof (GFC_COMPLEX_8):
+ unpack0_c8 ((gfc_array_c8 *) ret, (GFC_COMPLEX_8 *) vector,
+ mask, (gfc_array_c8 *) field);
+ return;
+
+#ifdef HAVE_GFC_COMPLEX_10
+ case sizeof (GFC_COMPLEX_10):
+ unpack0_c10 ((gfc_array_c10 *) ret, (GFC_COMPLEX_10 *) vector,
+ mask, (gfc_array_c10 *) field);
+ return;
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+ case sizeof (GFC_COMPLEX_16):
+ unpack0_c16 ((gfc_array_c16 *) ret, (GFC_COMPLEX_16 *) vector,
+ mask, (gfc_array_c16 *) field);
+ return;
+#endif
+ }
+ }
memset (&tmp, 0, sizeof (tmp));
tmp.dtype = 0;
tmp.data = field;