This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Corrected patch for PR 36886
- From: Thomas Koenig <tkoenig at netcologne dot de>
- To: fortran at gcc dot gnu dot org
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Sat, 09 Aug 2008 18:20:43 +0200
- Subject: Corrected patch for PR 36886
Here's a corrected patch, I had forgotten changes to libgfortran.h.
Thomas
Index: libgfortran.h
===================================================================
--- libgfortran.h (revision 138904)
+++ libgfortran.h (working copy)
@@ -437,6 +437,12 @@ typedef GFC_ARRAY_DESCRIPTOR (GFC_MAX_DI
(__alignof__(GFC_INTEGER_16) - 1))
#endif
+#define GFC_UNALIGNED_C4(x) (((uintptr_t)(x)) & \
+ (__alignof__(GFC_COMPLEX_4) - 1))
+
+#define GFC_UNALIGNED_C8(x) (((uintptr_t)(x)) & \
+ (__alignof__(GFC_COMPLEX_8) - 1))
+
/* Runtime library include. */
#define stringize(x) expand_macro(x)
#define expand_macro(x) # x
@@ -1210,4 +1216,55 @@ typedef GFC_ARRAY_DESCRIPTOR (GFC_MAX_DI
extern index_type size0 (const array_t * array);
iexport_proto(size0);
+/* Internal auxiliary functions for cshift */
+
+void cshift0_i1 (gfc_array_i1 *, const gfc_array_i1 *, ssize_t, int);
+internal_proto(cshift0_i1);
+
+void cshift0_i2 (gfc_array_i2 *, const gfc_array_i2 *, ssize_t, int);
+internal_proto(cshift0_i2);
+
+void cshift0_i4 (gfc_array_i4 *, const gfc_array_i4 *, ssize_t, int);
+internal_proto(cshift0_i4);
+
+void cshift0_i8 (gfc_array_i8 *, const gfc_array_i8 *, ssize_t, int);
+internal_proto(cshift0_i8);
+
+#ifdef HAVE_GFC_INTEGER_16
+void cshift0_i16 (gfc_array_i16 *, const gfc_array_i16 *, ssize_t, int);
+internal_proto(cshift0_i16);
+#endif
+
+void cshift0_r4 (gfc_array_r4 *, const gfc_array_r4 *, ssize_t, int);
+internal_proto(cshift0_r4);
+
+void cshift0_r8 (gfc_array_r8 *, const gfc_array_r8 *, ssize_t, int);
+internal_proto(cshift0_r8);
+
+#ifdef HAVE_GFC_REAL_10
+void cshift0_r10 (gfc_array_r10 *, const gfc_array_r10 *, ssize_t, int);
+internal_proto(cshift0_r10);
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+void cshift0_r16 (gfc_array_r16 *, const gfc_array_r16 *, ssize_t, int);
+internal_proto(cshift0_r16);
+#endif
+
+void cshift0_c4 (gfc_array_c4 *, const gfc_array_c4 *, ssize_t, int);
+internal_proto(cshift0_c4);
+
+void cshift0_c8 (gfc_array_c8 *, const gfc_array_c8 *, ssize_t, int);
+internal_proto(cshift0_c8);
+
+#ifdef HAVE_GFC_COMPLEX_10
+void cshift0_c10 (gfc_array_c10 *, const gfc_array_c10 *, ssize_t, int);
+internal_proto(cshift0_c10);
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+void cshift0_c16 (gfc_array_c16 *, const gfc_array_c16 *, ssize_t, int);
+internal_proto(cshift0_c16);
+#endif
+
#endif /* LIBGFOR_H */
Index: Makefile.am
===================================================================
--- Makefile.am (revision 138904)
+++ Makefile.am (working copy)
@@ -379,6 +379,22 @@ $(srcdir)/generated/eoshift3_4.c \
$(srcdir)/generated/eoshift3_8.c \
$(srcdir)/generated/eoshift3_16.c
+i_cshift0_c= \
+$(srcdir)/generated/cshift0_i1.c \
+$(srcdir)/generated/cshift0_i2.c \
+$(srcdir)/generated/cshift0_i4.c \
+$(srcdir)/generated/cshift0_i8.c \
+$(srcdir)/generated/cshift0_i16.c \
+$(srcdir)/generated/cshift0_r4.c \
+$(srcdir)/generated/cshift0_r8.c \
+$(srcdir)/generated/cshift0_r10.c \
+$(srcdir)/generated/cshift0_r16.c \
+$(srcdir)/generated/cshift0_c4.c \
+$(srcdir)/generated/cshift0_c8.c \
+$(srcdir)/generated/cshift0_c10.c \
+$(srcdir)/generated/cshift0_c16.c
+
+
i_cshift1_c= \
$(srcdir)/generated/cshift1_4.c \
$(srcdir)/generated/cshift1_8.c \
@@ -545,7 +561,7 @@ gfor_built_src= $(i_all_c) $(i_any_c) $(
$(i_exponent_c) $(i_fraction_c) $(i_nearest_c) $(i_set_exponent_c) \
$(i_pow_c) $(i_rrspacing_c) $(i_spacing_c) $(i_pack_c) $(i_unpack_c) \
$(i_spread_c) selected_int_kind.inc selected_real_kind.inc kinds.h \
- kinds.inc c99_protos.inc fpu-target.h
+ $(i_cshift0_c) kinds.inc c99_protos.inc fpu-target.h
# Machine generated specifics
gfor_built_specific_src= \
@@ -829,6 +845,9 @@ $(i_eoshift1_c): m4/eoshift1.m4 $(I_M4_D
$(i_eoshift3_c): m4/eoshift3.m4 $(I_M4_DEPS)
$(M4) -Dfile=$@ -I$(srcdir)/m4 eoshift3.m4 > $@
+$(i_cshift0_c): m4/cshift0.m4 $(I_M4_DEPS)
+ $(M4) -Dfile=$@ -I$(srcdir)/m4 cshift0.m4 > $@
+
$(i_cshift1_c): m4/cshift1.m4 $(I_M4_DEPS)
$(M4) -Dfile=$@ -I$(srcdir)/m4 cshift1.m4 > $@
Index: intrinsics/cshift0.c
===================================================================
--- intrinsics/cshift0.c (revision 138904)
+++ intrinsics/cshift0.c (working copy)
@@ -33,48 +33,6 @@ Boston, MA 02110-1301, USA. */
#include <assert.h>
#include <string.h>
-
-/* "Templatized" helper function for the inner shift loop. */
-
-#define DEF_COPY_LOOP(NAME, TYPE) \
-static inline void \
-copy_loop_##NAME (void *xdest, const void *xsrc, \
- size_t roff, size_t soff, \
- index_type len, index_type shift) \
-{ \
- TYPE *dest = xdest; \
- const TYPE *src; \
- index_type i; \
- \
- roff /= sizeof (TYPE); \
- soff /= sizeof (TYPE); \
- \
- src = xsrc; \
- src += shift * soff; \
- for (i = 0; i < len - shift; ++i) \
- { \
- *dest = *src; \
- dest += roff; \
- src += soff; \
- } \
- \
- src = xsrc; \
- for (i = 0; i < shift; ++i) \
- { \
- *dest = *src; \
- dest += roff; \
- src += soff; \
- } \
-}
-
-DEF_COPY_LOOP(int, int)
-DEF_COPY_LOOP(long, long)
-DEF_COPY_LOOP(double, double)
-DEF_COPY_LOOP(ldouble, long double)
-DEF_COPY_LOOP(cfloat, _Complex float)
-DEF_COPY_LOOP(cdouble, _Complex double)
-
-
static void
cshift0 (gfc_array_char * ret, const gfc_array_char * array,
ssize_t shift, int which, index_type size)
@@ -96,9 +54,10 @@ cshift0 (gfc_array_char * ret, const gfc
index_type dim;
index_type len;
index_type n;
- int whichloop;
index_type arraysize;
+ index_type type_size;
+
if (which < 1 || which > GFC_DESCRIPTOR_RANK (array))
runtime_error ("Argument 'DIM' is out of range in call to 'CSHIFT'");
@@ -133,43 +92,188 @@ cshift0 (gfc_array_char * ret, const gfc
if (arraysize == 0)
return;
+ type_size = GFC_DTYPE_TYPE_SIZE (array);
- which = which - 1;
- sstride[0] = 0;
- rstride[0] = 0;
+ switch(type_size)
+ {
+ case GFC_DTYPE_LOGICAL_1:
+ case GFC_DTYPE_INTEGER_1:
+ case GFC_DTYPE_DERIVED_1:
+ cshift0_i1 ((gfc_array_i1 *)ret, (gfc_array_i1 *) array, shift, which);
+ return;
+
+ case GFC_DTYPE_LOGICAL_2:
+ case GFC_DTYPE_INTEGER_2:
+ cshift0_i2 ((gfc_array_i2 *)ret, (gfc_array_i2 *) array, shift, which);
+ return;
+
+ case GFC_DTYPE_LOGICAL_4:
+ case GFC_DTYPE_INTEGER_4:
+ cshift0_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, shift, which);
+ return;
+
+ case GFC_DTYPE_LOGICAL_8:
+ case GFC_DTYPE_INTEGER_8:
+ cshift0_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, shift, which);
+ return;
- extent[0] = 1;
- count[0] = 0;
- n = 0;
+#ifdef HAVE_GFC_INTEGER_16
+ case GFC_DTYPE_LOGICAL_16:
+ case GFC_DTYPE_INTEGER_16:
+ cshift0_i16 ((gfc_array_i8 *)ret, (gfc_array_i16 *) array, shift,
+ which);
+ return;
+#endif
- /* The values assigned here must match the cases in the inner loop. */
- whichloop = 0;
- switch (GFC_DESCRIPTOR_TYPE (array))
- {
- case GFC_DTYPE_LOGICAL:
- case GFC_DTYPE_INTEGER:
- case GFC_DTYPE_REAL:
- if (size == sizeof (int))
- whichloop = 1;
- else if (size == sizeof (long))
- whichloop = 2;
- else if (size == sizeof (double))
- whichloop = 3;
- else if (size == sizeof (long double))
- whichloop = 4;
+ case GFC_DTYPE_REAL_4:
+ cshift0_r4 ((gfc_array_r4 *)ret, (gfc_array_r4 *) array, shift, which);
+ return;
+
+ case GFC_DTYPE_REAL_8:
+ cshift0_r8 ((gfc_array_r8 *)ret, (gfc_array_r8 *) array, shift, which);
+ return;
+
+#ifdef HAVE_GFC_REAL_10
+ case GFC_DTYPE_REAL_10:
+ cshift0_r10 ((gfc_array_r10 *)ret, (gfc_array_r10 *) array, shift,
+ which);
+ return;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+ case GFC_DTYPE_REAL_16:
+ cshift0_r16 ((gfc_array_r16 *)ret, (gfc_array_r16 *) array, shift,
+ which);
+ return;
+#endif
+
+ case GFC_DTYPE_COMPLEX_4:
+ cshift0_c4 ((gfc_array_c4 *)ret, (gfc_array_c4 *) array, shift, which);
+ return;
+
+ case GFC_DTYPE_COMPLEX_8:
+ cshift0_c8 ((gfc_array_c8 *)ret, (gfc_array_c8 *) array, shift, which);
+ return;
+
+#ifdef HAVE_GFC_COMPLEX_10
+ case GFC_DTYPE_COMPLEX_10:
+ cshift0_c10 ((gfc_array_c10 *)ret, (gfc_array_c10 *) array, shift,
+ which);
+ return;
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+ case GFC_DTYPE_COMPLEX_16:
+ cshift0_c16 ((gfc_array_c16 *)ret, (gfc_array_c16 *) array, shift,
+ which);
+ return;
+#endif
+
+ default:
break;
+ }
- case GFC_DTYPE_COMPLEX:
- if (size == sizeof (_Complex float))
- whichloop = 5;
- else if (size == sizeof (_Complex double))
- whichloop = 6;
+ switch (size)
+ {
+ /* Let's check the actual alignment of the data pointers. If they
+ are suitably aligned, we can safely call the unpack functions. */
+
+ case sizeof (GFC_INTEGER_1):
+ cshift0_i1 ((gfc_array_i1 *) ret, (gfc_array_i1 *) array, shift,
+ which);
break;
+ case sizeof (GFC_INTEGER_2):
+ if (GFC_UNALIGNED_2(ret->data) || GFC_UNALIGNED_2(array->data))
+ break;
+ else
+ {
+ cshift0_i2 ((gfc_array_i2 *) ret, (gfc_array_i2 *) array, shift,
+ which);
+ return;
+ }
+
+ case sizeof (GFC_INTEGER_4):
+ if (GFC_UNALIGNED_4(ret->data) || GFC_UNALIGNED_4(array->data))
+ break;
+ else
+ {
+ cshift0_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, shift,
+ which);
+ return;
+ }
+
+ case sizeof (GFC_INTEGER_8):
+ if (GFC_UNALIGNED_8(ret->data) || GFC_UNALIGNED_8(array->data))
+ {
+ /* Let's try to use the complex routines. First, a sanity
+ check that the sizes match; this should be optimized to
+ a no-op. */
+ if (sizeof(GFC_INTEGER_8) != sizeof(GFC_COMPLEX_4))
+ break;
+
+ if (GFC_UNALIGNED_C4(ret->data) || GFC_UNALIGNED_C4(array->data))
+ break;
+
+ cshift0_c4 ((gfc_array_c4 *) ret, (gfc_array_c4 *) array, shift,
+ which);
+ return;
+ }
+ else
+ {
+ cshift0_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, shift,
+ which);
+ return;
+ }
+
+#ifdef HAVE_GFC_INTEGER_16
+ case sizeof (GFC_INTEGER_16):
+ if (GFC_UNALIGNED_16(ret->data) || GFC_UNALIGNED_16(array->data))
+ {
+ /* Let's try to use the complex routines. First, a sanity
+ check that the sizes match; this should be optimized to
+ a no-op. */
+ if (sizeof(GFC_INTGER_16) != sizeof(GFC_COMPLEX_8))
+ break;
+
+ if (GFC_UNALIGNED_C8(ret->data) || GFC_UNALIGNED_C8(array->data))
+ break;
+
+ cshift0_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) array, shift,
+ which);
+ return;
+ }
+ else
+ {
+ cshift0_i16 ((gfc_array_i16 *) ret, (gfc_array_i16 *) array,
+ shift, which);
+ return;
+ }
+#else
+ case sizeof (GFC_COMPLEX_8):
+
+ if (GFC_UNALIGNED_C8(ret->data) || GFC_UNALIGNED_C8(array->data))
+ break;
+ else
+ {
+ cshift0_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) array, shift,
+ which);
+ return;
+ }
+#endif
+
default:
break;
}
+
+ which = which - 1;
+ sstride[0] = 0;
+ rstride[0] = 0;
+
+ extent[0] = 1;
+ count[0] = 0;
+ n = 0;
/* Initialized for avoiding compiler warnings. */
roffset = size;
soffset = size;
@@ -227,56 +331,21 @@ cshift0 (gfc_array_char * ret, const gfc
else
{
/* Otherwise, we'll have to perform the copy one element at
- a time. We can speed this up a tad for common cases of
- fundamental types. */
- switch (whichloop)
- {
- case 0:
- {
- char *dest = rptr;
- const char *src = &sptr[shift * soffset];
-
- for (n = 0; n < len - shift; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
- for (src = sptr, n = 0; n < shift; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
- }
- break;
-
- case 1:
- copy_loop_int (rptr, sptr, roffset, soffset, len, shift);
- break;
-
- case 2:
- copy_loop_long (rptr, sptr, roffset, soffset, len, shift);
- break;
-
- case 3:
- copy_loop_double (rptr, sptr, roffset, soffset, len, shift);
- break;
-
- case 4:
- copy_loop_ldouble (rptr, sptr, roffset, soffset, len, shift);
- break;
-
- case 5:
- copy_loop_cfloat (rptr, sptr, roffset, soffset, len, shift);
- break;
-
- case 6:
- copy_loop_cdouble (rptr, sptr, roffset, soffset, len, shift);
- break;
+ a time. */
+ char *dest = rptr;
+ const char *src = &sptr[shift * soffset];
- default:
- abort ();
+ for (n = 0; n < len - shift; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ for (src = sptr, n = 0; n < shift; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
}
}