This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Corrected patch for PR 36886


Here's a corrected patch, I had forgotten changes to libgfortran.h.

	Thomas

Index: libgfortran.h
===================================================================
--- libgfortran.h	(revision 138904)
+++ libgfortran.h	(working copy)
@@ -437,6 +437,12 @@ typedef GFC_ARRAY_DESCRIPTOR (GFC_MAX_DI
 			     (__alignof__(GFC_INTEGER_16) - 1))
 #endif
 
+#define GFC_UNALIGNED_C4(x) (((uintptr_t)(x)) & \
+			     (__alignof__(GFC_COMPLEX_4) - 1))
+
+#define GFC_UNALIGNED_C8(x) (((uintptr_t)(x)) & \
+			     (__alignof__(GFC_COMPLEX_8) - 1))
+
 /* Runtime library include.  */
 #define stringize(x) expand_macro(x)
 #define expand_macro(x) # x
@@ -1210,4 +1216,55 @@ typedef GFC_ARRAY_DESCRIPTOR (GFC_MAX_DI
 extern index_type size0 (const array_t * array); 
 iexport_proto(size0);
 
+/* Internal auxiliary functions for cshift */
+
+void cshift0_i1 (gfc_array_i1 *, const gfc_array_i1 *, ssize_t, int);
+internal_proto(cshift0_i1);
+
+void cshift0_i2 (gfc_array_i2 *, const gfc_array_i2 *, ssize_t, int);
+internal_proto(cshift0_i2);
+
+void cshift0_i4 (gfc_array_i4 *, const gfc_array_i4 *, ssize_t, int);
+internal_proto(cshift0_i4);
+
+void cshift0_i8 (gfc_array_i8 *, const gfc_array_i8 *, ssize_t, int);
+internal_proto(cshift0_i8);
+
+#ifdef HAVE_GFC_INTEGER_16
+void cshift0_i16 (gfc_array_i16 *, const gfc_array_i16 *, ssize_t, int);
+internal_proto(cshift0_i16);
+#endif
+
+void cshift0_r4 (gfc_array_r4 *, const gfc_array_r4 *, ssize_t, int);
+internal_proto(cshift0_r4);
+
+void cshift0_r8 (gfc_array_r8 *, const gfc_array_r8 *, ssize_t, int);
+internal_proto(cshift0_r8);
+
+#ifdef HAVE_GFC_REAL_10
+void cshift0_r10 (gfc_array_r10 *, const gfc_array_r10 *, ssize_t, int);
+internal_proto(cshift0_r10);
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+void cshift0_r16 (gfc_array_r16 *, const gfc_array_r16 *, ssize_t, int);
+internal_proto(cshift0_r16);
+#endif
+
+void cshift0_c4 (gfc_array_c4 *, const gfc_array_c4 *, ssize_t, int);
+internal_proto(cshift0_c4);
+
+void cshift0_c8 (gfc_array_c8 *, const gfc_array_c8 *, ssize_t, int);
+internal_proto(cshift0_c8);
+
+#ifdef HAVE_GFC_COMPLEX_10
+void cshift0_c10 (gfc_array_c10 *, const gfc_array_c10 *, ssize_t, int);
+internal_proto(cshift0_c10);
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+void cshift0_c16 (gfc_array_c16 *, const gfc_array_c16 *, ssize_t, int);
+internal_proto(cshift0_c16);
+#endif
+
 #endif  /* LIBGFOR_H  */
Index: Makefile.am
===================================================================
--- Makefile.am	(revision 138904)
+++ Makefile.am	(working copy)
@@ -379,6 +379,22 @@ $(srcdir)/generated/eoshift3_4.c \
 $(srcdir)/generated/eoshift3_8.c \
 $(srcdir)/generated/eoshift3_16.c
 
+i_cshift0_c= \
+$(srcdir)/generated/cshift0_i1.c \
+$(srcdir)/generated/cshift0_i2.c \
+$(srcdir)/generated/cshift0_i4.c \
+$(srcdir)/generated/cshift0_i8.c \
+$(srcdir)/generated/cshift0_i16.c \
+$(srcdir)/generated/cshift0_r4.c \
+$(srcdir)/generated/cshift0_r8.c \
+$(srcdir)/generated/cshift0_r10.c \
+$(srcdir)/generated/cshift0_r16.c \
+$(srcdir)/generated/cshift0_c4.c \
+$(srcdir)/generated/cshift0_c8.c \
+$(srcdir)/generated/cshift0_c10.c \
+$(srcdir)/generated/cshift0_c16.c
+ 
+
 i_cshift1_c= \
 $(srcdir)/generated/cshift1_4.c \
 $(srcdir)/generated/cshift1_8.c \
@@ -545,7 +561,7 @@ gfor_built_src= $(i_all_c) $(i_any_c) $(
     $(i_exponent_c) $(i_fraction_c) $(i_nearest_c) $(i_set_exponent_c) \
     $(i_pow_c) $(i_rrspacing_c) $(i_spacing_c) $(i_pack_c) $(i_unpack_c) \
     $(i_spread_c) selected_int_kind.inc selected_real_kind.inc kinds.h \
-    kinds.inc c99_protos.inc fpu-target.h
+    $(i_cshift0_c) kinds.inc c99_protos.inc fpu-target.h
 
 # Machine generated specifics
 gfor_built_specific_src= \
@@ -829,6 +845,9 @@ $(i_eoshift1_c): m4/eoshift1.m4 $(I_M4_D
 $(i_eoshift3_c): m4/eoshift3.m4 $(I_M4_DEPS)
 	$(M4) -Dfile=$@ -I$(srcdir)/m4 eoshift3.m4 > $@
 
+$(i_cshift0_c): m4/cshift0.m4 $(I_M4_DEPS)
+	$(M4) -Dfile=$@ -I$(srcdir)/m4 cshift0.m4 > $@
+
 $(i_cshift1_c): m4/cshift1.m4 $(I_M4_DEPS)
 	$(M4) -Dfile=$@ -I$(srcdir)/m4 cshift1.m4 > $@
 
Index: intrinsics/cshift0.c
===================================================================
--- intrinsics/cshift0.c	(revision 138904)
+++ intrinsics/cshift0.c	(working copy)
@@ -33,48 +33,6 @@ Boston, MA 02110-1301, USA.  */
 #include <assert.h>
 #include <string.h>
 
-
-/* "Templatized" helper function for the inner shift loop.  */
-
-#define DEF_COPY_LOOP(NAME, TYPE)					\
-static inline void							\
-copy_loop_##NAME (void *xdest, const void *xsrc,			\
-		  size_t roff, size_t soff,				\
-		  index_type len, index_type shift)			\
-{									\
-  TYPE *dest = xdest;							\
-  const TYPE *src;							\
-  index_type i;								\
-									\
-  roff /= sizeof (TYPE);						\
-  soff /= sizeof (TYPE);						\
-									\
-  src = xsrc;								\
-  src += shift * soff;							\
-  for (i = 0; i < len - shift; ++i)					\
-    {									\
-      *dest = *src;							\
-      dest += roff;							\
-      src += soff;							\
-    }									\
-									\
-  src = xsrc;								\
-  for (i = 0; i < shift; ++i)						\
-    {									\
-      *dest = *src;							\
-      dest += roff;							\
-      src += soff;							\
-    }									\
-}
-
-DEF_COPY_LOOP(int, int)
-DEF_COPY_LOOP(long, long)
-DEF_COPY_LOOP(double, double)
-DEF_COPY_LOOP(ldouble, long double)
-DEF_COPY_LOOP(cfloat, _Complex float)
-DEF_COPY_LOOP(cdouble, _Complex double)
-
-
 static void
 cshift0 (gfc_array_char * ret, const gfc_array_char * array,
 	 ssize_t shift, int which, index_type size)
@@ -96,9 +54,10 @@ cshift0 (gfc_array_char * ret, const gfc
   index_type dim;
   index_type len;
   index_type n;
-  int whichloop;
   index_type arraysize;
 
+  index_type type_size;
+
   if (which < 1 || which > GFC_DESCRIPTOR_RANK (array))
     runtime_error ("Argument 'DIM' is out of range in call to 'CSHIFT'");
 
@@ -133,43 +92,188 @@ cshift0 (gfc_array_char * ret, const gfc
   
   if (arraysize == 0)
     return;
+  type_size = GFC_DTYPE_TYPE_SIZE (array);
 
-  which = which - 1;
-  sstride[0] = 0;
-  rstride[0] = 0;
+  switch(type_size)
+    {
+    case GFC_DTYPE_LOGICAL_1:
+    case GFC_DTYPE_INTEGER_1:
+    case GFC_DTYPE_DERIVED_1:
+      cshift0_i1 ((gfc_array_i1 *)ret, (gfc_array_i1 *) array, shift, which);
+      return;
+
+    case GFC_DTYPE_LOGICAL_2:
+    case GFC_DTYPE_INTEGER_2:
+      cshift0_i2 ((gfc_array_i2 *)ret, (gfc_array_i2 *) array, shift, which);
+      return;
+
+    case GFC_DTYPE_LOGICAL_4:
+    case GFC_DTYPE_INTEGER_4:
+      cshift0_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, shift, which);
+      return;
+
+    case GFC_DTYPE_LOGICAL_8:
+    case GFC_DTYPE_INTEGER_8:
+      cshift0_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, shift, which);
+      return;
 
-  extent[0] = 1;
-  count[0] = 0;
-  n = 0;
+#ifdef HAVE_GFC_INTEGER_16
+    case GFC_DTYPE_LOGICAL_16:
+    case GFC_DTYPE_INTEGER_16:
+      cshift0_i16 ((gfc_array_i8 *)ret, (gfc_array_i16 *) array, shift,
+		   which);
+      return;
+#endif
 
-  /* The values assigned here must match the cases in the inner loop.  */
-  whichloop = 0;
-  switch (GFC_DESCRIPTOR_TYPE (array))
-    {
-    case GFC_DTYPE_LOGICAL:
-    case GFC_DTYPE_INTEGER:
-    case GFC_DTYPE_REAL:
-      if (size == sizeof (int))
-	whichloop = 1;
-      else if (size == sizeof (long))
-	whichloop = 2;
-      else if (size == sizeof (double))
-	whichloop = 3;
-      else if (size == sizeof (long double))
-	whichloop = 4;
+    case GFC_DTYPE_REAL_4:
+      cshift0_r4 ((gfc_array_r4 *)ret, (gfc_array_r4 *) array, shift, which);
+      return;
+
+    case GFC_DTYPE_REAL_8:
+      cshift0_r8 ((gfc_array_r8 *)ret, (gfc_array_r8 *) array, shift, which);
+      return;
+
+#ifdef HAVE_GFC_REAL_10
+    case GFC_DTYPE_REAL_10:
+      cshift0_r10 ((gfc_array_r10 *)ret, (gfc_array_r10 *) array, shift,
+		   which);
+      return;
+#endif
+
+#ifdef HAVE_GFC_REAL_16
+    case GFC_DTYPE_REAL_16:
+      cshift0_r16 ((gfc_array_r16 *)ret, (gfc_array_r16 *) array, shift,
+		   which);
+      return;
+#endif
+
+    case GFC_DTYPE_COMPLEX_4:
+      cshift0_c4 ((gfc_array_c4 *)ret, (gfc_array_c4 *) array, shift, which);
+      return;
+
+    case GFC_DTYPE_COMPLEX_8:
+      cshift0_c8 ((gfc_array_c8 *)ret, (gfc_array_c8 *) array, shift, which);
+      return;
+
+#ifdef HAVE_GFC_COMPLEX_10
+    case GFC_DTYPE_COMPLEX_10:
+      cshift0_c10 ((gfc_array_c10 *)ret, (gfc_array_c10 *) array, shift,
+		   which);
+      return;
+#endif
+
+#ifdef HAVE_GFC_COMPLEX_16
+    case GFC_DTYPE_COMPLEX_16:
+      cshift0_c16 ((gfc_array_c16 *)ret, (gfc_array_c16 *) array, shift,
+		   which);
+      return;
+#endif
+
+    default:
       break;
+    }
 
-    case GFC_DTYPE_COMPLEX:
-      if (size == sizeof (_Complex float))
-	whichloop = 5;
-      else if (size == sizeof (_Complex double))
-	whichloop = 6;
+  switch (size)
+    {
+      /* Let's check the actual alignment of the data pointers.  If they
+	 are suitably aligned, we can safely call the unpack functions.  */
+
+    case sizeof (GFC_INTEGER_1):
+      cshift0_i1 ((gfc_array_i1 *) ret, (gfc_array_i1 *) array, shift,
+		  which);
       break;
 
+    case sizeof (GFC_INTEGER_2):
+      if (GFC_UNALIGNED_2(ret->data) || GFC_UNALIGNED_2(array->data))
+	break;
+      else
+	{
+	  cshift0_i2 ((gfc_array_i2 *) ret, (gfc_array_i2 *) array, shift,
+		      which);
+	  return;
+	}
+
+    case sizeof (GFC_INTEGER_4):
+      if (GFC_UNALIGNED_4(ret->data) || GFC_UNALIGNED_4(array->data))
+	break;
+      else
+	{
+	  cshift0_i4 ((gfc_array_i4 *)ret, (gfc_array_i4 *) array, shift,
+		      which);
+	  return;
+	}
+
+    case sizeof (GFC_INTEGER_8):
+      if (GFC_UNALIGNED_8(ret->data) || GFC_UNALIGNED_8(array->data))
+	{
+	  /* Let's try to use the complex routines.  First, a sanity
+	     check that the sizes match; this should be optimized to
+	     a no-op.  */
+	  if (sizeof(GFC_INTEGER_8) != sizeof(GFC_COMPLEX_4))
+	    break;
+
+	  if (GFC_UNALIGNED_C4(ret->data) || GFC_UNALIGNED_C4(array->data))
+	    break;
+
+	  cshift0_c4 ((gfc_array_c4 *) ret, (gfc_array_c4 *) array, shift,
+		      which);
+	      return;
+	}
+      else
+	{
+	  cshift0_i8 ((gfc_array_i8 *)ret, (gfc_array_i8 *) array, shift,
+		      which);
+	  return;
+	}
+
+#ifdef HAVE_GFC_INTEGER_16
+    case sizeof (GFC_INTEGER_16):
+      if (GFC_UNALIGNED_16(ret->data) || GFC_UNALIGNED_16(array->data))
+	{
+	  /* Let's try to use the complex routines.  First, a sanity
+	     check that the sizes match; this should be optimized to
+	     a no-op.  */
+	  if (sizeof(GFC_INTGER_16) != sizeof(GFC_COMPLEX_8))
+	    break;
+
+	  if (GFC_UNALIGNED_C8(ret->data) || GFC_UNALIGNED_C8(array->data))
+	    break;
+
+	  cshift0_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) array, shift,
+		      which);
+	      return;
+	}
+      else
+	{
+	  cshift0_i16 ((gfc_array_i16 *) ret, (gfc_array_i16 *) array,
+		       shift, which);
+	  return;
+	}
+#else
+    case sizeof (GFC_COMPLEX_8):
+
+      if (GFC_UNALIGNED_C8(ret->data) || GFC_UNALIGNED_C8(array->data))
+	break;
+      else
+	{
+	  cshift0_c8 ((gfc_array_c8 *) ret, (gfc_array_c8 *) array, shift,
+		      which);
+	  return;
+	}
+#endif
+
     default:
       break;
     }
 
+
+  which = which - 1;
+  sstride[0] = 0;
+  rstride[0] = 0;
+
+  extent[0] = 1;
+  count[0] = 0;
+  n = 0;
   /* Initialized for avoiding compiler warnings.  */
   roffset = size;
   soffset = size;
@@ -227,56 +331,21 @@ cshift0 (gfc_array_char * ret, const gfc
       else
 	{
 	  /* Otherwise, we'll have to perform the copy one element at
-	     a time.  We can speed this up a tad for common cases of 
-	     fundamental types.  */
-	  switch (whichloop)
-	    {
-	    case 0:
-	      {
-		char *dest = rptr;
-		const char *src = &sptr[shift * soffset];
-
-		for (n = 0; n < len - shift; n++)
-		  {
-		    memcpy (dest, src, size);
-		    dest += roffset;
-		    src += soffset;
-		  }
-		for (src = sptr, n = 0; n < shift; n++)
-		  {
-		    memcpy (dest, src, size);
-		    dest += roffset;
-		    src += soffset;
-		  }
-	      }
-	      break;
-
-	    case 1:
-	      copy_loop_int (rptr, sptr, roffset, soffset, len, shift);
-	      break;
-
-	    case 2:
-	      copy_loop_long (rptr, sptr, roffset, soffset, len, shift);
-	      break;
-
-	    case 3:
-	      copy_loop_double (rptr, sptr, roffset, soffset, len, shift);
-	      break;
-
-	    case 4:
-	      copy_loop_ldouble (rptr, sptr, roffset, soffset, len, shift);
-	      break;
-
-	    case 5:
-	      copy_loop_cfloat (rptr, sptr, roffset, soffset, len, shift);
-	      break;
-	      
-	    case 6:
-	      copy_loop_cdouble (rptr, sptr, roffset, soffset, len, shift);
-	      break;
+	     a time.  */
+	  char *dest = rptr;
+	  const char *src = &sptr[shift * soffset];
 
-	    default:
-	      abort ();
+	  for (n = 0; n < len - shift; n++)
+	    {
+	      memcpy (dest, src, size);
+	      dest += roffset;
+	      src += soffset;
+	    }
+	  for (src = sptr, n = 0; n < shift; n++)
+	    {
+	      memcpy (dest, src, size);
+	      dest += roffset;
+	      src += soffset;
 	    }
 	}
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]