This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Speedup cshift0 for the case in facerec
- From: Richard Henderson <rth at redhat dot com>
- To: Paul Brook <paul at codesourcery dot com>
- Cc: fortran at gcc dot gnu dot org, Andrew Pinski <apinski at apple dot com>, gcc Patches <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 3 Sep 2004 12:05:53 -0700
- Subject: Re: [PATCH] Speedup cshift0 for the case in facerec
- References: <0B56F914-EE5A-11D8-9325-00039351ED8A@apple.com> <20040903063700.GD20559@redhat.com> <200409031427.07535.paul@codesourcery.com>
On Fri, Sep 03, 2004 at 02:27:07PM +0100, Paul Brook wrote:
> Yes, that looks better.
I've checked in this version, which unlike the previous has
actually been tested. Aside from fixing buglets, it contains
one additional improvement -- unswitching the copy loop vs
the wrap of sptr.
r~
* intrinsics/cshift0.c (DEF_COPY_LOOP, copy_loop_int, copy_loop_long,
copy_loop_double, copy_loop_ldouble): New.
(__cshift0): Make shift type ssize_t. Use % operator instead of
div. Use specialized versions of copy loop depending on the shape.
Index: intrinsics/cshift0.c
===================================================================
RCS file: /cvs/gcc/gcc/libgfortran/intrinsics/cshift0.c,v
retrieving revision 1.3
diff -c -p -d -r1.3 cshift0.c
*** intrinsics/cshift0.c 4 Aug 2004 14:30:46 -0000 1.3
--- intrinsics/cshift0.c 3 Sep 2004 19:00:52 -0000
*************** Boston, MA 02111-1307, USA. */
*** 25,49 ****
#include <string.h>
#include "libgfortran.h"
! /* TODO: make this work for large shifts when
! sizeof(int) < sizeof (index_type). */
static void
__cshift0 (gfc_array_char * ret, const gfc_array_char * array,
! int shift, int which)
{
/* r.* indicates the return array. */
index_type rstride[GFC_MAX_DIMENSIONS - 1];
index_type rstride0;
index_type roffset;
char *rptr;
! char *dest;
/* s.* indicates the source array. */
index_type sstride[GFC_MAX_DIMENSIONS - 1];
index_type sstride0;
index_type soffset;
const char *sptr;
- const char *src;
index_type count[GFC_MAX_DIMENSIONS - 1];
index_type extent[GFC_MAX_DIMENSIONS - 1];
--- 25,85 ----
#include <string.h>
#include "libgfortran.h"
!
! /* "Templatized" helper function for the inner shift loop. */
!
! #define DEF_COPY_LOOP(NAME, TYPE) \
! static inline void \
! copy_loop_##NAME (void *xdest, const void *xsrc, \
! size_t roff, size_t soff, \
! index_type len, index_type shift) \
! { \
! TYPE *dest = xdest; \
! const TYPE *src; \
! index_type i; \
! \
! roff /= sizeof (TYPE); \
! soff /= sizeof (TYPE); \
! \
! src = xsrc; \
! src += shift * soff; \
! for (i = 0; i < len - shift; ++i) \
! { \
! *dest = *src; \
! dest += roff; \
! src += soff; \
! } \
! \
! src = xsrc; \
! for (i = 0; i < shift; ++i) \
! { \
! *dest = *src; \
! dest += roff; \
! src += soff; \
! } \
! }
!
! DEF_COPY_LOOP(int, int)
! DEF_COPY_LOOP(long, long)
! DEF_COPY_LOOP(double, double)
! DEF_COPY_LOOP(ldouble, long double)
!
static void
__cshift0 (gfc_array_char * ret, const gfc_array_char * array,
! ssize_t shift, int which)
{
/* r.* indicates the return array. */
index_type rstride[GFC_MAX_DIMENSIONS - 1];
index_type rstride0;
index_type roffset;
char *rptr;
!
/* s.* indicates the source array. */
index_type sstride[GFC_MAX_DIMENSIONS - 1];
index_type sstride0;
index_type soffset;
const char *sptr;
index_type count[GFC_MAX_DIMENSIONS - 1];
index_type extent[GFC_MAX_DIMENSIONS - 1];
*************** __cshift0 (gfc_array_char * ret, const g
*** 64,70 ****
size = GFC_DESCRIPTOR_SIZE (array);
n = 0;
! /* Initialized for avoiding compiler warnings. */
roffset = size;
soffset = size;
len = 0;
--- 100,106 ----
size = GFC_DESCRIPTOR_SIZE (array);
n = 0;
! /* Initialized for avoiding compiler warnings. */
roffset = size;
soffset = size;
len = 0;
*************** __cshift0 (gfc_array_char * ret, const g
*** 120,143 ****
rptr = ret->data;
sptr = array->data;
! shift = (div (shift, len)).rem;
if (shift < 0)
shift += len;
while (rptr)
{
/* Do the shift for this dimension. */
! src = &sptr[shift * soffset];
! dest = rptr;
! for (n = 0; n < len; n++)
! {
! memcpy (dest, src, size);
! dest += roffset;
! if (n == len - shift - 1)
! src = sptr;
! else
! src += soffset;
! }
/* Advance to the next section. */
rptr += rstride0;
--- 156,210 ----
rptr = ret->data;
sptr = array->data;
! shift = shift % (ssize_t)len;
if (shift < 0)
shift += len;
while (rptr)
{
/* Do the shift for this dimension. */
!
! /* If elements are contiguous, perform the operation
! in two block moves. */
! if (soffset == size && roffset == size)
! {
! size_t len1 = shift * size;
! size_t len2 = (len - shift) * size;
! memcpy (rptr, sptr + len1, len2);
! memcpy (rptr + len2, sptr, len1);
! }
! else
! {
! /* Otherwise, we'll have to perform the copy one element at
! a time. We can speed this up a tad for common cases of
! fundamental types. */
! if (size == sizeof(int))
! copy_loop_int (rptr, sptr, roffset, soffset, len, shift);
! else if (size == sizeof(long))
! copy_loop_long (rptr, sptr, roffset, soffset, len, shift);
! else if (size == sizeof(double))
! copy_loop_double (rptr, sptr, roffset, soffset, len, shift);
! else if (size == sizeof(long double))
! copy_loop_ldouble (rptr, sptr, roffset, soffset, len, shift);
! else
! {
! char *dest = rptr;
! const char *src = &sptr[shift * soffset];
!
! for (n = 0; n < len - shift; n++)
! {
! memcpy (dest, src, size);
! dest += roffset;
! src += soffset;
! }
! for (src = sptr, n = 0; n < shift; n++)
! {
! memcpy (dest, src, size);
! dest += roffset;
! src += soffset;
! }
! }
! }
/* Advance to the next section. */
rptr += rstride0;
*************** __cshift0_8 (gfc_array_char * ret, const
*** 185,188 ****
{
__cshift0 (ret, array, *pshift, pdim ? *pdim : 1);
}
-
--- 252,254 ----