This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[patch, libgfortran] Use memcpy in a few more places for eoshift
- From: Thomas Koenig <tkoenig at netcologne dot de>
- To: "fortran at gcc dot gnu dot org" <fortran at gcc dot gnu dot org>, gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 4 Jul 2017 00:06:30 +0200
- Subject: [patch, libgfortran] Use memcpy in a few more places for eoshift
- Authentication-results: sourceware.org; auth=none
Hello world,
attached are a few more speedups for special eoshift cases. This
time, nothing fancy, just use memcpy for copying in the
contiguous case.
I am still looking at eoshift2 (scalar shift, array boundary)
to see if it would be possible to duplicate the speed gains for
eoshift0 (scalar shift, scalar boundary), but it won't hurt
to do this first. At least the shift along dimension 1
should be faster by about a factor of two.
I have also added a few test cases which test eoshift in all
the variants touched by this patch.
Regression-testing as I write this. I don't expect anything bad
(because I tested all test cases containing *eoshift*).
OK for trunk if this passes?
Regards
Thomas
2017-06-03 Thomas Koenig <tkoenig@gcc.gnu.org>
* intrinsics/eoshift2.c (eoshift2): Use memcpy
for innermost copy where possible.
* m4/eoshift1.m4 (eoshift1): Likewise.
* m4/eoshift3.m4 (eoshift3): Likewise.
* generated/eoshift1_16.c: Regenerated.
* generated/eoshift1_4.c: Regenerated.
* generated/eoshift1_8.c: Regenerated.
* generated/eoshift3_16.c: Regenerated.
* generated/eoshift3_4.c: Regenerated.
* generated/eoshift3_8.c: Regenerated.
2017-06-03 Thomas Koenig <tkoenig@gcc.gnu.org>
* gfortran.dg/eoshift_4.f90: New test.
* gfortran.dg/eoshift_5.f90: New test.
* gfortran.dg/eoshift_6.f90: New test.
Index: intrinsics/eoshift2.c
===================================================================
--- intrinsics/eoshift2.c (Revision 249936)
+++ intrinsics/eoshift2.c (Arbeitskopie)
@@ -181,12 +181,23 @@ eoshift2 (gfc_array_char *ret, const gfc_array_cha
src = sptr;
dest = &rptr[-shift * roffset];
}
- for (n = 0; n < len; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * len;
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
if (shift >= 0)
{
n = shift;
Index: m4/eoshift1.m4
===================================================================
--- m4/eoshift1.m4 (Revision 249936)
+++ m4/eoshift1.m4 (Arbeitskopie)
@@ -184,12 +184,23 @@ eoshift1 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
if (sh < 0)
dest = rptr;
n = delta;
Index: m4/eoshift3.m4
===================================================================
--- m4/eoshift3.m4 (Revision 249936)
+++ m4/eoshift3.m4 (Arbeitskopie)
@@ -199,12 +199,24 @@ eoshift3 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
+
if (sh < 0)
dest = rptr;
n = delta;
Index: generated/eoshift1_16.c
===================================================================
--- generated/eoshift1_16.c (Revision 249936)
+++ generated/eoshift1_16.c (Arbeitskopie)
@@ -183,12 +183,23 @@ eoshift1 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
if (sh < 0)
dest = rptr;
n = delta;
Index: generated/eoshift1_4.c
===================================================================
--- generated/eoshift1_4.c (Revision 249936)
+++ generated/eoshift1_4.c (Arbeitskopie)
@@ -183,12 +183,23 @@ eoshift1 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
if (sh < 0)
dest = rptr;
n = delta;
Index: generated/eoshift1_8.c
===================================================================
--- generated/eoshift1_8.c (Revision 249936)
+++ generated/eoshift1_8.c (Arbeitskopie)
@@ -183,12 +183,23 @@ eoshift1 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
if (sh < 0)
dest = rptr;
n = delta;
Index: generated/eoshift3_16.c
===================================================================
--- generated/eoshift3_16.c (Revision 249936)
+++ generated/eoshift3_16.c (Arbeitskopie)
@@ -198,12 +198,24 @@ eoshift3 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
+
if (sh < 0)
dest = rptr;
n = delta;
Index: generated/eoshift3_4.c
===================================================================
--- generated/eoshift3_4.c (Revision 249936)
+++ generated/eoshift3_4.c (Arbeitskopie)
@@ -198,12 +198,24 @@ eoshift3 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
+
if (sh < 0)
dest = rptr;
n = delta;
Index: generated/eoshift3_8.c
===================================================================
--- generated/eoshift3_8.c (Revision 249936)
+++ generated/eoshift3_8.c (Arbeitskopie)
@@ -198,12 +198,24 @@ eoshift3 (gfc_array_char * const restrict ret,
src = sptr;
dest = &rptr[delta * roffset];
}
- for (n = 0; n < len - delta; n++)
- {
- memcpy (dest, src, size);
- dest += roffset;
- src += soffset;
- }
+
+ /* If the elements are contiguous, perform a single block move. */
+ if (soffset == size && roffset == size)
+ {
+ size_t chunk = size * (len - delta);
+ memcpy (dest, src, chunk);
+ dest += chunk;
+ }
+ else
+ {
+ for (n = 0; n < len - delta; n++)
+ {
+ memcpy (dest, src, size);
+ dest += roffset;
+ src += soffset;
+ }
+ }
+
if (sh < 0)
dest = rptr;
n = delta;
! { dg-do run }
! Check that eoshift works for three-dimensional arrays.
module x
implicit none
contains
subroutine eoshift_2 (array, shift, boundary, dim, res)
real, dimension(:,:,:), intent(in) :: array
real, dimension(:,:,:), intent(out) :: res
integer, value :: shift
real, optional, dimension(:,:), intent(in) :: boundary
integer, optional, intent(in) :: dim
integer :: s1, s2, s3
integer :: n1, n2, n3
real :: b
integer :: d
if (present(dim)) then
d = dim
else
d = 1
end if
n1 = size(array,1)
n2 = size(array,2)
n3 = size(array,3)
select case(dim)
case(1)
if (shift > 0) then
shift = min(shift, n1)
do s3=1,n3
do s2=1,n2
b = boundary(s2,s3)
do s1= 1, n1 - shift
res(s1,s2,s3) = array(s1+shift,s2,s3)
end do
do s1 = n1 - shift + 1,n1
res(s1,s2,s3) = b
end do
end do
end do
else
shift = max(shift, -n1)
do s3=1,n3
do s2=1,n2
b = boundary(s2,s3)
do s1=1,-shift
res(s1,s2,s3) = b
end do
do s1= 1-shift,n1
res(s1,s2,s3) = array(s1+shift,s2,s3)
end do
end do
end do
end if
case(2)
if (shift > 0) then
shift = min(shift, n2)
do s3=1,n3
do s2=1, n2 - shift
do s1=1,n1
res(s1,s2,s3) = array(s1,s2+shift,s3)
end do
end do
do s2=n2 - shift + 1, n2
do s1=1,n1
b = boundary(s1,s3)
res(s1,s2,s3) = b
end do
end do
end do
else
shift = max(shift, -n2)
do s3=1,n3
do s2=1,-shift
do s1=1,n1
b = boundary(s1,s3)
res(s1,s2,s3) = b
end do
end do
do s2=1-shift,n2
do s1=1,n1
res(s1,s2,s3) = array(s1,s2+shift,s3)
end do
end do
end do
end if
case(3)
if (shift > 0) then
shift = min(shift, n3)
do s3=1,n3 - shift
do s2=1, n2
do s1=1,n1
res(s1,s2,s3) = array(s1,s2,s3+shift)
end do
end do
end do
do s3=n3 - shift + 1, n3
do s2=1, n2
do s1=1,n1
b = boundary(s1,s2)
res(s1,s2,s3) = b
end do
end do
end do
else
shift = max(shift, -n3)
do s3=1,-shift
do s2=1,n2
do s1=1,n1
b = boundary(s1,s2)
res(s1,s2,s3) = b
end do
end do
end do
do s3=1-shift,n3
do s2=1,n2
do s1=1,n1
res(s1,s2,s3) = array(s1,s2,s3+shift)
end do
end do
end do
end if
case default
stop "Illegal dim"
end select
end subroutine eoshift_2
end module x
program main
use x
implicit none
integer, parameter :: n1=20,n2=30,n3=40
real, dimension(n1,n2,n3) :: a,b,c
real, dimension(2*n1,n2,n3) :: a2,c2
integer :: dim, shift, shift_lim
real, dimension(n2,n3), target :: b1
real, dimension(n1,n3), target :: b2
real, dimension(n1,n2), target :: b3
real, dimension(:,:), pointer :: bp
call random_number(a)
call random_number (b1)
call random_number (b2)
call random_number (b3)
do dim=1,3
if (dim == 1) then
shift_lim = n1 + 1
bp => b1
else if (dim == 2) then
shift_lim = n2 + 1
bp => b2
else
shift_lim = n3 + 1
bp => b3
end if
do shift=-shift_lim, shift_lim
b = eoshift(a,shift,dim=dim, boundary=bp)
call eoshift_2 (a, shift=shift, dim=dim, boundary=bp, res=c)
if (any (b /= c)) then
print *,"dim = ", dim, "shift = ", shift
print *,b
print *,c
call abort
end if
a2 = 42.
a2(1:2*n1:2,:,:) = a
b = eoshift(a2(1:2*n1:2,:,:), shift, dim=dim, boundary=bp)
if (any (b /= c)) then
call abort
end if
c2 = 43.
c2(1:2*n1:2,:,:) = eoshift(a,shift,dim=dim, boundary=bp)
if (any(c2(1:2*n1:2,:,:) /= c)) then
call abort
end if
if (any(c2(2:2*n1:2,:,:) /= 43)) then
call abort
end if
end do
end do
end program main
! { dg-do run }
! Check that eoshift works for three-dimensional arrays.
module x
implicit none
contains
subroutine eoshift_1 (array, shift, boundary, dim, res)
real, dimension(:,:,:), intent(in) :: array
real, dimension(:,:,:), intent(out) :: res
integer, dimension(:,:), intent(in) :: shift
real, optional, intent(in) :: boundary
integer, optional, intent(in) :: dim
integer :: s1, s2, s3
integer :: n1, n2, n3
integer :: sh
real :: b
integer :: d
if (present(boundary)) then
b = boundary
else
b = 0.0
end if
if (present(dim)) then
d = dim
else
d = 1
end if
n1 = size(array,1)
n2 = size(array,2)
n3 = size(array,3)
select case(dim)
case(1)
do s3=1,n3
do s2=1,n2
sh = shift(s2,s3)
if (sh > 0) then
sh = min(sh, n1)
do s1= 1, n1 - sh
res(s1,s2,s3) = array(s1+sh,s2,s3)
end do
do s1 = n1 - sh + 1,n1
res(s1,s2,s3) = b
end do
else
sh = max(sh, -n1)
do s1=1,-sh
res(s1,s2,s3) = b
end do
do s1= 1-sh,n1
res(s1,s2,s3) = array(s1+sh,s2,s3)
end do
end if
end do
end do
case(2)
do s3=1,n3
do s1=1,n1
sh = shift(s1,s3)
if (sh > 0) then
sh = min (sh, n2)
do s2=1, n2 - sh
res(s1,s2,s3) = array(s1,s2+sh,s3)
end do
do s2=n2 - sh + 1, n2
res(s1,s2,s3) = b
end do
else
sh = max(sh, -n2)
do s2=1,-sh
res(s1,s2,s3) = b
end do
do s2=1-sh,n2
res(s1,s2,s3) = array(s1,s2+sh,s3)
end do
end if
end do
end do
case(3)
do s2=1, n2
do s1=1,n1
sh = shift(s1, s2)
if (sh > 0) then
sh = min(sh, n3)
do s3=1,n3 - sh
res(s1,s2,s3) = array(s1,s2,s3+sh)
end do
do s3=n3 - sh + 1, n3
res(s1,s2,s3) = b
end do
else
sh = max(sh, -n3)
do s3=1,-sh
res(s1,s2,s3) = b
end do
do s3=1-sh,n3
res(s1,s2,s3) = array(s1,s2,s3+sh)
end do
end if
end do
end do
case default
stop "Illegal dim"
end select
end subroutine eoshift_1
subroutine fill_shift(x, n)
integer, intent(out), dimension(:,:) :: x
integer, intent(in) :: n
integer :: n1, n2, s1, s2
integer :: v
v = -n - 1
n1 = size(x,1)
n2 = size(x,2)
do s2=1,n2
do s1=1,n1
x(s1,s2) = v
v = v + 1
if (v > n + 1) v = -n - 1
end do
end do
end subroutine fill_shift
end module x
program main
use x
implicit none
integer, parameter :: n1=20,n2=30,n3=40
real, dimension(n1,n2,n3) :: a,b,c
real, dimension(2*n1,n2,n3) :: a2, c2
integer :: dim
integer, dimension(n2,n3), target :: sh1
integer, dimension(n1,n3), target :: sh2
integer, dimension(n1,n2), target :: sh3
real, dimension(n2,n3), target :: b1
real, dimension(n1,n3), target :: b2
real, dimension(n1,n2), target :: b3
integer, dimension(:,:), pointer :: sp
real, dimension(:,:), pointer :: bp
call random_number(a)
call fill_shift(sh1, n1)
call fill_shift(sh2, n2)
call fill_shift(sh3, n3)
do dim=1,3
if (dim == 1) then
sp => sh1
else if (dim == 2) then
sp => sh2
else
sp => sh3
end if
b = eoshift(a,shift=sp,dim=dim,boundary=-0.5)
call eoshift_1 (a, shift=sp, dim=dim, boundary=-0.5,res=c)
if (any (b /= c)) then
print *,"dim = ", dim
print *,"sp = ", sp
print '(99F8.4)',b
print '(99F8.4)',c
call abort
end if
a2 = 42.
a2(1:2*n1:2,:,:) = a
b = eoshift(a2(1:2*n1:2,:,:), shift=sp, dim=dim, boundary=-0.5)
if (any(b /= c)) then
call abort
end if
c2 = 43.
c2(1:2*n1:2,:,:) = eoshift(a, shift=sp, dim=dim, boundary=-0.5)
if (any(c2(1:2*n1:2,:,:) /= c)) then
call abort
end if
if (any(c2(2:2*n1:2,:,:) /= 43.)) then
call abort
end if
end do
end program main
! { dg-do run }
! Check that eoshift works for three-dimensional arrays.
module x
implicit none
contains
subroutine eoshift_3 (array, shift, boundary, dim, res)
real, dimension(:,:,:), intent(in) :: array
real, dimension(:,:,:), intent(out) :: res
integer, dimension(:,:), intent(in) :: shift
real, optional, dimension(:,:), intent(in) :: boundary
integer, optional, intent(in) :: dim
integer :: s1, s2, s3
integer :: n1, n2, n3
integer :: sh
real :: b
integer :: d
if (present(dim)) then
d = dim
else
d = 1
end if
n1 = size(array,1)
n2 = size(array,2)
n3 = size(array,3)
select case(dim)
case(1)
do s3=1,n3
do s2=1,n2
sh = shift(s2,s3)
b = boundary(s2,s3)
if (sh > 0) then
sh = min(sh, n1)
do s1= 1, n1 - sh
res(s1,s2,s3) = array(s1+sh,s2,s3)
end do
do s1 = n1 - sh + 1,n1
res(s1,s2,s3) = b
end do
else
sh = max(sh, -n1)
do s1=1,-sh
res(s1,s2,s3) = b
end do
do s1= 1-sh,n1
res(s1,s2,s3) = array(s1+sh,s2,s3)
end do
end if
end do
end do
case(2)
do s3=1,n3
do s1=1,n1
sh = shift(s1,s3)
b = boundary(s1,s3)
if (sh > 0) then
sh = min (sh, n2)
do s2=1, n2 - sh
res(s1,s2,s3) = array(s1,s2+sh,s3)
end do
do s2=n2 - sh + 1, n2
res(s1,s2,s3) = b
end do
else
sh = max(sh, -n2)
do s2=1,-sh
res(s1,s2,s3) = b
end do
do s2=1-sh,n2
res(s1,s2,s3) = array(s1,s2+sh,s3)
end do
end if
end do
end do
case(3)
do s2=1, n2
do s1=1,n1
sh = shift(s1, s2)
b = boundary(s1, s2)
if (sh > 0) then
sh = min(sh, n3)
do s3=1,n3 - sh
res(s1,s2,s3) = array(s1,s2,s3+sh)
end do
do s3=n3 - sh + 1, n3
res(s1,s2,s3) = b
end do
else
sh = max(sh, -n3)
do s3=1,-sh
res(s1,s2,s3) = b
end do
do s3=1-sh,n3
res(s1,s2,s3) = array(s1,s2,s3+sh)
end do
end if
end do
end do
case default
stop "Illegal dim"
end select
end subroutine eoshift_3
subroutine fill_shift(x, n)
integer, intent(out), dimension(:,:) :: x
integer, intent(in) :: n
integer :: n1, n2, s1, s2
integer :: v
v = -n - 1
n1 = size(x,1)
n2 = size(x,2)
do s2=1,n2
do s1=1,n1
x(s1,s2) = v
v = v + 1
if (v > n + 1) v = -n - 1
end do
end do
end subroutine fill_shift
end module x
program main
use x
implicit none
integer, parameter :: n1=10,n2=30,n3=40
real, dimension(n1,n2,n3) :: a,b,c
real, dimension(2*n1,n2,n3) :: a2, c2
integer :: dim
integer, dimension(n2,n3), target :: sh1
integer, dimension(n1,n3), target :: sh2
integer, dimension(n1,n2), target :: sh3
real, dimension(n2,n3), target :: b1
real, dimension(n1,n3), target :: b2
real, dimension(n1,n2), target :: b3
integer, dimension(:,:), pointer :: sp
real, dimension(:,:), pointer :: bp
call random_number(a)
call random_number(b1)
call random_number(b2)
call random_number(b3)
call fill_shift(sh1, n1)
call fill_shift(sh2, n2)
call fill_shift(sh3, n3)
do dim=1,3
if (dim == 1) then
sp => sh1
bp => b1
else if (dim == 2) then
sp => sh2
bp => b2
else
sp => sh3
bp => b3
end if
b = eoshift(a,shift=sp,dim=dim,boundary=bp)
call eoshift_3 (a, shift=sp, dim=dim, boundary=bp,res=c)
if (any (b /= c)) then
call abort
end if
a2 = 42.
a2(1:2*n1:2,:,:) = a
b = eoshift(a2(1:2*n1:2,:,:), shift=sp, dim=dim, boundary=bp)
if (any(b /= c)) then
call abort
end if
c2 = 43.
c2(1:2*n1:2,:,:) = eoshift(a, shift=sp, dim=dim, boundary=bp)
if (any(c2(1:2*n1:2,:,:) /= c)) then
call abort
end if
if (any(c2(2:2*n1:2,:,:) /= 43.)) then
call abort
end if
end do
end program main