[gcc r11-2541] [nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle
Tom de Vries
vries@gcc.gnu.org
Tue Aug 4 09:59:22 GMT 2020
https://gcc.gnu.org/g:344f09a756ebd50510cc1eb3db111fd61c527702
commit r11-2541-g344f09a756ebd50510cc1eb3db111fd61c527702
Author: Tom de Vries <tdevries@suse.de>
Date: Tue Aug 4 09:53:08 2020 +0200
[nvptx] Handle V2DI/V2SI mode in nvptx_gen_shuffle
With the pr96628-part1.f90 source and -ftree-slp-vectorize, we run into an
ICE due to the fact that V2DI mode is not handled in nvptx_gen_shuffle.
Fix this by adding handling of V2DI as well as V2SI mode in
nvptx_gen_shuffle.
Build and reg-tested on x86_64 with nvptx accelerator.
gcc/ChangeLog:
PR target/96428
* config/nvptx/nvptx.c (nvptx_gen_shuffle): Handle V2SI/V2DI.
libgomp/ChangeLog:
PR target/96428
* testsuite/libgomp.oacc-fortran/pr96628-part1.f90: New test.
* testsuite/libgomp.oacc-fortran/pr96628-part2.f90: New test.
Diff:
---
gcc/config/nvptx/nvptx.c | 38 ++++++++++++++++++++++
.../libgomp.oacc-fortran/pr96628-part1.f90 | 20 ++++++++++++
.../libgomp.oacc-fortran/pr96628-part2.f90 | 37 +++++++++++++++++++++
3 files changed, 95 insertions(+)
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index d8a8fb2d55b..cf53a921e5b 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1796,6 +1796,44 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
end_sequence ();
}
break;
+ case E_V2SImode:
+ {
+ rtx src0 = gen_rtx_SUBREG (SImode, src, 0);
+ rtx src1 = gen_rtx_SUBREG (SImode, src, 4);
+ rtx dst0 = gen_rtx_SUBREG (SImode, dst, 0);
+ rtx dst1 = gen_rtx_SUBREG (SImode, dst, 4);
+ rtx tmp0 = gen_reg_rtx (SImode);
+ rtx tmp1 = gen_reg_rtx (SImode);
+ start_sequence ();
+ emit_insn (gen_movsi (tmp0, src0));
+ emit_insn (gen_movsi (tmp1, src1));
+ emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
+ emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
+ emit_insn (gen_movsi (dst0, tmp0));
+ emit_insn (gen_movsi (dst1, tmp1));
+ res = get_insns ();
+ end_sequence ();
+ }
+ break;
+ case E_V2DImode:
+ {
+ rtx src0 = gen_rtx_SUBREG (DImode, src, 0);
+ rtx src1 = gen_rtx_SUBREG (DImode, src, 8);
+ rtx dst0 = gen_rtx_SUBREG (DImode, dst, 0);
+ rtx dst1 = gen_rtx_SUBREG (DImode, dst, 8);
+ rtx tmp0 = gen_reg_rtx (DImode);
+ rtx tmp1 = gen_reg_rtx (DImode);
+ start_sequence ();
+ emit_insn (gen_movdi (tmp0, src0));
+ emit_insn (gen_movdi (tmp1, src1));
+ emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
+ emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
+ emit_insn (gen_movdi (dst0, tmp0));
+ emit_insn (gen_movdi (dst1, tmp1));
+ res = get_insns ();
+ end_sequence ();
+ }
+ break;
case E_BImode:
{
rtx tmp = gen_reg_rtx (SImode);
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90
new file mode 100644
index 00000000000..71219f9c467
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part1.f90
@@ -0,0 +1,20 @@
+! { dg-do run }
+! { dg-additional-sources pr96628-part2.f90 }
+! { dg-additional-options "-ftree-slp-vectorize" }
+!
+! This file is compiled first
+module m2
+ real*8 :: mysum
+ !$acc declare device_resident(mysum)
+contains
+ SUBROUTINE one(t)
+ !$acc routine
+ REAL*8, INTENT(IN) :: t(:)
+ mysum = sum(t)
+ END SUBROUTINE one
+ SUBROUTINE two(t)
+ !$acc routine seq
+ REAL*8, INTENT(INOUT) :: t(:)
+ t = (100.0_8*t)/sum
+ END SUBROUTINE two
+end module m2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90
new file mode 100644
index 00000000000..784dc27e19e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr96628-part2.f90
@@ -0,0 +1,37 @@
+! { dg-do compile { target skip-all-targets } }
+!
+! Main file is pr96628-part1.f90
+
+MODULE m
+ IMPLICIT NONE
+ REAL*8, ALLOCATABLE :: t(:)
+CONTAINS
+ SUBROUTINE run()
+ use m2
+ IMPLICIT NONE
+
+ INTEGER :: i,j ! loop indices
+ !$acc data present(t)
+ !$acc parallel
+ !$acc loop gang
+ DO j = 1,2
+ !$acc loop vector
+ DO i = 1,2
+ CALL one(t(:))
+ CALL two(t(:))
+ END DO
+ END DO
+ !$acc end parallel
+ !$acc end data
+ END SUBROUTINE run
+END MODULE m
+
+use m
+implicit none
+integer :: i
+t = [(3.0_8*i, i = 1, 100)]
+!$acc data copy(t)
+call run
+!$acc end data
+if (any (abs(t - [((300.0_8*i)/15150.0_8, i = 1, 100)]) < 10.0_8*epsilon(t))) stop 1
+end
More information about the Gcc-cvs
mailing list