This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed][nvptx] Commit passing pr85381-*.c test-cases


Hi,

Add pr85381*.c test-cases that are already passing without the fix for PR85381.

Build and reg-tested on x86_64 with nvptx accelerator.

Committed to trunk.

Thanks,
- Tom

[nvptx] Commit passing pr85381-*.c test-cases

2018-12-19  Tom de Vries  <tdevries@suse.de>

	* testsuite/libgomp.oacc-c-c++-common/pr85381-2.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr85381-3.c: New test.
	* testsuite/libgomp.oacc-c-c++-common/pr85381-4.c: New test.

---
 .../libgomp.oacc-c-c++-common/pr85381-2.c          | 36 ++++++++++++++++++++++
 .../libgomp.oacc-c-c++-common/pr85381-3.c          | 35 +++++++++++++++++++++
 .../libgomp.oacc-c-c++-common/pr85381-4.c          | 27 ++++++++++++++++
 3 files changed, 98 insertions(+)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
new file mode 100644
index 00000000000..6570c64afff
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-2.c
@@ -0,0 +1,36 @@
+/* { dg-additional-options "-save-temps" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+   { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+int
+main (void)
+{
+  int v1;
+
+  #pragma acc parallel
+  #pragma acc loop worker
+  for (v1 = 0; v1 < 20; v1 += 2)
+    ;
+
+  return 0;
+}
+
+/* Todo: Boths bar.syncs can be removed.
+   Atm we generate this dead code inbetween forked and joining:
+
+                     mov.u32 %r28, %ntid.y;
+                     mov.u32 %r29, %tid.y;
+                     add.u32 %r30, %r29, %r29;
+                     setp.gt.s32     %r31, %r30, 19;
+             @%r31   bra     $L2;
+                     add.u32 %r25, %r28, %r28;
+                     mov.u32 %r24, %r30;
+     $L3:
+                     add.u32 %r24, %r24, %r25;
+                     setp.le.s32     %r33, %r24, 19;
+             @%r33   bra     $L3;
+     $L2:
+
+   so the loop is not recognized as empty loop (which we detect by seeing if
+   joining immediately follows forked).  */
+/* { dg-final { scan-assembler-times "bar.sync" 2 } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c
new file mode 100644
index 00000000000..c5d1c5add68
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-3.c
@@ -0,0 +1,35 @@
+/* { dg-additional-options "-save-temps -w" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+   { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+int a;
+#pragma acc declare create(a)
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+foo_v (void)
+{
+  a = 1;
+}
+
+#pragma acc routine worker
+void __attribute__((noinline, noclone))
+foo_w (void)
+{
+  a = 2;
+}
+
+int
+main (void)
+{
+
+  #pragma acc parallel
+  foo_v ();
+
+  #pragma acc parallel
+  foo_w ();
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "bar.sync" } } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
new file mode 100644
index 00000000000..d955d79718d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85381-4.c
@@ -0,0 +1,27 @@
+/* { dg-additional-options "-save-temps -w" } */
+/* { dg-do run { target openacc_nvidia_accel_selected } }
+   { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */
+
+#define n 1024
+
+int
+main (void)
+{
+  #pragma acc parallel
+  {
+    #pragma acc loop worker
+    for (int i = 0; i < n; i++)
+      ;
+
+    #pragma acc loop worker
+    for (int i = 0; i < n; i++)
+      ;
+  }
+
+  return 0;
+}
+
+/* Atm, %ntid.y is broadcast from one loop to the next, so there are 2 bar.syncs
+   for that (the other two are there for the same reason as in pr85381-2.c).
+   Todo: Recompute %ntid.y instead of broadcasting it. */
+/* { dg-final { scan-assembler-times "bar.sync" 4 } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]