This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[gomp4.1] Initial #pragma omp ordered simd support
- From: Jakub Jelinek <jakub at redhat dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Thu, 10 Sep 2015 16:16:24 +0200
- Subject: [gomp4.1] Initial #pragma omp ordered simd support
- Authentication-results: sourceware.org; auth=none
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
This patch just makes sure (for now) that we don't vectorize loops with
#pragma omp ordered simd in them (directly or inlined into them).
Later on we can teach the vectorizer to handle some of the cases (but
supposedly the markers would need to become stronger barriers, so that
earlier optimizations don't shuffle any memory accesses across it; for now
they are ok as is).
2015-09-10 Jakub Jelinek <jakub@redhat.com>
* internal-fn.def (GOMP_SIMD_ORDERED_START, GOMP_SIMD_ORDERED_END):
New internal functions.
* omp-low.c (lower_omp_ordered): For ordered simd construct expand
GOMP_SIMD_ORDERED_{START,END} internal calls around the body
instead of GOMP_ordered_{start,end}, and set cfun->has_simduid_loops.
* passes.def (pass_simduid_cleanup): Schedule another copy of the
pass after all optimizations.
* tree-inline.c (remap_gimple_stmt): Set has_simduid_loops if
remapping GOMP_SIMD_ORDERED_{START,END} internal call.
* tree-vectorizer.c (adjust_simduid_builtins): Remove
GOMP_SIMD_ORDERED_{START,END} calls.
(vectorize_loops, pass_simduid_cleanup::execute): Adjust comments.
* internal-fn.c (expand_GOMP_SIMD_ORDERED_START,
expand_GOMP_SIMD_ORDERED_END): New functions.
* c-c++-common/gomp/ordered-1.c: New test.
* c-c++-common/gomp/ordered-2.c: New test.
--- gcc/internal-fn.def.jj 2015-04-29 10:59:41.000000000 +0200
+++ gcc/internal-fn.def 2015-09-10 13:49:08.053011729 +0200
@@ -44,6 +44,8 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST
DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMD_ORDERED_START, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMD_ORDERED_END, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (MASK_LOAD, ECF_PURE | ECF_LEAF, NULL)
DEF_INTERNAL_FN (MASK_STORE, ECF_LEAF, NULL)
--- gcc/omp-low.c.jj 2015-09-07 14:42:28.000000000 +0200
+++ gcc/omp-low.c 2015-09-10 14:46:48.653692822 +0200
@@ -12327,10 +12327,13 @@ lower_omp_ordered (gimple_stmt_iterator
{
tree block;
gimple stmt = gsi_stmt (*gsi_p);
+ gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt);
gcall *x;
gbind *bind;
+ bool simd
+ = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD);
- lower_omp_ordered_clauses (as_a <gomp_ordered *> (stmt), ctx);
+ lower_omp_ordered_clauses (ord_stmt, ctx);
push_gimplify_context ();
@@ -12339,8 +12342,14 @@ lower_omp_ordered (gimple_stmt_iterator
gsi_replace (gsi_p, bind, true);
gimple_bind_add_stmt (bind, stmt);
- x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
- 0);
+ if (simd)
+ {
+ x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 0);
+ cfun->has_simduid_loops = true;
+ }
+ else
+ x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
+ 0);
gimple_bind_add_stmt (bind, x);
lower_omp (gimple_omp_body_ptr (stmt), ctx);
@@ -12348,7 +12357,11 @@ lower_omp_ordered (gimple_stmt_iterator
gimple_bind_add_seq (bind, gimple_omp_body (stmt));
gimple_omp_set_body (stmt, NULL);
- x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END), 0);
+ if (simd)
+ x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 0);
+ else
+ x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END),
+ 0);
gimple_bind_add_stmt (bind, x);
gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
--- gcc/passes.def.jj 2015-09-03 16:38:07.000000000 +0200
+++ gcc/passes.def 2015-09-10 13:50:21.220920366 +0200
@@ -340,6 +340,7 @@ along with GCC; see the file COPYING3.
NEXT_PASS (pass_tm_memopt);
NEXT_PASS (pass_tm_edges);
POP_INSERT_PASSES ()
+ NEXT_PASS (pass_simduid_cleanup);
NEXT_PASS (pass_vtable_verify);
NEXT_PASS (pass_lower_vaarg);
NEXT_PASS (pass_lower_vector);
--- gcc/tree-inline.c.jj 2015-09-03 16:40:30.000000000 +0200
+++ gcc/tree-inline.c 2015-09-10 14:21:08.638424534 +0200
@@ -1619,6 +1619,11 @@ remap_gimple_stmt (gimple stmt, copy_bod
gimple_call_set_tail (call_stmt, false);
if (gimple_call_from_thunk_p (call_stmt))
gimple_call_set_from_thunk (call_stmt, false);
+ if (gimple_call_internal_p (call_stmt)
+ && IN_RANGE (gimple_call_internal_fn (call_stmt),
+ IFN_GOMP_SIMD_ORDERED_START,
+ IFN_GOMP_SIMD_ORDERED_END))
+ DECL_STRUCT_FUNCTION (id->dst_fn)->has_simduid_loops = true;
}
/* Remap the region numbers for __builtin_eh_{pointer,filter},
--- gcc/tree-vectorizer.c.jj 2015-09-03 16:39:22.000000000 +0200
+++ gcc/tree-vectorizer.c 2015-09-10 14:31:40.860067887 +0200
@@ -149,8 +149,9 @@ simd_array_to_simduid::equal (const simd
return p1->decl == p2->decl;
}
-/* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF and IFN_GOMP_SIMD_LAST_LANE
- into their corresponding constants. */
+/* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
+ into their corresponding constants and remove
+ IFN_GOMP_SIMD_ORDERED_{START,END}. */
static void
adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
@@ -161,7 +162,7 @@ adjust_simduid_builtins (hash_table<simd
{
gimple_stmt_iterator i;
- for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+ for (i = gsi_start_bb (bb); !gsi_end_p (i); )
{
unsigned int vf = 1;
enum internal_fn ifn;
@@ -169,7 +170,10 @@ adjust_simduid_builtins (hash_table<simd
tree t;
if (!is_gimple_call (stmt)
|| !gimple_call_internal_p (stmt))
- continue;
+ {
+ gsi_next (&i);
+ continue;
+ }
ifn = gimple_call_internal_fn (stmt);
switch (ifn)
{
@@ -177,7 +181,13 @@ adjust_simduid_builtins (hash_table<simd
case IFN_GOMP_SIMD_VF:
case IFN_GOMP_SIMD_LAST_LANE:
break;
+ case IFN_GOMP_SIMD_ORDERED_START:
+ case IFN_GOMP_SIMD_ORDERED_END:
+ gsi_remove (&i, true);
+ unlink_stmt_vdef (stmt);
+ continue;
default:
+ gsi_next (&i);
continue;
}
tree arg = gimple_call_arg (stmt, 0);
@@ -206,6 +216,7 @@ adjust_simduid_builtins (hash_table<simd
gcc_unreachable ();
}
update_call_from_tree (&i, t);
+ gsi_next (&i);
}
}
}
@@ -568,7 +579,7 @@ vectorize_loops (void)
free_stmt_vec_info_vec ();
- /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE} builtins. */
+ /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
if (cfun->has_simduid_loops)
adjust_simduid_builtins (simduid_to_vf_htab);
@@ -630,7 +641,7 @@ pass_simduid_cleanup::execute (function
note_simd_array_uses (&simd_array_to_simduid_htab);
- /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE} builtins. */
+ /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */
adjust_simduid_builtins (NULL);
/* Shrink any "omp array simd" temporary arrays to the
--- gcc/internal-fn.c.jj 2015-09-03 16:36:22.000000000 +0200
+++ gcc/internal-fn.c 2015-09-10 14:45:02.454253630 +0200
@@ -175,6 +175,22 @@ expand_GOMP_SIMD_LAST_LANE (gcall *)
gcc_unreachable ();
}
+/* This should get expanded in adjust_simduid_builtins. */
+
+static void
+expand_GOMP_SIMD_ORDERED_START (gcall *)
+{
+ gcc_unreachable ();
+}
+
+/* This should get expanded in adjust_simduid_builtins. */
+
+static void
+expand_GOMP_SIMD_ORDERED_END (gcall *)
+{
+ gcc_unreachable ();
+}
+
/* This should get expanded in the sanopt pass. */
static void
--- gcc/testsuite/c-c++-common/gomp/ordered-1.c.jj 2015-09-10 15:14:57.959090756 +0200
+++ gcc/testsuite/c-c++-common/gomp/ordered-1.c 2015-09-10 15:20:26.115259937 +0200
@@ -0,0 +1,91 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fopenmp-simd" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 1024
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void abort (void);
+
+int last;
+
+void
+bar (unsigned char *a, int i, int safelen)
+{
+ int j, k;
+ if (i != last++)
+ abort ();
+ for (j = i - safelen - 32; j < i; j++)
+ if (j >= 0 && a[j] != 2)
+ break;
+ if (j <= i - safelen || a[j] != 1)
+ abort ();
+ for (k = j; k < i + safelen + 32; k++)
+ if (k >= N || a[k] != 1)
+ break;
+ if (k <= i || k > j + safelen)
+ abort ();
+ if (k < N && a[k] != 0)
+ abort ();
+ for (; k < i + safelen + 32; k++)
+ if (k < N && a[k] != 0)
+ abort ();
+}
+
+static inline void
+foo (unsigned char *a, int i)
+{
+ #pragma omp ordered simd
+ bar (a, i, 64);
+}
+
+int
+main ()
+{
+ unsigned char a[N], b[N];
+ int i;
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ a[i] = 0;
+ #pragma omp simd safelen (64)
+ for (i = 0; i < N; i++)
+ {
+ a[i]++;
+ foo (a, i);
+ a[i]++;
+ }
+ #pragma omp simd
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 0;
+ b[i] = 0;
+ }
+ last = 0;
+ #pragma omp simd safelen (32)
+ for (i = 0; i < N; i++)
+ {
+ a[i]++;
+ #pragma omp ordered simd
+ bar (a, i, 32);
+ a[i]++;
+ }
+ for (i = 0; i < N; i++)
+ if (a[i] != 2)
+ abort ();
+ #pragma omp simd safelen (32)
+ for (i = 1; i < N; i++)
+ {
+ #pragma omp ordered simd
+ b[i] = b[i - 1] + 1;
+ a[i]++;
+ #pragma omp ordered simd
+ a[i] += a[i - 1];
+ }
+ for (i = 0; i < N; i++)
+ if (a[i] != (unsigned char) (2 + 3 * i) || b[i] != (unsigned char) i)
+ abort ();
+ return 0;
+}
--- gcc/testsuite/c-c++-common/gomp/ordered-2.c.jj 2015-09-10 15:16:02.527140243 +0200
+++ gcc/testsuite/c-c++-common/gomp/ordered-2.c 2015-09-10 15:15:58.345201806 +0200
@@ -0,0 +1,4 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -fopenmp-simd" } */
+
+#include "ordered-1.c"
Jakub