This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

RFC patch for #pragma ivdep


Attached is an early version (C only) for #pragma ivdep, which aids vectorization by setting (for the following for-loop) loop->safelen to INT_MAX. [In the final version, I will also add parsing support for C++ and use it for Fortran's "do concurrent".]

As suggested by Richard and Jakub (thanks!), it is implemented as follows:
* An ANNOTATE_EXPR with ANNOTATE_EXPR_ID == annot_expr_ivdep_kind is attached to the condition of the loop
* In gimplify.c, it is converted into an internal function (ANNOTATE)
* When the "struct loops" is created, the internal function is removed and loop->safelen is set to INT_MAX

RFC:
* The replacement of the internal function is done in cfgloop.c's flow_loops_find. The code path I am interested in is: pass_build_cfg -> execute_build_cfg -> loop_optimizer_init (AVOID_CFG_MODIFICATIONS) -> flow_loops_find. But flow_loops_find is also called elsewhere. Thus, is this the best spot? Is the slowdown of walking the gimple statements acceptable? Additionally, there are some assumptions, which may or may not be valid: - There is only one latch edge (if there are more, loop->latch is set to NULL and my code is not reached; if expand_ANNOTATE might then get called and one gets an ICE [gcc_unreachable()]).
- The IFN_ANNOTATE is just before the GIMPLE_COND
- The loop condition is in loop->latch->next_bb.
* Parsing: Currently, #pragma ivdep and #pragma omp for require that a for loop follows. Other compilers permit #pragma ivdep followed by #pragma omp for - and vice versa [which gets complicated when OpenMPv4's safelen is also used]. Is restricting to either ivdep xor another pragma fine?


Example:

void foo(int n, int *a, int *b, int *c) {
  int i;
#pragma ivdep
  for (i = 0; i < n; ++i) {
    a[i] = b[i] + c[i];
  }
}

Without the pragma, "gcc -O3 -fopt-info-vec-optimized -c foo.c" gives:
foo.c:4:3: note: loop vectorized
foo.c:4:3: note: loop versioned for vectorization because of possible aliasing
foo.c:4:3: note: loop peeled for vectorization to enhance alignment

With the pragma, as expected, no loop versioning is done (i.e. there is no "loop versioned for vectorization because of possible aliasing").

(I successfully did an ada,c,c++,fortran,go,java,lto,objc,obj-c++ bootstrap on x86-64-gnu-linux; regtesting is on-going.)

Tobias
2013-08-10  Tobias Burnus  <burnus@net-b.de>

	* c-pragma.c (init_pragma) Add #pragma ivdep handling.
	* c-pragma.h (pragma_kind): Add PRAGMA_IVDEP.

	* c-parser.c (c_parser_pragma, c_parser_for_statement):
	Handle PRAGMA_IVDEP.
	(c_parser_statement_after_labels): Update call.

	* cfgloop.c (flow_loops_find): Search for IFN_ANNOTATE
	and set safelen.
	* gimplify.c (gimple_boolify, gimplify_expr): Handle ANNOTATE_EXPR.
	* internal-fn.c (expand_ANNOTATE): New function.
	* internal-fn.def (ANNOTATE): Define as new internal function.
	* tree-core.h (tree_node_kind): Add annot_expr_ivdep_kind.
	(tree_base) Update a comment.
	* tree-pretty-print.c (dump_generic_node): Handle ANNOTATE_EXPR.
	* tree.def (ANNOTATE_EXPR): New DEFTREECODE.
	* tree.h (ANNOTATE_EXPR_ID, SET_ANNOTATE_EXPR_ID): New macros.

diff --git a/gcc/c-family/c-pragma.c b/gcc/c-family/c-pragma.c
index 309859f..06dbf17 100644
--- a/gcc/c-family/c-pragma.c
+++ b/gcc/c-family/c-pragma.c
@@ -1353,6 +1353,8 @@ init_pragma (void)
     cpp_register_deferred_pragma (parse_in, "GCC", "pch_preprocess",
 				  PRAGMA_GCC_PCH_PREPROCESS, false, false);
 
+  cpp_register_deferred_pragma (parse_in, 0, "ivdep", PRAGMA_IVDEP, false,
+				false);
 #ifdef HANDLE_PRAGMA_PACK_WITH_EXPANSION
   c_register_pragma_with_expansion (0, "pack", handle_pragma_pack);
 #else
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index 41215db..c826fbd 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -46,6 +46,7 @@ typedef enum pragma_kind {
   PRAGMA_OMP_THREADPRIVATE,
 
   PRAGMA_GCC_PCH_PREPROCESS,
+  PRAGMA_IVDEP,
 
   PRAGMA_FIRST_EXTERNAL
 } pragma_kind;
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index b612e29..6bf9fbf 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -1150,7 +1150,7 @@ static void c_parser_if_statement (c_parser *);
 static void c_parser_switch_statement (c_parser *);
 static void c_parser_while_statement (c_parser *);
 static void c_parser_do_statement (c_parser *);
-static void c_parser_for_statement (c_parser *);
+static void c_parser_for_statement (c_parser *, bool);
 static tree c_parser_asm_statement (c_parser *);
 static tree c_parser_asm_operands (c_parser *);
 static tree c_parser_asm_goto_operands (c_parser *);
@@ -4495,7 +4495,7 @@ c_parser_statement_after_labels (c_parser *parser)
 	  c_parser_do_statement (parser);
 	  break;
 	case RID_FOR:
-	  c_parser_for_statement (parser);
+	  c_parser_for_statement (parser, false);
 	  break;
 	case RID_GOTO:
 	  c_parser_consume_token (parser);
@@ -4948,7 +4948,7 @@ c_parser_do_statement (c_parser *parser)
 */
 
 static void
-c_parser_for_statement (c_parser *parser)
+c_parser_for_statement (c_parser *parser, bool ivdep)
 {
   tree block, cond, incr, save_break, save_cont, body;
   /* The following are only used when parsing an ObjC foreach statement.  */
@@ -5054,8 +5054,17 @@ c_parser_for_statement (c_parser *parser)
 	{
 	  if (c_parser_next_token_is (parser, CPP_SEMICOLON))
 	    {
-	      c_parser_consume_token (parser);
-	      cond = NULL_TREE;
+	      if (ivdep)
+		{
+		  c_parser_error (parser, "missing loop condition loop with "
+				  "IVDEP pragma");
+		  cond = error_mark_node;
+		}
+	      else
+		{
+		  c_parser_consume_token (parser);
+		  cond = NULL_TREE;
+		}
 	    }
 	  else
 	    {
@@ -5069,6 +5078,12 @@ c_parser_for_statement (c_parser *parser)
 	      c_parser_skip_until_found (parser, CPP_SEMICOLON,
 					 "expected %<;%>");
 	    }
+	  if (ivdep)
+	    {
+	      cond = build1 (ANNOTATE_EXPR, TREE_TYPE (cond), cond);
+	      SET_ANNOTATE_EXPR_ID (cond, annot_expr_ivdep_kind);
+	    }
+
 	}
       /* Parse the increment expression (the third expression in a
 	 for-statement).  In the case of a foreach-statement, this is
@@ -8947,6 +8962,17 @@ c_parser_pragma (c_parser *parser, enum pragma_context context)
       c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
       return false;
 
+    case PRAGMA_IVDEP:
+      c_parser_consume_pragma (parser);
+      c_parser_skip_to_pragma_eol (parser);
+      if (!c_parser_next_token_is_keyword (parser, RID_FOR))
+	{
+	  c_parser_error (parser, "for statement expected");
+	  return false;
+	}
+      c_parser_for_statement (parser, true);
+      return false;
+
     case PRAGMA_GCC_PCH_PREPROCESS:
       c_parser_error (parser, "%<#pragma GCC pch_preprocess%> must be first");
       c_parser_skip_until_found (parser, CPP_PRAGMA_EOL, NULL);
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index f39b194..a5eaf91 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -507,6 +507,39 @@ flow_loops_find (struct loops *loops)
 	      loop->latch = latch;
 	    }
 	}
+      /* Search for ANNOTATE call with annot_expr_ivdep_kind; if found, remove
+	 it and set loop->safelen to INT_MAX.  */
+      if (loop->latch && loop->latch->next_bb != EXIT_BLOCK_PTR
+          && bb_seq_addr (loop->latch->next_bb))
+	{
+           gimple_stmt_iterator gsi;
+           for (gsi = gsi_start_bb (loop->latch->next_bb);
+		gsi.bb && gsi.seq && !gsi_end_p (gsi);
+		gsi_next (&gsi))
+              {
+                gimple stmt = gsi_stmt (gsi);
+		if (gimple_code (stmt) == GIMPLE_COND)
+		  {
+		    gsi_prev_nondebug (&gsi);
+		    if (gsi_end_p (gsi))
+		      break;
+		    stmt = gsi_stmt (gsi);
+		    if (gimple_code (stmt) != GIMPLE_CALL)
+		      break;
+		    if (!gimple_call_internal_p (stmt)
+			 || gimple_call_internal_fn (stmt) != IFN_ANNOTATE)
+		      break;
+		    if ((annot_expr_kind) tree_low_cst (gimple_call_arg (stmt,
+									 1), 0)
+			!= annot_expr_ivdep_kind)
+		      break;
+		    stmt = gimple_build_assign (gimple_call_lhs (stmt),
+						gimple_call_arg (stmt, 0));
+		    gsi_replace (&gsi, stmt, true);
+		    loop->safelen = INT_MAX;
+		  }
+	      }
+	}
     }
 
   larray.release ();
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 86bda77..7d4e1b5 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -3053,6 +3053,16 @@ gimple_boolify (tree expr)
 	TREE_TYPE (expr) = boolean_type_node;
       return expr;
 
+    case ANNOTATE_EXPR:
+      if (ANNOTATE_EXPR_ID (expr) == annot_expr_ivdep_kind)
+	{
+	  TREE_OPERAND (expr, 0) = gimple_boolify (TREE_OPERAND (expr, 0));
+	  if (TREE_CODE (type) != BOOLEAN_TYPE)
+	    TREE_TYPE (expr) = boolean_type_node;
+	  return expr;
+	}
+      /* FALLTHRU */
+
     default:
       if (COMPARISON_CLASS_P (expr))
 	{
@@ -7378,6 +7388,22 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  ret = gimplify_addr_expr (expr_p, pre_p, post_p);
 	  break;
 
+	case ANNOTATE_EXPR:
+	  {
+	    tree cond = TREE_OPERAND (*expr_p, 0);
+	    tree id = build_int_cst (integer_type_node,
+				     ANNOTATE_EXPR_ID (*expr_p));
+	    tree tmp = create_tmp_var_raw (TREE_TYPE(cond), NULL);
+	    gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
+	    gimple call = gimple_build_call_internal (IFN_ANNOTATE, 2,
+						      cond, id);
+            gimple_call_set_lhs (call, tmp);
+	    gimplify_seq_add_stmt (pre_p, call);
+            *expr_p = tmp;
+	    ret = GS_ALL_DONE;
+	    break;
+	  }
+
 	case VA_ARG_EXPR:
 	  ret = gimplify_va_arg_expr (expr_p, pre_p, post_p);
 	  break;
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 983efeb..a22f222 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -109,6 +109,12 @@ expand_STORE_LANES (gimple stmt)
   expand_insn (get_multi_vector_move (type, vec_store_lanes_optab), 2, ops);
 }
 
+static void
+expand_ANNOTATE (gimple stmt ATTRIBUTE_UNUSED)
+{
+  gcc_unreachable ();
+}
+
 /* This should get expanded in adjust_simduid_builtins.  */
 
 static void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 5427664..0f5cc3c 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -43,3 +43,4 @@ DEF_INTERNAL_FN (STORE_LANES, ECF_CONST | ECF_LEAF)
 DEF_INTERNAL_FN (GOMP_SIMD_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW)
 DEF_INTERNAL_FN (GOMP_SIMD_VF, ECF_CONST | ECF_LEAF | ECF_NOTHROW)
 DEF_INTERNAL_FN (GOMP_SIMD_LAST_LANE, ECF_CONST | ECF_LEAF | ECF_NOTHROW)
+DEF_INTERNAL_FN (ANNOTATE,  ECF_CONST | ECF_LEAF | ECF_NOTHROW)
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 0b3314b..2d07a45 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -593,6 +593,10 @@ enum tree_node_kind {
   all_kinds
 };
 
+enum annot_expr_kind {
+  annot_expr_ivdep_kind
+};
+
 
 /*---------------------------------------------------------------------------
                                 Type definitions
@@ -692,7 +696,8 @@ struct GTY(()) tree_base {
        make better use of the 4-byte sized word.  */
     /* VEC length.  This field is only used with TREE_VEC.  */
     int length;
-    /* SSA version number.  This field is only used with SSA_NAME.  */
+    /* SSA version number or the ID of an ANNOTATE_EXPR.  This field is only
+       used with SSA_NAME and ANNOTATE_EXPR.  */
     unsigned int version;
   } GTY((skip(""))) u;
 };
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index c357b06..071fc43 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -1924,6 +1924,18 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
       pp_string (buffer, " predictor.");
       break;
 
+    case ANNOTATE_EXPR:
+      pp_string (buffer, "ANNOTATE_EXPR <");
+      switch (ANNOTATE_EXPR_ID (node))
+	{
+	case annot_expr_ivdep_kind:
+	  pp_string (buffer, "ivdep, ");
+	  break;
+	}
+      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+      pp_greater (buffer);
+      break;
+
     case RETURN_EXPR:
       pp_string (buffer, "return");
       op0 = TREE_OPERAND (node, 0);
diff --git a/gcc/tree.def b/gcc/tree.def
index f825aad..040f46d 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1231,6 +1231,12 @@ DEFTREECODE (OPTIMIZATION_NODE, "optimization_node", tcc_exceptional, 0)
 /* TARGET_OPTION_NODE.  Node to store the target specific options.  */
 DEFTREECODE (TARGET_OPTION_NODE, "target_option_node", tcc_exceptional, 0)
 
+/* ANNOTATE_EXPR.
+   Operand 0 is the expression.  ....
+   Operand 1 is the annotation id, FIXME */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 1)
+
+
 /*
 Local variables:
 mode:c
diff --git a/gcc/tree.h b/gcc/tree.h
index a71cd96..0ea5318 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -591,6 +591,11 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define PREDICT_EXPR_PREDICTOR(NODE) \
   ((enum br_predictor)tree_low_cst (TREE_OPERAND (PREDICT_EXPR_CHECK (NODE), 0), 0))
 
+#define ANNOTATE_EXPR_ID(NODE) \
+  ((enum annot_expr_kind) ANNOTATE_EXPR_CHECK(NODE)->base.u.version)
+#define SET_ANNOTATE_EXPR_ID(NODE, ID) \
+  (ANNOTATE_EXPR_CHECK(NODE)->base.u.version = ID)
+
 /* In a VAR_DECL, nonzero means allocate static storage.
    In a FUNCTION_DECL, nonzero if function has been defined.
    In a CONSTRUCTOR, nonzero means allocate static storage.  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]