This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: OpenACC middle end changes


Hi!

On Thu, 13 Nov 2014 19:09:49 +0100, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, Nov 13, 2014 at 05:59:11PM +0100, Thomas Schwinge wrote:
> > --- gcc/builtins.c
> > +++ gcc/builtins.c

> > +/* Expand OpenACC acc_on_device.
> > +
> > +   This has to happen late (that is, not in early folding; expand_builtin_*,
> > +   rather than fold_builtin_*), as we have to act differently for host and
> > +   acceleration device (ACCEL_COMPILER conditional).  */
> > +
> > +static rtx
> > +expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
> > +{
> > +  if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
> > +    return NULL_RTX;
> > +
> > +  tree arg, v1, v2, ret;
> > +  location_t loc;
> > +
> > +  arg = CALL_EXPR_ARG (exp, 0);
> > +  arg = builtin_save_expr (arg);
> > +  loc = EXPR_LOCATION (exp);
> > +
> > +  /* Build: (arg == v1 || arg == v2) ? 1 : 0.  */
> > +
> > +#ifdef ACCEL_COMPILER
> > +  v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_not_host */ 3);
> > +  v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
> > +#else
> > +  v1 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_none */ 0);
> > +  v2 = build_int_cst (TREE_TYPE (arg), /* TODO: acc_device_host */ 2);
> > +#endif
> > +
> > +  v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
> > +  v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
> > +
> > +  /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
> > +     expand_expr_real*.  */
> > +  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
> > +  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
> > +			 ret, integer_one_node, integer_zero_node);
> > +
> > +  return expand_normal (ret);
> 
> If you can't fold it late (which is indeed a problem for -O0),
> then I'd suggest to implement this more RTL-ish.
> So, avoid the builtin_save_expr, instead
>   rtx op = expand_normal (arg);
> Don't build v1/v2 as trees (and, please fix the TODOs), but rtxes,

(acc_device_* TODOs already resolved earlier on.)

> just
>   rtx v1 = GEN_INT (...);
>   rtx v2 = GEN_INT (...);
>   machine_mode mode = TYPE_MODE (TREE_TYPE (arg));
>   rtx ret = gen_reg_rtx (TYPE_MODE (integer_type_node));
>   emit_move_insn (ret, const0_rtx);
>   rtx_code_label *done_label = gen_label_rtx ();
>   emit_cmp_and_jump_insns (op, v1, NE, NULL_RTX, mode,
> 			   false, done_label, PROB_EVEN);
>   emit_cmp_and_jump_insns (op, v2, NE, NULL_RTX, mode,
> 			   false, done_label, PROB_EVEN);
>   emit_move_insn (ret, const1_rtx);
>   emit_label (done_label);
>   return ret;
> or similar.

Thanks for the review/suggestion/code!

> Note, it would still be worthwhile to fold the builtin, at least
> when optimizing, after IPA.  Dunno if we have some property you can check,
> and Richard B. could suggest where it would be most appropriate (if GIMPLE
> guarded match.pd entry, or what), gimple_fold, etc.

I'll make a note to have a look at that later on.

> I bet I should handle omp_is_initial_device (); similarly.

Yeah.

Committed to gomp-4_0-branch in r218858:

commit da5ad5aec1c0f9b230ecb2dc00620a5598de5066
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Thu Dec 18 10:42:30 2014 +0000

    OpenACC acc_on_device: Make builtin expansion more RTXy.
    
    	gcc/
    	* builtins.c (expand_builtin_acc_on_device): Make more RTXy.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@218858 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog.gomp |  5 +++++
 gcc/builtins.c     | 44 +++++++++++++++++++++-----------------------
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index b370616..a3650c5 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2014-12-18  Thomas Schwinge  <thomas@codesourcery.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* builtins.c (expand_builtin_acc_on_device): Make more RTXy.
+
 2014-12-17  Thomas Schwinge  <thomas@codesourcery.com>
 	    Bernd Schmidt  <bernds@codesourcery.com>
 
diff --git gcc/builtins.c gcc/builtins.c
index fcf3f53..e946521 100644
--- gcc/builtins.c
+++ gcc/builtins.c
@@ -5889,38 +5889,36 @@ expand_stack_save (void)
    acceleration device (ACCEL_COMPILER conditional).  */
 
 static rtx
-expand_builtin_acc_on_device (tree exp, rtx target ATTRIBUTE_UNUSED)
+expand_builtin_acc_on_device (tree exp, rtx target)
 {
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
-  tree arg, v1, v2, ret;
-  location_t loc;
-
-  arg = CALL_EXPR_ARG (exp, 0);
-  arg = builtin_save_expr (arg);
-  loc = EXPR_LOCATION (exp);
-
-  /* Build: (arg == v1 || arg == v2) ? 1 : 0.  */
+  tree arg = CALL_EXPR_ARG (exp, 0);
 
+  /* Return (arg == v1 || arg == v2) ? 1 : 0.  */
+  machine_mode v_mode = TYPE_MODE (TREE_TYPE (arg));
+  rtx v = expand_normal (arg), v1, v2;
 #ifdef ACCEL_COMPILER
-  v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NOT_HOST);
-  v2 = build_int_cst (TREE_TYPE (arg), ACCEL_COMPILER_acc_device);
+  v1 = GEN_INT (GOMP_DEVICE_NOT_HOST);
+  v2 = GEN_INT (ACCEL_COMPILER_acc_device);
 #else
-  v1 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_NONE);
-  v2 = build_int_cst (TREE_TYPE (arg), GOMP_DEVICE_HOST);
+  v1 = GEN_INT (GOMP_DEVICE_NONE);
+  v2 = GEN_INT (GOMP_DEVICE_HOST);
 #endif
+  machine_mode target_mode = TYPE_MODE (integer_type_node);
+  if (!REG_P (target) || GET_MODE (target) != target_mode)
+    target = gen_reg_rtx (target_mode);
+  emit_move_insn (target, const0_rtx);
+  rtx_code_label *done_label = gen_label_rtx ();
+  emit_cmp_and_jump_insns (v, v1, NE, NULL_RTX, v_mode,
+			   false, done_label, PROB_EVEN);
+  emit_cmp_and_jump_insns (v, v2, NE, NULL_RTX, v_mode,
+			   false, done_label, PROB_EVEN);
+  emit_move_insn (target, const1_rtx);
+  emit_label (done_label);
 
-  v1 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v1);
-  v2 = fold_build2_loc (loc, EQ_EXPR, integer_type_node, arg, v2);
-
-  /* Can't use TRUTH_ORIF_EXPR, as that is not supported by
-     expand_expr_real*.  */
-  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node, v1, v1, v2);
-  ret = fold_build3_loc (loc, COND_EXPR, integer_type_node,
-			 ret, integer_one_node, integer_zero_node);
-
-  return expand_normal (ret);
+  return target;
 }
 
 


GrÃÃe,
 Thomas

Attachment: pgpJ4NL2c2ZT1.pgp
Description: PGP signature


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]