This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Operator Strength Reduction on trees


Hello,

> > > So perhaps
> > > the main advantage is that this algorithm is implemented now.
> > Ugh... I can implement the described scev-based algorithm in about half
> > an hour...
> > I think it is pretty clear from the metacode I provided; I can send
> > you a full implementation if you have a problem with that.
> 
> I don't have a problem, but I would like to see the full
> implementation. In part because of what I wrote above, I don't think
> it will be so trivial. If I am wrong I would like to learn, and the
> best way is probably to see your code.

here it is.  Disclaimers:

-- the only testcase I run through it is the s_should_lftr_loop function
   you provided.  If it compiles anything else correctly, I would be
   extremely surprised.  It is even possible that it makes some mistake
   in the s_should_lftr_loop compilation, I did not look at every
   operation in detail.
-- there are several errors in the code I know about (it does not preserve
   the semantics of overflows, and does not verify that the
   transformation in stupid_lftr_exit_by_biv does not cause overflow,
   thus making it invalid).
-- the code is written quite inefficiently (find_iv should use hash
   table to find an existing biv with a similar evolution, rather than
   traversing all the bivs in the loop).
-- there are several features that the pass would need to learn about
   (like address arithmetics) to make it really useful (nevertheless,
   most of them are also missing in the OSR implementation).

Zdenek

Index: tree-pass.h
===================================================================
*** tree-pass.h	(revision 120836)
--- tree-pass.h	(working copy)
*************** extern struct tree_opt_pass pass_if_conv
*** 254,259 ****
--- 254,261 ----
  extern struct tree_opt_pass pass_vectorize;
  extern struct tree_opt_pass pass_complete_unroll;
  extern struct tree_opt_pass pass_loop_prefetch;
+ extern struct tree_opt_pass pass_stupid_sr;
+ extern struct tree_opt_pass pass_stupid_lftr;
  extern struct tree_opt_pass pass_iv_optimize;
  extern struct tree_opt_pass pass_tree_loop_done;
  extern struct tree_opt_pass pass_ch;
Index: tree-scalar-evolution.c
===================================================================
*** tree-scalar-evolution.c	(revision 120836)
--- tree-scalar-evolution.c	(working copy)
*************** scev_const_prop (void)
*** 3017,3019 ****
--- 3017,3422 ----
      }
    return 0;
  }
+ 
+ /* Dumb strength reduction pass.  Strength reduces everything, regardless of
+    whether it is useful or not.  Does not understand addressing modes, neither
+    any other common features that would be necessary to make it usable.  We
+    do not bother to care about overflows, either.  */
+ 
+ static bool
+ gate_stupid_st (void)
+ {
+   return true;
+ }
+ 
+ static bool
+ similar_iv_p (struct loop *loop, affine_iv *iv, tree var, tree *delta)
+ {
+   tree type = TREE_TYPE (iv->base);
+   affine_iv viv;
+   tree base, step;
+       
+   if (!simple_iv (loop, SSA_NAME_DEF_STMT (var), var, &viv, true))
+     return false;
+ 
+   base = fold_convert (type, viv.base);
+   step = fold_convert (type, viv.step);
+ 
+   if (!operand_equal_p (step, iv->step, 0))
+     return false;
+ 
+   *delta = fold_build2 (MINUS_EXPR, type, iv->base, base);
+   if (TREE_CODE (*delta) != INTEGER_CST
+       && !operand_equal_p (*delta, step, 0))
+     return false;
+ 
+   return true;
+ }
+ 
+ static bool
+ find_iv (struct loop *loop, affine_iv *iv, tree *var, tree *delta)
+ {
+   tree phi, r, rtype, type = TREE_TYPE (iv->base);
+ 
+   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
+     {
+       r = PHI_RESULT (phi);
+       rtype = TREE_TYPE (r);
+ 
+       if (!is_gimple_reg (r)
+ 	  || !(INTEGRAL_TYPE_P (rtype)
+ 	       || POINTER_TYPE_P (rtype))
+ 	  || TYPE_PRECISION (rtype) != TYPE_PRECISION (type))
+ 	continue;
+ 
+       if (similar_iv_p (loop, iv, r, delta))
+ 	{
+ 	  *var = r;
+ 	  return true;
+ 	}
+     }
+ 
+   return false;
+ }
+ 
+ /* Bitmap of basic induction variables used to express uses except
+    for the ones in cond_exprs (i.e., those that cannot be eliminated
+    in lftr).  */
+ bitmap bivs_used_in_noncond;
+ 
+ static void
+ execute_sr_for_op (struct loop *loop, tree op, affine_iv *iv, bool noncond)
+ {
+   tree var, delta, rhs, stmts;
+   block_stmt_iterator bsi;
+   bool after;
+   tree type = TREE_TYPE (op);
+   tree def = SSA_NAME_DEF_STMT (op);
+ 
+   if (TREE_CODE (def) == PHI_NODE
+       && bb_for_stmt (def) == loop->header)
+     {
+       bitmap_set_bit (bivs_used_in_noncond, SSA_NAME_VERSION (op));
+       return;
+     }
+ 
+   if (!find_iv (loop, iv, &var, &delta))
+     {
+       standard_iv_increment_position (loop, &bsi, &after);
+       create_iv (iv->base, iv->step, NULL_TREE, loop,
+ 		 &bsi, after, &var, NULL);
+       delta = build_int_cst (type, 0);
+     }
+   if (noncond)
+     bitmap_set_bit (bivs_used_in_noncond, SSA_NAME_VERSION (var));
+ 
+   rhs = fold_build2 (PLUS_EXPR, type, fold_convert (type, var),
+ 		     unshare_expr (delta));
+   rhs = force_gimple_operand (rhs, &stmts, false, NULL_TREE);
+   if (TREE_CODE (def) == PHI_NODE)
+     {
+       bsi = bsi_after_labels (bb_for_stmt (def));
+       if (stmts)
+ 	{
+ 	  bsi = bsi_for_stmt (def);
+ 	  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
+ 	}
+       remove_phi_node (def, NULL_TREE, false);
+       def = build2_gimple (GIMPLE_MODIFY_STMT, op, rhs);
+       SSA_NAME_DEF_STMT (op) = def;
+       bsi_insert_before (&bsi, def, BSI_SAME_STMT);
+     }
+   else
+     {
+       gcc_assert (TREE_CODE (def) == GIMPLE_MODIFY_STMT);
+       if (stmts)
+ 	{
+ 	  bsi = bsi_for_stmt (def);
+ 	  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
+ 	}
+       GIMPLE_STMT_OPERAND (def, 1) = rhs;
+       update_stmt (def);
+     }
+ }
+ 
+ /* Bitmap of SSA names that were already strength reduced.  */
+ bitmap sreduced;
+ 
+ static void
+ stupid_sr_op (tree op, bool noncond)
+ {
+   tree type = TREE_TYPE (op);
+   tree def;
+   basic_block bb;
+   affine_iv iv;
+   struct loop *loop;
+ 
+   if (TREE_CODE (op) != SSA_NAME
+       || !(INTEGRAL_TYPE_P (type)
+ 	   || POINTER_TYPE_P (type))
+       || bitmap_bit_p (sreduced, SSA_NAME_VERSION (op)))
+     return;
+ 
+   bitmap_set_bit (sreduced, SSA_NAME_VERSION (op));
+   def = SSA_NAME_DEF_STMT (op);
+   bb = bb_for_stmt (def);
+   if (!bb || bb->loop_father == current_loops->tree_root)
+     return;
+ 
+   /* Find the loop in that OP is a non-invariant induction variable.  */
+   for (loop = bb->loop_father;
+        loop != current_loops->tree_root;
+        loop = loop->outer)
+     {
+       if (!simple_iv (loop, def, op, &iv, true))
+ 	return;
+ 
+       if (!integer_zerop (iv.step))
+ 	{
+ 	  execute_sr_for_op (loop, op, &iv, noncond);
+ 	  return;
+ 	}
+     }
+ }
+ 
+ static void
+ stupid_sr_stmt (basic_block bb, tree stmt)
+ {
+   tree lhs, op;
+   affine_iv iv;
+   ssa_op_iter oi;
+ 
+   /* We do not care about statements that just define induction variables;
+      we are interested only in the statements that use them.  */
+   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
+     {
+       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
+       if (TREE_CODE (lhs) == SSA_NAME
+ 	  && simple_iv (bb->loop_father, stmt, lhs, &iv, true))
+ 	return;
+     }
+ 
+   FOR_EACH_SSA_TREE_OPERAND (op, stmt, oi, SSA_OP_USE)
+     {
+       stupid_sr_op (op, TREE_CODE (stmt) != COND_EXPR);
+     }
+ }
+ 
+ static void
+ stupid_sr_bb (basic_block bb)
+ {
+   block_stmt_iterator bsi;
+   tree phi;
+   edge e;
+   edge_iterator ei;
+ 
+   if (bb->loop_father == current_loops->tree_root)
+     return;
+ 
+   /* Ignore the uses in loopback phi nodes (these define bivs).  */
+   if (bb->loop_father->latch != bb)
+     {
+       FOR_EACH_EDGE (e, ei, bb->succs)
+ 	{
+ 	  for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
+ 	    {
+ 	      if (!is_gimple_reg (PHI_RESULT (phi)))
+ 		continue;
+ 
+ 	      stupid_sr_op (PHI_ARG_DEF_FROM_EDGE (phi, e), true);
+ 	    }
+ 	}
+     }
+ 
+   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+     stupid_sr_stmt (bb, bsi_stmt (bsi));
+ }
+ 
+ static void
+ run_sr_in_dom_postorder (basic_block bb)
+ {
+   basic_block son;
+ 
+   for (son = first_dom_son (CDI_DOMINATORS, bb);
+        son;
+        son = next_dom_son (CDI_DOMINATORS, son))
+     run_sr_in_dom_postorder (son);
+ 
+   if (bb != ENTRY_BLOCK_PTR)
+     stupid_sr_bb (bb);
+ }
+ 
+ static unsigned
+ stupid_sr (void)
+ {
+   if (!current_loops)
+     return 0;
+ 
+   sreduced = BITMAP_ALLOC (NULL);
+   bivs_used_in_noncond = BITMAP_ALLOC (NULL);
+   run_sr_in_dom_postorder (ENTRY_BLOCK_PTR);
+   BITMAP_FREE (sreduced);
+ 
+   return 0;
+ }
+ 
+ struct tree_opt_pass pass_stupid_sr =
+ {
+   "sreduce",				/* name */
+   gate_stupid_st,			/* gate */
+   stupid_sr,				/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   TV_TREE_LOOP_IVOPTS,	  		/* tv_id */
+   PROP_cfg | PROP_ssa,			/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   TODO_dump_func | TODO_cleanup_cfg,
+ 					/* todo_flags_finish */
+   0					/* letter */
+ };
+ 
+ /* Stupid lftr.  Some of the transformations we do are invalid, since we do not
+    check for overflows.  */
+ 
+ static bool
+ gate_stupid_lftr (void)
+ {
+   return true;
+ }
+ 
+ static bool
+ stupid_lftr_exit_by_biv (struct loop *loop,
+ 			 basic_block bb,
+ 			 tree cvar, tree bnd, enum tree_code op,
+ 			 affine_iv *cvar_iv, tree biv)
+ {
+   tree type = TREE_TYPE (cvar), btype = TREE_TYPE (biv);
+   affine_iv biv_iv;
+   double_int mul;
+   block_stmt_iterator bsi = bsi_last (bb);
+   tree exit_stmt = bsi_stmt (bsi), ibiv;
+ 
+   if (TYPE_PRECISION (type) != TYPE_PRECISION (btype))
+     return false;
+ 
+   ibiv = PHI_ARG_DEF_FROM_EDGE (SSA_NAME_DEF_STMT (biv),
+ 				loop_latch_edge (loop));
+   if (TREE_CODE (ibiv) == SSA_NAME)
+     {
+       tree idef = SSA_NAME_DEF_STMT (ibiv);
+       if (dominated_by_p (CDI_DOMINATORS, bb, bb_for_stmt (idef)))
+ 	biv = ibiv;
+     }
+ 
+   if (!simple_iv (loop, SSA_NAME_DEF_STMT (biv), biv, &biv_iv, true))
+     return false;
+ 
+   if (!constant_multiple_of (biv_iv.step, cvar_iv->step, &mul))
+     return false;
+ 
+   if (double_int_negative_p (mul))
+     op = swap_tree_comparison (op);
+ 
+   bnd = fold_build2 (MINUS_EXPR, type, bnd, cvar_iv->base);
+   bnd = fold_build2 (PLUS_EXPR, btype, biv_iv.base,
+ 		     fold_build2 (MULT_EXPR, btype,
+ 				  double_int_to_tree (btype, mul),
+ 				  fold_convert (btype, bnd)));
+   bnd = unshare_expr (bnd);
+   bnd = force_gimple_operand_bsi (&bsi, bnd, true, NULL_TREE);
+ 
+   COND_EXPR_COND (exit_stmt) = build2 (op, boolean_type_node, biv, bnd);
+   update_stmt (exit_stmt);
+ 
+   return true;
+ }
+ 
+ static void
+ stupid_lftr_exit (struct loop *loop, edge exit)
+ {
+   tree exit_stmt = last_stmt (exit->src), cond, cvar, bnd, otmp;
+   affine_iv cvar_iv, bnd_iv, tmp;
+   tree phi, biv;
+   enum tree_code op;
+ 
+   if (TREE_CODE (exit_stmt) != COND_EXPR)
+     return;
+   cond = COND_EXPR_COND (exit_stmt);
+ 
+   if (!COMPARISON_CLASS_P (cond))
+     return;
+ 
+   cvar = TREE_OPERAND (cond, 0);
+   bnd = TREE_OPERAND (cond, 1);
+   op = TREE_CODE (cond);
+   if (!simple_iv (loop, exit_stmt, cvar, &cvar_iv, true)
+       || !simple_iv (loop, exit_stmt, bnd, &bnd_iv, true))
+     return;
+ 
+   if (integer_zerop (cvar_iv.step))
+     {
+       otmp = cvar; cvar = bnd; bnd = otmp;
+       tmp = cvar_iv; cvar_iv = bnd_iv; bnd_iv = tmp;
+       op = swap_tree_comparison (op);
+     }
+   if (integer_zerop (cvar_iv.step)
+       || !integer_zerop (bnd_iv.step)
+       || bitmap_bit_p (bivs_used_in_noncond,
+ 		       SSA_NAME_VERSION (cvar)))
+     return;
+ 
+   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
+     {
+       biv = PHI_RESULT (phi);
+       if (bitmap_bit_p (bivs_used_in_noncond,
+ 			SSA_NAME_VERSION (biv))
+ 	  && stupid_lftr_exit_by_biv (loop, exit->src, cvar, bnd, op,
+ 				      &cvar_iv, biv))
+ 	return;
+     }
+ }
+ 
+ static unsigned
+ stupid_lftr (void)
+ {
+   loop_iterator li;
+   struct loop *loop;
+   VEC (edge, heap) *exits;
+   unsigned i;
+   edge exit;
+ 
+   if (!current_loops)
+     return 0;
+ 
+   FOR_EACH_LOOP (li, loop, 0)
+     {
+       exits = get_loop_exit_edges (loop);
+       for (i = 0; VEC_iterate (edge, exits, i, exit); i++)
+ 	stupid_lftr_exit (loop, exit);
+       VEC_free (edge, heap, exits);
+     }
+   BITMAP_FREE (bivs_used_in_noncond);
+ 
+   return 0;
+ }
+ 
+ struct tree_opt_pass pass_stupid_lftr =
+ {
+   "slftr",				/* name */
+   gate_stupid_lftr,			/* gate */
+   stupid_lftr,				/* execute */
+   NULL,					/* sub */
+   NULL,					/* next */
+   0,					/* static_pass_number */
+   TV_TREE_LOOP_IVOPTS,	  		/* tv_id */
+   PROP_cfg | PROP_ssa,			/* properties_required */
+   0,					/* properties_provided */
+   0,					/* properties_destroyed */
+   0,					/* todo_flags_start */
+   TODO_dump_func | TODO_cleanup_cfg,
+ 					/* todo_flags_finish */
+   0					/* letter */
+ };
Index: tree-ssa-loop-ivopts.c
===================================================================
*** tree-ssa-loop-ivopts.c	(revision 120836)
--- tree-ssa-loop-ivopts.c	(working copy)
*************** tree_int_cst_sign_bit (tree t)
*** 2534,2540 ****
     The returned value is always sign-extended, regardless of the
     signedness of TOP and BOT.  */
  
! static bool
  constant_multiple_of (tree top, tree bot, double_int *mul)
  {
    tree mby;
--- 2534,2540 ----
     The returned value is always sign-extended, regardless of the
     signedness of TOP and BOT.  */
  
! bool
  constant_multiple_of (tree top, tree bot, double_int *mul)
  {
    tree mby;
Index: tree-flow.h
===================================================================
*** tree-flow.h	(revision 120836)
--- tree-flow.h	(working copy)
*************** void tree_transform_and_unroll_loop (str
*** 914,919 ****
--- 914,920 ----
  				     edge, struct tree_niter_desc *,
  				     transform_callback, void *);
  bool contains_abnormal_ssa_name_p (tree);
+ bool constant_multiple_of (tree, tree, double_int *);
  
  /* In tree-ssa-threadedge.c */
  extern bool potentially_threadable_block (basic_block);
Index: passes.c
===================================================================
*** passes.c	(revision 120836)
--- passes.c	(working copy)
*************** init_optimization_passes (void)
*** 635,640 ****
--- 635,644 ----
       pass_may_alias.  */
    NEXT_PASS (pass_complete_unroll);
    NEXT_PASS (pass_loop_prefetch);
+   NEXT_PASS (pass_stupid_sr);
+   NEXT_PASS (pass_dce_loop);
+   NEXT_PASS (pass_stupid_lftr);
+   NEXT_PASS (pass_dce_loop);
    NEXT_PASS (pass_iv_optimize);
    NEXT_PASS (pass_tree_loop_done);
    *p = NULL;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]