This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [4.5] Find more autoinc addressing for induction variables


I wrote:

I'll see whether I can achieve a similar effect by modifying tree-ssa-loop-ivopts instead. Maybe I can add new candidates that get incremented in suitable basic blocks other than just the last one.

So here's a draft of what that would look like. For every use/cand pair for which autoinc is possible (which now requires that the use's block is not in a nested loop, and dominates the latch), we create a new candidate which is incremented at the point of the use, and compute its cost without including the cost of the step. This also gets rid of the special handling in iv_ca_recount_cost.


That could be extended later to have candidates that are incremented several times, e.g. to create two 16-bit postinc addressing modes for a candidate that's incremented by 4.

Does this look like an approach that everyone can live with?


Bernd -- This footer brought to you by insane German lawmakers. Analog Devices GmbH Wilhelm-Wagenfeld-Str. 6 80807 Muenchen Sitz der Gesellschaft Muenchen, Registergericht Muenchen HRB 40368 Geschaeftsfuehrer Thomas Wessel, William A. Martin, Margaret Seif
Index: tree-ssa-loop-ivopts.c
===================================================================
--- tree-ssa-loop-ivopts.c	(revision 147092)
+++ tree-ssa-loop-ivopts.c	(working copy)
@@ -155,6 +155,8 @@ struct cost_pair
   tree value;		/* For final value elimination, the expression for
 			   the final value of the iv.  For iv elimination,
 			   the new bound to compare with.  */
+  bool can_autoinc;	/* True if we think we can use autoincrement for
+			   this candidate/use pair.  */
 };
 
 /* Use.  */
@@ -181,6 +183,7 @@ enum iv_position
 {
   IP_NORMAL,		/* At the end, just before the exit condition.  */
   IP_END,		/* At the end of the latch block.  */
+  IP_AT_USE,		/* After a specific use.  */
   IP_ORIGINAL		/* The original biv.  */
 };
 
@@ -515,6 +518,10 @@ dump_cand (FILE *file, struct iv_cand *c
       fprintf (file, "  incremented before exit test\n");
       break;
 
+    case IP_AT_USE:
+      fprintf (file, "  incremented at use %d\n", -12345);
+      break;
+
     case IP_END:
       fprintf (file, "  incremented at end\n");
       break;
@@ -566,7 +573,7 @@ stmt_after_ip_normal_pos (struct loop *l
    variable CAND is incremented.  */
 
 static bool
-stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
+stmt_after_inc_pos (struct iv_cand *cand, gimple stmt)
 {
   basic_block cand_bb = gimple_bb (cand->incremented_at);
   basic_block stmt_bb = gimple_bb (stmt);
@@ -604,7 +611,8 @@ stmt_after_increment (struct loop *loop,
       return stmt_after_ip_normal_pos (loop, stmt);
 
     case IP_ORIGINAL:
-      return stmt_after_ip_original_pos (cand, stmt);
+    case IP_AT_USE:
+      return stmt_after_inc_pos (cand, stmt);
 
     default:
       gcc_unreachable ();
@@ -2360,24 +2368,6 @@ record_important_candidates (struct ivop
     }
 }
 
-/* Finds the candidates for the induction variables.  */
-
-static void
-find_iv_candidates (struct ivopts_data *data)
-{
-  /* Add commonly used ivs.  */
-  add_standard_iv_candidates (data);
-
-  /* Add old induction variables.  */
-  add_old_ivs_candidates (data);
-
-  /* Add induction variables derived from uses.  */
-  add_derived_ivs_candidates (data);
-
-  /* Record the important candidates.  */
-  record_important_candidates (data);
-}
-
 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
    If consider_all_candidates is true, we use a two-dimensional array, otherwise
    we allocate a simple list to every use.  */
@@ -2474,7 +2464,8 @@ infinite_cost_p (comp_cost cost)
 static void
 set_use_iv_cost (struct ivopts_data *data,
 		 struct iv_use *use, struct iv_cand *cand,
-		 comp_cost cost, bitmap depends_on, tree value)
+		 comp_cost cost, bitmap depends_on, tree value,
+		 bool can_autoinc)
 {
   unsigned i, s;
 
@@ -2490,6 +2481,7 @@ set_use_iv_cost (struct ivopts_data *dat
       use->cost_map[cand->id].cost = cost;
       use->cost_map[cand->id].depends_on = depends_on;
       use->cost_map[cand->id].value = value;
+      use->cost_map[cand->id].can_autoinc = can_autoinc;
       return;
     }
 
@@ -2509,6 +2501,7 @@ found:
   use->cost_map[i].cost = cost;
   use->cost_map[i].depends_on = depends_on;
   use->cost_map[i].value = value;
+  use->cost_map[i].can_autoinc = can_autoinc;
 }
 
 /* Gets cost of (USE, CANDIDATE) pair.  */
@@ -2993,21 +2986,30 @@ multiplier_allowed_in_address_p (HOST_WI
    variable is omitted.  Compute the cost for a memory reference that accesses
    a memory location of mode MEM_MODE.
 
+   MAY_AUTOINC is set to true if the autoincrement (increasing index by
+   size of MEM_MODE / RATIO) is available.  To make this determination, we
+   look at the size of the increment to be made, which is given in CSTEP.
+   CSTEP may be zero if the step is unknown.
+   STMT_AFTER_INC is true iff the statement we're looking at is after the
+   increment of the original biv.
+
    TODO -- there must be some better way.  This all is quite crude.  */
 
 static comp_cost
 get_address_cost (bool symbol_present, bool var_present,
 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
-		  enum machine_mode mem_mode,
-		  bool speed)
+		  HOST_WIDE_INT cstep, enum machine_mode mem_mode, bool speed,
+		  bool stmt_after_inc, bool *may_autoinc)
 {
   static bool initialized[MAX_MACHINE_MODE];
   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
+  static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
+  static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
   unsigned cost, acost, complexity;
-  bool offset_p, ratio_p;
-  HOST_WIDE_INT s_offset;
+  bool offset_p, ratio_p, autoinc;
+  HOST_WIDE_INT s_offset, autoinc_offset, msize;
   unsigned HOST_WIDE_INT mask;
   unsigned bits;
 
@@ -3066,6 +3068,26 @@ get_address_cost (bool symbol_present, b
       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
 
+      if (HAVE_PRE_DECREMENT)
+	{
+	  addr = gen_rtx_PRE_DEC (Pmode, reg0);
+	  has_predec[mem_mode] = memory_address_p (mem_mode, addr);
+	}
+      if (HAVE_POST_DECREMENT)
+	{
+	  addr = gen_rtx_POST_DEC (Pmode, reg0);
+	  has_postdec[mem_mode] = memory_address_p (mem_mode, addr);
+	}
+      if (HAVE_PRE_INCREMENT)
+	{
+	  addr = gen_rtx_PRE_INC (Pmode, reg0);
+	  has_preinc[mem_mode] = memory_address_p (mem_mode, addr);
+	}
+      if (HAVE_POST_INCREMENT)
+	{
+	  addr = gen_rtx_POST_INC (Pmode, reg0);
+	  has_postinc[mem_mode] = memory_address_p (mem_mode, addr);
+	}
       for (i = 0; i < 16; i++)
 	{
 	  sym_p = i & 1;
@@ -3104,7 +3126,7 @@ get_address_cost (bool symbol_present, b
     
 	  if (base)
 	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
-  
+
 	  start_sequence ();
 	  /* To avoid splitting addressing modes, pretend that no cse will
 	     follow.  */
@@ -3149,7 +3171,7 @@ get_address_cost (bool symbol_present, b
 	  if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
 	    costs[mem_mode][1][var_p][off_p][rat_p] = acost;
 	}
-  
+
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "Address costs:\n");
@@ -3174,6 +3196,9 @@ get_address_cost (bool symbol_present, b
 	      acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
 	      fprintf (dump_file, "index costs %d\n", acost);
 	    }
+	  if (has_predec[mem_mode] || has_postdec[mem_mode]
+	      || has_preinc[mem_mode] || has_postinc[mem_mode])
+	    fprintf (dump_file, "  May include autoinc/dec\n");
 	  fprintf (dump_file, "\n");
 	}
     }
@@ -3185,6 +3210,23 @@ get_address_cost (bool symbol_present, b
     offset |= ~mask;
   s_offset = offset;
 
+  autoinc = false;
+  msize = GET_MODE_SIZE (mem_mode);
+  autoinc_offset = offset;
+  if (stmt_after_inc)
+    autoinc_offset += ratio * cstep;
+  if (symbol_present || var_present || ratio != 1)
+    autoinc = false;
+  else if ((has_postinc[mem_mode] && autoinc_offset == 0
+	       && msize == cstep)
+	   || (has_postdec[mem_mode] && autoinc_offset == 0
+	       && msize == -cstep)
+	   || (has_preinc[mem_mode] && autoinc_offset == msize
+	       && msize == cstep)
+	   || (has_predec[mem_mode] && autoinc_offset == -msize
+	       && msize == -cstep))
+    autoinc = true;
+
   cost = 0;
   offset_p = (s_offset != 0
 	      && min_offset[mem_mode] <= s_offset
@@ -3198,6 +3240,8 @@ get_address_cost (bool symbol_present, b
   if (s_offset && !offset_p && !symbol_present)
     cost += add_cost (Pmode, speed);
 
+  if (may_autoinc)
+    *may_autoinc = autoinc;
   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
   return new_cost (cost + acost, complexity);
@@ -3502,14 +3546,15 @@ difference_cost (struct ivopts_data *dat
 static comp_cost
 get_computation_cost_at (struct ivopts_data *data,
 			 struct iv_use *use, struct iv_cand *cand,
-			 bool address_p, bitmap *depends_on, gimple at)
+			 bool address_p, bitmap *depends_on, gimple at,
+			 bool *can_autoinc)
 {
   tree ubase = use->iv->base, ustep = use->iv->step;
   tree cbase, cstep;
   tree utype = TREE_TYPE (ubase), ctype;
   unsigned HOST_WIDE_INT cstepi, offset = 0;
   HOST_WIDE_INT ratio, aratio;
-  bool var_present, symbol_present;
+  bool var_present, symbol_present, stmt_is_after_inc;
   comp_cost cost;
   unsigned n_sums;
   double_int rat;
@@ -3604,16 +3649,20 @@ get_computation_cost_at (struct ivopts_d
 
   /* If we are after the increment, the value of the candidate is higher by
      one iteration.  */
-  if (stmt_after_increment (data->current_loop, cand, at))
+  stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
+  if (stmt_is_after_inc)
     offset -= ratio * cstepi;
 
   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
      (symbol/var/const parts may be omitted).  If we are looking for an address,
      find the cost of addressing this.  */
   if (address_p)
-    return add_costs (cost, get_address_cost (symbol_present, var_present,
-				offset, ratio,
-				TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
+    return add_costs (cost,
+		      get_address_cost (symbol_present, var_present,
+					offset, ratio, cstepi,
+					TYPE_MODE (TREE_TYPE (*use->op_p)),
+					speed, stmt_is_after_inc,
+					can_autoinc));
 
   /* Otherwise estimate the costs for computing the expression.  */
   aratio = ratio > 0 ? ratio : -ratio;
@@ -3666,10 +3715,11 @@ fallback:
 static comp_cost
 get_computation_cost (struct ivopts_data *data,
 		      struct iv_use *use, struct iv_cand *cand,
-		      bool address_p, bitmap *depends_on)
+		      bool address_p, bitmap *depends_on, bool *can_autoinc)
 {
   return get_computation_cost_at (data,
-				  use, cand, address_p, depends_on, use->stmt);
+				  use, cand, address_p, depends_on, use->stmt,
+				  can_autoinc);
 }
 
 /* Determines cost of basing replacement of USE on CAND in a generic
@@ -3689,12 +3739,12 @@ determine_use_iv_cost_generic (struct iv
   if (cand->pos == IP_ORIGINAL
       && cand->incremented_at == use->stmt)
     {
-      set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
+      set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, false);
       return true;
     }
 
-  cost = get_computation_cost (data, use, cand, false, &depends_on);
-  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
+  cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
+  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, false);
 
   return !infinite_cost_p (cost);
 }
@@ -3706,9 +3756,11 @@ determine_use_iv_cost_address (struct iv
 			       struct iv_use *use, struct iv_cand *cand)
 {
   bitmap depends_on;
-  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
+  bool can_autoinc;
+  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
+					 &can_autoinc);
 
-  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
+  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, can_autoinc);
 
   return !infinite_cost_p (cost);
 }
@@ -3866,7 +3918,7 @@ determine_use_iv_cost_condition (struct 
   /* Only consider real candidates.  */
   if (!cand->iv)
     {
-      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
+      set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, false);
       return false;
     }
 
@@ -3887,7 +3939,7 @@ determine_use_iv_cost_condition (struct 
   gcc_assert (ok);
 
   express_cost = get_computation_cost (data, use, cand, false,
-				       &depends_on_express);
+				       &depends_on_express, NULL);
   fd_ivopts_data = data;
   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
 
@@ -3906,7 +3958,7 @@ determine_use_iv_cost_condition (struct 
       bound = NULL_TREE;
     }
 
-  set_use_iv_cost (data, use, cand, cost, depends_on, bound);
+  set_use_iv_cost (data, use, cand, cost, depends_on, bound, false);
 
   if (depends_on_elim)
     BITMAP_FREE (depends_on_elim);
@@ -3939,6 +3991,90 @@ determine_use_iv_cost (struct ivopts_dat
     }
 }
 
+static void
+maybe_allocate_autoinc_cand (struct ivopts_data *data, struct iv_use *use,
+			     struct iv_cand *cand)
+{
+  bitmap depends_on;
+  bool can_autoinc;
+  comp_cost cost;
+  
+  if (cand->pos != IP_NORMAL)
+    return;
+
+  cost = get_computation_cost (data, use, cand, true, &depends_on,
+			       &can_autoinc);
+
+  BITMAP_FREE (depends_on);
+
+  if (infinite_cost_p (cost) || !can_autoinc)
+    return;
+
+  add_candidate_1 (data, cand->iv->base, cand->iv->step, false, IP_AT_USE,
+		   use, use->stmt);
+}
+
+/* Examine all the use/candidate pairs to see if we find autoincrement
+   opportunities, and create new IP_AT_USE candidates as appropriate.  */
+static void
+add_autoinc_candidates (struct ivopts_data *data)
+{
+  unsigned i, j;
+  struct iv_cand *cand;
+  struct loop *loop = data->current_loop;
+
+  for (i = 0; i < n_iv_uses (data); i++)
+    {
+      struct iv_use *use = iv_use (data, i);
+      basic_block use_bb = gimple_bb (use->stmt);
+
+      if (use->type != USE_ADDRESS
+	  || !dominated_by_p (CDI_DOMINATORS, loop->latch, use_bb)
+	  || use_bb->loop_father != loop)
+	continue;
+
+      if (data->consider_all_candidates)
+	{
+	  for (j = 0; j < n_iv_cands (data); j++)
+	    {
+	      cand = iv_cand (data, j);
+	      maybe_allocate_autoinc_cand (data, use, cand);
+	    }
+	}
+      else
+	{
+	  bitmap_iterator bi;
+
+	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
+	    {
+	      cand = iv_cand (data, j);
+	      maybe_allocate_autoinc_cand (data, use, cand);
+	    }
+	}
+    }
+
+}
+
+/* Finds the candidates for the induction variables.  */
+
+static void
+find_iv_candidates (struct ivopts_data *data)
+{
+  /* Add commonly used ivs.  */
+  add_standard_iv_candidates (data);
+
+  /* Add old induction variables.  */
+  add_old_ivs_candidates (data);
+
+  /* Add induction variables derived from uses.  */
+  add_derived_ivs_candidates (data);
+
+  add_autoinc_candidates (data);
+
+  /* Record the important candidates.  */
+  record_important_candidates (data);
+}
+
 /* Determines costs of basing the use of the iv on an iv candidate.  */
 
 static void
@@ -4036,7 +4172,11 @@ determine_iv_cost (struct ivopts_data *d
 
   base = cand->iv->base;
   cost_base = force_var_cost (data, base, NULL);
-  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
+
+  if (cand->pos == IP_AT_USE)
+    cost_step = 0;
+  else
+    cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
 
   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
 
@@ -4204,6 +4344,7 @@ static void
 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
 {
   comp_cost cost = ivs->cand_use_cost;
+
   cost.cost += ivs->cand_cost;
   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
 
@@ -4305,6 +4446,7 @@ iv_ca_set_cp (struct ivopts_data *data, 
       ivs->bad_uses--;
       ivs->cand_for_use[uid] = cp;
       ivs->n_cand_uses[cid]++;
+      
       if (ivs->n_cand_uses[cid] == 1)
 	{
 	  bitmap_set_bit (ivs->cands, cid);
@@ -4316,7 +4458,7 @@ iv_ca_set_cp (struct ivopts_data *data, 
 
 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
 	}
-
+  
       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
       iv_ca_set_add_invariants (ivs, cp->depends_on);
       iv_ca_recount_cost (data, ivs);
@@ -5000,6 +5142,11 @@ create_new_iv (struct ivopts_data *data,
       after = true;
       break;
 
+    case IP_AT_USE:
+      incr_pos = gsi_for_stmt (cand->incremented_at);
+      after = true;
+      break;
+
     case IP_ORIGINAL:
       /* Mark that the iv is preserved.  */
       name_info (data, cand->var_before)->preserve_biv = true;
@@ -5498,8 +5645,8 @@ tree_ssa_iv_optimize_loop (struct ivopts
   find_iv_candidates (data);
 
   /* Calculates the costs (item 3, part 1).  */
-  determine_use_iv_costs (data);
   determine_iv_costs (data);
+  determine_use_iv_costs (data);
   determine_set_costs (data);
 
   /* Find the optimal set of induction variables (item 3, part 2).  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]