This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Update profile in loop versioning and unrolling


Hello,

at the moment, the probabilities and freqencies are set to zero in one
copy of the versioned loop, and to original values in the other copy.
This is somewhat wrong, since even if the copied loop is the one that is
unlikely to be entered, some optimizers (e.g. bb reordering) take
frequencies into account and will produce weird results on the parts of
the code where frequencies are all zero.

In some of the applications the copy even is the more important of the
loops (e.g. in the unrolling performed by prefetching, the copied loop
is the one that gets unrolled), which makes us incorrectly optimize the
less likely path.

In loop unswitching, we know what the probabilities that either the
original loop or its copy are entered, from the probabilities of the
unswitched branch.

This patch fixes these problems, by enabling us to specify how the
frequencies should be scaled in the versioned loop and its copy.
This revealed some other problems and possible improvements in setting
the frequencies during loop unrolling (mostly dealing with the
probabilities and frequencies of the edges that will be removed
after unrolling, and the blocks dominated by them), which the parts
of the patch in tree_unroll_loop and duplicate_loop_to_header_edge fix.

I have added some tests for the testsuite; unfortunately, I do not know
about a reliable way how to test whether probability/frequency of some
object is set "sanely", since the structure of cfg and the exact values of
frequencies may change easily by introducing changes to other
optimizers.

Bootstrapped & regtested on i686 and x86_64.

Zdenek

	* loop-unswitch.c (unswitch_loop): Pass probabilities to loopify.
	* tree-ssa-loop-niter.c (nonzero_p): Export.
	* tree-ssa-loop-unswitch.c (tree_unswitch_loop): Pass probabilities
	to loop_version.
	* cfgloopmanip.c (scale_loop_frequencies): Export.
	(loopify): Scale the frequencies by prescribed coefficients.
	(set_zero_probability): New function.
	(duplicate_loop_to_header_edge): Improve updating of frequencies.
	(lv_adjust_loop_entry_edge, loop_version): Set probabilities
	and frequencies according to arguments.
	* tree-ssa-loop-manip.c (tree_unroll_loop): Set probabilities
	correctly.
	* tree.h (nonzero_p): Declare.
	* cfg.c (scale_bbs_frequencies_int): Allow scaling the frequencies up.
	* modulo-sched.c (sms_schedule): Set probabilities for entering
	versioned loop correctly.
	* tree-vect-transform.c (vect_transform_loop): Ditto.
	* cfgloop.h (loopify, loop_version): Declaration changed.
	(scale_loop_frequencies): Declared.

	* gcc.dg/tree-ssa/update-unroll-1.c: New test.
	* gcc.dg/tree-ssa/update-unswitch-1.c: New test.

Index: loop-unswitch.c
===================================================================
*** loop-unswitch.c	(revision 120080)
--- loop-unswitch.c	(working copy)
*************** unswitch_loop (struct loop *loop, basic_
*** 451,457 ****
    /* Loopify from the copy of LOOP body, constructing the new loop.  */
    nloop = loopify (latch_edge,
  		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
! 		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true);
  
    /* Remove branches that are now unreachable in new loops.  */
    remove_path (true_edge);
--- 451,458 ----
    /* Loopify from the copy of LOOP body, constructing the new loop.  */
    nloop = loopify (latch_edge,
  		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
! 		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true,
! 		   prob, REG_BR_PROB_BASE - prob);
  
    /* Remove branches that are now unreachable in new loops.  */
    remove_path (true_edge);
Index: tree-ssa-loop-niter.c
===================================================================
*** tree-ssa-loop-niter.c	(revision 120080)
--- tree-ssa-loop-niter.c	(working copy)
*************** zero_p (tree arg)
*** 70,76 ****
  /* Returns true if ARG a nonzero constant.  Unlike integer_nonzerop, it does
     not care about overflow flags.  */
  
! static bool
  nonzero_p (tree arg)
  {
    if (!arg)
--- 70,76 ----
  /* Returns true if ARG a nonzero constant.  Unlike integer_nonzerop, it does
     not care about overflow flags.  */
  
! bool
  nonzero_p (tree arg)
  {
    if (!arg)
Index: tree-ssa-loop-unswitch.c
===================================================================
*** tree-ssa-loop-unswitch.c	(revision 120080)
--- tree-ssa-loop-unswitch.c	(working copy)
*************** static struct loop *
*** 269,281 ****
  tree_unswitch_loop (struct loop *loop,
  		    basic_block unswitch_on, tree cond)
  {
!   basic_block condition_bb;
  
    /* Some sanity checking.  */
    gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
    gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
    gcc_assert (loop->inner == NULL);
  
    return loop_version (loop, unshare_expr (cond), 
! 		       &condition_bb, false);
  }
--- 269,285 ----
  tree_unswitch_loop (struct loop *loop,
  		    basic_block unswitch_on, tree cond)
  {
!   unsigned prob_true;
!   edge edge_true, edge_false;
  
    /* Some sanity checking.  */
    gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
    gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
    gcc_assert (loop->inner == NULL);
  
+   extract_true_false_edges_from_block (unswitch_on, &edge_true, &edge_false);
+   prob_true = edge_true->probability;
    return loop_version (loop, unshare_expr (cond), 
! 		       NULL, prob_true, prob_true,
! 		       REG_BR_PROB_BASE - prob_true, false);
  }
Index: cfgloopmanip.c
===================================================================
*** cfgloopmanip.c	(revision 120080)
--- cfgloopmanip.c	(working copy)
*************** static void fix_loop_placements (struct 
*** 44,50 ****
  static bool fix_bb_placement (basic_block);
  static void fix_bb_placements (basic_block, bool *);
  static void place_new_loop (struct loop *);
- static void scale_loop_frequencies (struct loop *, int, int);
  static basic_block create_preheader (struct loop *, int);
  static void unloop (struct loop *, bool *);
  
--- 44,49 ----
*************** add_loop (struct loop *loop, struct loop
*** 396,402 ****
  }
  
  /* Multiply all frequencies in LOOP by NUM/DEN.  */
! static void
  scale_loop_frequencies (struct loop *loop, int num, int den)
  {
    basic_block *bbs;
--- 395,401 ----
  }
  
  /* Multiply all frequencies in LOOP by NUM/DEN.  */
! void
  scale_loop_frequencies (struct loop *loop, int num, int den)
  {
    basic_block *bbs;
*************** scale_loop_frequencies (struct loop *loo
*** 413,424 ****
     LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
     FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
     TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
!    Returns newly created loop.  */
  
  struct loop *
  loopify (edge latch_edge, edge header_edge,
  	 basic_block switch_bb, edge true_edge, edge false_edge,
! 	 bool redirect_all_edges)
  {
    basic_block succ_bb = latch_edge->dest;
    basic_block pred_bb = header_edge->src;
--- 412,424 ----
     LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
     FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
     TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
!    Returns the newly created loop.  Frequencies and counts in the new loop
!    are scaled by FALSE_SCALE and in the old one by TRUE_SCALE.  */
  
  struct loop *
  loopify (edge latch_edge, edge header_edge,
  	 basic_block switch_bb, edge true_edge, edge false_edge,
! 	 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
  {
    basic_block succ_bb = latch_edge->dest;
    basic_block pred_bb = header_edge->src;
*************** loopify (edge latch_edge, edge header_ed
*** 427,433 ****
    sbitmap seen;
    struct loop *loop = XCNEW (struct loop);
    struct loop *outer = succ_bb->loop_father->outer;
!   int freq, prob, tot_prob;
    gcov_type cnt;
    edge e;
    edge_iterator ei;
--- 427,433 ----
    sbitmap seen;
    struct loop *loop = XCNEW (struct loop);
    struct loop *outer = succ_bb->loop_father->outer;
!   int freq;
    gcov_type cnt;
    edge e;
    edge_iterator ei;
*************** loopify (edge latch_edge, edge header_ed
*** 437,446 ****
  
    freq = EDGE_FREQUENCY (header_edge);
    cnt = header_edge->count;
-   prob = EDGE_SUCC (switch_bb, 0)->probability;
-   tot_prob = prob + EDGE_SUCC (switch_bb, 1)->probability;
-   if (tot_prob == 0)
-     tot_prob = 1;
  
    /* Redirect edges.  */
    loop_redirect_edge (latch_edge, loop->header);
--- 437,442 ----
*************** loopify (edge latch_edge, edge header_ed
*** 469,480 ****
    add_bb_to_loop (switch_bb, outer);
  
    /* Fix frequencies.  */
!   switch_bb->frequency = freq;
!   switch_bb->count = cnt;
!   FOR_EACH_EDGE (e, ei, switch_bb->succs)
!     e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
!   scale_loop_frequencies (loop, prob, tot_prob);
!   scale_loop_frequencies (succ_bb->loop_father, tot_prob - prob, tot_prob);
  
    /* Update dominators of blocks outside of LOOP.  */
    dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
--- 465,481 ----
    add_bb_to_loop (switch_bb, outer);
  
    /* Fix frequencies.  */
!   if (redirect_all_edges)
!     {
!       switch_bb->frequency = freq;
!       switch_bb->count = cnt;
!       FOR_EACH_EDGE (e, ei, switch_bb->succs)
! 	{
! 	  e->count = (switch_bb->count * e->probability) / REG_BR_PROB_BASE;
! 	}
!     }
!   scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
!   scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
  
    /* Update dominators of blocks outside of LOOP.  */
    dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
*************** update_single_exit_for_duplicated_loops 
*** 804,809 ****
--- 805,845 ----
      update_single_exit_for_duplicated_loop (orig_loops[i]);
  }
  
+ /* Sets probability and count of edge E to zero.  The probability and count
+    is redistributed evenly to the remaining edges coming from E->src.  */
+ 
+ static void
+ set_zero_probability (edge e)
+ {
+   basic_block bb = e->src;
+   edge_iterator ei;
+   edge ae, last = NULL;
+   unsigned n = EDGE_COUNT (bb->succs);
+   gcov_type cnt = e->count, cnt1;
+   unsigned prob = e->probability, prob1;
+ 
+   gcc_assert (n > 1);
+   cnt1 = cnt / (n - 1);
+   prob1 = prob / (n - 1);
+ 
+   FOR_EACH_EDGE (ae, ei, bb->succs)
+     {
+       if (ae == e)
+ 	continue;
+ 
+       ae->probability += prob1;
+       ae->count += cnt1;
+       last = ae;
+     }
+ 
+   /* Move the rest to one of the edges.  */
+   last->probability += prob % (n - 1);
+   last->count += cnt % (n - 1);
+ 
+   e->probability = 0;
+   e->count = 0;
+ }
+ 
  /* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
     loop structure and dominators.  E's destination must be LOOP header for
     this to work, i.e. it must be entry or latch edge of this loop; these are
*************** duplicate_loop_to_header_edge (struct lo
*** 834,843 ****
--- 870,882 ----
    unsigned i, j, n;
    int is_latch = (latch == e->src);
    int scale_act = 0, *scale_step = NULL, scale_main = 0;
+   int scale_after_exit = 0;
    int p, freq_in, freq_le, freq_out_orig;
    int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
    int add_irreducible_flag;
    basic_block place_after;
+   bitmap bbs_to_scale = NULL;
+   bitmap_iterator bi;
  
    gcc_assert (e->dest == loop->header);
    gcc_assert (ndupl > 0);
*************** duplicate_loop_to_header_edge (struct lo
*** 887,896 ****
        prob_pass_wont_exit =
  	      RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
  
        scale_step = XNEWVEC (int, ndupl);
  
! 	for (i = 1; i <= ndupl; i++)
! 	  scale_step[i - 1] = TEST_BIT (wont_exit, i)
  				? prob_pass_wont_exit
  				: prob_pass_thru;
  
--- 926,953 ----
        prob_pass_wont_exit =
  	      RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
  
+       if (orig
+ 	  /* Do not risk overflows.  The probability of the exit is usually
+ 	     small, anyway.  */
+ 	  && REG_BR_PROB_BASE - orig->probability >= REG_BR_PROB_BASE / 5)
+ 	{
+ 	  /* The blocks that are dominated by a removed exit edge ORIG have
+ 	     frequencies scaled by this.  */
+ 	  scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
+ 				   REG_BR_PROB_BASE - orig->probability);
+ 	  bbs_to_scale = BITMAP_ALLOC (NULL);
+ 	  for (i = 0; i < n; i++)
+ 	    {
+ 	      if (bbs[i] != orig->src
+ 		  && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
+ 		bitmap_set_bit (bbs_to_scale, i);
+ 	    }
+ 	}
+ 
        scale_step = XNEWVEC (int, ndupl);
  
!       for (i = 1; i <= ndupl; i++)
! 	scale_step[i - 1] = TEST_BIT (wont_exit, i)
  				? prob_pass_wont_exit
  				: prob_pass_thru;
  
*************** duplicate_loop_to_header_edge (struct lo
*** 1043,1048 ****
--- 1100,1116 ----
  	{
  	  if (to_remove)
  	    VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
+ 	  set_zero_probability (new_spec_edges[SE_ORIG]);
+ 
+ 	  /* Scale the frequencies of the blocks dominated by the exit.  */
+ 	  if (bbs_to_scale)
+ 	    {
+ 	      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+ 		{
+ 		  scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
+ 					     REG_BR_PROB_BASE);
+ 		}
+ 	    }
  	}
  
        /* Record the first copy in the control flow order if it is not
*************** duplicate_loop_to_header_edge (struct lo
*** 1068,1073 ****
--- 1136,1152 ----
      {
        if (to_remove)
  	VEC_safe_push (edge, heap, *to_remove, orig);
+       set_zero_probability (orig);
+ 
+       /* Scale the frequencies of the blocks dominated by the exit.  */
+       if (bbs_to_scale)
+ 	{
+ 	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
+ 	    {
+ 	      scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
+ 					 REG_BR_PROB_BASE);
+ 	    }
+ 	}
      }
  
    /* Update the original loop.  */
*************** duplicate_loop_to_header_edge (struct lo
*** 1103,1108 ****
--- 1182,1188 ----
    free (first_active);
  
    free (bbs);
+   BITMAP_FREE (bbs_to_scale);
  
    return true;
  }
*************** force_single_succ_latches (void)
*** 1239,1251 ****
      --- edge e ---> [cond expr] ---> [first_head]
  			|
  			+---------> [second_head]
! */
  
  static basic_block
! lv_adjust_loop_entry_edge (basic_block first_head,
! 			   basic_block second_head,
! 			   edge e,
! 			   void *cond_expr)
  {
    basic_block new_head = NULL;
    edge e1;
--- 1319,1330 ----
      --- edge e ---> [cond expr] ---> [first_head]
  			|
  			+---------> [second_head]
! 
!   THEN_PROB is the probability of then branch of the condition.  */
  
  static basic_block
! lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
! 			   edge e, void *cond_expr, unsigned then_prob)
  {
    basic_block new_head = NULL;
    edge e1;
*************** lv_adjust_loop_entry_edge (basic_block f
*** 1256,1268 ****
       insert conditional expr.  */
    new_head = split_edge (e);
  
- 
    lv_add_condition_to_bb (first_head, second_head, new_head,
  			  cond_expr);
  
    /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
    e1 = make_edge (new_head, first_head,
  		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
    set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
    set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
  
--- 1335,1352 ----
       insert conditional expr.  */
    new_head = split_edge (e);
  
    lv_add_condition_to_bb (first_head, second_head, new_head,
  			  cond_expr);
  
    /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
+   e = single_succ_edge (new_head);
    e1 = make_edge (new_head, first_head,
  		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
+   e1->probability = then_prob;
+   e->probability = REG_BR_PROB_BASE - then_prob;
+   e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
+   e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
+ 
    set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
    set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
  
*************** lv_adjust_loop_entry_edge (basic_block f
*** 1281,1292 ****
--- 1365,1382 ----
     may be a run time test for things that were not resolved by static
     analysis (overlapping ranges (anti-aliasing), alignment, etc.).
  
+    THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
+    is the ratio by that the frequencies in the original loop should
+    be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
+    new loop should be scaled.
+    
     If PLACE_AFTER is true, we place the new loop after LOOP in the
     instruction stream, otherwise it is placed before LOOP.  */
  
  struct loop *
  loop_version (struct loop *loop,
  	      void *cond_expr, basic_block *condition_bb,
+ 	      unsigned then_prob, unsigned then_scale, unsigned else_scale,
  	      bool place_after)
  {
    basic_block first_head, second_head;
*************** loop_version (struct loop *loop,
*** 1318,1324 ****
  
    /* Split loop entry edge and insert new block with cond expr.  */
    cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
! 					entry, cond_expr);
    if (condition_bb)
      *condition_bb = cond_bb;
  
--- 1408,1414 ----
  
    /* Split loop entry edge and insert new block with cond expr.  */
    cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
! 					entry, cond_expr, then_prob);
    if (condition_bb)
      *condition_bb = cond_bb;
  
*************** loop_version (struct loop *loop,
*** 1334,1340 ****
    nloop = loopify (latch_edge,
  		   single_pred_edge (get_bb_copy (loop->header)),
  		   cond_bb, true_edge, false_edge,
! 		   false /* Do not redirect all edges.  */);
  
    exit = single_exit (loop);
    if (exit)
--- 1424,1431 ----
    nloop = loopify (latch_edge,
  		   single_pred_edge (get_bb_copy (loop->header)),
  		   cond_bb, true_edge, false_edge,
! 		   false /* Do not redirect all edges.  */,
! 		   then_scale, else_scale);
  
    exit = single_exit (loop);
    if (exit)
Index: tree-ssa-loop-manip.c
===================================================================
*** tree-ssa-loop-manip.c	(revision 120080)
--- tree-ssa-loop-manip.c	(working copy)
*************** determine_exit_conditions (struct loop *
*** 805,810 ****
--- 805,813 ----
         post;
       } */
  
+ /* Probability in % that the unrolled loop is entered.  Just a guess.  */
+ #define PROB_UNROLLED_LOOP_ENTERED 90
+ 
  void
  tree_unroll_loop (struct loop *loop, unsigned factor,
  		  edge exit, struct tree_niter_desc *desc)
*************** tree_unroll_loop (struct loop *loop, uns
*** 816,826 ****
    struct loop *new_loop;
    basic_block rest, exit_bb;
    edge old_entry, new_entry, old_latch, precond_edge, new_exit;
!   edge nonexit, new_nonexit;
    block_stmt_iterator bsi;
    use_operand_p op;
    bool ok;
!   unsigned est_niter;
    unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    sbitmap wont_exit;
  
--- 819,829 ----
    struct loop *new_loop;
    basic_block rest, exit_bb;
    edge old_entry, new_entry, old_latch, precond_edge, new_exit;
!   edge new_nonexit;
    block_stmt_iterator bsi;
    use_operand_p op;
    bool ok;
!   unsigned est_niter, prob_entry, scale_unrolled, scale_rest, freq_e, freq_h;
    unsigned irr = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
    sbitmap wont_exit;
  
*************** tree_unroll_loop (struct loop *loop, uns
*** 829,835 ****
  			     &enter_main_cond, &exit_base, &exit_step,
  			     &exit_cmp, &exit_bound);
  
!   new_loop = loop_version (loop, enter_main_cond, NULL, true);
    gcc_assert (new_loop != NULL);
    update_ssa (TODO_update_ssa);
  
--- 832,860 ----
  			     &enter_main_cond, &exit_base, &exit_step,
  			     &exit_cmp, &exit_bound);
  
!   /* Let us assume that the unrolled loop is quite likely to be entered.  */
!   if (nonzero_p (enter_main_cond))
!     prob_entry = REG_BR_PROB_BASE;
!   else
!     prob_entry = PROB_UNROLLED_LOOP_ENTERED * REG_BR_PROB_BASE / 100;
! 
!   /* The values for scales should keep profile consistent, and somewhat close
!      to correct.
! 
!      TODO: The current value of SCALE_REST makes it appear that the loop that
!      is created by splitting the remaining iterations of the unrolled loop is
!      executed the same number of times as the original loop, and with the same
!      frequencies, which is obviously wrong.  This does not appear to cause
!      problems, so we do not bother with fixing it for now.  To make the profile
!      correct, we would need to change the probability of the exit edge of the
!      loop, and recompute the distribution of frequencies in its body because
!      of this change (scale the frequencies of blocks before and after the exit
!      by appropriate factors).  */
!   scale_unrolled = prob_entry;
!   scale_rest = REG_BR_PROB_BASE;
! 
!   new_loop = loop_version (loop, enter_main_cond, NULL,
! 			   prob_entry, scale_unrolled, scale_rest, true);
    gcc_assert (new_loop != NULL);
    update_ssa (TODO_update_ssa);
  
*************** tree_unroll_loop (struct loop *loop, uns
*** 837,850 ****
    dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
  	       ? boolean_false_node
  	       : boolean_true_node);
-   if (exit == EDGE_SUCC (exit->src, 0))
-     nonexit = EDGE_SUCC (exit->src, 1);
-   else
-     nonexit = EDGE_SUCC (exit->src, 0);
-   nonexit->probability = REG_BR_PROB_BASE;
-   exit->probability = 0;
-   nonexit->count += exit->count;
-   exit->count = 0;
    exit_if = last_stmt (exit->src);
    COND_EXPR_COND (exit_if) = dont_exit;
    update_stmt (exit_if);
--- 862,867 ----
*************** tree_unroll_loop (struct loop *loop, uns
*** 858,863 ****
--- 875,899 ----
    gcc_assert (ok);
    update_ssa (TODO_update_ssa);
  
+   /* Determine the probability of the exit edge.  */
+   est_niter = est_niter / factor;
+   if (est_niter < 5)
+     {
+       /* We had most likely estimated the number of iterations
+ 	 wrongly before, giving us too low number now.  Use
+ 	 something a bit more reasonable.  */
+       est_niter = 5;
+     }
+ 
+   /* Ensure that the frequencies in the loop match the new estimated
+      number of iterations.  */
+   freq_h = loop->header->frequency;
+   freq_e = EDGE_FREQUENCY (loop_preheader_edge (loop));
+   /* Avoid overflows.  */
+   if (freq_e * est_niter < 65536
+       && freq_e * est_niter <= 100 * freq_h)
+     scale_loop_frequencies (loop, freq_e * est_niter, freq_h);
+ 
    /* Prepare the cfg and update the phi nodes.  */
    rest = loop_preheader_edge (new_loop)->src;
    precond_edge = single_pred_edge (rest);
*************** tree_unroll_loop (struct loop *loop, uns
*** 866,877 ****
  
    new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
    new_exit->count = loop_preheader_edge (loop)->count;
-   est_niter = est_niter / factor + 1;
    new_exit->probability = REG_BR_PROB_BASE / est_niter;
  
    new_nonexit = single_pred_edge (loop->latch);
    new_nonexit->flags = EDGE_TRUE_VALUE;
    new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
  
    old_entry = loop_preheader_edge (loop);
    new_entry = loop_preheader_edge (new_loop);
--- 902,917 ----
  
    new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE | irr);
    new_exit->count = loop_preheader_edge (loop)->count;
    new_exit->probability = REG_BR_PROB_BASE / est_niter;
  
+   rest->count += new_exit->count;
+   rest->frequency += EDGE_FREQUENCY (new_exit);
+ 
    new_nonexit = single_pred_edge (loop->latch);
    new_nonexit->flags = EDGE_TRUE_VALUE;
    new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
+   scale_bbs_frequencies_int (&loop->latch, 1, new_nonexit->probability,
+ 			     REG_BR_PROB_BASE);
  
    old_entry = loop_preheader_edge (loop);
    new_entry = loop_preheader_edge (new_loop);
Index: tree.h
===================================================================
*** tree.h	(revision 120080)
--- tree.h	(working copy)
*************** extern int integer_pow2p (tree);
*** 4076,4081 ****
--- 4076,4082 ----
  extern int integer_nonzerop (tree);
  
  extern bool zero_p (tree);
+ extern bool nonzero_p (tree);
  extern bool cst_and_fits_in_hwi (tree);
  extern tree num_ending_zeros (tree);
  
Index: cfg.c
===================================================================
*** cfg.c	(revision 120080)
--- cfg.c	(working copy)
*************** scale_bbs_frequencies_int (basic_block *
*** 949,956 ****
    edge e;
    if (num < 0)
      num = 0;
-   if (num > den)
-     return;
    /* Assume that the users are producing the fraction from frequencies
       that never grow far enough to risk arithmetic overflow.  */
    gcc_assert (num < 65536);
--- 949,954 ----
Index: testsuite/gcc.dg/tree-ssa/update-unroll-1.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/update-unroll-1.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/update-unroll-1.c	(revision 0)
***************
*** 0 ****
--- 1,22 ----
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+ /* { dg-require-effective-target ilp32 } */
+ /* { dg-options "-O1 -fprefetch-loop-arrays -march=athlon -fdump-tree-aprefetch-blocks" } */
+ 
+ int a[10000];
+ 
+ int foo(unsigned n)
+ {
+   unsigned i, s = 0;
+ 
+   for (i = 0; i < n; i++)
+     s += a[i];
+ 
+   return s;
+ }
+ 
+ /* We used to make the probability that the body of the loop (unrolled
+    to enable prefetching) is entered 0, which is not correct.  */
+ 
+ /* { dg-final { scan-tree-dump-not "Invalid sum" "aprefetch"} } */
+ /* { dg-final { scan-tree-dump-not "SUCC: 7 .100.0%" "aprefetch"} } */
+ /* { dg-final { cleanup-tree-dump "aprefetch" } } */
Index: testsuite/gcc.dg/tree-ssa/update-unswitch-1.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/update-unswitch-1.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/update-unswitch-1.c	(revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O1 -funswitch-loops -fdump-tree-unswitch-blocks" } */
+ 
+ int bla(int p)
+ {
+   unsigned i, s = 1;
+ 
+   for (i = 4; i < 100; i++)
+     {
+       if (p)
+ 	s += i/2;
+       else
+ 	s *= i/2;
+     }
+ 
+   return s;
+ }
+ 
+ /* We used to make the probability that the first of the loops created
+    by unswitching is entered 100%, which is not correct.  */
+ 
+ /* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch"} } */
+ /* { dg-final { scan-tree-dump-not "SUCC: 3 .100.0%" "unswitch"} } */
+ /* { dg-final { cleanup-tree-dump "unswitch" } } */
Index: modulo-sched.c
===================================================================
*** modulo-sched.c	(revision 120080)
--- modulo-sched.c	(working copy)
*************** canon_loop (struct loop *loop)
*** 868,873 ****
--- 868,877 ----
      }
  }
  
+ /* Probability in % that the sms-ed loop rolls enough so that optimized
+    version may be entered.  Just a guess.  */
+ #define PROB_SMS_ENOUGH_ITERATIONS 80
+ 
  /* Main entry point, perform SMS scheduling on the loops of the function
     that consist of single basic blocks.  */
  static void
*************** sms_schedule (void)
*** 882,888 ****
    partial_schedule_ptr ps;
    struct df *df;
    basic_block bb = NULL;
!   struct loop *loop, *nloop;
    basic_block condition_bb = NULL;
    edge latch_edge;
    gcov_type trip_count = 0;
--- 886,892 ----
    partial_schedule_ptr ps;
    struct df *df;
    basic_block bb = NULL;
!   struct loop *loop;
    basic_block condition_bb = NULL;
    edge latch_edge;
    gcov_type trip_count = 0;
*************** sms_schedule (void)
*** 1181,1188 ****
  		{
  		  rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
  						 GEN_INT(stage_count));
  
! 		  nloop = loop_version (loop, comp_rtx, &condition_bb, true);
  		}
  
  	      /* Set new iteration count of loop kernel.  */
--- 1185,1196 ----
  		{
  		  rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
  						 GEN_INT(stage_count));
+ 		  unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
+ 				   * REG_BR_PROB_BASE) / 100;
  
! 		  loop_version (loop, comp_rtx, &condition_bb,
! 				prob, prob, REG_BR_PROB_BASE - prob,
! 				true);
  		}
  
  	      /* Set new iteration count of loop kernel.  */
Index: tree-vect-transform.c
===================================================================
*** tree-vect-transform.c	(revision 120080)
--- tree-vect-transform.c	(working copy)
*************** vect_transform_loop (loop_vec_info loop_
*** 4699,4709 ****
        basic_block new_exit_bb;
        edge new_exit_e, e;
        tree orig_phi, new_phi, arg;
  
        cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
                                                       &cond_expr_stmt_list);
        initialize_original_copy_tables ();
!       nloop = loop_version (loop, cond_expr, &condition_bb, true);
        free_original_copy_tables();
  
        /** Loop versioning violates an assumption we try to maintain during 
--- 4699,4711 ----
        basic_block new_exit_bb;
        edge new_exit_e, e;
        tree orig_phi, new_phi, arg;
+       unsigned prob = 4 * REG_BR_PROB_BASE / 5;
  
        cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
                                                       &cond_expr_stmt_list);
        initialize_original_copy_tables ();
!       nloop = loop_version (loop, cond_expr, &condition_bb,
! 			    prob, prob, REG_BR_PROB_BASE - prob, true);
        free_original_copy_tables();
  
        /** Loop versioning violates an assumption we try to maintain during 
Index: cfgloop.h
===================================================================
*** cfgloop.h	(revision 120080)
--- cfgloop.h	(working copy)
*************** extern bool duplicate_loop_to_header_edg
*** 254,263 ****
  					   unsigned, sbitmap, edge,
   					   VEC (edge, heap) **, int);
  extern struct loop *loopify (edge, edge,
! 			     basic_block, edge, edge, bool);
  struct loop * loop_version (struct loop *, void *,
! 			    basic_block *, bool);
  extern bool remove_path (edge);
  
  /* Induction variable analysis.  */
  
--- 254,265 ----
  					   unsigned, sbitmap, edge,
   					   VEC (edge, heap) **, int);
  extern struct loop *loopify (edge, edge,
! 			     basic_block, edge, edge, bool,
! 			     unsigned, unsigned);
  struct loop * loop_version (struct loop *, void *,
! 			    basic_block *, unsigned, unsigned, unsigned, bool);
  extern bool remove_path (edge);
+ void scale_loop_frequencies (struct loop *, int, int);
  
  /* Induction variable analysis.  */
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]