This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch] Move loop header copying after profiling


> Hello,
> 
> this patch moves loop header copying pass after profile pass.  This
> gives a better profile when we guess the branch probabilities.
> Currently we guess 50/50 probabilities for the branch of the copied condition
> that enters the loop, which is wrong, since the loop usually is entered.

The probability is actually roughtly about 60% as we got from the old
loop header heuristic.  Perhaps we can still use this number somehwere?

/* Copied condition for the first iteration of loop is probably true.  */
DEF_PREDICTOR (PRED_LOOP_HEADER, "loop header", HITRATE (64), 0)

Honza
> When the loop header pass is performed after copying, we get the
> probability of entering equal to 1 - 1 / estimated number of iterations.
> 
> This patch also moves the profile updating code to
> tree_duplicate_sese_region and fixes it (the profile updating code
> currently does not work when the profile is guessed, because it uses
> counts that are all zero in that case; we must work with
> frequencies instead). Pat, as far as I can tell, this fixes the
> testcase you sent me.  Can you check?
> 
> Finally, the patch removes the call to rewrite_into_loop_closed_ssa
> which is useless at the moment, since update_ssa no longer needs it.
> This change is compile time neutral.
> 
> Bootstrapped & regtested on i686 and ia64.
> 
> Zdenek
> 
> 	* tree-cfg.c (tree_duplicate_sese_region): Update profile.
> 	* tree-optimize.c (init_tree_optimization_passes) Swap
> 	pass_ch and pass_profile.
> 	* tree-ssa-loop-ch.c (copy_loop_headers): Do not update profile
> 	here.  Remove rewrite_into_loop_closed_ssa call.
> 
> Index: tree-cfg.c
> ===================================================================
> RCS file: /cvs/gcc/gcc/gcc/tree-cfg.c,v
> retrieving revision 2.184
> diff -c -3 -p -r2.184 tree-cfg.c
> *** tree-cfg.c	3 May 2005 21:47:29 -0000	2.184
> --- tree-cfg.c	10 May 2005 01:02:25 -0000
> *************** tree_duplicate_sese_region (edge entry, 
> *** 4847,4852 ****
> --- 4847,4853 ----
>     edge exit_copy;
>     basic_block *doms;
>     edge redirected;
> +   int total_freq, entry_freq;
>   
>     if (!can_copy_bbs_p (region, n_region))
>       return false;
> *************** tree_duplicate_sese_region (edge entry, 
> *** 4893,4904 ****
>   
>     gcc_assert (!need_ssa_update_p ());
>   
> !   /* Record blocks outside the region that are duplicated by something
>        inside.  */
>     doms = xmalloc (sizeof (basic_block) * n_basic_blocks);
>     n_doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region, doms);
>   
>     copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop);
>   
>     if (copying_header)
>       {
> --- 4894,4917 ----
>   
>     gcc_assert (!need_ssa_update_p ());
>   
> !   /* Record blocks outside the region that are dominated by something
>        inside.  */
>     doms = xmalloc (sizeof (basic_block) * n_basic_blocks);
>     n_doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region, doms);
>   
> +   total_freq = entry->dest->frequency;
> +   entry_freq = EDGE_FREQUENCY (entry);
> +   /* Fix up corner cases, to avoid division by zero or creation of negative
> +      frequencies.  */
> +   if (total_freq == 0)
> +     total_freq = 1;
> +   else if (entry_freq > total_freq)
> +     entry_freq = total_freq;
> + 
>     copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop);
> +   scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
> + 			     total_freq);
> +   scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
>   
>     if (copying_header)
>       {
> Index: tree-optimize.c
> ===================================================================
> RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
> retrieving revision 2.88
> diff -c -3 -p -r2.88 tree-optimize.c
> *** tree-optimize.c	27 Apr 2005 09:11:00 -0000	2.88
> --- tree-optimize.c	10 May 2005 01:02:25 -0000
> *************** init_tree_optimization_passes (void)
> *** 374,381 ****
>     NEXT_PASS (pass_phiopt);
>     NEXT_PASS (pass_may_alias);
>     NEXT_PASS (pass_tail_recursion);
> -   NEXT_PASS (pass_ch);
>     NEXT_PASS (pass_profile);
>     NEXT_PASS (pass_stdarg);
>     NEXT_PASS (pass_sra);
>     /* FIXME: SRA may generate arbitrary gimple code, exposing new
> --- 374,381 ----
>     NEXT_PASS (pass_phiopt);
>     NEXT_PASS (pass_may_alias);
>     NEXT_PASS (pass_tail_recursion);
>     NEXT_PASS (pass_profile);
> +   NEXT_PASS (pass_ch);
>     NEXT_PASS (pass_stdarg);
>     NEXT_PASS (pass_sra);
>     /* FIXME: SRA may generate arbitrary gimple code, exposing new
> Index: tree-ssa-loop-ch.c
> ===================================================================
> RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-ch.c,v
> retrieving revision 2.16
> diff -c -3 -p -r2.16 tree-ssa-loop-ch.c
> *** tree-ssa-loop-ch.c	23 Apr 2005 00:59:29 -0000	2.16
> --- tree-ssa-loop-ch.c	10 May 2005 01:02:25 -0000
> *************** copy_loop_headers (void)
> *** 131,142 ****
>     basic_block *bbs, *copied_bbs;
>     unsigned n_bbs;
>     unsigned bbs_size;
> -   gcov_type entry_count, body_count, total_count;
>   
>     loops = loop_optimizer_init (dump_file);
>     if (!loops)
>       return;
> -   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
>     
>     /* We do not try to keep the information about irreducible regions
>        up-to-date.  */
> --- 131,140 ----
> *************** copy_loop_headers (void)
> *** 202,209 ****
>   	exit = single_succ_edge (loop_split_edge_with (exit, NULL));
>   
>         entry = loop_preheader_edge (loop);
> -       entry_count = entry->src->count;
> -       body_count = exit->dest->count;
>   
>         if (!tree_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs))
>   	{
> --- 200,205 ----
> *************** copy_loop_headers (void)
> *** 211,233 ****
>   	  continue;
>   	}
>   
> -       /* Fix profiling info.  Scaling is done in gcov_type arithmetic to
> - 	 avoid losing information; this is slow, but is done at most
> - 	 once per loop.  We special case 0 to avoid division by 0;
> -          probably other special cases exist.  */
> -       total_count = body_count + entry_count;
> -       if (total_count == 0LL)
> - 	{
> - 	  scale_bbs_frequencies_int (bbs, n_bbs, 0, 1);
> - 	  scale_bbs_frequencies_int (copied_bbs, n_bbs, 0, 1);
> - 	}
> -       else
> - 	{
> - 	  scale_bbs_frequencies_gcov_type (bbs, n_bbs, body_count, total_count);
> - 	  scale_bbs_frequencies_gcov_type (copied_bbs, n_bbs, entry_count, 
> - 				           total_count);
> - 	}
> - 
>         /* Ensure that the latch and the preheader is simple (we know that they
>   	 are not now, since there was the loop exit condition.  */
>         loop_split_edge_with (loop_preheader_edge (loop), NULL);
> --- 207,212 ----


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]