This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

loop heuristics tweeks


Hi,
I've found somewhat weird that on following loop:
int m=1;
int a[100];
main()
{
        int i;
        for (i=0; i<m; i++)
                a[i]=0;
}
our reaction with profile feedback, so we know the loop iterates once at
average, is to bypass loop peeling and do loop unrolling+loop peeling both 4
times.

I've removed the checks to bypass peeling on simple loops with low iteration
counts and modfified runtime iterations unroller to limit unrolling to number
of iterations of given loop.

Hope that this is what we should do.

Zdenek: I've noticed that you've integrated loop peeling and unrolling into
simple loop unrollers.  This looks like neat idea.  What i am worried about
is whether it is required for correctness?  If not, I would suggest to peel
exactly one time for loops with large iteration counts.  One peeling should
be enought to help other optimizers and not waste code size.


Tue Apr  9 13:27:18 CEST 2002  Jan Hubicka  <jh@suse.cz>

	* toplev.c (rest_of_compilation): Better debug output; allways run webizer2
	* unroll-new.c (unroll_loop_runtime_iterations): Limit urolling to expected number
	of iterations.
	(peel_loop): Avoid check to bypass peeling on simple loops.

Index: toplev.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/toplev.c,v
retrieving revision 1.537.2.63
diff -c -3 -p -r1.537.2.63 toplev.c
*** toplev.c	9 Apr 2002 09:57:05 -0000	1.537.2.63
--- toplev.c	9 Apr 2002 11:27:00 -0000
*************** enum dump_file_index
*** 221,226 ****
--- 221,227 ----
    DFI_loop,
    DFI_cfg,
    DFI_bp,
+   DFI_loop2,
    DFI_tracer,
    DFI_gcse2,
    DFI_cse2,
*************** static struct dump_file_info dump_file[D
*** 273,278 ****
--- 274,280 ----
    { "loop",	'L', 1, 0, 0 },
    { "cfg",	'f', 1, 0, 0 },
    { "bp",	'b', 1, 0, 0 },
+   { "loop2",	'L', 1, 0, 0 },
    { "tracer",	'T', 1, 0, 0 },
    { "gcse2",	'G', 1, 0, 0 },
    { "cse2",	't', 1, 0, 0 },
*************** rest_of_compilation (decl)
*** 3004,3010 ****
      {
        struct loops *loops;
        timevar_push (TV_LOOP);
!       open_dump_file (DFI_loop, decl);
  
        loops = loop_optimizer_init (rtl_dump_file);
  
--- 3006,3014 ----
      {
        struct loops *loops;
        timevar_push (TV_LOOP);
!       open_dump_file (DFI_loop2, decl);
!       if (rtl_dump_file)
! 	dump_flow_info (rtl_dump_file);
  
        loops = loop_optimizer_init (rtl_dump_file);
  
*************** rest_of_compilation (decl)
*** 3023,3056 ****
  	  loop_optimizer_finalize (loops, rtl_dump_file);
  	}
  
-       close_dump_file (DFI_loop, print_rtl, get_insns ());
-       timevar_pop (TV_LOOP);
        
        cleanup_cfg (CLEANUP_EXPENSIVE);
        ggc_collect ();
      }
  
    if (flag_tracer)
      {
-       timevar_push (TV_TRACER);
        if (rtl_dump_file)
  	dump_flow_info (rtl_dump_file);
-       open_dump_file (DFI_tracer, decl);
        cleanup_cfg (CLEANUP_EXPENSIVE);
        tracer ();
!       if (flag_web)
! 	{
! 	  web_main ();
! 	  delete_trivially_dead_insns (get_insns (), max_reg_num ());
! 	}
!       cleanup_cfg (CLEANUP_EXPENSIVE
! 		   | (flag_thread_jumps ? CLEANUP_THREADING : 0));
!       close_dump_file (DFI_tracer, print_rtl_with_bb, get_insns ());
!       timevar_pop (TV_TRACER);
  #ifdef ENABLE_CHECKING
        verify_flow_info ();
  #endif
      }
  
    /* Lower into lowlevel RTL now.  */
    if (flag_midlevel_rtl)
--- 3027,3066 ----
  	  loop_optimizer_finalize (loops, rtl_dump_file);
  	}
  
        
        cleanup_cfg (CLEANUP_EXPENSIVE);
+       if (rtl_dump_file)
+ 	dump_flow_info (rtl_dump_file);
+       close_dump_file (DFI_loop2, print_rtl_with_bb, get_insns ());
+       timevar_pop (TV_LOOP);
        ggc_collect ();
      }
  
+   timevar_push (TV_TRACER);
+   open_dump_file (DFI_tracer, decl);
    if (flag_tracer)
      {
        if (rtl_dump_file)
  	dump_flow_info (rtl_dump_file);
        cleanup_cfg (CLEANUP_EXPENSIVE);
        tracer ();
!       cleanup_cfg (CLEANUP_EXPENSIVE);
  #ifdef ENABLE_CHECKING
        verify_flow_info ();
  #endif
      }
+   if (flag_web)
+     {
+       timevar_push (TV_WEB);
+       web_main ();
+       delete_trivially_dead_insns (get_insns (), max_reg_num ());
+       cleanup_cfg (CLEANUP_EXPENSIVE
+ 		   | (flag_thread_jumps ? CLEANUP_THREADING : 0));
+ 
+       timevar_pop (TV_WEB);
+     }
+   close_dump_file (DFI_tracer, print_rtl_with_bb, get_insns ());
+   timevar_pop (TV_TRACER);
  
    /* Lower into lowlevel RTL now.  */
    if (flag_midlevel_rtl)
*************** parse_options_and_default_flags (argc, a
*** 4868,4874 ****
        flag_strict_aliasing = 1;
        flag_delete_null_pointer_checks = 1;
        flag_reorder_blocks = 1;
!       flag_tracer = 1;
        flag_new_unroll_loops = 2;
        flag_unswitch_loops = 2;
        flag_peel_loops = 2;
--- 4878,4884 ----
        flag_strict_aliasing = 1;
        flag_delete_null_pointer_checks = 1;
        flag_reorder_blocks = 1;
!       flag_tracer = 2;
        flag_new_unroll_loops = 2;
        flag_unswitch_loops = 2;
        flag_peel_loops = 2;
*************** parse_options_and_default_flags (argc, a
*** 4879,4886 ****
--- 4889,4898 ----
      {
        flag_inline_functions = 1;
        flag_new_unroll_loops = 1;
+       flag_new_unroll_all_loops = 1;
        flag_unswitch_loops = 1;
        flag_peel_loops = 1;
+       flag_tracer = 1;
      }
  
    if (optimize < 2 || optimize_size)
*************** parse_options_and_default_flags (argc, a
*** 4980,4989 ****
--- 4992,5005 ----
      }
    if (!flag_branch_probabilities && flag_new_unroll_loops == 2)
      flag_new_unroll_loops = 0;
+   if (!flag_branch_probabilities && flag_new_unroll_all_loops == 2)
+     flag_new_unroll_all_loops = 0;
    if (!flag_branch_probabilities && flag_unswitch_loops == 2)
      flag_unswitch_loops = 0;
    if (!flag_branch_probabilities && flag_peel_loops == 2)
      flag_peel_loops = 0;
+   if (!flag_branch_probabilities && flag_tracer == 2)
+     flag_tracer = 0;
  
    /* Set flag_no_inline before the post_options () hook.  The C front
       ends use it to determine tree inlining defaults.  FIXME: such
Index: params.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/params.def,v
retrieving revision 1.11.2.5
diff -c -3 -p -r1.11.2.5 params.def
*** params.def	28 Mar 2002 13:03:59 -0000	1.11.2.5
--- params.def	9 Apr 2002 11:27:00 -0000
*************** DEFPARAM(PARAM_MAX_PEELED_INSNS,
*** 108,114 ****
  DEFPARAM(PARAM_MAX_PEEL_TIMES,
  	"max-peel-times",
  	"The maximum number of peelings of a single loop",
! 	3)
  /* The maximum number of insns of an unswitched loop.  */
  DEFPARAM(PARAM_MAX_UNSWITCH_INSNS,
  	"max-unswitch-insns",
--- 108,114 ----
  DEFPARAM(PARAM_MAX_PEEL_TIMES,
  	"max-peel-times",
  	"The maximum number of peelings of a single loop",
! 	15)
  /* The maximum number of insns of an unswitched loop.  */
  DEFPARAM(PARAM_MAX_UNSWITCH_INSNS,
  	"max-unswitch-insns",
Index: unroll-new.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/unroll-new.c,v
retrieving revision 1.1.2.10
diff -c -3 -p -r1.1.2.10 unroll-new.c
*** unroll-new.c	7 Apr 2002 15:58:04 -0000	1.1.2.10
--- unroll-new.c	9 Apr 2002 11:27:00 -0000
*************** unroll_loop_runtime_iterations (loops, l
*** 619,624 ****
--- 619,636 ----
    edge e;
    sbitmap wont_exit;
    int may_exit_copy, n_peel;
+   int expected_niter;
+ 
+   expected_niter = expected_loop_iterations (loop);
+   if (expected_niter < max_unroll && flag_branch_probabilities)
+     max_unroll = expected_niter;
+ 
+   if (max_unroll <= 1)
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, ";; Not unrolling loop, expected number of iteration is low\n");
+       return 0;
+     }
  
    /* Force max_unroll + 1 to be power of 2.  */
    for (i = 1; 2 * i <= max_unroll + 1; i *= 2);
*************** peel_loop (loops, loop, will_unroll)
*** 854,868 ****
      {
        if (rtl_dump_file)
  	fprintf (rtl_dump_file, ";; Not peeling loop, is too big\n");
-       return 1;
-     }
- 
-   /* Do not peel simple loops if also unrolling will be done, not to
-      interfere with it.  */
-   if (will_unroll && simple_loop_p (loops, loop, &desc))
-     {
-       if (rtl_dump_file)
- 	fprintf (rtl_dump_file, ";; Not peeling loop, loop will be unrolled\n");
        return 1;
      }
  
--- 866,871 ----


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]