This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook


Segher Boessenkool <segher@kernel.crashing.org> writes:

> Hi!
>
> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
>> In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
>> target related hueristic adjustment in this hook. In this patch, small loops
>> is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
>> some improvement for spec2017.  This patch enhanced a little for [Patch V2] to
>> enable small loops unroll for O3 by default like O2.
>
>> 	* gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.
>
> That's the declaration of a variable.  A command line flag is something
> like -munroll-small-loops.  Do we want a command line option like that?
> It makes testing simpler.
Thanks for great sugguestion, will update patch to add a command line
option.

>
>> -      /* unroll very small loops 2 time if no -funroll-loops.  */
>> +      /* If funroll-loops is not enabled explicitly, then enable small loops
>> +	 unrolling for -O2, and do not turn fweb or frename-registers on.  */
>>        if (!global_options_set.x_flag_unroll_loops
>>  	  && !global_options_set.x_flag_unroll_all_loops)
>>  	{
>> -	  maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
>> -				 global_options.x_param_values,
>> -				 global_options_set.x_param_values);
>> -
>> -	  maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
>> -				 global_options.x_param_values,
>> -				 global_options_set.x_param_values);
>> +	  unroll_small_loops = optimize >= 2 ? 1 : 0;
>
> That includes -Os?
>
> I think you shouldn't always set it to some value, only enable it where
> you want to enable it.  If you make a command line option for it this is
> especially simple (the table in common/config/rs6000/rs6000-common.c).
Thanks again, update rs6000_option_optimization_table as :
    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_fweb, NULL, 0 },
    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_frename_registers, NULL, 0 },
    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },

While, I still keep the code to disable unroll_small_loops for explicit
-funroll-loops via checking global_options_set.x_flag_unroll_loops. 
>
>> +static unsigned
>> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
>> +{
>> +  if (unroll_small_loops)
>> +    {
>> +      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
>> +	 example we may want to unroll very small loops more times (4 perhaps).
>> +	 We also should use a PARAM for this.  */
>> +      if (loop->ninsns <= 10)
>> +	return MIN (2, nunroll);
>> +      else
>> +	return 0;
>> +    }
>
> (Add an empty line here?)
Thanks again, updated accordingly.
>
>> +  return nunroll;
>> +}
>
> Great :-)
>
>> @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct cl_target_option *ptr,
>>  {
>>    ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
>>    ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
>> +  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
>>  }
>
> Yeah we shouldn't need to add that, this should all be automatic.
Yes, through adding new option in .opt, this is handled automaticly.

Updated patch is at the end of this mail. Thanks for review.

Jiufu
>
>
> Segher

Updated patch:

Index: gcc/common/config/rs6000/rs6000-common.c
===================================================================
--- gcc/common/config/rs6000/rs6000-common.c	(revision 277765)
+++ gcc/common/config/rs6000/rs6000-common.c	(working copy)
@@ -35,7 +35,9 @@ static const struct default_options rs6000_option_
     { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
     /* Enable -fsched-pressure for first pass instruction scheduling.  */
     { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
-    { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
+    /* Enable  -funroll-loops with -munroll-small-loops.  */
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
     { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 277765)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribut
 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
 
+#undef TARGET_LOOP_UNROLL_ADJUST
+#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
+
 #undef TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS rs6000_init_builtins
 #undef TARGET_BUILTIN_DECL
@@ -4540,24 +4543,21 @@ rs6000_option_override_internal (bool global_init_
 			     global_options.x_param_values,
 			     global_options_set.x_param_values);
 
-      /* unroll very small loops 2 time if no -funroll-loops.  */
-      if (!global_options_set.x_flag_unroll_loops
-	  && !global_options_set.x_flag_unroll_all_loops)
+      /* Explicit funroll-loops turns -munroll-small-loops off.
+	 Implicit funroll-loops does not turn fweb or frename-registers on.  */
+      if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
+	  || (global_options_set.x_flag_unroll_all_loops
+	      && flag_unroll_all_loops))
 	{
-	  maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
-				 global_options.x_param_values,
-				 global_options_set.x_param_values);
-
-	  maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
-				 global_options.x_param_values,
-				 global_options_set.x_param_values);
-
-	  /* If fweb or frename-registers are not specificed in command-line,
-	     do not turn them on implicitly.  */
+	  if (!global_options_set.x_unroll_small_loops)
+	    unroll_small_loops = 0;
+	}
+      else
+	{
 	  if (!global_options_set.x_flag_web)
-	    global_options.x_flag_web = 0;
+	    flag_web = 0;
 	  if (!global_options_set.x_flag_rename_registers)
-	    global_options.x_flag_rename_registers = 0;
+	    flag_rename_registers = 0;
 	}
 
       /* If using typedef char *va_list, signal that
@@ -5101,6 +5101,25 @@ rs6000_destroy_cost_data (void *data)
   free (data);
 }
 
+/*  Implement targetm.loop_unroll_adjust.  */
+
+static unsigned
+rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
+{
+   if (unroll_small_loops)
+    {
+      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
+	 example we may want to unroll very small loops more times (4 perhaps).
+	 We also should use a PARAM for this.  */
+      if (loop->ninsns <= 10)
+	return MIN (2, nunroll);
+      else
+	return 0;
+    }
+  
+  return nunroll;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
Index: gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc/config/rs6000/rs6000.opt	(revision 277765)
+++ gcc/config/rs6000/rs6000.opt	(working copy)
@@ -501,6 +501,10 @@ moptimize-swaps
 Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
 Analyze and remove doubleword swaps from VSX computations.
 
+munroll-small-loops
+Target Undocumented Var(unroll_small_loops) Init(0) Save
+Use conservative small loop unrolling.
+
 mpower9-misc
 Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags)
 Use certain scalar instructions added in ISA 3.0.
Index: gcc/testsuite/gcc.dg/pr59643.c
===================================================================
--- gcc/testsuite/gcc.dg/pr59643.c	(revision 277765)
+++ gcc/testsuite/gcc.dg/pr59643.c	(working copy)
@@ -1,9 +1,6 @@
 /* PR tree-optimization/59643 */
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdump-tree-pcom-details" } */
-/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */
-/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
-   loop of this case.  */
 
 void
 foo (double *a, double *b, double *c, double d, double e, int n)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]