This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook


Jiufu Guo <guojiufu@linux.ibm.com> writes:

Got message about fail to send to gcc-patches. Send again.

> Segher Boessenkool <segher@kernel.crashing.org> writes:
>
>> Hi!
>>
>> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
>>> In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
>>> target related hueristic adjustment in this hook. In this patch, small loops
>>> is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
>>> some improvement for spec2017.  This patch enhanced a little for [Patch V2] to
>>> enable small loops unroll for O3 by default like O2.
>>
[...]
>
> Updated patch is at the end of this mail. Thanks for review.
>
> Jiufu
>>
>>
>> Segher

Thanks again for review.
Add ChangeLog:

gcc/
2019-11-04  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR tree-optimization/88760
	* gcc/common/config/rs6000/rs6000-common.c
	(rs6000_option_optimization_table): Enable OPT_funroll_loops and
	OPT_munroll_small_loops for OPT_LEVELS_2_PLUS_SPEED_ONLY.
	* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
	code which changes PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
	Handle option overrides for explicit and implicit funroll-loops.
	(TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
	(rs6000_loop_unroll_adjust): New hook for loop unroll adjust.
	Unrolling 2 times for small loops.
	* gcc/config/rs6000/rs6000.opt (munroll-small-loops): Add new flag.

gcc.testsuite/
2019-11-04  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR tree-optimization/88760
	* gcc.dg/pr59643.c: Update back to r277550.

>
> Updated patch:
>
> Index: gcc/common/config/rs6000/rs6000-common.c
> ===================================================================
> --- gcc/common/config/rs6000/rs6000-common.c	(revision 277765)
> +++ gcc/common/config/rs6000/rs6000-common.c	(working copy)
> @@ -35,7 +35,9 @@ static const struct default_options rs6000_option_
>      { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
>      /* Enable -fsched-pressure for first pass instruction scheduling.  */
>      { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
> -    { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 },
> +    /* Enable  -funroll-loops with -munroll-small-loops.  */
> +    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
> +    { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_small_loops, NULL, 1 },
>      { OPT_LEVELS_NONE, 0, NULL, 0 }
>    };
>  
> Index: gcc/config/rs6000/rs6000.c
> ===================================================================
> --- gcc/config/rs6000/rs6000.c	(revision 277765)
> +++ gcc/config/rs6000/rs6000.c	(working copy)
> @@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribut
>  #undef TARGET_VECTORIZE_DESTROY_COST_DATA
>  #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
>  
> +#undef TARGET_LOOP_UNROLL_ADJUST
> +#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
> +
>  #undef TARGET_INIT_BUILTINS
>  #define TARGET_INIT_BUILTINS rs6000_init_builtins
>  #undef TARGET_BUILTIN_DECL
> @@ -4540,24 +4543,21 @@ rs6000_option_override_internal (bool global_init_
>  			     global_options.x_param_values,
>  			     global_options_set.x_param_values);
>  
> -      /* unroll very small loops 2 time if no -funroll-loops.  */
> -      if (!global_options_set.x_flag_unroll_loops
> -	  && !global_options_set.x_flag_unroll_all_loops)
> +      /* Explicit funroll-loops turns -munroll-small-loops off.
> +	 Implicit funroll-loops does not turn fweb or frename-registers on.  */
> +      if ((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
> +	  || (global_options_set.x_flag_unroll_all_loops
> +	      && flag_unroll_all_loops))
>  	{
> -	  maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
> -				 global_options.x_param_values,
> -				 global_options_set.x_param_values);
> -
> -	  maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
> -				 global_options.x_param_values,
> -				 global_options_set.x_param_values);
> -
> -	  /* If fweb or frename-registers are not specificed in command-line,
> -	     do not turn them on implicitly.  */
> +	  if (!global_options_set.x_unroll_small_loops)
> +	    unroll_small_loops = 0;
> +	}
> +      else
> +	{
>  	  if (!global_options_set.x_flag_web)
> -	    global_options.x_flag_web = 0;
> +	    flag_web = 0;
>  	  if (!global_options_set.x_flag_rename_registers)
> -	    global_options.x_flag_rename_registers = 0;
> +	    flag_rename_registers = 0;
>  	}
>  
>        /* If using typedef char *va_list, signal that
> @@ -5101,6 +5101,25 @@ rs6000_destroy_cost_data (void *data)
>    free (data);
>  }
>  
> +/*  Implement targetm.loop_unroll_adjust.  */
> +
> +static unsigned
> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
> +{
> +   if (unroll_small_loops)
> +    {
> +      /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> +	 example we may want to unroll very small loops more times (4 perhaps).
> +	 We also should use a PARAM for this.  */
> +      if (loop->ninsns <= 10)
> +	return MIN (2, nunroll);
> +      else
> +	return 0;
> +    }
> +  
> +  return nunroll;
> +}
> +
>  /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
>     library with vectorized intrinsics.  */
>  
> Index: gcc/config/rs6000/rs6000.opt
> ===================================================================
> --- gcc/config/rs6000/rs6000.opt	(revision 277765)
> +++ gcc/config/rs6000/rs6000.opt	(working copy)
> @@ -501,6 +501,10 @@ moptimize-swaps
>  Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
>  Analyze and remove doubleword swaps from VSX computations.
>  
> +munroll-small-loops
> +Target Undocumented Var(unroll_small_loops) Init(0) Save
> +Use conservative small loop unrolling.
> +
>  mpower9-misc
>  Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags)
>  Use certain scalar instructions added in ISA 3.0.
> Index: gcc/testsuite/gcc.dg/pr59643.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/pr59643.c	(revision 277765)
> +++ gcc/testsuite/gcc.dg/pr59643.c	(working copy)
> @@ -1,9 +1,6 @@
>  /* PR tree-optimization/59643 */
>  /* { dg-do compile } */
>  /* { dg-options "-O3 -fdump-tree-pcom-details" } */
> -/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */
> -/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
> -   loop of this case.  */
>  
>  void
>  foo (double *a, double *b, double *c, double d, double e, int n)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]