This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Small optimization for emit_case_bit_tests (PR tree-optimization/63464)


On Fri, 10 Oct 2014, Jakub Jelinek wrote:

> Hi!
> 
> This patch adds a small optimization to emit_case_bit_tests,
> instead of emitting (for high, low, mask all constants)
> (x - low) <= (high - low) && ((1 << (x - low)) & mask)
> if high is smaller than BITS_PER_WORD and low > 0 we can emit
> x <= high && ((1 << x) & (mask << low))
> and avoid subtraction.  Do this only if mask << low isn't more costly than
> mask.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

But isn't this a general optimization?  Also testing for RTX costs
this early sounds bogus.

Thanks,
Richard.

> 2014-10-10  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/63464
> 	* tree-switch-conversion.c (struct case_bit_test): Remove
> 	hi and lo fields, add wide_int mask field.
> 	(emit_case_bit_tests): Add MAXVAL argument, rewrite uses of
> 	hi/lo fields into wide_int mask operations, optimize by pretending
> 	minval to be 0 if maxval is small enough.
> 	(process_switch): Adjust caller.
> 
> --- gcc/tree-switch-conversion.c.jj	2014-08-01 09:24:12.000000000 +0200
> +++ gcc/tree-switch-conversion.c	2014-10-07 12:36:29.438181857 +0200
> @@ -246,8 +246,7 @@ This transformation was contributed by R
>  
>  struct case_bit_test
>  {
> -  HOST_WIDE_INT hi;
> -  HOST_WIDE_INT lo;
> +  wide_int mask;
>    edge target_edge;
>    tree label;
>    int bits;
> @@ -294,13 +293,14 @@ case_bit_test_cmp (const void *p1, const
>      are not guaranteed to be of the same type as INDEX_EXPR
>      (the gimplifier doesn't change the type of case label values,
>      and MINVAL and RANGE are derived from those values).
> +    MAXVAL is MINVAL + RANGE.
>  
>      There *MUST* be MAX_CASE_BIT_TESTS or less unique case
>      node targets.  */
>  
>  static void
>  emit_case_bit_tests (gimple swtch, tree index_expr,
> -		     tree minval, tree range)
> +		     tree minval, tree range, tree maxval)
>  {
>    struct case_bit_test test[MAX_CASE_BIT_TESTS];
>    unsigned int i, j, k;
> @@ -324,6 +324,8 @@ emit_case_bit_tests (gimple swtch, tree
>    tree word_type_node = lang_hooks.types.type_for_mode (word_mode, 1);
>    tree word_mode_zero = fold_convert (word_type_node, integer_zero_node);
>    tree word_mode_one = fold_convert (word_type_node, integer_one_node);
> +  int prec = TYPE_PRECISION (word_type_node);
> +  wide_int wone = wi::one (prec);
>  
>    memset (&test, 0, sizeof (test));
>  
> @@ -348,8 +350,7 @@ emit_case_bit_tests (gimple swtch, tree
>        if (k == count)
>  	{
>  	  gcc_checking_assert (count < MAX_CASE_BIT_TESTS);
> -	  test[k].hi = 0;
> -	  test[k].lo = 0;
> +	  test[k].mask = wi::zero (prec);
>  	  test[k].target_edge = e;
>  	  test[k].label = label;
>  	  test[k].bits = 1;
> @@ -367,14 +368,39 @@ emit_case_bit_tests (gimple swtch, tree
>  					    CASE_HIGH (cs), minval));
>  
>        for (j = lo; j <= hi; j++)
> -        if (j >= HOST_BITS_PER_WIDE_INT)
> -	  test[k].hi |= (HOST_WIDE_INT) 1 << (j - HOST_BITS_PER_INT);
> -	else
> -	  test[k].lo |= (HOST_WIDE_INT) 1 << j;
> +	test[k].mask |= wi::lshift (wone, j);
>      }
>  
>    qsort (test, count, sizeof (*test), case_bit_test_cmp);
>  
> +  /* If all values are in the 0 .. BITS_PER_WORD-1 range, we can get rid of
> +     the minval subtractions, but it might make the mask constants more
> +     expensive.  So, compare the costs.  */
> +  if (compare_tree_int (minval, 0) > 0
> +      && compare_tree_int (maxval, GET_MODE_BITSIZE (word_mode)) < 0)
> +    {
> +      int cost_diff;
> +      HOST_WIDE_INT m = tree_to_uhwi (minval);
> +      rtx reg = gen_raw_REG (word_mode, 10000);
> +      bool speed_p = optimize_bb_for_speed_p (gimple_bb (swtch));
> +      cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
> +					      GEN_INT (-m)), speed_p);
> +      for (i = 0; i < count; i++)
> +	{
> +	  rtx r = immed_wide_int_const (test[i].mask, word_mode);
> +	  cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r), speed_p);
> +	  r = immed_wide_int_const (wi::lshift (test[i].mask, m), word_mode);
> +	  cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r), speed_p);
> +	}
> +      if (cost_diff > 0)
> +	{
> +	  for (i = 0; i < count; i++)
> +	    test[i].mask = wi::lshift (test[i].mask, m);
> +	  minval = build_zero_cst (TREE_TYPE (minval));
> +	  range = maxval;
> +	}
> +    }
> +
>    /* We generate two jumps to the default case label.
>       Split the default edge, so that we don't have to do any PHI node
>       updating.  */
> @@ -446,13 +472,7 @@ emit_case_bit_tests (gimple swtch, tree
>          if (const & csui) goto target  */
>    for (k = 0; k < count; k++)
>      {
> -      HOST_WIDE_INT a[2];
> -
> -      a[0] = test[k].lo;
> -      a[1] = test[k].hi;
> -      tmp = wide_int_to_tree (word_type_node,
> -			      wide_int::from_array (a, 2,
> -						    TYPE_PRECISION (word_type_node)));
> +      tmp = wide_int_to_tree (word_type_node, test[k].mask);
>        tmp = fold_build2 (BIT_AND_EXPR, word_type_node, csui, tmp);
>        tmp = force_gimple_operand_gsi (&gsi, tmp,
>  				      /*simple=*/true, NULL_TREE,
> @@ -1369,8 +1389,8 @@ process_switch (gimple swtch)
>  	{
>  	  if (dump_file)
>  	    fputs ("  expanding as bit test is preferable\n", dump_file);
> -	  emit_case_bit_tests (swtch, info.index_expr,
> -			       info.range_min, info.range_size);
> +	  emit_case_bit_tests (swtch, info.index_expr, info.range_min,
> +			       info.range_size, info.range_max);
>  	  loops_state_set (LOOPS_NEED_FIXUP);
>  	  return NULL;
>  	}
> 
> 	Jakub
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]