This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] tree-switch-conversion fixes (PR tree-optimization/45830)


On Fri, 19 Nov 2010, Jakub Jelinek wrote:

> Hi!
> 
> This patch fixes a bunch of issues in switchconv pass:
> 1) if the switch would be expanded using bit tests, switch conversion
>    optimization actually produces worse code (especially for -Os, but
>    for other levels as well)
> 2) the pass can sometimes create completely unnecessarily huge
>    tables when the values would fit in much smaller integral data type
>    - for -Os I guess it is a win as soon as the table has two or more
>    entries, for -O2 I think the cut-off can be larger, because the sign or
>    zero extension can eat a cycle or two, on the other side
>    if it makes the .rodata table 2, 4 or 8 times smaller, it decreases
>    D-cache footprint
> 3) create_temp_arrays/free_temp_arrays wasn't using libiberty macros
>    and even made a mistake in the types (fortunately usually all pointers
>    have the same size), plus it is IMHO wasteful to do 4 allocations
>    when just 2 (or, if we ignored the types, just 1) is enough
> 
> On the included testcases on x86_64 the changes are:
>    text    data     bss     dec     hex filename
>     181       0       0     181      b5 before/gcc.target/i386/pr45830.o
>      83       0       0      83      53 after/gcc.target/i386/pr45830.o
>    3229     160       0    3389     d3d before/gcc.c-torture/execute/pr45830.o
>    1233     160       0    1393     571 after/gcc.c-torture/execute/pr45830.o
> 
> before/gcc.target/i386/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 000015 00  AX  0   0 16
>   [ 5] .rodata           PROGBITS        0000000000000000 000060 000070 00   A  0   0 32
> after/gcc.target/i386/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 000023 00  AX  0   0 16
> before/gcc.c-torture/execute/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 0001a5 00  AX  0   0 16
>   [ 5] .rodata           PROGBITS        0000000000000000 0002a0 000a98 00   A  0   0 32
> after/gcc.c-torture/execute/pr45830.o
>   [ 1] .text             PROGBITS        0000000000000000 000040 0001a5 00  AX  0   0 16
>   [ 5] .rodata           PROGBITS        0000000000000000 0002a0 0002cc 00   A  0   0 32
> As you can see, on the second testcase .text size is even identical and
> .rodata is 3.8 times smaller, on the first testcase .text size grew by 14 bytes,
> but 112 bytes disappeared from .rodata section (but the code no longer needs to wait
> for the memory read).
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

> 2010-11-19  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR tree-optimization/45830
> 	* stmt.c (expand_switch_using_bit_tests_p): New function.
> 	(expand_case): Use it.
> 	* tree.h (expand_switch_using_bit_tests_p): New prototype.
> 	* tree-switch-conversion.c (struct switch_conv_info): Add
> 	bit_test_uniq, bit_test_count and bit_test_bb fields.
> 	(check_range): Fix a comment.
> 	(check_process_case): Compute bit_test_uniq and bit_test_count.
> 	(create_temp_arrays): Use XCNEWVEC, merge 3 arrays into one
> 	allocation.
> 	(free_temp_arrays): Use XDELETEVEC, adjust for the 3 arrays merging.
> 	(constructor_contains_same_values_p): Use FOR_EACH_VEC_ELT.
> 	(array_value_type): New function.
> 	(build_one_array): Use it, if it returned different type,
> 	fold_convert all constructor fields and convert back to the
> 	wider type in the generated code.
> 	(process_switch): Initialize bit_test_uniq, bit_test_count and
> 	bit_test_bb fields.  Don't optimize if expand_switch_using_bit_tests_p
> 	returned true.
> 
> 	* gcc.target/i386/pr45830.c: New test.
> 	* gcc.c-torture/execute/pr45830.c: New test.
> 	
> --- gcc/stmt.c.jj	2010-11-15 18:53:41.000000000 +0100
> +++ gcc/stmt.c	2010-11-19 13:26:50.457354826 +0100
> @@ -2250,6 +2250,25 @@ emit_case_bit_tests (tree index_type, tr
>  #define HAVE_tablejump 0
>  #endif
>  
> +/* Return true if a switch should be expanded as a bit test.
> +   INDEX_EXPR is the index expression, RANGE is the difference between
> +   highest and lowest case, UNIQ is number of unique case node targets
> +   not counting the default case and COUNT is the number of comparisons
> +   needed, not counting the default case.  */
> +bool
> +expand_switch_using_bit_tests_p (tree index_expr, tree range,
> +				 unsigned int uniq, unsigned int count)
> +{
> +  return (CASE_USE_BIT_TESTS
> +	  && ! TREE_CONSTANT (index_expr)
> +	  && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
> +	  && compare_tree_int (range, 0) > 0
> +	  && lshift_cheap_p ()
> +	  && ((uniq == 1 && count >= 3)
> +	      || (uniq == 2 && count >= 5)
> +	      || (uniq == 3 && count >= 6)));
> +}
> +
>  /* Terminate a case (Pascal/Ada) or switch (C) statement
>     in which ORIG_INDEX is the expression to be tested.
>     If ORIG_TYPE is not NULL, it is the original ORIG_INDEX
> @@ -2384,14 +2403,7 @@ expand_case (gimple stmt)
>        /* Try implementing this switch statement by a short sequence of
>  	 bit-wise comparisons.  However, we let the binary-tree case
>  	 below handle constant index expressions.  */
> -      if (CASE_USE_BIT_TESTS
> -	  && ! TREE_CONSTANT (index_expr)
> -	  && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
> -	  && compare_tree_int (range, 0) > 0
> -	  && lshift_cheap_p ()
> -	  && ((uniq == 1 && count >= 3)
> -	      || (uniq == 2 && count >= 5)
> -	      || (uniq == 3 && count >= 6)))
> +      if (expand_switch_using_bit_tests_p (index_expr, range, uniq, count))
>  	{
>  	  /* Optimize the case where all the case values fit in a
>  	     word without having to subtract MINVAL.  In this case,
> --- gcc/tree.h.jj	2010-11-15 23:28:02.000000000 +0100
> +++ gcc/tree.h	2010-11-15 23:28:02.000000000 +0100
> @@ -5314,6 +5314,8 @@ extern bool parse_input_constraint (cons
>  				    const char * const *, bool *, bool *);
>  extern void expand_asm_stmt (gimple);
>  extern tree resolve_asm_operand_names (tree, tree, tree, tree);
> +extern bool expand_switch_using_bit_tests_p (tree, tree, unsigned int,
> +					     unsigned int);
>  extern void expand_case (gimple);
>  extern void expand_decl (tree);
>  #ifdef HARD_CONST
> --- gcc/tree-switch-conversion.c.jj	2010-09-06 11:46:47.000000000 +0200
> +++ gcc/tree-switch-conversion.c	2010-11-19 17:01:15.698511106 +0100
> @@ -156,6 +156,12 @@ struct switch_conv_info
>    /* String reason why the case wasn't a good candidate that is written to the
>       dump file, if there is one.  */
>    const char *reason;
> +
> +  /* Parameters for expand_switch_using_bit_tests.  Should be computed
> +     the same way as in expand_case.  */
> +  unsigned int bit_test_uniq;
> +  unsigned int bit_test_count;
> +  basic_block bit_test_bb[2];
>  };
>  
>  /* Global pass info.  */
> @@ -174,7 +180,7 @@ check_range (gimple swtch)
>    tree range_max;
>  
>    /* The gimplifier has already sorted the cases by CASE_LOW and ensured there
> -     is a default label which is the last in the vector.  */
> +     is a default label which is the first in the vector.  */
>  
>    min_case = gimple_switch_label (swtch, 1);
>    info.range_min = CASE_LOW (min_case);
> @@ -234,7 +240,26 @@ check_process_case (tree cs)
>        info.default_count = e->count;
>      }
>    else
> -    info.other_count += e->count;
> +    {
> +      int i;
> +      info.other_count += e->count;
> +      for (i = 0; i < 2; i++)
> +	if (info.bit_test_bb[i] == label_bb)
> +	  break;
> +	else if (info.bit_test_bb[i] == NULL)
> +	  {
> +	    info.bit_test_bb[i] = label_bb;
> +	    info.bit_test_uniq++;
> +	    break;
> +	  }
> +      if (i == 2)
> +	info.bit_test_uniq = 3;
> +      if (CASE_HIGH (cs) != NULL_TREE
> +	  && ! tree_int_cst_equal (CASE_LOW (cs), CASE_HIGH (cs)))
> +	info.bit_test_count += 2;
> +      else
> +	info.bit_test_count++;
> +    }
>  
>    if (!label_bb)
>      {
> @@ -336,13 +361,10 @@ create_temp_arrays (void)
>  {
>    int i;
>  
> -  info.default_values = (tree *) xcalloc (info.phi_count, sizeof (tree));
> -  info.constructors = (VEC (constructor_elt, gc) **) xcalloc (info.phi_count,
> -							      sizeof (tree));
> -  info.target_inbound_names = (tree *) xcalloc (info.phi_count, sizeof (tree));
> -  info.target_outbound_names = (tree *) xcalloc (info.phi_count,
> -						 sizeof (tree));
> -
> +  info.default_values = XCNEWVEC (tree, info.phi_count * 3);
> +  info.constructors = XCNEWVEC (VEC (constructor_elt, gc) *, info.phi_count);
> +  info.target_inbound_names = info.default_values + info.phi_count;
> +  info.target_outbound_names = info.target_inbound_names + info.phi_count;
>    for (i = 0; i < info.phi_count; i++)
>      info.constructors[i]
>        = VEC_alloc (constructor_elt, gc, tree_low_cst (info.range_size, 1) + 1);
> @@ -355,10 +377,8 @@ create_temp_arrays (void)
>  static void
>  free_temp_arrays (void)
>  {
> -  free (info.constructors);
> -  free (info.default_values);
> -  free (info.target_inbound_names);
> -  free (info.target_outbound_names);
> +  XDELETEVEC (info.constructors);
> +  XDELETEVEC (info.default_values);
>  }
>  
>  /* Populate the array of default values in the order of phi nodes.
> @@ -468,13 +488,12 @@ build_constructors (gimple swtch)
>  static tree
>  constructor_contains_same_values_p (VEC (constructor_elt, gc) *vec)
>  {
> -  int i, len = VEC_length (constructor_elt, vec);
> +  unsigned int i;
>    tree prev = NULL_TREE;
> +  constructor_elt *elt;
>  
> -  for (i = 0; i < len; i++)
> +  FOR_EACH_VEC_ELT (constructor_elt, vec, i, elt)
>      {
> -      constructor_elt *elt = VEC_index (constructor_elt, vec, i);
> -
>        if (!prev)
>  	prev = elt->value;
>        else if (!operand_equal_p (elt->value, prev, OEP_ONLY_CONST))
> @@ -483,6 +502,79 @@ constructor_contains_same_values_p (VEC 
>    return prev;
>  }
>  
> +/* Return type which should be used for array elements, either TYPE,
> +   or for integral type some smaller integral type that can still hold
> +   all the constants.  */
> +
> +static tree
> +array_value_type (gimple swtch, tree type, int num)
> +{
> +  unsigned int i, len = VEC_length (constructor_elt, info.constructors[num]);
> +  constructor_elt *elt;
> +  enum machine_mode mode;
> +  int sign = 0;
> +  tree smaller_type;
> +
> +  if (!INTEGRAL_TYPE_P (type))
> +    return type;
> +
> +  mode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (TYPE_MODE (type)));
> +  if (GET_MODE_SIZE (TYPE_MODE (type)) <= GET_MODE_SIZE (mode))
> +    return type;
> +
> +  if (len < (optimize_bb_for_size_p (gimple_bb (swtch)) ? 2 : 32))
> +    return type;
> +
> +  FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
> +    {
> +      double_int cst;
> +
> +      if (TREE_CODE (elt->value) != INTEGER_CST)
> +	return type;
> +
> +      cst = TREE_INT_CST (elt->value);
> +      while (1)
> +	{
> +	  unsigned int prec = GET_MODE_BITSIZE (mode);
> +	  if (prec > HOST_BITS_PER_WIDE_INT)
> +	    return type;
> +
> +	  if (sign >= 0
> +	      && double_int_equal_p (cst, double_int_zext (cst, prec)))
> +	    {
> +	      if (sign == 0
> +		  && double_int_equal_p (cst, double_int_sext (cst, prec)))
> +		break;
> +	      sign = 1;
> +	      break;
> +	    }
> +	  if (sign <= 0
> +	      && double_int_equal_p (cst, double_int_sext (cst, prec)))
> +	    {
> +	      sign = -1;
> +	      break;
> +	    }
> +
> +	  if (sign == 1)
> +	    sign = 0;
> +
> +	  mode = GET_MODE_WIDER_MODE (mode);
> +	  if (mode == VOIDmode
> +	      || GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TYPE_MODE (type)))
> +	    return type;
> +	}
> +    }
> +
> +  if (sign == 0)
> +    sign = TYPE_UNSIGNED (type) ? 1 : -1;
> +  smaller_type = lang_hooks.types.type_for_mode (mode, sign >= 0);
> +  if (GET_MODE_SIZE (TYPE_MODE (type))
> +      <= GET_MODE_SIZE (TYPE_MODE (smaller_type)))
> +    return type;
> +
> +  return smaller_type;
> +}
> +
>  /* Create an appropriate array type and declaration and assemble a static array
>     variable.  Also create a load statement that initializes the variable in
>     question with a value from the static array.  SWTCH is the switch statement
> @@ -512,10 +604,19 @@ build_one_array (gimple swtch, int num, 
>      load = gimple_build_assign (name, cst);
>    else
>      {
> -      tree array_type, ctor, decl, value_type, fetch;
> +      tree array_type, ctor, decl, value_type, fetch, default_type;
>  
> -      value_type = TREE_TYPE (info.default_values[num]);
> +      default_type = TREE_TYPE (info.default_values[num]);
> +      value_type = array_value_type (swtch, default_type, num);
>        array_type = build_array_type (value_type, arr_index_type);
> +      if (default_type != value_type)
> +	{
> +	  unsigned int i;
> +	  constructor_elt *elt;
> +
> +	  FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
> +	    elt->value = fold_convert (value_type, elt->value);
> +	}
>        ctor = build_constructor (array_type, info.constructors[num]);
>        TREE_CONSTANT (ctor) = true;
>        TREE_STATIC (ctor) = true;
> @@ -534,6 +635,12 @@ build_one_array (gimple swtch, int num, 
>  
>        fetch = build4 (ARRAY_REF, value_type, decl, tidx, NULL_TREE,
>  		      NULL_TREE);
> +      if (default_type != value_type)
> +	{
> +	  fetch = fold_convert (default_type, fetch);
> +	  fetch = force_gimple_operand_gsi (&gsi, fetch, true, NULL_TREE,
> +					    true, GSI_SAME_STMT);
> +	}
>        load = gimple_build_assign (name, fetch);
>      }
>  
> @@ -818,6 +925,10 @@ process_switch (gimple swtch)
>    info.default_prob = 0;
>    info.default_count = 0;
>    info.other_count = 0;
> +  info.bit_test_uniq = 0;
> +  info.bit_test_count = 0;
> +  info.bit_test_bb[0] = NULL;
> +  info.bit_test_bb[1] = NULL;
>  
>    /* An ERROR_MARK occurs for various reasons including invalid data type.
>       (comment from stmt.c) */
> @@ -841,6 +952,18 @@ process_switch (gimple swtch)
>  	return false;
>        }
>  
> +  if (info.bit_test_uniq <= 2)
> +    {
> +      rtl_profile_for_bb (gimple_bb (swtch));
> +      if (expand_switch_using_bit_tests_p (gimple_switch_index (swtch),
> +					   info.range_size, info.bit_test_uniq,
> +					   info.bit_test_count))
> +	{
> +	  info.reason = "  Expanding as bit test is preferable\n";
> +	  return false;
> +	}
> +    }
> +
>    if (!check_final_bb ())
>      return false;
>  
> --- gcc/testsuite/gcc.target/i386/pr45830.c.jj	2010-11-19 16:58:08.345372833 +0100
> +++ gcc/testsuite/gcc.target/i386/pr45830.c	2010-11-19 17:00:54.425260908 +0100
> @@ -0,0 +1,31 @@
> +/* PR target/45830 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-switchconv-all -mtune=generic" } */
> +
> +int
> +foo (int *a)
> +{
> +  switch (*a)
> +    {
> +    case 0:
> +    case 3:
> +    case 1:
> +    case 2:
> +    case 4:
> +    case 23:
> +    case 26:
> +    case 19:
> +    case 5:
> +    case 21:
> +    case 20:
> +    case 22:
> +    case 27:
> +      return 1;
> +    default:
> +      return 0;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump "Expanding as bit test is preferable" "switchconv" } } */
> +/* { dg-final { scan-assembler-not "CSWTCH" } } */
> +/* { dg-final { cleanup-tree-dump "switchconv" } } */
> --- gcc/testsuite/gcc.c-torture/execute/pr45830.c.jj	2010-11-19 16:56:39.214313262 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr45830.c	2010-11-19 16:55:39.000000000 +0100
> @@ -0,0 +1,97 @@
> +/* PR tree-optimization/45830 */
> +
> +extern void abort (void);
> +
> +long long va, vb, vc, vd, ve;
> +
> +__attribute__((noinline)) int
> +foo (int x)
> +{
> +  long long a, b, c, d, e;
> +  switch (x)
> +    {
> +    case 0:
> +    case 3:
> +    case 1:
> +    case 2:
> +    case 4:
> +      a = 1;
> +      b = 129;
> +      c = -12;
> +      d = -4;
> +      e = 128;
> +      break;
> +    case 23:
> +    case 26:
> +    case 19:
> +    case 65:
> +    case 5:
> +      a = 2;
> +      b = 138;
> +      c = 115;
> +      d = 128;
> +      e = -16;
> +      break;
> +    case 21:
> +    case 20:
> +    case 22:
> +    case 38:
> +    case 27:
> +    case 66:
> +    case 45:
> +    case 47:
> +      a = 3;
> +      b = 6;
> +      c = 127;
> +      d = 25;
> +      e = 257;
> +      break;
> +    default:
> +      a = 0;
> +      b = 18;
> +      c = 0;
> +      d = 64;
> +      e = 32768L;
> +      break;
> +    }
> +  va = a;
> +  vb = b;
> +  vc = c;
> +  vd = d;
> +  ve = e;
> +}
> +
> +int
> +bar (int x)
> +{
> +  if (x < 0)
> +    return 3;
> +  if (x < 5)
> +    return 0;
> +  if (x == 5 || x == 19 || x == 23 | x == 26 || x == 65)
> +    return 1;
> +  if ((x >= 20 && x <= 22) || x == 27 || x == 38
> +      || x == 45 || x == 47 || x == 66)
> +    return 2;
> +  return 3;
> +}
> +
> +long long expected[] =
> +{ 1, 129, -12, -4, 128, 2, 138, 115, 128, -16,
> +  3, 6, 127, 25, 257, 0, 18, 0, 64, 32768L };
> +
> +int
> +main (void)
> +{
> +  int i, v;
> +  for (i = -4; i < 70; i++)
> +    {
> +      foo (i);
> +      v = bar (i);
> +      if (va != expected[5 * v] || vb != expected[5 * v + 1]
> +	  || vc != expected[5 * v + 2] || vd != expected[5 * v + 3]
> +	  || ve != expected[5 * v + 4])
> +	abort ();
> +    }
> +  return 0;
> +}
> 
> 	Jakub
> 
> 

-- 
Richard Guenther <rguenther@suse.de>
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]