This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] tree-switch-conversion fixes (PR tree-optimization/45830)
- From: Richard Guenther <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Martin Jambor <mjambor at suse dot cz>
- Date: Fri, 19 Nov 2010 23:43:58 +0100 (CET)
- Subject: Re: [PATCH] tree-switch-conversion fixes (PR tree-optimization/45830)
- References: <20101119193159.GJ29412@tyan-ft48-01.lab.bos.redhat.com>
On Fri, 19 Nov 2010, Jakub Jelinek wrote:
> Hi!
>
> This patch fixes a bunch of issues in switchconv pass:
> 1) if the switch would be expanded using bit tests, switch conversion
> optimization actually produces worse code (especially for -Os, but
> for other levels as well)
> 2) the pass can sometimes create completely unnecessarily huge
> tables when the values would fit in much smaller integral data type
> - for -Os I guess it is a win as soon as the table has two or more
> entries, for -O2 I think the cut-off can be larger, because the sign or
> zero extension can eat a cycle or two, on the other side
> if it makes the .rodata table 2, 4 or 8 times smaller, it decreases
> D-cache footprint
> 3) create_temp_arrays/free_temp_arrays wasn't using libiberty macros
> and even made a mistake in the types (fortunately usually all pointers
> have the same size), plus it is IMHO wasteful to do 4 allocations
> when just 2 (or, if we ignored the types, just 1) is enough
>
> On the included testcases on x86_64 the changes are:
> text data bss dec hex filename
> 181 0 0 181 b5 before/gcc.target/i386/pr45830.o
> 83 0 0 83 53 after/gcc.target/i386/pr45830.o
> 3229 160 0 3389 d3d before/gcc.c-torture/execute/pr45830.o
> 1233 160 0 1393 571 after/gcc.c-torture/execute/pr45830.o
>
> before/gcc.target/i386/pr45830.o
> [ 1] .text PROGBITS 0000000000000000 000040 000015 00 AX 0 0 16
> [ 5] .rodata PROGBITS 0000000000000000 000060 000070 00 A 0 0 32
> after/gcc.target/i386/pr45830.o
> [ 1] .text PROGBITS 0000000000000000 000040 000023 00 AX 0 0 16
> before/gcc.c-torture/execute/pr45830.o
> [ 1] .text PROGBITS 0000000000000000 000040 0001a5 00 AX 0 0 16
> [ 5] .rodata PROGBITS 0000000000000000 0002a0 000a98 00 A 0 0 32
> after/gcc.c-torture/execute/pr45830.o
> [ 1] .text PROGBITS 0000000000000000 000040 0001a5 00 AX 0 0 16
> [ 5] .rodata PROGBITS 0000000000000000 0002a0 0002cc 00 A 0 0 32
> As you can see, on the second testcase .text size is even identical and
> .rodata is 3.8 times smaller, on the first testcase .text size grew by 14 bytes,
> but 112 bytes disappeared from .rodata section (but the code no longer needs to wait
> for the memory read).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
Thanks,
Richard.
> 2010-11-19 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/45830
> * stmt.c (expand_switch_using_bit_tests_p): New function.
> (expand_case): Use it.
> * tree.h (expand_switch_using_bit_tests_p): New prototype.
> * tree-switch-conversion.c (struct switch_conv_info): Add
> bit_test_uniq, bit_test_count and bit_test_bb fields.
> (check_range): Fix a comment.
> (check_process_case): Compute bit_test_uniq and bit_test_count.
> (create_temp_arrays): Use XCNEWVEC, merge 3 arrays into one
> allocation.
> (free_temp_arrays): Use XDELETEVEC, adjust for the 3 arrays merging.
> (constructor_contains_same_values_p): Use FOR_EACH_VEC_ELT.
> (array_value_type): New function.
> (build_one_array): Use it, if it returned different type,
> fold_convert all constructor fields and convert back to the
> wider type in the generated code.
> (process_switch): Initialize bit_test_uniq, bit_test_count and
> bit_test_bb fields. Don't optimize if expand_switch_using_bit_tests_p
> returned true.
>
> * gcc.target/i386/pr45830.c: New test.
> * gcc.c-torture/execute/pr45830.c: New test.
>
> --- gcc/stmt.c.jj 2010-11-15 18:53:41.000000000 +0100
> +++ gcc/stmt.c 2010-11-19 13:26:50.457354826 +0100
> @@ -2250,6 +2250,25 @@ emit_case_bit_tests (tree index_type, tr
> #define HAVE_tablejump 0
> #endif
>
> +/* Return true if a switch should be expanded as a bit test.
> + INDEX_EXPR is the index expression, RANGE is the difference between
> + highest and lowest case, UNIQ is number of unique case node targets
> + not counting the default case and COUNT is the number of comparisons
> + needed, not counting the default case. */
> +bool
> +expand_switch_using_bit_tests_p (tree index_expr, tree range,
> + unsigned int uniq, unsigned int count)
> +{
> + return (CASE_USE_BIT_TESTS
> + && ! TREE_CONSTANT (index_expr)
> + && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
> + && compare_tree_int (range, 0) > 0
> + && lshift_cheap_p ()
> + && ((uniq == 1 && count >= 3)
> + || (uniq == 2 && count >= 5)
> + || (uniq == 3 && count >= 6)));
> +}
> +
> /* Terminate a case (Pascal/Ada) or switch (C) statement
> in which ORIG_INDEX is the expression to be tested.
> If ORIG_TYPE is not NULL, it is the original ORIG_INDEX
> @@ -2384,14 +2403,7 @@ expand_case (gimple stmt)
> /* Try implementing this switch statement by a short sequence of
> bit-wise comparisons. However, we let the binary-tree case
> below handle constant index expressions. */
> - if (CASE_USE_BIT_TESTS
> - && ! TREE_CONSTANT (index_expr)
> - && compare_tree_int (range, GET_MODE_BITSIZE (word_mode)) < 0
> - && compare_tree_int (range, 0) > 0
> - && lshift_cheap_p ()
> - && ((uniq == 1 && count >= 3)
> - || (uniq == 2 && count >= 5)
> - || (uniq == 3 && count >= 6)))
> + if (expand_switch_using_bit_tests_p (index_expr, range, uniq, count))
> {
> /* Optimize the case where all the case values fit in a
> word without having to subtract MINVAL. In this case,
> --- gcc/tree.h.jj 2010-11-15 23:28:02.000000000 +0100
> +++ gcc/tree.h 2010-11-15 23:28:02.000000000 +0100
> @@ -5314,6 +5314,8 @@ extern bool parse_input_constraint (cons
> const char * const *, bool *, bool *);
> extern void expand_asm_stmt (gimple);
> extern tree resolve_asm_operand_names (tree, tree, tree, tree);
> +extern bool expand_switch_using_bit_tests_p (tree, tree, unsigned int,
> + unsigned int);
> extern void expand_case (gimple);
> extern void expand_decl (tree);
> #ifdef HARD_CONST
> --- gcc/tree-switch-conversion.c.jj 2010-09-06 11:46:47.000000000 +0200
> +++ gcc/tree-switch-conversion.c 2010-11-19 17:01:15.698511106 +0100
> @@ -156,6 +156,12 @@ struct switch_conv_info
> /* String reason why the case wasn't a good candidate that is written to the
> dump file, if there is one. */
> const char *reason;
> +
> + /* Parameters for expand_switch_using_bit_tests. Should be computed
> + the same way as in expand_case. */
> + unsigned int bit_test_uniq;
> + unsigned int bit_test_count;
> + basic_block bit_test_bb[2];
> };
>
> /* Global pass info. */
> @@ -174,7 +180,7 @@ check_range (gimple swtch)
> tree range_max;
>
> /* The gimplifier has already sorted the cases by CASE_LOW and ensured there
> - is a default label which is the last in the vector. */
> + is a default label which is the first in the vector. */
>
> min_case = gimple_switch_label (swtch, 1);
> info.range_min = CASE_LOW (min_case);
> @@ -234,7 +240,26 @@ check_process_case (tree cs)
> info.default_count = e->count;
> }
> else
> - info.other_count += e->count;
> + {
> + int i;
> + info.other_count += e->count;
> + for (i = 0; i < 2; i++)
> + if (info.bit_test_bb[i] == label_bb)
> + break;
> + else if (info.bit_test_bb[i] == NULL)
> + {
> + info.bit_test_bb[i] = label_bb;
> + info.bit_test_uniq++;
> + break;
> + }
> + if (i == 2)
> + info.bit_test_uniq = 3;
> + if (CASE_HIGH (cs) != NULL_TREE
> + && ! tree_int_cst_equal (CASE_LOW (cs), CASE_HIGH (cs)))
> + info.bit_test_count += 2;
> + else
> + info.bit_test_count++;
> + }
>
> if (!label_bb)
> {
> @@ -336,13 +361,10 @@ create_temp_arrays (void)
> {
> int i;
>
> - info.default_values = (tree *) xcalloc (info.phi_count, sizeof (tree));
> - info.constructors = (VEC (constructor_elt, gc) **) xcalloc (info.phi_count,
> - sizeof (tree));
> - info.target_inbound_names = (tree *) xcalloc (info.phi_count, sizeof (tree));
> - info.target_outbound_names = (tree *) xcalloc (info.phi_count,
> - sizeof (tree));
> -
> + info.default_values = XCNEWVEC (tree, info.phi_count * 3);
> + info.constructors = XCNEWVEC (VEC (constructor_elt, gc) *, info.phi_count);
> + info.target_inbound_names = info.default_values + info.phi_count;
> + info.target_outbound_names = info.target_inbound_names + info.phi_count;
> for (i = 0; i < info.phi_count; i++)
> info.constructors[i]
> = VEC_alloc (constructor_elt, gc, tree_low_cst (info.range_size, 1) + 1);
> @@ -355,10 +377,8 @@ create_temp_arrays (void)
> static void
> free_temp_arrays (void)
> {
> - free (info.constructors);
> - free (info.default_values);
> - free (info.target_inbound_names);
> - free (info.target_outbound_names);
> + XDELETEVEC (info.constructors);
> + XDELETEVEC (info.default_values);
> }
>
> /* Populate the array of default values in the order of phi nodes.
> @@ -468,13 +488,12 @@ build_constructors (gimple swtch)
> static tree
> constructor_contains_same_values_p (VEC (constructor_elt, gc) *vec)
> {
> - int i, len = VEC_length (constructor_elt, vec);
> + unsigned int i;
> tree prev = NULL_TREE;
> + constructor_elt *elt;
>
> - for (i = 0; i < len; i++)
> + FOR_EACH_VEC_ELT (constructor_elt, vec, i, elt)
> {
> - constructor_elt *elt = VEC_index (constructor_elt, vec, i);
> -
> if (!prev)
> prev = elt->value;
> else if (!operand_equal_p (elt->value, prev, OEP_ONLY_CONST))
> @@ -483,6 +502,79 @@ constructor_contains_same_values_p (VEC
> return prev;
> }
>
> +/* Return type which should be used for array elements, either TYPE,
> + or for integral type some smaller integral type that can still hold
> + all the constants. */
> +
> +static tree
> +array_value_type (gimple swtch, tree type, int num)
> +{
> + unsigned int i, len = VEC_length (constructor_elt, info.constructors[num]);
> + constructor_elt *elt;
> + enum machine_mode mode;
> + int sign = 0;
> + tree smaller_type;
> +
> + if (!INTEGRAL_TYPE_P (type))
> + return type;
> +
> + mode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (TYPE_MODE (type)));
> + if (GET_MODE_SIZE (TYPE_MODE (type)) <= GET_MODE_SIZE (mode))
> + return type;
> +
> + if (len < (optimize_bb_for_size_p (gimple_bb (swtch)) ? 2 : 32))
> + return type;
> +
> + FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
> + {
> + double_int cst;
> +
> + if (TREE_CODE (elt->value) != INTEGER_CST)
> + return type;
> +
> + cst = TREE_INT_CST (elt->value);
> + while (1)
> + {
> + unsigned int prec = GET_MODE_BITSIZE (mode);
> + if (prec > HOST_BITS_PER_WIDE_INT)
> + return type;
> +
> + if (sign >= 0
> + && double_int_equal_p (cst, double_int_zext (cst, prec)))
> + {
> + if (sign == 0
> + && double_int_equal_p (cst, double_int_sext (cst, prec)))
> + break;
> + sign = 1;
> + break;
> + }
> + if (sign <= 0
> + && double_int_equal_p (cst, double_int_sext (cst, prec)))
> + {
> + sign = -1;
> + break;
> + }
> +
> + if (sign == 1)
> + sign = 0;
> +
> + mode = GET_MODE_WIDER_MODE (mode);
> + if (mode == VOIDmode
> + || GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TYPE_MODE (type)))
> + return type;
> + }
> + }
> +
> + if (sign == 0)
> + sign = TYPE_UNSIGNED (type) ? 1 : -1;
> + smaller_type = lang_hooks.types.type_for_mode (mode, sign >= 0);
> + if (GET_MODE_SIZE (TYPE_MODE (type))
> + <= GET_MODE_SIZE (TYPE_MODE (smaller_type)))
> + return type;
> +
> + return smaller_type;
> +}
> +
> /* Create an appropriate array type and declaration and assemble a static array
> variable. Also create a load statement that initializes the variable in
> question with a value from the static array. SWTCH is the switch statement
> @@ -512,10 +604,19 @@ build_one_array (gimple swtch, int num,
> load = gimple_build_assign (name, cst);
> else
> {
> - tree array_type, ctor, decl, value_type, fetch;
> + tree array_type, ctor, decl, value_type, fetch, default_type;
>
> - value_type = TREE_TYPE (info.default_values[num]);
> + default_type = TREE_TYPE (info.default_values[num]);
> + value_type = array_value_type (swtch, default_type, num);
> array_type = build_array_type (value_type, arr_index_type);
> + if (default_type != value_type)
> + {
> + unsigned int i;
> + constructor_elt *elt;
> +
> + FOR_EACH_VEC_ELT (constructor_elt, info.constructors[num], i, elt)
> + elt->value = fold_convert (value_type, elt->value);
> + }
> ctor = build_constructor (array_type, info.constructors[num]);
> TREE_CONSTANT (ctor) = true;
> TREE_STATIC (ctor) = true;
> @@ -534,6 +635,12 @@ build_one_array (gimple swtch, int num,
>
> fetch = build4 (ARRAY_REF, value_type, decl, tidx, NULL_TREE,
> NULL_TREE);
> + if (default_type != value_type)
> + {
> + fetch = fold_convert (default_type, fetch);
> + fetch = force_gimple_operand_gsi (&gsi, fetch, true, NULL_TREE,
> + true, GSI_SAME_STMT);
> + }
> load = gimple_build_assign (name, fetch);
> }
>
> @@ -818,6 +925,10 @@ process_switch (gimple swtch)
> info.default_prob = 0;
> info.default_count = 0;
> info.other_count = 0;
> + info.bit_test_uniq = 0;
> + info.bit_test_count = 0;
> + info.bit_test_bb[0] = NULL;
> + info.bit_test_bb[1] = NULL;
>
> /* An ERROR_MARK occurs for various reasons including invalid data type.
> (comment from stmt.c) */
> @@ -841,6 +952,18 @@ process_switch (gimple swtch)
> return false;
> }
>
> + if (info.bit_test_uniq <= 2)
> + {
> + rtl_profile_for_bb (gimple_bb (swtch));
> + if (expand_switch_using_bit_tests_p (gimple_switch_index (swtch),
> + info.range_size, info.bit_test_uniq,
> + info.bit_test_count))
> + {
> + info.reason = " Expanding as bit test is preferable\n";
> + return false;
> + }
> + }
> +
> if (!check_final_bb ())
> return false;
>
> --- gcc/testsuite/gcc.target/i386/pr45830.c.jj 2010-11-19 16:58:08.345372833 +0100
> +++ gcc/testsuite/gcc.target/i386/pr45830.c 2010-11-19 17:00:54.425260908 +0100
> @@ -0,0 +1,31 @@
> +/* PR target/45830 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-switchconv-all -mtune=generic" } */
> +
> +int
> +foo (int *a)
> +{
> + switch (*a)
> + {
> + case 0:
> + case 3:
> + case 1:
> + case 2:
> + case 4:
> + case 23:
> + case 26:
> + case 19:
> + case 5:
> + case 21:
> + case 20:
> + case 22:
> + case 27:
> + return 1;
> + default:
> + return 0;
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump "Expanding as bit test is preferable" "switchconv" } } */
> +/* { dg-final { scan-assembler-not "CSWTCH" } } */
> +/* { dg-final { cleanup-tree-dump "switchconv" } } */
> --- gcc/testsuite/gcc.c-torture/execute/pr45830.c.jj 2010-11-19 16:56:39.214313262 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr45830.c 2010-11-19 16:55:39.000000000 +0100
> @@ -0,0 +1,97 @@
> +/* PR tree-optimization/45830 */
> +
> +extern void abort (void);
> +
> +long long va, vb, vc, vd, ve;
> +
> +__attribute__((noinline)) int
> +foo (int x)
> +{
> + long long a, b, c, d, e;
> + switch (x)
> + {
> + case 0:
> + case 3:
> + case 1:
> + case 2:
> + case 4:
> + a = 1;
> + b = 129;
> + c = -12;
> + d = -4;
> + e = 128;
> + break;
> + case 23:
> + case 26:
> + case 19:
> + case 65:
> + case 5:
> + a = 2;
> + b = 138;
> + c = 115;
> + d = 128;
> + e = -16;
> + break;
> + case 21:
> + case 20:
> + case 22:
> + case 38:
> + case 27:
> + case 66:
> + case 45:
> + case 47:
> + a = 3;
> + b = 6;
> + c = 127;
> + d = 25;
> + e = 257;
> + break;
> + default:
> + a = 0;
> + b = 18;
> + c = 0;
> + d = 64;
> + e = 32768L;
> + break;
> + }
> + va = a;
> + vb = b;
> + vc = c;
> + vd = d;
> + ve = e;
> +}
> +
> +int
> +bar (int x)
> +{
> + if (x < 0)
> + return 3;
> + if (x < 5)
> + return 0;
> + if (x == 5 || x == 19 || x == 23 | x == 26 || x == 65)
> + return 1;
> + if ((x >= 20 && x <= 22) || x == 27 || x == 38
> + || x == 45 || x == 47 || x == 66)
> + return 2;
> + return 3;
> +}
> +
> +long long expected[] =
> +{ 1, 129, -12, -4, 128, 2, 138, 115, 128, -16,
> + 3, 6, 127, 25, 257, 0, 18, 0, 64, 32768L };
> +
> +int
> +main (void)
> +{
> + int i, v;
> + for (i = -4; i < 70; i++)
> + {
> + foo (i);
> + v = bar (i);
> + if (va != expected[5 * v] || vb != expected[5 * v + 1]
> + || vc != expected[5 * v + 2] || vd != expected[5 * v + 3]
> + || ve != expected[5 * v + 4])
> + abort ();
> + }
> + return 0;
> +}
>
> Jakub
>
>
--
Richard Guenther <rguenther@suse.de>
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 - GF: Markus Rex