This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Disable postreload GCSE on large code
On вторник, 3 септември 2019 г. 14:54:19 EEST Richard Biener wrote:
> 2019-09-02 Richard Biener <rguenther@suse.de>
>
> PR rtl-optimization/36262
> * postreload-gcse.c: Include intl.h and gcse.h.
> (insert_expr_in_table): Insert at the head of cur_expr->avail_occr
> to avoid linear list walk.
> (record_last_mem_set_info): Gate off if not computing
> transparentness. (get_bb_avail_insn): If transparentness isn't computed
> give up early.
> (gcse_after_reload_main): Skip compute_transp and extended PRE
> if gcse_or_cprop_is_too_expensive says so.
>
> Index: gcc/postreload-gcse.c
> ===================================================================
> --- gcc/postreload-gcse.c (revision 275294)
> +++ gcc/postreload-gcse.c (working copy)
> @@ -38,7 +38,9 @@ along with GCC; see the file COPYING3.
> #include "params.h"
> #include "tree-pass.h"
> #include "dbgcnt.h"
> +#include "intl.h"
> #include "gcse-common.h"
> +#include "gcse.h"
>
> /* The following code implements gcse after reload, the purpose of this
> pass is to cleanup redundant loads generated by reload and other
> @@ -364,7 +366,7 @@ insert_expr_in_table (rtx x, rtx_insn *i
> int do_not_record_p;
> hashval_t hash;
> struct expr *cur_expr, **slot;
> - struct occr *avail_occr, *last_occr = NULL;
> + struct occr *avail_occr;
>
> hash = hash_expr (x, &do_not_record_p);
>
> @@ -405,38 +407,22 @@ insert_expr_in_table (rtx x, rtx_insn *i
> cur_expr = *slot;
> }
>
> - /* Search for another occurrence in the same basic block. */
> + /* Search for another occurrence in the same basic block. We insert
> + insns blockwise from start to end, so keep appending to the
> + start of the list so we have to check only a single element. */
> avail_occr = cur_expr->avail_occr;
> - while (avail_occr
> - && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
> - {
> - /* If an occurrence isn't found, save a pointer to the end of
> - the list. */
> - last_occr = avail_occr;
> - avail_occr = avail_occr->next;
> - }
> -
> - if (avail_occr)
> - /* Found another instance of the expression in the same basic block.
> - Prefer this occurrence to the currently recorded one. We want
> - the last one in the block and the block is scanned from start
> - to end. */
> + if (avail_occr
> + && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
> avail_occr->insn = insn;
> else
> {
> /* First occurrence of this expression in this basic block. */
> avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
> sizeof (struct occr));
> -
> - /* First occurrence of this expression in any block? */
> - if (cur_expr->avail_occr == NULL)
> - cur_expr->avail_occr = avail_occr;
> - else
> - last_occr->next = avail_occr;
> -
> avail_occr->insn = insn;
> - avail_occr->next = NULL;
> + avail_occr->next = cur_expr->avail_occr;
> avail_occr->deleted_p = 0;
> + cur_expr->avail_occr = avail_occr;
> }
> }
>
> @@ -710,6 +696,9 @@ record_last_reg_set_info_regno (rtx_insn
> static void
> record_last_mem_set_info (rtx_insn *insn)
> {
> + if (!transp)
> + return;
> +
> struct modifies_mem *list_entry;
>
> list_entry = (struct modifies_mem *) obstack_alloc
> (&modifies_mem_obstack, @@ -995,7 +984,8 @@ get_bb_avail_insn (basic_block
> bb, struc
> /* If we could not find an occurrence in BB, see if BB
> has a single predecessor with an occurrence that is
> transparent through BB. */
> - if (single_pred_p (bb)
> + if (transp
> + && single_pred_p (bb)
> && bitmap_bit_p (transp[bb->index], bitmap_index)
> && (occr = get_bb_avail_insn (single_pred (bb), orig_occr,
> bitmap_index))) {
> @@ -1371,6 +1361,10 @@ delete_redundant_insns (void)
> static void
> gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
> {
> + /* Disable computing transparentness if it is too expensive. */
> + bool do_transp
> + = !gcse_or_cprop_is_too_expensive (_("using simple load CSE after
> register " + "allocation"));
>
> memset (&stats, 0, sizeof (stats));
>
> @@ -1392,15 +1386,21 @@ gcse_after_reload_main (rtx f ATTRIBUTE_
> increase the number of redundant loads found. So compute
> transparency information for each memory expression in the hash table. */
> df_analyze ();
> - /* This cannot be part of the normal allocation routine because
> - we have to know the number of elements in the hash table. */
> - transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> - expr_table->elements ());
> - bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
> - expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
> + if (do_transp)
> + {
> + /* This cannot be part of the normal allocation routine because
> + we have to know the number of elements in the hash table. */
> + transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> + expr_table->elements ());
> + bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
> + expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
> + }
> + else
> + transp = NULL;
> eliminate_partially_redundant_loads ();
> delete_redundant_insns ();
> - sbitmap_vector_free (transp);
> + if (do_transp)
> + sbitmap_vector_free (transp);
>
> if (dump_file)
> {
Hi,
Just to let you know that this patch introduced a regression for pru target:
PASS->FAIL: gcc.c-torture/execute/builtins/memcpy-chk.c execution, -O3 -g
PASS->FAIL: gcc.c-torture/execute/builtins/memmove-chk.c execution, -O3 -g
PASS->FAIL: gcc.c-torture/execute/builtins/mempcpy-2.c execution, -O3 -g
PASS->FAIL: gcc.c-torture/execute/builtins/strcpy.c execution, -O3 -g
PASS->FAIL: gcc.c-torture/execute/pr49390.c -O3 -g execution test
I suspect the patch has exposed a latent bug in the pru backend. I'm
investigaing.
Thanks,
Dimitar