This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Disable postreload GCSE on large code


On Thu, 5 Sep 2019, Dimitar Dimitrov wrote:

> On вторник, 3 септември 2019 г. 14:54:19 EEST Richard Biener wrote:
> > 2019-09-02  Richard Biener  <rguenther@suse.de>
> > 
> >         PR rtl-optimization/36262
> >         * postreload-gcse.c: Include intl.h and gcse.h.
> >         (insert_expr_in_table): Insert at the head of cur_expr->avail_occr
> >         to avoid linear list walk.
> >         (record_last_mem_set_info): Gate off if not computing
> > transparentness. (get_bb_avail_insn): If transparentness isn't computed
> > give up early.
> >         (gcse_after_reload_main): Skip compute_transp and extended PRE
> >         if gcse_or_cprop_is_too_expensive says so.
> > 
> > Index: gcc/postreload-gcse.c
> > ===================================================================
> > --- gcc/postreload-gcse.c       (revision 275294)
> > +++ gcc/postreload-gcse.c       (working copy)
> > @@ -38,7 +38,9 @@ along with GCC; see the file COPYING3.
> >  #include "params.h"
> >  #include "tree-pass.h"
> >  #include "dbgcnt.h"
> > +#include "intl.h"
> >  #include "gcse-common.h"
> > +#include "gcse.h"
> > 
> >  /* The following code implements gcse after reload, the purpose of this
> >     pass is to cleanup redundant loads generated by reload and other
> > @@ -364,7 +366,7 @@ insert_expr_in_table (rtx x, rtx_insn *i
> >    int do_not_record_p;
> >    hashval_t hash;
> >    struct expr *cur_expr, **slot;
> > -  struct occr *avail_occr, *last_occr = NULL;
> > +  struct occr *avail_occr;
> > 
> >    hash = hash_expr (x, &do_not_record_p);
> > 
> > @@ -405,38 +407,22 @@ insert_expr_in_table (rtx x, rtx_insn *i
> >        cur_expr = *slot;
> >      }
> > 
> > -  /* Search for another occurrence in the same basic block.  */
> > +  /* Search for another occurrence in the same basic block.  We insert
> > +     insns blockwise from start to end, so keep appending to the
> > +     start of the list so we have to check only a single element.  */
> >    avail_occr = cur_expr->avail_occr;
> > -  while (avail_occr
> > -        && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn))
> > -    {
> > -      /* If an occurrence isn't found, save a pointer to the end of
> > -        the list.  */
> > -      last_occr = avail_occr;
> > -      avail_occr = avail_occr->next;
> > -    }
> > -
> > -  if (avail_occr)
> > -    /* Found another instance of the expression in the same basic block.
> > -       Prefer this occurrence to the currently recorded one.  We want
> > -       the last one in the block and the block is scanned from start
> > -       to end.  */
> > +  if (avail_occr
> > +      && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
> >      avail_occr->insn = insn;
> >    else
> >      {
> >        /* First occurrence of this expression in this basic block.  */
> >        avail_occr = (struct occr *) obstack_alloc (&occr_obstack,
> >                                                   sizeof (struct occr));
> > -
> > -      /* First occurrence of this expression in any block?  */
> > -      if (cur_expr->avail_occr == NULL)
> > -        cur_expr->avail_occr = avail_occr;
> > -      else
> > -        last_occr->next = avail_occr;
> > -
> >        avail_occr->insn = insn;
> > -      avail_occr->next = NULL;
> > +      avail_occr->next = cur_expr->avail_occr;
> >        avail_occr->deleted_p = 0;
> > +      cur_expr->avail_occr = avail_occr;
> >      }
> >  }
> >  
> > @@ -710,6 +696,9 @@ record_last_reg_set_info_regno (rtx_insn
> >  static void
> >  record_last_mem_set_info (rtx_insn *insn)
> >  {
> > +  if (!transp)
> > +    return;
> > +
> >    struct modifies_mem *list_entry;
> > 
> >    list_entry = (struct modifies_mem *) obstack_alloc
> > (&modifies_mem_obstack, @@ -995,7 +984,8 @@ get_bb_avail_insn (basic_block
> > bb, struc
> >    /* If we could not find an occurrence in BB, see if BB
> >       has a single predecessor with an occurrence that is
> >       transparent through BB.  */
> > -  if (single_pred_p (bb)
> > +  if (transp
> > +      && single_pred_p (bb)
> >        && bitmap_bit_p (transp[bb->index], bitmap_index)
> >        && (occr = get_bb_avail_insn (single_pred (bb), orig_occr,
> > bitmap_index))) {
> > @@ -1371,6 +1361,10 @@ delete_redundant_insns (void)
> >  static void
> >  gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED)
> >  {
> > +  /* Disable computing transparentness if it is too expensive.  */
> > +  bool do_transp
> > +    = !gcse_or_cprop_is_too_expensive (_("using simple load CSE after
> > register " +                                        "allocation"));
> > 
> >    memset (&stats, 0, sizeof (stats));
> > 
> > @@ -1392,15 +1386,21 @@ gcse_after_reload_main (rtx f ATTRIBUTE_
> >          increase the number of redundant loads found.  So compute
> > transparency information for each memory expression in the hash table.  */
> > df_analyze ();
> > -      /* This cannot be part of the normal allocation routine because
> > -        we have to know the number of elements in the hash table.  */
> > -      transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> > -                                    expr_table->elements ());
> > -      bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
> > -      expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
> > +      if (do_transp)
> > +       {
> > +         /* This cannot be part of the normal allocation routine because
> > +            we have to know the number of elements in the hash table.  */
> > +         transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
> > +                                        expr_table->elements ());
> > +         bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
> > +         expr_table->traverse <FILE *, compute_expr_transp> (dump_file);
> > +       }
> > +      else
> > +       transp = NULL;
> >        eliminate_partially_redundant_loads ();
> >        delete_redundant_insns ();
> > -      sbitmap_vector_free (transp);
> > +      if (do_transp)
> > +       sbitmap_vector_free (transp);
> > 
> >        if (dump_file)
> >         {
> 
> Hi,
> 
> Just to let you know that this patch introduced a regression for pru target:
> 
> PASS->FAIL: gcc.c-torture/execute/builtins/memcpy-chk.c execution,  -O3 -g 
> PASS->FAIL: gcc.c-torture/execute/builtins/memmove-chk.c execution,  -O3 -g 
> PASS->FAIL: gcc.c-torture/execute/builtins/mempcpy-2.c execution,  -O3 -g 
> PASS->FAIL: gcc.c-torture/execute/builtins/strcpy.c execution,  -O3 -g 
> PASS->FAIL: gcc.c-torture/execute/pr49390.c   -O3 -g  execution test
> 
> I suspect the patch has exposed a latent bug in the pru backend. I'm 
> investigaing.

Nope, it was my fault - patch in testing.

Richard.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]