This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH][0/n] tree LIM TLC - series part for backporting, limit LIM


On Thu, 14 Mar 2013, Richard Biener wrote:

> 
> This extracts pieces from the already posted patch series that are
> most worthwhile and applicable for backporting to both 4.8 and 4.7.
> It also re-implements the limiting of the maximum number of memory
> references to consider for LIMs dependence analysis.  This limiting
> is now done per loop-nest and disables optimizing outer loops
> only.  The limiting requires backporting introduction of the
> shared unalalyzable mem-ref - it works by marking that as stored
> in loops we do not want to compute dependences for - which makes
> dependence computation for mems in those loops linear, as that
> mem-ref, which conveniently has ID 0, is tested first.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> The current limit of 1000 datarefs is quite low (well, for LIMs
> purposes, that is), and I only bothered to care about -O1 for
> backports (no caching of the affine combination).  With the
> limit in place and at -O1 LIM now takes
> 
>  tree loop invariant motion:   0.55 ( 1%) usr
> 
> for the testcase in PR39326.  Four patches in total, we might
> consider not backporting the limiting, without it this
> insane testcase has, at ~2GB memory usage (peak determined by IRA)
> 
>  tree loop invariant motion: 533.30 (77%) usr
> 
> but avoids running into the DSE / combine issue (and thus stays
> managable overall at -O1).  With limiting it requires -fno-dse
> to not blow up (>5GB of memory use).

Note that the limiting patch (below) causes code-generation differences
because it collects memory-references in a different order and
store-motion applies its transform in order of mem-ref IDs
(different order of loads / stores and different decl UIDs).  The
different ordering results in quite a big speedup because bitmaps
have a more regular form (maybe only for this testcase though).

Richard.

> 2013-03-14  Richard Biener  <rguenther@suse.de>
> 
> 	PR tree-optimization/39326
> 	* tree-ssa-loop-im.c: Include diagnostic-core.h.
> 	(mark_ref_stored): Optimize.
> 	(gather_mem_refs_stmt): Also set all_refs_stored_bit if stored.
> 	(create_vop_ref_mapping_loop, create_vop_ref_mapping): Remove
> 	and fold functionality into ...
> 	(gather_mem_refs_in_loops): ... this.  Iterate over loops,
> 	counting memory references and punting when more than
> 	--param loop-max-datarefs-for-datadeps.
> 	(analyze_memory_references): Adjust.
> 
> Index: trunk/gcc/tree-ssa-loop-im.c
> ===================================================================
> *** trunk.orig/gcc/tree-ssa-loop-im.c	2013-03-14 12:52:37.000000000 +0100
> --- trunk/gcc/tree-ssa-loop-im.c	2013-03-14 14:23:47.533164359 +0100
> *************** along with GCC; see the file COPYING3.
> *** 20,25 ****
> --- 20,26 ----
>   #include "config.h"
>   #include "system.h"
>   #include "coretypes.h"
> + #include "diagnostic-core.h"
>   #include "tm.h"
>   #include "tree.h"
>   #include "tm_p.h"
> *************** record_mem_ref_loc (mem_ref_p ref, struc
> *** 1551,1561 ****
>   static void
>   mark_ref_stored (mem_ref_p ref, struct loop *loop)
>   {
> !   for (;
> !        loop != current_loops->tree_root
> !        && !bitmap_bit_p (ref->stored, loop->num);
> !        loop = loop_outer (loop))
> !     bitmap_set_bit (ref->stored, loop->num);
>   }
>   
>   /* Gathers memory references in statement STMT in LOOP, storing the
> --- 1552,1560 ----
>   static void
>   mark_ref_stored (mem_ref_p ref, struct loop *loop)
>   {
> !   while (loop != current_loops->tree_root
> ! 	 && bitmap_set_bit (ref->stored, loop->num))
> !     loop = loop_outer (loop);
>   }
>   
>   /* Gathers memory references in statement STMT in LOOP, storing the
> *************** gather_mem_refs_stmt (struct loop *loop,
> *** 1618,1624 ****
>       }
>     bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id);
>     if (is_stored)
> !     mark_ref_stored (ref, loop);
>     return;
>   }
>   
> --- 1617,1627 ----
>       }
>     bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id);
>     if (is_stored)
> !     {
> !       bitmap_set_bit (memory_accesses.all_refs_stored_in_loop[loop->num],
> ! 		      ref->id);
> !       mark_ref_stored (ref, loop);
> !     }
>     return;
>   }
>   
> *************** gather_mem_refs_stmt (struct loop *loop,
> *** 1627,1704 ****
>   static void
>   gather_mem_refs_in_loops (void)
>   {
> -   gimple_stmt_iterator bsi;
> -   basic_block bb;
>     struct loop *loop;
>     loop_iterator li;
> -   bitmap lrefs, alrefs, alrefso;
> - 
> -   FOR_EACH_BB (bb)
> -     {
> -       loop = bb->loop_father;
> -       if (loop == current_loops->tree_root)
> - 	continue;
>   
> !       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
> ! 	gather_mem_refs_stmt (loop, gsi_stmt (bsi));
> !     }
>   
>     /* Propagate the information about accessed memory references up
>        the loop hierarchy.  */
>     FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
>       {
> !       lrefs = memory_accesses.refs_in_loop[loop->num];
> !       alrefs = memory_accesses.all_refs_in_loop[loop->num];
> !       bitmap_ior_into (alrefs, lrefs);
>   
> !       if (loop_outer (loop) == current_loops->tree_root)
>   	continue;
>   
> !       alrefso = memory_accesses.all_refs_in_loop[loop_outer (loop)->num];
> !       bitmap_ior_into (alrefso, alrefs);
>       }
> - }
> - 
> - /* Create a mapping from virtual operands to references that touch them
> -    in LOOP.  */
> - 
> - static void
> - create_vop_ref_mapping_loop (struct loop *loop)
> - {
> -   bitmap refs = memory_accesses.refs_in_loop[loop->num];
> -   struct loop *sloop;
> -   bitmap_iterator bi;
> -   unsigned i;
> -   mem_ref_p ref;
>   
> !   EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
> !     {
> !       ref = memory_accesses.refs_list[i];
> !       for (sloop = loop; sloop != current_loops->tree_root;
> ! 	   sloop = loop_outer (sloop))
> ! 	if (bitmap_bit_p (ref->stored, loop->num))
> ! 	  {
> ! 	    bitmap refs_stored
> ! 	      = memory_accesses.all_refs_stored_in_loop[sloop->num];
> ! 	    bitmap_set_bit (refs_stored, ref->id);
> ! 	  }
> !     }
>   }
>   
> - /* For each non-clobbered virtual operand and each loop, record the memory
> -    references in this loop that touch the operand.  */
> - 
> - static void
> - create_vop_ref_mapping (void)
> - {
> -   loop_iterator li;
> -   struct loop *loop;
> - 
> -   FOR_EACH_LOOP (li, loop, 0)
> -     {
> -       create_vop_ref_mapping_loop (loop);
> -     }
> - }
>   
>   /* Gathers information about memory accesses in the loops.  */
>   
> --- 1630,1707 ----
>   static void
>   gather_mem_refs_in_loops (void)
>   {
>     struct loop *loop;
>     loop_iterator li;
>   
> !   unsigned *count = XCNEWVEC (unsigned, number_of_loops ());
>   
>     /* Propagate the information about accessed memory references up
>        the loop hierarchy.  */
>     FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
>       {
> !       basic_block *bbs = get_loop_body (loop);
> !       struct loop *outer;
> !       unsigned i;
> ! 
> !       for (i = 0; i < loop->num_nodes; ++i)
> ! 	{
> ! 	  basic_block bb = bbs[i];
> ! 	  gimple_stmt_iterator gsi;
> ! 	  if (bb->loop_father != loop)
> ! 	    continue;
> ! 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> ! 	    gather_mem_refs_stmt (loop, gsi_stmt (gsi));
> ! 	}
> !       free (bbs);
>   
> !       /* Finalize the overall numbers (including subloops) for this loop.  */
> !       count[loop->num]
> ! 	+= bitmap_count_bits (memory_accesses.refs_in_loop[loop->num]);
> ! 
> !       /* If there are too many memory references in this loop and its
> ! 	 sub-loops such that dependence testing would blow up compile-time
> ! 	 drop a unanalyzed store into the list of memory references to
> ! 	 get to the early-out.  */
> !       if ((count[loop->num]
> ! 	   > (unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
> ! 	  && bitmap_set_bit
> ! 	       (memory_accesses.all_refs_stored_in_loop[loop->num],
> ! 		UNANALYZABLE_MEM_ID))
> ! 	{
> ! 	  bitmap_set_bit (memory_accesses.refs_in_loop[loop->num],
> ! 			  UNANALYZABLE_MEM_ID);
> ! 	  mark_ref_stored (memory_accesses.refs_list[UNANALYZABLE_MEM_ID],
> ! 			   loop);
> ! 	  if (dump_file && (dump_flags & TDF_DETAILS))
> ! 	    fprintf (dump_file, "Too many memory references in loop %d and its "
> ! 		     "super-loops, giving up\n", loop->num);
> ! 	  warning_at (gimple_location (gsi_stmt (gsi_start_bb (loop->header))),
> ! 		      OPT_Wdisabled_optimization,
> ! 		      "-ftree-loop-im: number of memory references in loop "
> ! 		      "exceeds the --param loops-max-datarefs-for-datadeps "
> ! 		      "threshold");
> ! 	}
> ! 
> !       /* Finalize the overall touched references (including subloops).  */
> !       bitmap_ior_into (memory_accesses.all_refs_in_loop[loop->num],
> ! 		       memory_accesses.refs_in_loop[loop->num]);
> ! 
> !       /* Propagate the information about accessed memory references up
> ! 	 the loop hierarchy.  */
> !       outer = loop_outer (loop);
> !       if (outer == current_loops->tree_root)
>   	continue;
>   
> !       bitmap_ior_into (memory_accesses.all_refs_in_loop[outer->num],
> ! 		       memory_accesses.all_refs_in_loop[loop->num]);
> !       bitmap_ior_into (memory_accesses.all_refs_stored_in_loop[outer->num],
> ! 		       memory_accesses.all_refs_stored_in_loop[loop->num]);
> !       count[outer->num] += count[loop->num];
>       }
>   
> !   free (count);
>   }
>   
>   
>   /* Gathers information about memory accesses in the loops.  */
>   
> *************** analyze_memory_references (void)
> *** 1731,1737 ****
>     memory_accesses.ttae_cache = NULL;
>   
>     gather_mem_refs_in_loops ();
> -   create_vop_ref_mapping ();
>   }
>   
>   /* Returns true if MEM1 and MEM2 may alias.  TTAE_CACHE is used as a cache in
> --- 1734,1739 ----
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]