This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [GRAPHITE, PATCH] Loop unroll and jam optimization


> 
> > At this point pbb->transformed has two basic maps, one is the mapping for
> > unroll and jam,
> > and one for the full tile for the striped dimension. Introduce a check that
> > differentiate
> > between them as the image of one maps should be included in the other.
> 
> I didn't do a full review (and I won't have time for it either),
> but at a quick glance,
> you still seem to be assuming that if you take the union of two
> basic maps, that you can extract the original two basic maps from the union.
> In general, you can't.  At least you shouldn't assume that you can.
> Besides, your updated code is also pretty convoluted,
> with very little documentation.
> 

Many thanks. To considerably ease the computation of separation class, needed to transmit information
available at scheduling computation time to AST computation time. I didn't want to
introduce new fields in graphite global structures, and tried to trick pbb->transformed
to contain the unroll and jam mapping as well as a mapping use to compute the separation
class. It wasn't good idea (for the mappings used by unroll and jam my version 
of isl preserve the basic mappings afteer union), but of course my code should not be based on
assumptions not guaranteed by isl semantics.

So, in this updated patch I go the other way and keep the additional information required in the
pbb structure. This has the advantages that removes the most convoluted part of my code dealing 
with the two basic maps kept in pbb->transformed. As before in graphite-optimize_isl,c, 
beside the map for unroll and jam, another map needed for the separation class defined by
full tiles is computed. In graphite-isl-ast-to-gimple.c, the separation class option is set (using 
the auxiliary map computed previously) and is added to the other AST build options. 

Mircea

Attachment: Change.txt
Description: Text document

Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c	(revision 217013)
+++ gcc/toplev.c	(working copy)
@@ -1302,11 +1302,12 @@
       || flag_loop_block
       || flag_loop_interchange
       || flag_loop_strip_mine
-      || flag_loop_parallelize_all)
+      || flag_loop_parallelize_all
+      || flag_loop_unroll_jam)
     sorry ("Graphite loop optimizations cannot be used (ISL is not available)" 
 	   "(-fgraphite, -fgraphite-identity, -floop-block, "
 	   "-floop-interchange, -floop-strip-mine, -floop-parallelize-all, "
-	   "and -ftree-loop-linear)");
+	   "-floop-unroll-and-jam, and -ftree-loop-linear)");
 #endif
 
   /* One region RA really helps to decrease the code size.  */
Index: gcc/graphite-optimize-isl.c
===================================================================
--- gcc/graphite-optimize-isl.c	(revision 217013)
+++ gcc/graphite-optimize-isl.c	(working copy)
@@ -186,7 +186,7 @@
   PartialSchedule = isl_band_get_partial_schedule (Band);
   *Dimensions = isl_band_n_member (Band);
 
-  if (DisableTiling)
+  if (DisableTiling || flag_loop_unroll_jam)
     return PartialSchedule;
 
   /* It does not make any sense to tile a band with just one dimension.  */
@@ -241,7 +241,9 @@
    constant number of iterations, if the number of loop iterations at
    DimToVectorize can be devided by VectorWidth. The default VectorWidth is
    currently constant and not yet target specific. This function does not reason
-   about parallelism.  */
+   about parallelism.
+
+  */
 static isl_map *
 getPrevectorMap (isl_ctx *ctx, int DimToVectorize,
 		 int ScheduleDimensions,
@@ -305,8 +307,89 @@
   isl_constraint_set_constant_si (c, VectorWidth - 1);
   TilingMap = isl_map_add_constraint (TilingMap, c);
 
-  isl_map_dump (TilingMap);
+  return TilingMap;
+}
 
+/* Compute an auxiliary map to getPrevectorMap, for computing the separating 
+   class defined by full tiles.  Used in graphite_isl_ast_to_gimple.c to set the 
+   corresponding option for AST build.
+ */ 
+static isl_map *
+getPrevectorMap_full (isl_ctx *ctx, int DimToVectorize,
+		 int ScheduleDimensions,
+		 int VectorWidth)
+{
+  isl_space *Space;
+  isl_local_space *LocalSpace, *LocalSpaceRange;
+  isl_set *Modulo;
+  isl_map *TilingMap;
+  isl_constraint *c;
+  isl_aff *Aff;
+  int PointDimension; /* ip */
+  int TileDimension;  /* it */
+  isl_val *VectorWidthMP;
+  int i;
+
+  /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/
+
+  Space = isl_space_alloc (ctx, 0, ScheduleDimensions, ScheduleDimensions + 1);
+  TilingMap = isl_map_universe (isl_space_copy (Space));
+  LocalSpace = isl_local_space_from_space (Space);
+  PointDimension = ScheduleDimensions;
+  TileDimension = DimToVectorize;
+
+  /* Create an identity map for everything except DimToVectorize and the 
+     point loop. */
+  for (i = 0; i < ScheduleDimensions; i++)
+    {
+      if (i == DimToVectorize)
+        continue;
+
+      c = isl_equality_alloc (isl_local_space_copy (LocalSpace));
+
+      isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1);
+      isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1);
+
+      TilingMap = isl_map_add_constraint (TilingMap, c);
+    }
+
+  /* it % 'VectorWidth' = 0  */
+  LocalSpaceRange = isl_local_space_range (isl_local_space_copy (LocalSpace));
+  Aff = isl_aff_zero_on_domain (LocalSpaceRange);
+  Aff = isl_aff_set_constant_si (Aff, VectorWidth);
+  Aff = isl_aff_set_coefficient_si (Aff, isl_dim_in, TileDimension, 1);
+
+  VectorWidthMP = isl_val_int_from_si (ctx, VectorWidth);
+  Aff = isl_aff_mod_val (Aff, VectorWidthMP);
+  Modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (Aff));
+  TilingMap = isl_map_intersect_range (TilingMap, Modulo);
+
+  /* it + ('VectorWidth' - 1) = i0  */
+  c = isl_equality_alloc (isl_local_space_copy(LocalSpace));
+  isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension,-1);
+  isl_constraint_set_coefficient_si (c, isl_dim_in, TileDimension, 1);
+  isl_constraint_set_constant_si (c, -VectorWidth);
+  TilingMap = isl_map_add_constraint (TilingMap, c);
+
+  /* ip >= 0 */
+  c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
+  isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
+  isl_constraint_set_constant_si (c, 0);
+  TilingMap = isl_map_add_constraint (TilingMap, c);
+
+  /* it <= ip */
+  c = isl_inequality_alloc (isl_local_space_copy (LocalSpace));
+  isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, -1);
+  isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, 1);
+  TilingMap = isl_map_add_constraint (TilingMap, c);
+
+  /* ip <= it + ('VectorWidth' - 1) */
+  c = isl_inequality_alloc (LocalSpace);
+  isl_constraint_set_coefficient_si (c, isl_dim_out, TileDimension, 1);
+  isl_constraint_set_coefficient_si (c, isl_dim_out, PointDimension, -1);
+  isl_constraint_set_constant_si (c, VectorWidth - 1);
+  TilingMap = isl_map_add_constraint (TilingMap, c);
+
   return TilingMap;
 }
 
@@ -316,9 +399,11 @@
     
    We walk recursively the forest of bands to combine the schedules of the
    individual bands to the overall schedule. In case tiling is requested,
-   the individual bands are tiled.  */
+   the individual bands are tiled.
+   For unroll and jam the map the schedule for full tiles of the unrolled
+   dimnesion is computed.  */
 static isl_union_map *
-getScheduleForBandList (isl_band_list *BandList)
+getScheduleForBandList (isl_band_list *BandList, isl_union_map **map_sepcl)
 {
   int NumBands, i;
   isl_union_map *Schedule;
@@ -335,55 +420,87 @@
       int ScheduleDimensions;
       isl_space *Space;
 
+      isl_union_map *PartialSchedule_f;
+
       Band = isl_band_list_get_band (BandList, i);
       PartialSchedule = getScheduleForBand (Band, &ScheduleDimensions);
       Space = isl_union_map_get_space (PartialSchedule);
 
+      PartialSchedule_f = NULL;
+
       if (isl_band_has_children (Band))
 	{
 	  isl_band_list *Children;
 	  isl_union_map *SuffixSchedule;
 
 	  Children = isl_band_get_children (Band);
-	  SuffixSchedule = getScheduleForBandList (Children);
+	  SuffixSchedule = getScheduleForBandList (Children, map_sepcl);
 	  PartialSchedule = isl_union_map_flat_range_product (PartialSchedule,
 							      SuffixSchedule);
 	  isl_band_list_free (Children);
 	}
-      else if (EnablePollyVector)
+      else if (EnablePollyVector || flag_loop_unroll_jam)
 	{
+	  int i;
+	  int depth;
+ 
+ 	  depth = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
+  
 	  for (i = ScheduleDimensions - 1 ;  i >= 0 ; i--)
 	    {
+	      if (flag_loop_unroll_jam && (i != (ScheduleDimensions - depth)))
+		continue;
+
 	      if (isl_band_member_is_zero_distance (Band, i))
 		{
 		  isl_map *TileMap;
 		  isl_union_map *TileUMap;
+		  int stride;
 
-		  TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, 4);
+                  stride = PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_SIZE);    
+
+		  TileMap = getPrevectorMap_full (ctx, i, ScheduleDimensions, 
+						  stride); 
+ 		  TileUMap = isl_union_map_from_map (TileMap);
+		  TileUMap = isl_union_map_align_params
+		    (TileUMap, isl_space_copy (Space));
+		  PartialSchedule_f = isl_union_map_apply_range
+		    (isl_union_map_copy (PartialSchedule), TileUMap);
+
+		  TileMap = getPrevectorMap (ctx, i, ScheduleDimensions, stride);
 		  TileUMap = isl_union_map_from_map (TileMap);
 		  TileUMap = isl_union_map_align_params
 		    (TileUMap, isl_space_copy (Space));
 		  PartialSchedule = isl_union_map_apply_range
 		    (PartialSchedule, TileUMap);
 		  break;
-		}
+		}	
 	    }
 	}
-
       Schedule = isl_union_map_union (Schedule, PartialSchedule);
 
       isl_band_free (Band);
       isl_space_free (Space);
+
+      if (!flag_loop_unroll_jam)
+	continue;
+
+      if (PartialSchedule_f)
+	*map_sepcl = isl_union_map_union (*map_sepcl, 
+					  PartialSchedule_f);
+      else
+        *map_sepcl = isl_union_map_union (*map_sepcl, 
+         				  isl_union_map_copy (PartialSchedule));
     }
 
   return Schedule;
 }
 
 static isl_union_map *
-getScheduleMap (isl_schedule *Schedule)
+getScheduleMap (isl_schedule *Schedule, isl_union_map **map_sepcl)
 {
   isl_band_list *BandList = isl_schedule_get_band_forest (Schedule);
-  isl_union_map *ScheduleMap = getScheduleForBandList (BandList);
+  isl_union_map *ScheduleMap = getScheduleForBandList (BandList, map_sepcl);
   isl_band_list_free (BandList);
   return ScheduleMap;
 }
@@ -398,7 +515,7 @@
 }
 
 static void
-apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map)
+apply_schedule_map_to_scop (scop_p scop, isl_union_map *schedule_map, bool sepcl)
 {
   int i;
   poly_bb_p pbb;
@@ -413,8 +530,15 @@
 	(isl_union_map_copy (schedule_map),
 	 isl_union_set_from_set (domain));
       isl_union_map_foreach_map (stmtBand, getSingleMap, &stmtSchedule);
-      isl_map_free (pbb->transformed);
-      pbb->transformed = stmtSchedule;
+
+      if (!sepcl)
+	{
+	  isl_map_free (pbb->transformed);
+	  pbb->transformed = stmtSchedule;
+	}
+      else
+	  pbb->map_sepclass = stmtSchedule;
+
       isl_union_map_free (stmtBand);
     }
 }
@@ -429,6 +553,7 @@
   isl_union_set *domain;
   isl_union_map *validity, *proximity, *dependences;
   isl_union_map *schedule_map;
+  isl_union_map *schedule_map_f;
 
   domain = scop_get_domains (scop);
   dependences = scop_get_dependences (scop);
@@ -450,9 +575,13 @@
   if (!schedule)
     return false;
 
-  schedule_map = getScheduleMap (schedule);
+  schedule_map_f = isl_union_map_empty (isl_space_params_alloc (scop->ctx, 0));
+  schedule_map = getScheduleMap (schedule, &schedule_map_f);
 
-  apply_schedule_map_to_scop (scop, schedule_map);
+  apply_schedule_map_to_scop (scop, schedule_map, false);
+  if (!isl_union_map_is_empty (schedule_map_f))
+    apply_schedule_map_to_scop (scop, schedule_map_f, true);
+  isl_union_map_free (schedule_map_f);
 
   isl_schedule_free (schedule);
   isl_union_map_free (schedule_map);
Index: gcc/graphite-isl-ast-to-gimple.c
===================================================================
--- gcc/graphite-isl-ast-to-gimple.c	(revision 217013)
+++ gcc/graphite-isl-ast-to-gimple.c	(working copy)
@@ -831,6 +831,92 @@
   return schedule;
 }
 
+/* Set the separation_class option for unroll and jam. */
+
+static __isl_give isl_union_map *
+generate_luj_sepclass_opt (scop_p scop, __isl_take isl_union_set *domain, 
+			int dim, int cl)
+{
+  isl_map  *map;
+  isl_space *space, *space_sep;
+  isl_ctx *ctx;
+  isl_union_map *mapu;
+  int nsched = get_max_schedule_dimensions (scop);
+ 
+  ctx = scop->ctx;
+  space_sep = isl_space_alloc (ctx, 0, 1, 1);
+  space_sep = isl_space_wrap (space_sep);
+  space_sep = isl_space_set_tuple_name (space_sep, isl_dim_set,
+				        "separation_class");
+  space = isl_set_get_space (scop->context);
+  space_sep = isl_space_align_params (space_sep, isl_space_copy(space));
+  space = isl_space_map_from_domain_and_range (space, space_sep);
+  space = isl_space_add_dims (space,isl_dim_in, nsched);
+  map = isl_map_universe (space);
+  isl_map_fix_si (map,isl_dim_out,0,dim);
+  isl_map_fix_si (map,isl_dim_out,1,cl);
+
+  mapu = isl_union_map_intersect_domain (isl_union_map_from_map (map), 
+					 domain);
+  return (mapu);
+}
+
+/* Compute the separation class for loop unroll and jam.  */
+
+static __isl_give isl_union_set *
+generate_luj_sepclass (scop_p scop)
+{
+  int i;
+  poly_bb_p pbb;
+  isl_union_set *domain_isl;
+
+  domain_isl = isl_union_set_empty (isl_set_get_space (scop->context));
+
+  FOR_EACH_VEC_ELT (SCOP_BBS (scop), i, pbb)
+    {
+      isl_set *bb_domain;
+      isl_set *bb_domain_s;
+
+      if (pbb->map_sepclass == NULL)
+	continue;
+
+      if (isl_set_is_empty (pbb->domain))
+	continue;
+
+      bb_domain = isl_set_copy (pbb->domain);
+      bb_domain_s = isl_set_apply (bb_domain, pbb->map_sepclass);
+      pbb->map_sepclass = NULL;
+
+      domain_isl =
+	isl_union_set_union (domain_isl, isl_union_set_from_set (bb_domain_s));
+    }
+
+  return domain_isl;
+}
+
+/* Set the AST built options for loop unroll and jam. */
+ 
+static __isl_give isl_union_map *
+generate_luj_options (scop_p scop)
+{
+  isl_union_set *domain_isl;
+  isl_union_map *options_isl_ss;
+  isl_union_map *options_isl =
+    isl_union_map_empty (isl_set_get_space (scop->context));
+  int dim = get_max_schedule_dimensions (scop) - 1;
+  int dim1 = dim - PARAM_VALUE (PARAM_LOOP_UNROLL_JAM_DEPTH);
+
+  if (!flag_loop_unroll_jam)
+    return options_isl;
+
+  domain_isl = generate_luj_sepclass (scop);
+
+  options_isl_ss = generate_luj_sepclass_opt (scop, domain_isl, dim1, 0);
+  options_isl = isl_union_map_union (options_isl, options_isl_ss);
+
+  return options_isl;
+}
+
 /* Generates a schedule, which specifies an order used to
    visit elements in a domain.  */
 
@@ -879,11 +965,13 @@
 }
 
 /* Set the separate option for all dimensions.
-   This helps to reduce control overhead.  */
+   This helps to reduce control overhead.
+   Set the options for unroll and jam.  */
 
 static __isl_give isl_ast_build *
 set_options (__isl_take isl_ast_build *control,
-	     __isl_keep isl_union_map *schedule)
+	     __isl_keep isl_union_map *schedule,
+	     __isl_take isl_union_map *opt_luj)
 {
   isl_ctx *ctx = isl_union_map_get_ctx (schedule);
   isl_space *range_space = isl_space_set_alloc (ctx, 0, 1);
@@ -894,6 +982,9 @@
   isl_union_set *domain = isl_union_map_range (isl_union_map_copy (schedule));
   domain = isl_union_set_universe (domain);
   isl_union_map *options = isl_union_map_from_domain_and_range (domain, range);
+
+  options = isl_union_map_union (options, opt_luj);
+
   return isl_ast_build_set_options (control, options);
 }
 
@@ -907,9 +998,14 @@
   isl_options_set_ast_build_atomic_upper_bound (scop->ctx, true);
 
   add_parameters_to_ivs_params (scop, ip);
+
+  isl_union_map *options_luj = generate_luj_options (scop);
+
   isl_union_map *schedule_isl = generate_isl_schedule (scop);
   isl_ast_build *context_isl = generate_isl_context (scop);
-  context_isl = set_options (context_isl, schedule_isl);
+
+  context_isl = set_options (context_isl, schedule_isl, options_luj);
+
   isl_union_map *dependences = NULL;
   if (flag_loop_parallelize_all)
   {
Index: gcc/graphite-poly.c
===================================================================
--- gcc/graphite-poly.c	(revision 217013)
+++ gcc/graphite-poly.c	(working copy)
@@ -276,7 +276,7 @@
 
   /* This pass needs to be run at the final stage, as it does not
      update the lst.  */
-  if (flag_loop_optimize_isl)
+  if (flag_loop_optimize_isl || flag_loop_unroll_jam)
     transform_done |= optimize_isl (scop);
 
   return transform_done;
@@ -327,6 +327,7 @@
   pbb->schedule = NULL;
   pbb->transformed = NULL;
   pbb->saved = NULL;
+  pbb->map_sepclass = NULL;
   PBB_SCOP (pbb) = scop;
   pbb_set_black_box (pbb, black_box);
   PBB_TRANSFORMED (pbb) = NULL;
Index: gcc/graphite-poly.h
===================================================================
--- gcc/graphite-poly.h	(revision 217013)
+++ gcc/graphite-poly.h	(working copy)
@@ -349,6 +349,9 @@
   poly_scattering_p _saved;
   isl_map *saved;
 
+  /* For tiling, the map for computing the separating class.  */
+  isl_map *map_sepclass;
+
   /* True when this PBB contains only a reduction statement.  */
   bool is_reduction;
 };
Index: gcc/graphite.c
===================================================================
--- gcc/graphite.c	(revision 217013)
+++ gcc/graphite.c	(working copy)
@@ -383,7 +383,8 @@
       || flag_loop_strip_mine
       || flag_graphite_identity
       || flag_loop_parallelize_all
-      || flag_loop_optimize_isl)
+      || flag_loop_optimize_isl
+      || flag_loop_unroll_jam)
     flag_graphite = 1;
 
   return flag_graphite != 0;
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 217013)
+++ gcc/common.opt	(working copy)
@@ -1328,6 +1328,10 @@
 Common Report Var(flag_loop_block) Optimization
 Enable Loop Blocking transformation
 
+floop-unroll-and-jam
+Common Report Var(flag_loop_unroll_jam) Optimization
+Enable Loop Unroll Jam transformation
+ 
 fgnu-tm
 Common Report Var(flag_tm)
 Enable support for GNU transactional memory
Index: gcc/params.def
===================================================================
--- gcc/params.def	(revision 217013)
+++ gcc/params.def	(working copy)
@@ -847,6 +847,21 @@
 	  "size of tiles for loop blocking",
 	  51, 0, 0)
 
+/* Size of unrolling factor for unroll-and-jam.  */
+ 
+DEFPARAM (PARAM_LOOP_UNROLL_JAM_SIZE,
+ 	  "loop-unroll-jam-size",
+ 	  "size of unrolling factor for unroll-and-jam",
+ 	  4, 0, 0)
+
+/* Size of the band formed by the strip mined dimension and the most inner one for unroll-and-jam.  */
+ 
+DEFPARAM (PARAM_LOOP_UNROLL_JAM_DEPTH,
+ 	  "loop-unroll-jam-depth",
+ 	  "depth of unrolled loop for unroll-and-jam",
+ 	  2, 0, 0)
+
+
 /* Maximal number of parameters that we allow in a SCoP.  */
 
 DEFPARAM (PARAM_GRAPHITE_MAX_NB_SCOP_PARAMS,

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]