This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch]: to improve precision of register allocation conflict builder.


This is the second of several patch for removing REG_NO_CONFLICT notes.

This patch changes the conflict builder so that it precisely models
ZERO_EXTRACTs, SIGN_EXTRACTs, and STRICT_LOW_PARTs.   The modeling is
local to a basic block.

The next patch will provide information to global on multiword hard regs
that can be allocated to exactly the same hard reg set even if parts of
the regs conflict. 

This patch bootstraps and partially regression tests on x86-{32,64)
ppc-{32,64} and ia-64. 
(an earlier version had a small number of ia-64 regressions that have
been fixed but was otherwise clean, the other regression tests have not
yet finished but i expect them later tonight, certainly before this
patch is reviewed and committed.

Ok to commit, assuming the regression tests come out clean?

Kenny

2008-03-10  Kenneth Zadeck <zadeck@naturalbridge.com>

    PR rtl-optimization/35404

    * dbgcnt.def (ra_conflict): Added.
    * ra-conflict.c (compute_pseudo_bytes): New function.
    (clear_reg_in_live, set_reg_in_live, global_conflicts): Calls
    compute_pseudo_bytes to get info on subregs, extracts, and
    strict_low_part.
    (set_reg_in_live): Change last parm from bool to df_ref that may
    be NULL.
    (clear_reg_in_live): Removed reg parameter.
    * Makefile.in (ra-conflict): Added dependency.
    

Index: dbgcnt.def
===================================================================
--- dbgcnt.def	(revision 133087)
+++ dbgcnt.def	(working copy)
@@ -88,6 +88,7 @@ DEBUG_COUNTER (jump_bypass)
 DEBUG_COUNTER (local_alloc_for_sched)
 DEBUG_COUNTER (postreload_cse)
 DEBUG_COUNTER (pre_insn)
+DEBUG_COUNTER (ra_conflict)
 DEBUG_COUNTER (sched2_func)
 DEBUG_COUNTER (sched_block)
 DEBUG_COUNTER (sched_func)
Index: ra-conflict.c
===================================================================
--- ra-conflict.c	(revision 133087)
+++ ra-conflict.c	(working copy)
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  
 #include "ra.h"
 #include "sbitmap.h"
 #include "sparseset.h"
+#include "dbgcnt.h"
 
 /* Externs defined in regs.h.  */
 
@@ -440,6 +441,112 @@ ra_init_live_subregs (bool init_value, 
 }
 
 
+/* Compute the set of affected bytes by a store to a psuedo to REF.
+   If the store is to the whole register, just return TRUE, if it is
+   to part of the register, return FALSE and set START_BYTE and
+   LAST_BYTE properly.  */
+
+static bool 
+compute_pseudo_bytes (struct df_ref *ref, unsigned int *start_byte, 
+		      unsigned int *last_byte)
+{
+  rtx reg = DF_REF_REG (ref);
+  
+  if (!dbg_cnt (ra_conflict))
+    return true;
+
+  if (GET_CODE (reg) == SUBREG)
+    {
+      enum machine_mode m1 = GET_MODE (reg);
+      unsigned int start;
+      unsigned int last;
+      if (DF_REF_FLAGS_IS_SET (ref, DF_REF_SIGN_EXTRACT | DF_REF_ZERO_EXTRACT))
+	{
+	  /*  (set (zero_extract:M1 (reg:M2 X) WIDTH POS) ...)
+	  
+	    This is a bitfield insertion.  The assignment clobbers
+	    exactly the bits named by WIDTH and POS and does not
+	    affect the other bits in register X.  It is also
+	    technically possible that the bits asked for are longer
+	    than units per word.  */
+	  int width = DF_REF_WIDTH (ref);
+	  int offset = DF_REF_OFFSET (ref);
+	  if (width == -1 || offset == -1)
+	    return true;
+
+	  start = offset / BITS_PER_UNIT;
+	  last = (width + offset + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
+
+	  if (dump_file)
+	    fprintf (dump_file, "extract start=%d last=%d\n", start, last);
+	}
+      else if (DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART))
+	{
+	  /*  (set (strict_low_part (subreg:M1 (reg:M2 X) N)) ...)
+
+	    This is a bitfield insertion.  The assignment clobbers
+	    exactly the bits named by the subreg--the M1 bits at
+	    position N.  It is also technically possible that the bits
+	    asked for are longer than units per word.  */
+	  int offset = SUBREG_BYTE (reg);
+
+	  start = offset;
+	  last = offset + GET_MODE_SIZE (m1);
+	  if (dump_file)
+	    fprintf (dump_file, "strict low part start=%d last=%d\n", start, last);
+	}
+      else /* (set (subreg:M1 (reg:M2 X) N) ...) */
+	{
+	  rtx subreg = SUBREG_REG (reg);
+	  enum machine_mode m2 = GET_MODE (subreg);
+	  if (GET_MODE_SIZE (m2) > UNITS_PER_WORD)
+	    {
+	      /* The assignment clobbers UNITS_PER_WORD segments of X.
+		 Look at the bytes named by the subreg, and expand it
+		 to cover a UNITS_PER_WORD part of register X.  That
+		 part of register X is clobbered, the rest is not.
+		 
+                 E.g., (subreg:SI (reg:DI X) 0), where UNITS_PER_WORD
+                 is the size of SImode, clobbers the first SImode part
+                 of X, and does not affect the second SImode part.
+
+                 E.g., (subreg:QI (reg:DI X) 0), where UNITS_PER_WORD
+                 is the size of SImode, clobbers the first SImode part
+                 of X, and does not affect the second SImode part.
+                 Here the QImode byte is expanded to a UNITS_PER_WORD
+                 portion of the register for purposes of determining
+                 what is clobbered.
+
+                 It is technically possible (though not likely) that
+                 the N spans two UNITS_PER_WORD segments of the
+                 pseudo.  We check for this case.  */
+	      int offset = SUBREG_BYTE (reg);
+	      int size = GET_MODE_SIZE (m1);
+	      if (size < UNITS_PER_WORD)
+		size = UNITS_PER_WORD;
+
+	      start = offset & ~(UNITS_PER_WORD - 1);
+	      last = (offset + size + size - 1) & ~(UNITS_PER_WORD - 1);
+	      if (dump_file)
+		fprintf (dump_file, "subreg start=%d last=%d\n", start, last);
+	    }
+	  else
+	    {
+	      /* whole register size M2 equal to or smaller than UNITS_PER_WORD 
+		 
+	         The assignment clobbers the entire register X.  */
+	      return true;
+	    }
+	}
+      
+      *start_byte = start;
+      *last_byte = last;
+      return false;
+    }
+  else /* Whole reg store.  */
+    return true;
+}
+
 /* Set REG to be not live in the sets ALLOCNOS_LIVE, LIVE_SUBREGS,
    HARD_REGS_LIVE.  DEF is the definition of the register.  */
 
@@ -448,47 +555,38 @@ clear_reg_in_live (sparseset allocnos_li
 		   sbitmap *live_subregs, 
 		   int *live_subregs_used,
 		   HARD_REG_SET *hard_regs_live, 
-		   rtx reg, struct df_ref *def)
+		   struct df_ref *def)
 {
+  rtx reg = DF_REF_REG (def);
   unsigned int regno = (GET_CODE (reg) == SUBREG) 
     ? REGNO (SUBREG_REG (reg)): REGNO (reg);
   int allocnum = reg_allocno[regno];
 
   if (allocnum >= 0)
     {
-      if (GET_CODE (reg) == SUBREG
-	  && !DF_REF_FLAGS_IS_SET (def, DF_REF_ZERO_EXTRACT))
+      unsigned int start;
+      unsigned int last;
+	  
+      if (!compute_pseudo_bytes (def, &start, &last))
 	{
-	  unsigned int start = SUBREG_BYTE (reg);
-	  unsigned int last = start + GET_MODE_SIZE (GET_MODE (reg));
-
 	  ra_init_live_subregs (sparseset_bit_p (allocnos_live, allocnum), 
 				live_subregs, live_subregs_used, allocnum, reg);
 
-	  if (!DF_REF_FLAGS_IS_SET (def, DF_REF_STRICT_LOW_PART))
-	    {
-	      /* Expand the range to cover entire words.
-		 Bytes added here are "don't care".  */
-	      start = start / UNITS_PER_WORD * UNITS_PER_WORD;
-	      last = ((last + UNITS_PER_WORD - 1)
-		      / UNITS_PER_WORD * UNITS_PER_WORD);
-	    }
-
 	  /* Ignore the paradoxical bits.  */
 	  if ((int)last > live_subregs_used[allocnum])
 	    last = live_subregs_used[allocnum];
-
+	  
 	  while (start < last)
 	    {
 	      RESET_BIT (live_subregs[allocnum], start);
 	      start++;
 	    }
-
+	  
 	  if (sbitmap_empty_p (live_subregs[allocnum]))
 	    {
 	      live_subregs_used[allocnum] = 0;
 	      sparseset_clear_bit (allocnos_live, allocnum);
-	    }
+		}
 	  else
 	    /* Set the allocnos live here because that bit has to be
 	       true to get us to look at the live_subregs fields.  */
@@ -540,8 +638,7 @@ set_reg_in_live (sparseset allocnos_live
 		 sbitmap *live_subregs, 
 		 int *live_subregs_used,
 		 HARD_REG_SET *hard_regs_live, 
-		 rtx reg,
-		 bool extract)
+		 rtx reg, struct df_ref *def)
 {
   unsigned int regno = (GET_CODE (reg) == SUBREG) 
     ? REGNO (SUBREG_REG (reg)): REGNO (reg);
@@ -549,18 +646,21 @@ set_reg_in_live (sparseset allocnos_live
 
   if (allocnum >= 0)
     {
-      if ((GET_CODE (reg) == SUBREG) && !extract)
-	{
-	  unsigned int start = SUBREG_BYTE (reg);
-	  unsigned int last = start + GET_MODE_SIZE (GET_MODE (reg));
+      unsigned int start;
+      unsigned int last;
 
+      /* Have to check that a def was passed, otherwise, this is
+	 called from the bottom of the block to get things started for
+	 that block and there are no df_refs there.  */
+      if (def && !compute_pseudo_bytes (def, &start, &last))
+	{
 	  ra_init_live_subregs (sparseset_bit_p (allocnos_live, allocnum), 
 				live_subregs, live_subregs_used, allocnum, reg);
 	  
 	  /* Ignore the paradoxical bits.  */
 	  if ((int)last > live_subregs_used[allocnum])
 	    last = live_subregs_used[allocnum];
-
+	  
 	  while (start < last)
 	    {
 	      SET_BIT (live_subregs[allocnum], start);
@@ -570,18 +670,18 @@ set_reg_in_live (sparseset allocnos_live
       else
 	/* Resetting the live_subregs_used is effectively saying do not use the 
 	   subregs because we are writing the whole pseudo.  */
-	  live_subregs_used[allocnum] = 0;
-     
+	live_subregs_used[allocnum] = 0;
+      
       sparseset_set_bit (allocnos_live, allocnum);
     }
-      
+
   if (regno >= FIRST_PSEUDO_REGISTER)
     return;
 
   /* Handle hardware regs (and pseudos allocated to hard regs).  */
   if (! fixed_regs[regno])
     {
-      if ((GET_CODE (reg) == SUBREG) && !extract)
+      if (def && (GET_CODE (reg) == SUBREG))
 	{
 	  unsigned int start = regno;
 	  unsigned int last;
@@ -748,10 +848,10 @@ global_conflicts (void)
   VEC (df_ref_t, heap) *clobbers = NULL;
   VEC (df_ref_t, heap) *dying_regs = NULL;
 
-  /* live_subregs is a vector used to keep accurate information about
-     which hardregs are live in multiword pseudos.  live_subregs and
+  /* Live_subregs is a vector used to keep accurate information about
+     which hardregs are live in multiword pseudos.  Live_subregs and
      live_subregs_used are indexed by reg_allocno.  The live_subreg
-     entry for a particular pseudo is a bitmap with one bit per byte
+     entry for a particular pseudo is an sbitmap with one bit per byte
      of the register.  It is only used if the corresponding element is
      non zero in live_subregs_used.  The value in live_subregs_used is
      number of bytes that the pseudo can occupy.  */
@@ -800,7 +900,7 @@ global_conflicts (void)
 	      rtx reg = regno_reg_rtx[i];
 
 	      set_reg_in_live (allocnos_live, live_subregs, live_subregs_used, 
-			       &hard_regs_live, reg, false);
+			       &hard_regs_live, reg, NULL);
 	      if (renumber >= 0 && renumber < FIRST_PSEUDO_REGISTER)
 		set_renumbers_live (&renumbers_live, live_subregs, live_subregs_used, 
 				    allocnum, renumber);
@@ -863,8 +963,7 @@ global_conflicts (void)
 		{
 		  rtx reg = DF_REF_REG (def);
 		  set_reg_in_live (allocnos_live, live_subregs, live_subregs_used, 
-				   &hard_regs_live, reg, 
-				   DF_REF_FLAGS_IS_SET (def, DF_REF_ZERO_EXTRACT));
+				   &hard_regs_live, reg, def);
 		  if (dump_file)
 		    dump_ref (dump_file, "  adding def", "\n",
 			      reg, DF_REF_REGNO (def), live_subregs, live_subregs_used);
@@ -898,7 +997,7 @@ global_conflicts (void)
 		  rtx reg = DF_REF_REG (def);
 
 		  clear_reg_in_live (allocnos_live, live_subregs, live_subregs_used,
-				     &hard_regs_live, reg, def);
+				     &hard_regs_live, def);
 		  if (dump_file)
 		    dump_ref (dump_file, "  clearing def", "\n", 
 			      reg, DF_REF_REGNO (def), live_subregs, live_subregs_used);
@@ -956,12 +1055,11 @@ global_conflicts (void)
 
 	      if (allocnum >= 0)
 		{
-		  if (GET_CODE (reg) == SUBREG
-		      && !DF_REF_FLAGS_IS_SET (use, DF_REF_ZERO_EXTRACT)) 
+		  unsigned int start;
+		  unsigned int last;
+		  
+		  if (!compute_pseudo_bytes (use, &start, &last))
 		    {
-		      unsigned int start = SUBREG_BYTE (reg);
-		      unsigned int last = start + GET_MODE_SIZE (GET_MODE (reg));
-
 		      ra_init_live_subregs (sparseset_bit_p (allocnos_live, allocnum), 
 					    live_subregs, live_subregs_used, allocnum, reg);
 		      
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 133087)
+++ Makefile.in	(working copy)
@@ -2724,7 +2724,7 @@ global.o : global.c $(CONFIG_H) $(SYSTEM
 ra-conflict.o : ra-conflict.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
    $(FLAGS_H) reload.h $(FUNCTION_H) $(RECOG_H) $(REGS_H) hard-reg-set.h \
    insn-config.h output.h toplev.h $(TM_P_H) $(MACHMODE_H) tree-pass.h \
-   $(TIMEVAR_H) vecprim.h $(DF_H) $(RA_H) sbitmap.h 
+   $(TIMEVAR_H) vecprim.h $(DF_H) $(RA_H) sbitmap.h $(DBGCNT_H)
 varray.o : varray.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(GGC_H) \
    $(HASHTAB_H) $(BCONFIG_H) $(VARRAY_H) toplev.h
 vec.o : vec.c $(CONFIG_H) $(SYSTEM_H) coretypes.h vec.h $(GGC_H) \

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]