This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

squelch dead store elimination


Our current dead store elimination code builds a list of
all non-aliasing sets, and compares each new set in turn
to see if it is killed by a later store.

Consider a function containing one block with 4K provably
non-aliasing stores.  Such is the subject of optimization/286.

One might guess that we could build up quite a long list,
and that it might take a bit o time to iterate through it
N times.  One might not immediately guess that we'd also
eat up fantastic amounts of memory doing so, but it is true.

The following patch caps the length of mem_set_list to an
arbitrary 100 elements.  Any additional non-aliasing memory
sets that we encounter are dropped on the floor.

With the test case from the PR, this improves thinks like so:

BEFORE

 i686

 MEM = 527M
 flow analysis         : 122.94 (49%) usr   6.19 (73%) sys 144.54 (42%) wall
 flow 2                :  71.03 (29%) usr   0.30 ( 4%) sys  71.44 (21%) wall
 TOTAL                 : 248.79             8.46           343.71

 alpha

 MEM = 19M
 flow analysis         :  58.29 (37%) usr   0.00 ( 3%) sys  58.31 (37%) wall
 flow 2                :  57.28 (37%) usr   0.00 ( 4%) sys  57.30 (37%) wall
 TOTAL                 : 156.17             0.21           156.40

AFTER

 i686
 
 MEM = 32M
 flow analysis         :   5.48 ( 9%) usr   0.21 (57%) sys   5.69 (10%) wall
 flow 2                :   3.29 ( 6%) usr   0.00 ( 0%) sys   3.29 ( 6%) wall
 TOTAL                 :  58.54             0.37            58.91

 alpha

 MEM = 19M
 flow analysis         :   2.46 ( 5%) usr   0.00 ( 0%) sys   2.46 ( 5%) wall
 flow 2                :   2.44 ( 5%) usr   0.00 ( 0%) sys   2.44 ( 5%) wall
 TOTAL                 :  45.01             0.18            45.21

I'm not sure why Alpha wasn't using the same amounts of memory
as i686.  I'm guessing it is related to aliasing information,
in that Alpha wasn't managing to collect it somehow.



r~


        * flow.c (struct propagate_block_info): Add mem_set_list_len.
        (MAX_MEM_SET_LIST_LEN): New.
        (propagate_one_insn): Update mem_set_list_len.
        (invalidate_mems_from_autoinc): Likewise.
        (invalidate_mems_from_set): Likewise.
        (mark_used_regs): Likewise.
        (init_propagate_block_info): Likewise.  Stop collecting memories
        when we reach MAX_MEM_SET_LIST_LEN.
        (mark_set_1): Likewise.

Index: flow.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/flow.c,v
retrieving revision 1.368
diff -c -p -d -r1.368 flow.c
*** flow.c	2001/01/11 17:02:44	1.368
--- flow.c	2001/01/16 13:33:49
*************** struct propagate_block_info
*** 316,321 ****
--- 316,324 ----
    regset reg_cond_reg;
  #endif
  
+   /* The length of mem_set_list.  */
+   int mem_set_list_len;
+ 
    /* Non-zero if the value of CC0 is live.  */
    int cc0_live;
  
*************** struct propagate_block_info
*** 323,328 ****
--- 326,335 ----
    int flags;
  };
  
+ /* Maximum length of pbi->mem_set_list before we start dropping
+    new elements on the floor.  */
+ #define MAX_MEM_SET_LIST_LEN	100
+ 
  /* Store the data structures necessary for depth-first search.  */
  struct depth_first_search_dsS {
    /* stack for backtracking during the algorithm */
*************** propagate_one_insn (pbi, insn)
*** 3877,3883 ****
  
  	  /* Non-constant calls clobber memory.  */
  	  if (! CONST_CALL_P (insn))
! 	    free_EXPR_LIST_list (&pbi->mem_set_list);
  
  	  /* There may be extra registers to be clobbered.  */
  	  for (note = CALL_INSN_FUNCTION_USAGE (insn);
--- 3884,3893 ----
  
  	  /* Non-constant calls clobber memory.  */
  	  if (! CONST_CALL_P (insn))
! 	    {
! 	      free_EXPR_LIST_list (&pbi->mem_set_list);
! 	      pbi->mem_set_list_len = 0;
! 	    }
  
  	  /* There may be extra registers to be clobbered.  */
  	  for (note = CALL_INSN_FUNCTION_USAGE (insn);
*************** init_propagate_block_info (bb, live, loc
*** 3967,3972 ****
--- 3977,3983 ----
    pbi->bb = bb;
    pbi->reg_live = live;
    pbi->mem_set_list = NULL_RTX;
+   pbi->mem_set_list_len = 0;
    pbi->local_set = local_set;
    pbi->cond_local_set = cond_local_set;
    pbi->cc0_live = 0;
*************** init_propagate_block_info (bb, live, loc
*** 4111,4116 ****
--- 4122,4129 ----
  		  mem = shallow_copy_rtx (mem);
  #endif
  		pbi->mem_set_list = alloc_EXPR_LIST (0, mem, pbi->mem_set_list);
+ 		if (++pbi->mem_set_list_len >= MAX_MEM_SET_LIST_LEN)
+ 		  break;
  	      }
  	  }
      }
*************** invalidate_mems_from_autoinc (pbi, insn)
*** 4512,4517 ****
--- 4525,4531 ----
  		  else
  		    pbi->mem_set_list = next;
  		  free_EXPR_LIST_node (temp);
+ 		  pbi->mem_set_list_len--;
  		}
  	      else
  		prev = temp;
*************** invalidate_mems_from_set (pbi, exp)
*** 4547,4552 ****
--- 4561,4567 ----
  	  else
  	    pbi->mem_set_list = next;
  	  free_EXPR_LIST_node (temp);
+ 	  pbi->mem_set_list_len--;
  	}
        else
  	prev = temp;
*************** mark_set_1 (pbi, code, reg, cond, insn, 
*** 4743,4749 ****
        if (insn && GET_CODE (reg) == MEM)
  	invalidate_mems_from_autoinc (pbi, insn);
  
!       if (GET_CODE (reg) == MEM && ! side_effects_p (reg)
  	  /* ??? With more effort we could track conditional memory life.  */
  	  && ! cond
  	  /* We do not know the size of a BLKmode store, so we do not track
--- 4758,4765 ----
        if (insn && GET_CODE (reg) == MEM)
  	invalidate_mems_from_autoinc (pbi, insn);
  
!       if (pbi->mem_set_list_len < MAX_MEM_SET_LIST_LEN
! 	  && GET_CODE (reg) == MEM && ! side_effects_p (reg)
  	  /* ??? With more effort we could track conditional memory life.  */
  	  && ! cond
  	  /* We do not know the size of a BLKmode store, so we do not track
*************** mark_set_1 (pbi, code, reg, cond, insn, 
*** 4761,4766 ****
--- 4777,4783 ----
  	    reg = shallow_copy_rtx (reg);
  #endif
  	  pbi->mem_set_list = alloc_EXPR_LIST (0, reg, pbi->mem_set_list);
+ 	  pbi->mem_set_list_len++;
  	}
      }
  
*************** mark_used_regs (pbi, x, cond, insn)
*** 5859,5864 ****
--- 5876,5882 ----
  		      else
  			pbi->mem_set_list = next;
  		      free_EXPR_LIST_node (temp);
+ 		      pbi->mem_set_list_len--;
  		    }
  		  else
  		    prev = temp;
*************** mark_used_regs (pbi, x, cond, insn)
*** 5996,6002 ****
  	   So for now, just clear the memory set list and mark any regs
  	   we can find in ASM_OPERANDS as used.  */
  	if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
! 	  free_EXPR_LIST_list (&pbi->mem_set_list);
  
  	/* For all ASM_OPERANDS, we must traverse the vector of input operands.
  	   We can not just fall through here since then we would be confused
--- 6014,6023 ----
  	   So for now, just clear the memory set list and mark any regs
  	   we can find in ASM_OPERANDS as used.  */
  	if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
! 	  {
! 	    free_EXPR_LIST_list (&pbi->mem_set_list);
! 	    pbi->mem_set_list_len = 0;
! 	  }
  
  	/* For all ASM_OPERANDS, we must traverse the vector of input operands.
  	   We can not just fall through here since then we would be confused

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]