This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

crossjumping over CFG



Hi,
still more fat guy, that I've originally hoped for :(.
Implements cross-jumping on the flowgraph.  Compared to the existing
crossjumping it identifies more oppurtunities and runs faster.  Also it avoids
combination of conditional statements when these have different branch
probability predictions.

Unlike the old crossjumping code, it does not take care about
profitability of crossjumping (whether number of matched instruction
exceeds number of extra jumps). This is because code layout will be changed
anyway and possible damage should be undone by BB reordering pass by
duplicating small basic blocks.

The resulting code is slightly longer than one with old crossjumping if
the condition disallowing combination of different probabilities is in
effect, otherwise it is noticeably shorter.

Unlike old crossjumper, I can now run it anytime, even before register
allocation.

The jump_optimize_minimal is now fully redundant with cleanup_cfg (0),
but still needs to be run to initialize JUMP_LABEL.  I will track this
later.

Fri Jul  6 15:37:04 CEST 2001  Jan Hubicka  <jh@suse.cz>
	* basic-block.h (CLEANUP_EXPENSIVE, CLEANUP_CROSSJUMP,
	CLEANUP_POST_REGSTACK): New constants.
	* except.c (finish_eh_generation): Update call of cleanup_cfg,
	* jump.c (rtx_renumbered_equal_p): Handle 't' fields.
	* output.h (cleanup_cfg): Update prototype.
	* reg-stack.c (reg_to_stack): Use cleanup_cfg instead of jump_optimize
	* sibcall.c (optimize_sibling_and_tail_recursive_call): Update 
	cleanup_cfg call; kill missleading comment.
	* toplev.c (rest_of_compilation): Update all cleanup_cfg calls.
	* flow.c (merge_blocks, try_optimize_cfg, cleanup_cfg): Accept mode
	parameter; control optimizations performed using it.
	(flow_find_cross_jump, outgoing_edges_match, try_crossjump_to_edge,
	try_crossjump_bb): New functions.


*** /windows/C/honza/gcc/gcc/basic-block.h	Sat Jun 23 02:07:38 2001
--- basic-block.h	Wed Jul  4 23:26:41 2001
*************** enum update_life_extent
*** 513,518 ****
--- 521,531 ----
  #define PROP_AUTOINC		32	/* Create autoinc mem references.  */
  #define PROP_FINAL		63	/* All of the above.  */
  
+ #define CLEANUP_EXPENSIVE	1	/* Do relativly expensive optimizations
+ 					   except for edge forwarding */
+ #define CLEANUP_CROSSJUMP	2	/* Do crossjumping.  */
+ #define CLEANUP_POST_REGSTACK	4	/* We run after reg-stack and need
+ 					   to care REG_DEAD notes.  */
  /* Flags for loop discovery.  */
  
  #define LOOP_TREE		1 	/* Build loop hierarchy tree.  */
*** /windows/C/honza/gcc/gcc/except.c	Tue Jun 19 02:08:16 2001
--- except.c	Thu Jul  5 19:29:22 2001
*************** finish_eh_generation ()
*** 2354,2360 ****
  
    jump_optimize_minimal (get_insns ());
    find_basic_blocks (get_insns (), max_reg_num (), 0);
!   cleanup_cfg ();
  
    /* These registers are used by the landing pads.  Make sure they
       have been generated.  */
--- 2354,2360 ----
  
    jump_optimize_minimal (get_insns ());
    find_basic_blocks (get_insns (), max_reg_num (), 0);
!   cleanup_cfg (0);
  
    /* These registers are used by the landing pads.  Make sure they
       have been generated.  */
*************** finish_eh_generation ()
*** 2377,2383 ****
    find_exception_handler_labels ();
    jump_optimize_minimal (get_insns ());
    find_basic_blocks (get_insns (), max_reg_num (), 0);
!   cleanup_cfg ();
  }
  
  /* This section handles removing dead code for flow.  */
--- 2377,2383 ----
    find_exception_handler_labels ();
    jump_optimize_minimal (get_insns ());
    find_basic_blocks (get_insns (), max_reg_num (), 0);
!   cleanup_cfg (0);
  }
  
  /* This section handles removing dead code for flow.  */
*** /windows/C/honza/gcc/gcc/jump.c	Mon Jun 11 18:05:30 2001
--- jump.c	Wed Jul  4 23:25:05 2001
*************** rtx_renumbered_equal_p (x, y)
*** 3666,3671 ****
--- 3667,3677 ----
  
  	case 'i':
  	  if (XINT (x, i) != XINT (y, i))
+ 	    return 0;
+ 	  break;
+ 
+ 	case 't':
+ 	  if (XTREE (x, i) != XTREE (y, i))
  	    return 0;
  	  break;
  
*** /windows/C/honza/gcc/gcc/output.h	Wed Apr 18 06:41:44 2001
--- output.h	Wed Jul  4 23:28:16 2001
*************** extern void allocate_for_life_analysis	P
*** 132,138 ****
  extern int regno_uninitialized		PARAMS ((int));
  extern int regno_clobbered_at_setjmp	PARAMS ((int));
  extern void find_basic_blocks		PARAMS ((rtx, int, FILE *));
! extern void cleanup_cfg			PARAMS ((void));
  extern void check_function_return_warnings PARAMS ((void));
  #endif
  
--- 132,138 ----
  extern int regno_uninitialized		PARAMS ((int));
  extern int regno_clobbered_at_setjmp	PARAMS ((int));
  extern void find_basic_blocks		PARAMS ((rtx, int, FILE *));
! extern void cleanup_cfg			PARAMS ((int));
  extern void check_function_return_warnings PARAMS ((void));
  #endif
  
*** /windows/C/honza/gcc/gcc/reg-stack.c	Tue May  1 17:40:34 2001
--- reg-stack.c	Thu Jul  5 00:54:48 2001
*************** reg_to_stack (first, file)
*** 476,485 ****
  		    "stack_regs_mentioned cache");
  
    if (convert_regs (file) && optimize)
!     {
!       jump_optimize (first, JUMP_CROSS_JUMP_DEATH_MATTERS,
! 		     !JUMP_NOOP_MOVES, !JUMP_AFTER_REGSCAN);
!     }
  
    /* Clean up.  */
    VARRAY_FREE (stack_regs_mentioned_data);
--- 476,482 ----
  		    "stack_regs_mentioned cache");
  
    if (convert_regs (file) && optimize)
!     cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_CROSSJUMP | CLEANUP_POST_REGSTACK);
  
    /* Clean up.  */
    VARRAY_FREE (stack_regs_mentioned_data);
*** /windows/C/honza/gcc/gcc/sibcall.c	Mon Jun 11 02:06:52 2001
--- sibcall.c	Thu Jul  5 01:00:50 2001
*************** optimize_sibling_and_tail_recursive_call
*** 565,579 ****
       ahead and find all the EH labels.  */
    find_exception_handler_labels ();
  
!   /* Run a jump optimization pass to clean up the CFG.  We primarily want
!      this to thread jumps so that it is obvious which blocks jump to the
!      epilouge.  */
!   jump_optimize_minimal (insns);
! 
    /* We need cfg information to determine which blocks are succeeded
       only by the epilogue.  */
    find_basic_blocks (insns, max_reg_num (), 0);
!   cleanup_cfg ();
  
    /* If there are no basic blocks, then there is nothing to do.  */
    if (n_basic_blocks == 0)
--- 565,575 ----
       ahead and find all the EH labels.  */
    find_exception_handler_labels ();
  
!   jump_optimize_minimal (insns);
    /* We need cfg information to determine which blocks are succeeded
       only by the epilogue.  */
    find_basic_blocks (insns, max_reg_num (), 0);
!   cleanup_cfg (0);
  
    /* If there are no basic blocks, then there is nothing to do.  */
    if (n_basic_blocks == 0)
*** /windows/C/honza/gcc/gcc/toplev.c	Wed Jun 27 02:06:16 2001
--- toplev.c	Thu Jul  5 00:50:09 2001
*************** rest_of_compilation (decl)
*** 2952,2958 ****
    if (optimize > 0)
      {
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!       cleanup_cfg ();
  
        /* ??? Run if-conversion before delete_null_pointer_checks,
           since the later does not preserve the CFG.  This should
--- 2952,2958 ----
    if (optimize > 0)
      {
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!       cleanup_cfg (CLEANUP_EXPENSIVE);
  
        /* ??? Run if-conversion before delete_null_pointer_checks,
           since the later does not preserve the CFG.  This should
*************** rest_of_compilation (decl)
*** 3022,3028 ****
  	  timevar_push (TV_JUMP);
  	  find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
  
! 	  cleanup_cfg ();
  
  	  delete_null_pointer_checks (insns);
  	  timevar_pop (TV_JUMP);
--- 3022,3028 ----
  	  timevar_push (TV_JUMP);
  	  find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
  
! 	  cleanup_cfg (CLEANUP_EXPENSIVE);
  
  	  delete_null_pointer_checks (insns);
  	  timevar_pop (TV_JUMP);
*************** rest_of_compilation (decl)
*** 3053,3059 ****
        open_dump_file (DFI_ssa, decl);
  
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!       cleanup_cfg ();
        convert_to_ssa ();
  
        close_dump_file (DFI_ssa, print_rtl_with_bb, insns);
--- 3053,3059 ----
        open_dump_file (DFI_ssa, decl);
  
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!       cleanup_cfg (CLEANUP_EXPENSIVE);
        convert_to_ssa ();
  
        close_dump_file (DFI_ssa, print_rtl_with_bb, insns);
*************** rest_of_compilation (decl)
*** 3108,3114 ****
        open_dump_file (DFI_gcse, decl);
  
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!       cleanup_cfg ();
        tem = gcse_main (insns, rtl_dump_file);
  
        save_csb = flag_cse_skip_blocks;
--- 3108,3114 ----
        open_dump_file (DFI_gcse, decl);
  
        find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!       cleanup_cfg (CLEANUP_EXPENSIVE);
        tem = gcse_main (insns, rtl_dump_file);
  
        save_csb = flag_cse_skip_blocks;
*************** rest_of_compilation (decl)
*** 3212,3218 ****
  	  timevar_push (TV_IFCVT);
  
  	  find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
! 	  cleanup_cfg ();
  	  if_convert (0);
  
  	  timevar_pop(TV_IFCVT);
--- 3212,3218 ----
  	  timevar_push (TV_IFCVT);
  
  	  find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
! 	  cleanup_cfg (CLEANUP_EXPENSIVE);
  	  if_convert (0);
  
  	  timevar_pop(TV_IFCVT);
*************** rest_of_compilation (decl)
*** 3258,3264 ****
    open_dump_file (DFI_cfg, decl);
  
    find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!   cleanup_cfg ();
    check_function_return_warnings ();
  
    /* It may make more sense to mark constant functions after dead code is
--- 3258,3264 ----
    open_dump_file (DFI_cfg, decl);
  
    find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
!   cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
    check_function_return_warnings ();
  
    /* It may make more sense to mark constant functions after dead code is
*************** rest_of_compilation (decl)
*** 3341,3347 ****
  
  	  timevar_push (TV_FLOW);
  	  find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
! 	  cleanup_cfg ();
  
  	  /* Blimey.  We've got to have the CFG up to date for the call to
  	     if_convert below.  However, the random deletion of blocks
--- 3341,3347 ----
  
  	  timevar_push (TV_FLOW);
  	  find_basic_blocks (insns, max_reg_num (), rtl_dump_file);
! 	  cleanup_cfg (CLEANUP_EXPENSIVE);
  
  	  /* Blimey.  We've got to have the CFG up to date for the call to
  	     if_convert below.  However, the random deletion of blocks
*************** rest_of_compilation (decl)
*** 3552,3558 ****
  
    if (optimize)
      {
!       cleanup_cfg ();
        life_analysis (insns, rtl_dump_file, PROP_FINAL);
  
        /* This is kind of a heuristic.  We need to run combine_stack_adjustments
--- 3552,3558 ----
  
    if (optimize)
      {
!       cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_CROSSJUMP);
        life_analysis (insns, rtl_dump_file, PROP_FINAL);
  
        /* This is kind of a heuristic.  We need to run combine_stack_adjustments
*** flow.c.new	Mon Jul  2 13:20:22 2001
--- flow.c	Fri Jul  6 18:00:51 2001
*************** typedef struct depth_first_search_dsS *d
*** 361,366 ****
--- 361,371 ----
    print_rtl_and_abort_fcn (__FILE__, __LINE__, __FUNCTION__)
  
  /* Forward declarations */
+ static bool try_crossjump_to_edge 	PARAMS ((int, edge, edge));
+ static bool try_crossjump_bb		PARAMS ((int, basic_block));
+ static bool outgoing_edges_match	PARAMS ((basic_block, basic_block));
+ static int flow_find_cross_jump		PARAMS ((int, basic_block, basic_block,
+ 						 rtx *, rtx *));
  static int count_basic_blocks		PARAMS ((rtx));
  static void find_basic_blocks_1		PARAMS ((rtx));
  static rtx find_label_refs		PARAMS ((rtx, rtx));
*************** static int merge_blocks_move_predecessor
*** 380,387 ****
  							  basic_block));
  static int merge_blocks_move_successor_nojumps PARAMS ((basic_block,
  							basic_block));
! static int merge_blocks			PARAMS ((edge,basic_block,basic_block));
! static bool try_optimize_cfg		PARAMS ((void));
  static bool forwarder_block_p		PARAMS ((basic_block));
  static bool can_fallthru		PARAMS ((basic_block, basic_block));
  static bool try_redirect_by_replacing_jump PARAMS ((edge, basic_block));
--- 385,393 ----
  							  basic_block));
  static int merge_blocks_move_successor_nojumps PARAMS ((basic_block,
  							basic_block));
! static int merge_blocks			PARAMS ((edge,basic_block,basic_block,
! 						 int));
! static bool try_optimize_cfg		PARAMS ((int));
  static bool forwarder_block_p		PARAMS ((basic_block));
  static bool can_fallthru		PARAMS ((basic_block, basic_block));
  static bool try_redirect_by_replacing_jump PARAMS ((edge, basic_block));
*************** find_basic_blocks_1 (f)
*** 1012,1021 ****
  /* Tidy the CFG by deleting unreachable code and whatnot.  */
  
  void
! cleanup_cfg ()
  {
    delete_unreachable_blocks ();
!   if (try_optimize_cfg ())
      delete_unreachable_blocks ();
    mark_critical_edges ();
  
--- 997,1007 ----
  /* Tidy the CFG by deleting unreachable code and whatnot.  */
  
  void
! cleanup_cfg (mode)
!      int mode;
  {
    delete_unreachable_blocks ();
!   if (try_optimize_cfg (mode))
      delete_unreachable_blocks ();
    mark_critical_edges ();
  
*************** merge_blocks (e, b, c)
*** 2810,2818 ****
  
        return 1;
      }
!   else
      {
        edge tmp_edge, c_fallthru_edge;
        int c_has_outgoing_fallthru;
        int b_has_incoming_fallthru;
  
--- 2921,2931 ----
  
        return 1;
      }
!   /* Otherwise we will need to move code around.  Do that only if expensive
!      transformations are allowed.  */
!   else if (mode & CLEANUP_EXPENSIVE)
      {
!       edge tmp_edge, c_fallthru_edge;
        int c_has_outgoing_fallthru;
        int b_has_incoming_fallthru;
  
*************** try_forward_edges (b)
*** 2967,2983 ****
    return changed;
  }
  
  /* Do simple CFG optimizations - basic block merging, simplifying of jump
     instructions etc.
  
     Return nonzero in case some optimizations matched.  */
  
  static bool
! try_optimize_cfg ()
  {
    int i;
    bool changed_overall = 0;
    bool changed;
  
    /* Attempt to merge blocks as made possible by edge removal.  If a block
       has only one successor, and the successor has only one predecessor,
--- 3107,3628 ----
    return changed;
  }
  
+ /* Compare the instructions before end of B1 and B2
+    to find an opportunity for cross jumping.
+    (This means detecting identical sequences of insns)
+    Find the longest possible equivalent sequences
+    and store the first insns of those sequences into *F1 and *F2
+    and return length of that sequence.
+ 
+    To simplify callers of this function, in the
+    all instructions were matched, allways store bb->head.  */
+ 
+ static int
+ flow_find_cross_jump (mode, bb1, bb2, f1, f2)
+      int mode;
+      basic_block bb1, bb2;
+      rtx *f1, *f2;
+ {
+   rtx i1 = onlyjump_p (bb1->end) ? PREV_INSN (bb1->end): bb1->end;
+   rtx i2 = onlyjump_p (bb2->end) ? PREV_INSN (bb2->end): bb2->end;
+   rtx p1, p2;
+   int lose = 0;
+   int ninsns = 0;
+   rtx last1 = bb1->end, last2 = bb2->end;
+   rtx afterlast1 = bb1->end, afterlast2 = bb2->end;
+ 
+   /* In case basic block ends by nontrivial jump instruction, count it as
+      an instruction.  Do not count an unconditional jump, as it will be
+      removed by basic_block reordering pass in case it is on the common
+      path.  */
+   if (bb1->succ->succ_next && bb1->end != i1)
+     ninsns++;
+ 
+   for (;i1 != bb1->head; i1 = PREV_INSN (i1))
+     {
+       /* Ignore notes.  */
+       if (GET_CODE (i1) == NOTE)
+ 	continue;
+       while ((GET_CODE (i2) == NOTE && i2 != bb2->head))
+ 	i2 = PREV_INSN (i2);
+ 
+       if (GET_CODE (i1) != GET_CODE (i2))
+ 	break;
+ 
+       p1 = PATTERN (i1);
+       p2 = PATTERN (i2);
+ 
+       /* If this is a CALL_INSN, compare register usage information.
+ 	 If we don't check this on stack register machines, the two
+ 	 CALL_INSNs might be merged leaving reg-stack.c with mismatching
+ 	 numbers of stack registers in the same basic block.
+ 	 If we don't check this on machines with delay slots, a delay slot may
+ 	 be filled that clobbers a parameter expected by the subroutine.
+ 
+ 	 ??? We take the simple route for now and assume that if they're
+ 	 equal, they were constructed identically.  */
+ 
+       if (GET_CODE (i1) == CALL_INSN
+ 	  && ! rtx_equal_p (CALL_INSN_FUNCTION_USAGE (i1),
+ 			    CALL_INSN_FUNCTION_USAGE (i2)))
+ 	lose = 1;
+ 
+ #ifdef STACK_REGS
+       /* If cross_jump_death_matters is not 0, the insn's mode
+ 	 indicates whether or not the insn contains any stack-like
+ 	 regs.  */
+ 
+       if (!lose && (mode & CLEANUP_POST_REGSTACK ) && stack_regs_mentioned (i1))
+ 	{
+ 	  /* If register stack conversion has already been done, then
+ 	     death notes must also be compared before it is certain that
+ 	     the two instruction streams match.  */
+ 
+ 	  rtx note;
+ 	  HARD_REG_SET i1_regset, i2_regset;
+ 
+ 	  CLEAR_HARD_REG_SET (i1_regset);
+ 	  CLEAR_HARD_REG_SET (i2_regset);
+ 
+ 	  for (note = REG_NOTES (i1); note; note = XEXP (note, 1))
+ 	    if (REG_NOTE_KIND (note) == REG_DEAD
+ 		&& STACK_REG_P (XEXP (note, 0)))
+ 	      SET_HARD_REG_BIT (i1_regset, REGNO (XEXP (note, 0)));
+ 
+ 	  for (note = REG_NOTES (i2); note; note = XEXP (note, 1))
+ 	    if (REG_NOTE_KIND (note) == REG_DEAD
+ 		&& STACK_REG_P (XEXP (note, 0)))
+ 	      SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0)));
+ 
+ 	  GO_IF_HARD_REG_EQUAL (i1_regset, i2_regset, done);
+ 
+ 	  lose = 1;
+ 
+ 	done:
+ 	  ;
+ 	}
+ #endif
+ 
+       /* Don't allow old-style asm or volatile extended asms to be accepted
+ 	 for cross jumping purposes.  It is conceptually correct to allow
+ 	 them, since cross-jumping preserves the dynamic instruction order
+ 	 even though it is changing the static instruction order.  However,
+ 	 if an asm is being used to emit an assembler pseudo-op, such as
+ 	 the MIPS `.set reorder' pseudo-op, then the static instruction order
+ 	 matters and it must be preserved.
+ 
+         ??? Does this still hold?  We reorder insns in many other
+ 	parts of compiler w/o extra care.  */
+       if (GET_CODE (p1) == ASM_INPUT || GET_CODE (p2) == ASM_INPUT
+ 	  || (GET_CODE (p1) == ASM_OPERANDS && MEM_VOLATILE_P (p1))
+ 	  || (GET_CODE (p2) == ASM_OPERANDS && MEM_VOLATILE_P (p2)))
+ 	lose = 1;
+ 
+       if (lose || GET_CODE (p1) != GET_CODE (p2)
+ 	  || ! rtx_renumbered_equal_p (p1, p2))
+ 	{
+ 	  /* The following code helps take care of G++ cleanups.  */
+ 	  rtx equiv1;
+ 	  rtx equiv2;
+ 
+ 	  if (!lose && GET_CODE (p1) == GET_CODE (p2)
+ 	      && ((equiv1 = find_reg_note (i1, REG_EQUAL, NULL_RTX)) != 0
+ 		  || (equiv1 = find_reg_note (i1, REG_EQUIV, NULL_RTX)) != 0)
+ 	      && ((equiv2 = find_reg_note (i2, REG_EQUAL, NULL_RTX)) != 0
+ 		  || (equiv2 = find_reg_note (i2, REG_EQUIV, NULL_RTX)) != 0)
+ 	      /* If the equivalences are not to a constant, they may
+ 		 reference pseudos that no longer exist, so we can't
+ 		 use them.  */
+ 	      && CONSTANT_P (XEXP (equiv1, 0))
+ 	      && rtx_equal_p (XEXP (equiv1, 0), XEXP (equiv2, 0)))
+ 	    {
+ 	      rtx s1 = single_set (i1);
+ 	      rtx s2 = single_set (i2);
+ 	      if (s1 != 0 && s2 != 0
+ 		  && rtx_renumbered_equal_p (SET_DEST (s1), SET_DEST (s2)))
+ 		{
+ 		  validate_change (i1, &SET_SRC (s1), XEXP (equiv1, 0), 1);
+ 		  validate_change (i2, &SET_SRC (s2), XEXP (equiv2, 0), 1);
+ 		  if (! rtx_renumbered_equal_p (p1, p2))
+ 		    cancel_changes (0);
+ 		  else if (apply_change_group ())
+ 		    goto win;
+ 		}
+ 	    }
+ 
+ 	  /* Insns fail to match; cross jumping is limited to the following
+ 	     insns.  */
+ 
+ #ifdef HAVE_cc0
+ 	  /* Don't allow the insn after a compare to be shared by
+ 	     cross-jumping unless the compare is also shared.
+ 	     Here, if either of these non-matching insns is a compare,
+ 	     exclude the following insn from possible cross-jumping.  */
+ 	  if (sets_cc0_p (p1) || sets_cc0_p (p2))
+ 	    last1 = afterlast1, last2 = afterlast2, ninsns--;
+ #endif
+ 	  break;
+ 	}
+ 
+     win:
+       if (GET_CODE (p1) != USE && GET_CODE (p1) != CLOBBER)
+ 	{
+ 	  /* Ok, this insn is potentially includable in a cross-jump here.  */
+ 	  afterlast1 = last1, afterlast2 = last2;
+ 	  last1 = i1, last2 = i2;
+           ninsns++;
+ 	}
+ 
+       if (i2 == bb2->end)
+ 	break;
+       i2 = PREV_INSN (i2);
+     }
+ 
+   /* Skip the notes to reach potential head of basic block.  */
+   while (last1 != bb1->head && GET_CODE (PREV_INSN (last1)) == NOTE)
+     last1 = PREV_INSN (last1);
+   if (last1 != bb1->head && GET_CODE (PREV_INSN (last1)) == CODE_LABEL)
+     last1 = PREV_INSN (last1);
+   while (last2 != bb2->head && GET_CODE (PREV_INSN (last2)) == NOTE)
+     last2 = PREV_INSN (last2);
+   if (last2 != bb2->head && GET_CODE (PREV_INSN (last2)) == CODE_LABEL)
+     last2 = PREV_INSN (last2);
+ 
+   *f1 = last1;
+   *f2 = last2;
+   return ninsns;
+ }
+ 
+ /* Return true iff outgoing edges of BB1 and BB2 match, together with
+    the branch instruction.  This means that if we commonize the control
+    flow before end of the basic block, the semantic remains unchanged.  
+ 
+    Assume that at least one outgoing edge is forwarded to the same
+    location.  */
+ static bool
+ outgoing_edges_match (bb1, bb2)
+      basic_block bb1;
+      basic_block bb2;
+ {
+   /* bb1 has one succesor,  so we are seeing unconditional jump.  */
+   if (bb1->succ && !bb1->succ->succ_next)
+     return (bb2->succ && !bb2->succ->succ_next);
+ 
+   /* Match conditional jumps - this may get tricky when fallthru and branch
+      edges are crossed.  */
+   if (bb1->succ && bb1->succ->succ_next && !bb1->succ->succ_next->succ_next
+       && any_condjump_p (bb1->end))
+     {
+       edge b1, f1, b2, f2;
+       bool reverse, match;
+       rtx set1, set2, cond1, cond2;
+       enum rtx_code code1, code2;
+ 
+       if (!bb2->succ || !bb2->succ->succ_next
+ 	  || bb1->succ->succ_next->succ_next || !any_condjump_p (bb2->end))
+ 	return false;
+       b1 = BRANCH_EDGE (bb1);
+       b2 = BRANCH_EDGE (bb2);
+       f1 = FALLTHRU_EDGE (bb1);
+       f2 = FALLTHRU_EDGE (bb2);
+ 
+       /* Get around possible forwarders on fallthru edges.  Other cases
+          should be optimized out already.  */
+       if (forwarder_block_p (f1->dest))
+ 	f1 = f1->dest->succ;
+       if (forwarder_block_p (f2->dest))
+ 	f2 = f2->dest->succ;
+ 
+       /* To simplify use of this function, return false if there are
+ 	 unneeded forwarder blocks.  These will get eliminated later
+ 	 during cleanup_cfg.  */
+       if (forwarder_block_p (f1->dest)
+ 	  || forwarder_block_p (f2->dest)
+ 	  || forwarder_block_p (b1->dest)
+ 	  || forwarder_block_p (b2->dest))
+ 	return false;
+ 
+       if (f1->dest == f2->dest && b1->dest == b2->dest)
+ 	reverse = false;
+       else if (f1->dest == b2->dest && b1->dest == f2->dest)
+ 	reverse = true;
+       else
+ 	return false;
+ 
+       set1 = pc_set (bb1->end);
+       set2 = pc_set (bb2->end);
+       if ((XEXP (SET_SRC (set1), 1) == pc_rtx)
+ 	  != (XEXP (SET_SRC (set2), 1) == pc_rtx))
+ 	reverse = !reverse;
+ 
+       cond1 = XEXP (SET_SRC (set1), 0);
+       cond2 = XEXP (SET_SRC (set2), 0);
+       code1 = GET_CODE (cond1);
+       if (reverse)
+ 	code2 = reversed_comparison_code (cond2, bb2->end);
+       else
+ 	code2 = GET_CODE (cond2);
+ 
+       /* See if we don have (cross) match in the codes and operands.  */
+       match = ((code1 == code2
+ 		&& rtx_renumbered_equal_p (XEXP (cond1, 0), XEXP (cond2, 0))
+ 		&& rtx_renumbered_equal_p (XEXP (cond1, 1), XEXP (cond2, 1)))
+ 	       || (code1 == swap_condition (code2)
+ 		   && rtx_renumbered_equal_p (XEXP (cond1, 1),
+ 					      XEXP (cond2, 0))
+ 		   && rtx_renumbered_equal_p (XEXP (cond1, 0),
+ 					      XEXP (cond2, 1))));
+       /* In case of returning true, we will commonize the flow.
+ 	 This also means, that both branches will contain only single
+ 	 branch prediction algorithm.  To match require resulting branch
+ 	 to be still well predictable.  */
+       if (match && !optimize_size)
+ 	{
+ 	  rtx note1, note2;
+ 	  int prob1, prob2;
+ 	  note1 = find_reg_note (bb1->end, REG_BR_PROB, 0);
+ 	  note2 = find_reg_note (bb2->end, REG_BR_PROB, 0);
+ 	  if (!note1 || !note2)
+ 	    return false;
+ 	  prob1 = INTVAL (XEXP (note1, 0));
+ 	  prob2 = INTVAL (XEXP (note2, 0));
+ 	  if (reverse)
+ 	    prob2 = REG_BR_PROB_BASE - prob2;
+ 
+ 	  /* ??? Later we should use basic block frequency to allow merging
+ 	     in the infrequent blocks, but at the moment it is not
+ 	     available when cleanup_cfg is run.  */
+ 	  if (abs (prob1 - prob2) > REG_BR_PROB_BASE / 90)
+ 	    return false;
+ 	}
+       if (rtl_dump_file && match)
+ 	fprintf (rtl_dump_file, "Conditionals in bb %i and %i match.\n",
+ 		 bb1->index, bb2->index);
+       return match;
+     }
+   /* ??? We can handle computed jumps too.  This may be important for
+      inlined functions containing switch statements.  Also jumps w/o
+      fallthru edges can be handled by simply matching whole insn.  */
+   return false;
+ }
+ 
+ /* Assume that e1 and e2 are the edges from the same basic block.
+    Attempt to find common code on both paths and forward control flow
+    from the first path to second if such exist.  */
+ static bool
+ try_crossjump_to_edge (mode, e1, e2)
+      int mode;
+      edge e1, e2;
+ {
+   int nmatch;
+   basic_block redirect_to;
+   rtx newpos1, newpos2;
+   rtx first, last;
+   edge s;
+   rtx note;
+   rtx label;
+   rtx barrier;
+ 
+   /* Skip forwarder blocks.  This is needed to avoid forced forwarders
+      after conditional jumps from making us to miss optimization.
+ 
+      We don't need to worry about multiple entry or chained forwarders, as they
+      will be optimized out.  */
+   if (e1->src->pred && !e1->src->pred->pred_next
+       && forwarder_block_p (e1->src))
+     e1 = e1->src->pred;
+   if (e2->src->pred && !e2->src->pred->pred_next
+       && forwarder_block_p (e2->src))
+     e2 = e2->src->pred;
+ 
+   if (e1->src == ENTRY_BLOCK_PTR || e2->src == ENTRY_BLOCK_PTR)
+     return false;
+   if (e1->src == e2->src)
+     return false;
+ 
+   /* Seeing more than 1 forwarder blocks would confuse us later...  */
+   if (forwarder_block_p (e1->dest)
+       && forwarder_block_p (e1->dest->succ->dest))
+     return false;
+   if (forwarder_block_p (e2->dest)
+       && forwarder_block_p (e2->dest->succ->dest))
+     return false;
+   /* ... similary as seeing dead code...  */
+   if (!e1->src->pred || !e2->src->pred)
+     return false;
+   /* ...similary non-jump edges.  */
+   if (e1->flags & EDGE_COMPLEX)
+     return false;
+ 
+   if (!outgoing_edges_match (e1->src, e2->src))
+     return false;
+   nmatch = flow_find_cross_jump (mode, e1->src, e2->src, &newpos1, &newpos2);
+   if (!nmatch)
+     return false;
+ 
+   /* Avoid splitting if possible.  */
+   if (newpos2 == e2->src->head)
+     redirect_to = e2->src;
+   else
+     {
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "Splitting bb %i before %i insns\n",
+ 		 e2->src->index, nmatch);
+       redirect_to = split_block (e2->src, PREV_INSN (newpos2))->dest;
+     }
+ 
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file,
+ 	     "Cross jumping from bb %i to bb %i. %i insn commoized\n",
+ 	     e1->src->index, e2->src->index, nmatch);
+ 
+   redirect_to->count += e1->src->count;
+   redirect_to->frequency += e1->src->frequency;
+ 
+   /* Recompute the frequencies and counts of outgoing edges.  */
+   for (s = redirect_to->succ; s; s = s->succ_next)
+     {
+       edge s2;
+       basic_block d = (forwarder_block_p (s->dest) ? s->dest->succ->dest
+ 		       : s->dest);
+       for (s2 = e1->src->succ;; s2 = s2->succ_next)
+ 	{
+ 	  basic_block d2 =
+ 	    (forwarder_block_p (s2->dest) ? s2->dest->succ->dest : s2->dest);
+ 	  if (d == d2)
+ 	    break;
+ 	}
+       s->count += s2->count;
+ 
+       /* Take care to update possible forwarder blocks.  We took care
+          that there is no more than one in chain, so we can't run
+          into infinite loop.  */
+       if (forwarder_block_p (s->dest))
+ 	{
+ 	  s->dest->succ->count += s2->count;
+ 	  s->dest->count += s2->count;
+ 	  s->dest->frequency += ((s->probability * s->src->frequency)
+ 				 / REG_BR_PROB_BASE);
+ 	}
+       if (forwarder_block_p (s2->dest))
+ 	{
+ 	  s2->dest->succ->count -= s2->count;
+ 	  s2->dest->count -= s2->count;
+ 	  s2->dest->frequency -= ((s->probability * s->src->frequency)
+ 				  / REG_BR_PROB_BASE);
+ 	}
+       if (!redirect_to->frequency && !e1->src->frequency)
+ 	s->probability = (s->probability + s2->probability) / 2;
+       else
+ 	s->probability =
+ 	  ((s->probability * redirect_to->frequency +
+ 	    s2->probability * e1->src->frequency)
+ 	   / (redirect_to->frequency + e1->src->frequency));
+     }
+ 
+   /* FIXME: enable once probabilities are fetched properly at
+      CFG build.  */
+ #if 0
+   note = find_reg_note (redirect_to->end, REG_BR_PROB, 0);
+   if (note)
+     XEXP (note, 0) = GEN_INT (BRANCH_EDGE (redirect_to)->probability);
+ #endif
+ 
+   /* Skip possible basic block header.  */
+   first = newpos1;
+   if (GET_CODE (first) == CODE_LABEL)
+     first = NEXT_INSN (first);
+   if (GET_CODE (first) == NOTE)
+     first = NEXT_INSN (first);
+ 
+   last = e1->src->end;
+ 
+   /* Now emit the jump insn.   */
+   label = block_label (redirect_to);
+   e1->src->end = emit_jump_insn_after (gen_jump (label), e1->src->end);
+   JUMP_LABEL (e1->src->end) = label;
+   LABEL_NUSES (label)++;
+   if (basic_block_for_insn)
+     set_block_for_insn (e1->src->end, e1->src);
+ 
+   flow_delete_insn_chain (first, last);
+ 
+   barrier = next_nonnote_insn (e1->src->end);
+   if (!barrier || GET_CODE (barrier) != BARRIER)
+     emit_barrier_after (e1->src->end);
+ 
+   /* Update CFG.  */
+   while (e1->src->succ->succ_next)
+     remove_edge (e1->src->succ);
+   e1->src->succ->flags = 0;
+   redirect_edge_succ (e1->src->succ, redirect_to);
+   return true;
+ }
+ 
+ /* Attempt to implement cross jumping.  This means moving one or more branches
+    to BB earlier to BB predecesors commonizing some code.  */
+ static bool
+ try_crossjump_bb (mode, bb)
+      int mode;
+      basic_block bb;
+ {
+   edge e, e2, nexte2, nexte, fallthru;
+   bool changed = false;
+ 
+   /* In case basic block has single predecesor, do nothing.  */
+   if (!bb->pred || !bb->pred->pred_next)
+     return false;
+ 
+   /* It is always cheapest to jump into fallthru edge.  */
+   for (fallthru = bb->pred; fallthru; fallthru = fallthru->pred_next)
+     if (fallthru->flags & EDGE_FALLTHRU)
+       break;
+ 
+   for (e = bb->pred; e; e = nexte)
+     {
+       nexte = e->pred_next;
+       /* First of all prioritize the fallthru edge, as the cheapest.  */
+       if (e != fallthru && fallthru
+ 	  && try_crossjump_to_edge (mode, e, fallthru))
+ 	changed = true, nexte = bb->pred;
+       else
+ 	/* Try match in other incomming edges.
+ 
+ 	   Loop only over the earlier edges to avoid,as the later
+ 	   will be examined in the oposite direction.  */
+ 	for (e2 = bb->pred; e2 != e; e2 = nexte2)
+ 	  {
+ 	    nexte2 = e2->pred_next;
+ 	    if (e2 != fallthru && try_crossjump_to_edge (mode, e, e2))
+ 	      {
+ 		changed = true;
+ 		nexte = bb->pred;
+ 
+ 		/* We may've removed the fallthru edge.  */
+ 		for (fallthru = bb->pred; fallthru;
+ 		     fallthru = fallthru->pred_next)
+ 		  if (fallthru->flags & EDGE_FALLTHRU)
+ 		    break;
+ 		break;
+ 	      }
+ 	  }
+     }
+   return changed;
+ }
+ 
  /* Do simple CFG optimizations - basic block merging, simplifying of jump
     instructions etc.
  
     Return nonzero in case some optimizations matched.  */
  
  static bool
! try_optimize_cfg (mode)
!      int mode;
  {
    int i;
    bool changed_overall = 0;
    bool changed;
+   int iterations = 0;
  
    /* Attempt to merge blocks as made possible by edge removal.  If a block
       has only one successor, and the successor has only one predecessor,
*************** try_optimize_cfg ()
*** 2986,2991 ****
--- 3631,3640 ----
    do
      {
        changed = 0;
+       iterations++;
+       if (rtl_dump_file)
+ 	fprintf (rtl_dump_file, "\n\ntry_optimize_cfg iteration %i\n\n",
+ 		 iterations);
        for (i = 0; i < n_basic_blocks;)
  	{
  	  basic_block c, b = BASIC_BLOCK (i);
*************** try_optimize_cfg ()
*** 3005,3010 ****
--- 3654,3660 ----
  	  /* The fallthru forwarder block can be deleted.  */
  	  if (b->pred->pred_next == NULL
  	      && forwarder_block_p (b)
+ 	      && n_basic_blocks > 1
  	      && (b->pred->flags & EDGE_FALLTHRU)
  	      && (b->succ->flags & EDGE_FALLTHRU))
  	    {
*************** try_optimize_cfg ()
*** 3024,3035 ****
  		 && (s->flags & EDGE_EH) == 0
  		 && (c = s->dest) != EXIT_BLOCK_PTR
  		 && c->pred->pred_next == NULL
! 		 /* If the jump insn has side effects, we can't kill the edge.  */
  		 && (GET_CODE (b->end) != JUMP_INSN
! 		     || onlyjump_p (b->end)) && merge_blocks (s, b, c))
  	    changed_here = 1;
  
! 	  if (try_simplify_condjump (b))
  	    changed_here = 1;
  
  	  /* In the case basic blocks has single outgoing edge, but over by the
--- 3674,3686 ----
  		 && (s->flags & EDGE_EH) == 0
  		 && (c = s->dest) != EXIT_BLOCK_PTR
  		 && c->pred->pred_next == NULL
! 		 /* If the jump insn has side effects,
! 		    we can't kill the edge.  */
  		 && (GET_CODE (b->end) != JUMP_INSN
! 		     || onlyjump_p (b->end)) && merge_blocks (s, b, c, mode))
  	    changed_here = 1;
  
! 	  if ((mode & CLEANUP_EXPENSIVE) && try_simplify_condjump (b))
  	    changed_here = 1;
  
  	  /* In the case basic blocks has single outgoing edge, but over by the
*************** try_optimize_cfg ()
*** 3050,3055 ****
--- 3701,3709 ----
  	  if (try_forward_edges (b))
  	    changed_here = 1;
  
+ 	  if ((mode & CLEANUP_CROSSJUMP) && try_crossjump_bb (mode, b))
+ 	    changed_here = 1;
+ 
  	  /* Don't get confused by the index shift caused by deleting
  	     blocks.  */
  	  if (!changed_here)
*************** try_optimize_cfg ()
*** 3057,3070 ****
  	  else
  	    changed = 1;
  	}
        changed_overall |= changed;
-       changed = 0;
      }
    while (changed);
- #ifdef ENABLE_CHECKING
-   if (changed)
-     verify_flow_info ();
- #endif
    return changed_overall;
  }
  
--- 3711,3725 ----
  	  else
  	    changed = 1;
  	}
+       if ((mode & CLEANUP_CROSSJUMP) && try_crossjump_bb (mode, EXIT_BLOCK_PTR))
+ 	changed = 1;
+ #ifdef ENABLE_CHECKING
+       if (changed)
+ 	verify_flow_info ();
+ #endif
        changed_overall |= changed;
      }
    while (changed);
    return changed_overall;
  }
  


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]