This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Do not mark pseudo-copies decomposable during first lower-subreg pass


Hello,

a while ago Andrew Stubbs noticed that the lower-subreg pass seems a little
too aggressive in splitting up DImode moves into SImode moves.  See the
discussion starting at:
http://gcc.gnu.org/ml/gcc/2012-04/msg00676.html

The conclusion of this thread was that the mere presence of a DImode move
between pseudo registers should *not* be sufficient on its own (i.e. if
the pseudos in question are nowhere accessed via subreg) to trigger a
split of the pseudo during the *first* pass of lower-subreg.  However,
Andrew never got around to submitting a patch implementing this
suggestion.

The patch below does so.  As Andrew already noted, this gives a significant
improvement on ARM with NEON, and a slight improvement in size on ARM
without NEON overall.  In addition, I've done size testing on PowerPC
and System z and found a slight but consistent improvement there as well.

Note that the patch disables the lower-subreg-1.c test case on ARM targets,
since the test case assumes a 64-bit logical or ought to be split; this is
currently not the case on ARM if NEON is available, since we provide an
iordi3 pattern in that case.  (Without the patch, some spurious moves
trigger the split anyway, hiding the issue.)

Tested on arm-linux-gnueabi, powerpc(64)-linux and s390(x)-linux.
OK for mainline?

Bye,
Ulrich


ChangeLog:

	* lower-subreg.c (enum classify_move_insn): Rename
	SIMPLE_PSEUDO_REG_MOVE to DECOMPOSABLE_SIMPLE_MOVE.
	(find_decomposable_subregs): Update.
	(decompose_multiword_subregs): Add DECOMPOSE_COPIES parameter.
	Only mark pseudo-to-pseudo copies as DECOMPOSABLE_SIMPLE_MOVE
	if that parameter is true.
	(rest_of_handle_lower_subreg): Call decompose_multiword_subregs
	with DECOMPOSE_COPIES false.
	(rest_of_handle_lower_subreg2): Call decompose_multiword_subregs
	with DECOMPOSE_COPIES true.

testsuite/ChangeLog:

	* gcc.dg/lower-subreg-1.c: Disable on arm-*-* targets.


Index: gcc/testsuite/gcc.dg/lower-subreg-1.c
===================================================================
*** gcc/testsuite/gcc.dg/lower-subreg-1.c	(revision 191254)
--- gcc/testsuite/gcc.dg/lower-subreg-1.c	(working copy)
***************
*** 1,4 ****
! /* { dg-do compile { target { ! { mips64 || { ia64-*-* spu-*-* tilegx-*-* } } } } } */
  /* { dg-options "-O -fdump-rtl-subreg1" } */
  /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
  /* { dg-require-effective-target ilp32 } */
--- 1,4 ----
! /* { dg-do compile { target { ! { mips64 || { arm-*-* ia64-*-* spu-*-* tilegx-*-* } } } } } */
  /* { dg-options "-O -fdump-rtl-subreg1" } */
  /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && x32 } { "*" } { "" } } */
  /* { dg-require-effective-target ilp32 } */
Index: gcc/lower-subreg.c
===================================================================
*** gcc/lower-subreg.c	(revision 191254)
--- gcc/lower-subreg.c	(working copy)
*************** enum classify_move_insn
*** 440,448 ****
  {
    /* Not a simple move from one location to another.  */
    NOT_SIMPLE_MOVE,
!   /* A simple move from one pseudo-register to another.  */
!   SIMPLE_PSEUDO_REG_MOVE,
!   /* A simple move involving a non-pseudo-register.  */
    SIMPLE_MOVE
  };
  
--- 440,448 ----
  {
    /* Not a simple move from one location to another.  */
    NOT_SIMPLE_MOVE,
!   /* A simple move we want to decompose.  */
!   DECOMPOSABLE_SIMPLE_MOVE,
!   /* Any other simple move.  */
    SIMPLE_MOVE
  };
  
*************** find_decomposable_subregs (rtx *px, void
*** 518,524 ****
  
  	 If this is not a simple copy from one location to another,
  	 then we can not decompose this register.  If this is a simple
! 	 copy from one pseudo-register to another, and the mode is right
  	 then we mark the register as decomposable.
  	 Otherwise we don't say anything about this register --
  	 it could be decomposed, but whether that would be
--- 518,524 ----
  
  	 If this is not a simple copy from one location to another,
  	 then we can not decompose this register.  If this is a simple
! 	 copy we want to decompose, and the mode is right,
  	 then we mark the register as decomposable.
  	 Otherwise we don't say anything about this register --
  	 it could be decomposed, but whether that would be
*************** find_decomposable_subregs (rtx *px, void
*** 537,543 ****
  	    case NOT_SIMPLE_MOVE:
  	      bitmap_set_bit (non_decomposable_context, regno);
  	      break;
! 	    case SIMPLE_PSEUDO_REG_MOVE:
  	      if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
  		bitmap_set_bit (decomposable_context, regno);
  	      break;
--- 537,543 ----
  	    case NOT_SIMPLE_MOVE:
  	      bitmap_set_bit (non_decomposable_context, regno);
  	      break;
! 	    case DECOMPOSABLE_SIMPLE_MOVE:
  	      if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
  		bitmap_set_bit (decomposable_context, regno);
  	      break;
*************** find_decomposable_subregs (rtx *px, void
*** 553,559 ****
        enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
  
        /* Any registers used in a MEM do not participate in a
! 	 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE.  Do our own recursion
  	 here, and return -1 to block the parent's recursion.  */
        for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
        return -1;
--- 553,559 ----
        enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
  
        /* Any registers used in a MEM do not participate in a
! 	 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE.  Do our own recursion
  	 here, and return -1 to block the parent's recursion.  */
        for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
        return -1;
*************** dump_choices (bool speed_p, const char *
*** 1336,1346 ****
  }
  
  /* Look for registers which are always accessed via word-sized SUBREGs
!    or via copies.  Decompose these registers into several word-sized
!    pseudo-registers.  */
  
  static void
! decompose_multiword_subregs (void)
  {
    unsigned int max;
    basic_block bb;
--- 1336,1346 ----
  }
  
  /* Look for registers which are always accessed via word-sized SUBREGs
!    or -if DECOMPOSE_COPIES is true- via copies.  Decompose these
!    registers into several word-sized pseudo-registers.  */
  
  static void
! decompose_multiword_subregs (bool decompose_copies)
  {
    unsigned int max;
    basic_block bb;
*************** decompose_multiword_subregs (void)
*** 1438,1445 ****
  	    cmi = NOT_SIMPLE_MOVE;
  	  else
  	    {
  	      if (find_pseudo_copy (set))
! 		cmi = SIMPLE_PSEUDO_REG_MOVE;
  	      else
  		cmi = SIMPLE_MOVE;
  	    }
--- 1438,1452 ----
  	    cmi = NOT_SIMPLE_MOVE;
  	  else
  	    {
+ 	      /* We mark pseudo-to-pseudo copies as decomposable during the
+ 		 second pass only.  The first pass is so early that there is
+ 		 good chance such moves will be optimized away completely by
+ 		 subsequent optimizations anyway.
+ 
+ 		 However, we call find_pseudo_copy even during the first pass
+ 		 so as to properly set up the reg_copy_graph.  */
  	      if (find_pseudo_copy (set))
! 		cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
  	      else
  		cmi = SIMPLE_MOVE;
  	    }
*************** gate_handle_lower_subreg (void)
*** 1640,1646 ****
  static unsigned int
  rest_of_handle_lower_subreg (void)
  {
!   decompose_multiword_subregs ();
    return 0;
  }
  
--- 1647,1653 ----
  static unsigned int
  rest_of_handle_lower_subreg (void)
  {
!   decompose_multiword_subregs (false);
    return 0;
  }
  
*************** rest_of_handle_lower_subreg (void)
*** 1649,1655 ****
  static unsigned int
  rest_of_handle_lower_subreg2 (void)
  {
!   decompose_multiword_subregs ();
    return 0;
  }
  
--- 1656,1662 ----
  static unsigned int
  rest_of_handle_lower_subreg2 (void)
  {
!   decompose_multiword_subregs (true);
    return 0;
  }
  
-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  Ulrich.Weigand@de.ibm.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]