This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Check rtx_costs in combine.c's try_combine (take 2)


Here's take two of my patch that uses the target's rtx_cost function
to determine whether a try_combine attempt should be rejected as being
more expensive than the original insn sequence.

This addresses both (all three?) of RTH's suggestions.  Firstly, I've
removed the TARGET_COMBINE_COSTS macro, so that we'll now always use
combine_insn_cost to check whether to combine or not to combine.  As
this is now unconditional, the patch below no longer needs its own
scan through the insn stream to determine the original instruction
costs, but we can piggy-back off of combine's existing "pre-scan".
This should improve cache locality traversing the RTL once instead
of twice [I've not measured it, but this should improve compile-time
further].  Finally, I've factored all of the "is more expensive"
logic out into its own combine_validate_cost function.  These changes
together make for a very "clean" patch.


The following patch has been tested on i686-pc-linux-gnu with a complete
"make bootstrap", all default languages, and regression tested with a
top-level "make -k check" with no new failures.  I've also confirmed
by hand that we still generate improved code on the Pentium4 for the
example given in my original posting.

Ok for mainline?



2004-06-29  Roger Sayle  <roger@eyesopen.com>

	* combine.c: Include "output.h" to define dump_file.
	(uid_insn_cost, last_insn_cost): New global variables.
	(combine_insn_cost): New function to estimate cost of an insn.
	(combine_validate_cost): New function to determine whether a
	try_combine replacement sequence is cheaper than the original.
	(combine_instructions): Allocate and populate uid_insn_cost
	array at the start of the combine pass, and deallocate it after.
	(try_combine): Check combine_validate_cost to determine whether
	a "recombination" should be rejected as being more expensive.
	* Makefile.in (combine.o): Add dependency upon output.h.


Index: combine.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/combine.c,v
retrieving revision 1.435
diff -c -3 -p -r1.435 combine.c
*** combine.c	24 Jun 2004 19:15:43 -0000	1.435
--- combine.c	29 Jun 2004 03:34:00 -0000
*************** Software Foundation, 59 Temple Place - S
*** 91,96 ****
--- 91,98 ----
  #include "toplev.h"
  #include "target.h"
  #include "rtlhooks-def.h"
+ /* Include output.h for dump_file.  */
+ #include "output.h"

  /* Number of attempts to combine instructions in this function.  */

*************** static basic_block this_basic_block;
*** 282,287 ****
--- 284,298 ----
     those blocks as starting points.  */
  static sbitmap refresh_blocks;

+ /* The following array records the combine_insn_cost for every insn
+    in the instruction stream.  */
+
+ static int *uid_insn_cost;
+
+ /* Length of the currently allocated uid_insn_cost array.  */
+
+ static int last_insn_cost;
+
  /* Incremented for each label.  */

  static int label_tick;
*************** do_SUBST_INT (int *into, int newval)
*** 504,509 ****
--- 515,649 ----

  #define SUBST_INT(INTO, NEWVAL)  do_SUBST_INT(&(INTO), (NEWVAL))

+ /* Calculate the rtx_cost of a single instruction.  A return value of zero
+    indicates an instruction without a known cost.  */
+
+ static int
+ combine_insn_cost (rtx pat)
+ {
+   int i, cost;
+   rtx set;
+
+   /* Extract the single set rtx from the instruction pattern.
+      We can't use single_set since we only have the pattern.  */
+   if (GET_CODE (pat) == SET)
+     set = pat;
+   else if (GET_CODE (pat) == PARALLEL)
+     {
+       set = NULL_RTX;
+       for (i = 0; i < XVECLEN (pat, 0); i++)
+ 	{
+ 	  rtx x = XVECEXP (pat, 0, i);
+ 	  if (GET_CODE (x) == SET)
+ 	    {
+ 	      if (set)
+ 		return 0;
+ 	      set = x;
+ 	    }
+ 	}
+       if (!set)
+ 	return 0;
+     }
+   else
+     return 0;
+
+   cost = rtx_cost (SET_SRC (set), SET);
+   return cost > 0 ? cost : COSTS_N_INSNS (1);
+ }
+
+ /* Subroutine of try_combine.  Determine whether the combine replacement
+    patterns NEWPAT and NEWI2PAT are cheaper according to combine_insn_cost
+    that the original instruction sequence I1, I2 and I3.  Note that I1
+    and/or NEWI2PAT may be NULL_RTX.  This function returns false, if the
+    costs of all instructions can be estimated, and the replacements are
+    more expensive than the original sequence.  */
+
+ static bool
+ combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat)
+ {
+   int i1_cost, i2_cost, i3_cost;
+   int new_i2_cost, new_i3_cost;
+   int old_cost, new_cost;
+
+   /* Lookup the original combine_insn_costs.  */
+   i2_cost = INSN_UID (i2) <= last_insn_cost
+ 	    ? uid_insn_cost[INSN_UID (i2)] : 0;
+   i3_cost = INSN_UID (i3) <= last_insn_cost
+ 	    ? uid_insn_cost[INSN_UID (i3)] : 0;
+
+   if (i1)
+     {
+       i1_cost = INSN_UID (i1) <= last_insn_cost
+ 		? uid_insn_cost[INSN_UID (i1)] : 0;
+       old_cost = (i1_cost > 0 && i2_cost > 0 && i3_cost > 0)
+ 		 ? i1_cost + i2_cost + i3_cost : 0;
+     }
+   else
+     {
+       old_cost = (i2_cost > 0 && i3_cost > 0) ? i2_cost + i3_cost : 0;
+       i1_cost = 0;
+     }
+
+   /* Calculate the replacement combine_insn_costs.  */
+   new_i3_cost = combine_insn_cost (newpat);
+   if (newi2pat)
+     {
+       new_i2_cost = combine_insn_cost (newi2pat);
+       new_cost = (new_i2_cost > 0 && new_i3_cost > 0)
+ 		 ? new_i2_cost + new_i3_cost : 0;
+     }
+   else
+     {
+       new_cost = new_i3_cost;
+       new_i2_cost = 0;
+     }
+
+   /* Disallow this recombination if both new_cost and old_cost are
+      greater than zero, and new_cost is greater than old cost.  */
+   if (!undobuf.other_insn
+       && old_cost > 0
+       && new_cost > old_cost)
+     {
+       if (dump_file)
+ 	{
+ 	  if (i1)
+ 	    {
+ 	      fprintf (dump_file,
+ 		       "rejecting combination of insns %d, %d and %d\n",
+ 		       INSN_UID (i1), INSN_UID (i2), INSN_UID (i3));
+ 	      fprintf (dump_file, "original costs %d + %d + %d = %d\n",
+ 		       i1_cost, i2_cost, i3_cost, old_cost);
+ 	    }
+ 	  else
+ 	    {
+ 	      fprintf (dump_file,
+ 		       "rejecting combination of insns %d and %d\n",
+ 		       INSN_UID (i2), INSN_UID (i3));
+ 	      fprintf (dump_file, "original costs %d + %d = %d\n",
+ 		       i2_cost, i3_cost, old_cost);
+ 	    }
+
+ 	  if (newi2pat)
+ 	    {
+ 	      fprintf (dump_file, "replacement costs %d + %d = %d\n",
+ 		       new_i2_cost, new_i3_cost, new_cost);
+ 	    }
+ 	  else
+ 	    fprintf (dump_file, "replacement cost %d\n", new_cost);
+ 	}
+
+       return false;
+     }
+
+   /* Update the uid_insn_cost array with the replacement costs.  */
+   uid_insn_cost[INSN_UID (i2)] = new_i2_cost;
+   uid_insn_cost[INSN_UID (i3)] = new_i3_cost;
+   if (i1)
+     uid_insn_cost[INSN_UID (i1)] = 0;
+
+   return true;
+ }
+
  /* Main entry point for combiner.  F is the first insn of the function.
     NREGS is the first unused pseudo-reg number.

*************** combine_instructions (rtx f, unsigned in
*** 568,573 ****
--- 708,717 ----
    refresh_blocks = sbitmap_alloc (last_basic_block);
    sbitmap_zero (refresh_blocks);

+   /* Allocate array of current combine_insn_costs.  */
+   uid_insn_cost = xcalloc (max_uid_cuid + 1, sizeof (int));
+   last_insn_cost = max_uid_cuid;
+
    for (insn = f, i = 0; insn; insn = NEXT_INSN (insn))
      {
        uid_cuid[INSN_UID (insn)] = ++i;
*************** combine_instructions (rtx f, unsigned in
*** 586,591 ****
--- 730,741 ----
  	      set_nonzero_bits_and_sign_copies (XEXP (links, 0), NULL_RTX,
  						NULL);
  #endif
+
+ 	  /* Record the current combine_insn_cost of this instruction.  */
+ 	  uid_insn_cost[INSN_UID (insn)] = combine_insn_cost (PATTERN (insn));
+ 	  if (dump_file)
+ 	    fprintf(dump_file, "insn_cost %d: %d\n",
+ 		    INSN_UID (insn), uid_insn_cost[INSN_UID (insn)]);
  	}

        if (GET_CODE (insn) == CODE_LABEL)
*************** combine_instructions (rtx f, unsigned in
*** 762,767 ****
--- 912,918 ----

    /* Clean up.  */
    sbitmap_free (refresh_blocks);
+   free (uid_insn_cost);
    free (reg_stat);
    free (uid_cuid);

*************** try_combine (rtx i3, rtx i2, rtx i1, int
*** 2490,2495 ****
--- 2641,2654 ----
    }
  #endif

+   /* Only allow this combination if combine_insn_costs reports that the
+      replacement instructions are cheaper than the originals.  */
+   if (!combine_validate_cost (i1, i2, i3, newpat, newi2pat))
+     {
+       undo_all ();
+       return 0;
+     }
+
    /* We now know that we can do this combination.  Merge the insns and
       update the status of registers and LOG_LINKS.  */

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1306
diff -c -3 -p -r1.1306 Makefile.in
*** Makefile.in	23 Jun 2004 20:12:41 -0000	1.1306
--- Makefile.in	29 Jun 2004 03:34:01 -0000
*************** loop-unroll.o: loop-unroll.c $(CONFIG_H)
*** 1973,1981 ****
  dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
     hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
  et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
! combine.o : combine.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(FLAGS_H) \
!    function.h insn-config.h $(INSN_ATTR_H) $(REGS_H) $(EXPR_H) rtlhooks-def.h \
!    $(BASIC_BLOCK_H) $(RECOG_H) real.h hard-reg-set.h toplev.h $(TM_P_H) $(TREE_H) $(TARGET_H)
  regclass.o : regclass.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
     hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(REGS_H) insn-config.h $(RECOG_H) reload.h \
     real.h toplev.h function.h output.h $(GGC_H) $(TM_P_H) $(EXPR_H) $(TIMEVAR_H)
--- 1973,1982 ----
  dominance.o : dominance.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
     hard-reg-set.h $(BASIC_BLOCK_H) et-forest.h
  et-forest.o : et-forest.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) et-forest.h alloc-pool.h
! combine.o : combine.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
!    $(FLAGS_H) function.h insn-config.h $(INSN_ATTR_H) $(REGS_H) $(EXPR_H) \
!    rtlhooks-def.h $(BASIC_BLOCK_H) $(RECOG_H) real.h hard-reg-set.h \
!    toplev.h $(TM_P_H) $(TREE_H) $(TARGET_H) output.h
  regclass.o : regclass.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \
     hard-reg-set.h $(FLAGS_H) $(BASIC_BLOCK_H) $(REGS_H) insn-config.h $(RECOG_H) reload.h \
     real.h toplev.h function.h output.h $(GGC_H) $(TM_P_H) $(EXPR_H) $(TIMEVAR_H)


Roger
--


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]