[PATCH] Don't combine param and return value copies

Alan Modra amodra@gmail.com
Tue May 26 08:15:00 GMT 2015


On powerpc64le, modifying the way combine treats function parameters
and call arguments results in some regressions.

For instance, this testcase from varasm.c

extern int foo3 (void *, ...);
extern void foo4 (void *, const char *);
int
emit_tls_common (void *decl,
		 const char *name,
		 unsigned long size)
{
  foo3 (0, "\t%s\t", "..");
  foo4 (0, name);
  foo3 (0, ",%lu,%u\n", size, ((unsigned int *)decl)[88] / 8);
  return 1;
}

at -O2 produces for the prologue and first call

old				new
	mflr 0				mflr 0
	std 29,-24(1)			std 29,-24(1)
	std 30,-16(1)			std 30,-16(1)
	mr 29,4				addis 9,2,.LC0@toc@ha
	std 31,-8(1)			std 31,-8(1)
	addis 4,2,.LC1@toc@ha		addis 10,2,.LC1@toc@ha
	mr 31,5				addi 9,9,.LC0@toc@l
	addis 5,2,.LC0@toc@ha		addi 10,10,.LC1@toc@l
	mr 30,3				mr 30,3
	addi 5,5,.LC0@toc@l		mr 29,4
	addi 4,4,.LC1@toc@l		mr 31,5
	li 3,0				mr 4,10
	std 0,16(1)			mr 5,9
	stdu 1,-128(1)			std 0,16(1)
	bl foo3				stdu 1,-128(1)
	nop				li 3,0
					bl foo3
					nop

As you can see, we have some extra register shuffling from keeping a
pseudo for arg setup insns.  I guess the pseudos allow sched more
freedom to mess around..

On the positive side, I saw cases where keeping parameter pseudos
allowed shrink-wrap to occur.  varasm.c:decode_reg_name_and_count is
one of them.  More shrink-wrapping is a big win.

Here's a case where changes at the return result in poorer code
int
decl_readonly_section_1 (int category)
{
  switch (category)
    {
    case 1:
    case 2:
    case 3:
    case 4:
    case 5:
      return 1;
    default:
      return 0;
    }
}
old			new
	addi 9,3,-6		addi 9,3,-6
	neg 3,3			neg 3,3
	and 3,9,3		and 3,9,3
	rldicl 3,3,33,63	srwi 3,3,31
	blr			rldicl 3,3,0,32
				blr

Previously this:
(insn 35 34 36 2 (set (reg:SI 161)
        (lshiftrt:SI (reg:SI 164)
            (const_int 31 [0x1f]))) {lshrsi3})
(insn 36 35 23 2 (set (reg:DI 155 [ D.2441 ])
        (zero_extend:DI (reg:SI 161))) {zero_extendsidi2})
(insn 23 36 24 2 (set (reg/i:DI 3 3)
        (reg:DI 155 [ D.2441 ])) {*movdi_internal64})

is first combined to
(insn 35 34 36 2 (set (reg:SI 161)
        (lshiftrt:SI (reg:SI 164)
            (const_int 31 [0x1f]))) {lshrsi3})
(insn 23 35 24 2 (set (reg/i:DI 3 3)
	(and:DI (subreg:DI (reg:SI 161) 0)
	    (const_int 1 [0x1]))))
which is somewhat surprising, but from my previous forays into
combine.c I'd say happens due to known zero bits.  (Just looking at
dumps here, rather than in gdb.)

Then the above is further combined to
(insn 23 34 24 2 (set (reg/i:DI 3 3)
	(zero_extract:DI (subreg:DI (reg:SI 164) 0)
	    (const_int 1 [0x1])
	    (const_int 32 [0x20])))

Looks to me like a missed optimization opportunity that insns 35 and
36 aren't combined without first going through the intermediate step.


Anyway, here's the rewritten patch.  I've left in some knobs I used
when testing in case you want to see for yourself what happens with
various options.  Bootstrapped etc. powerpc64le-linux and
x86_64-linux.  One testsuite regression appears on powerpc64 which
should go away if I push on getting an old patch of mine committed
https://gcc.gnu.org/ml/gcc-patches/2013-08/msg00971.html

	* combine.c (set_return_regs): New function.
	(create_log_links): Exclude instructions copying parameter
	values from hard regs to pseudos, and instructions copying
	call argument and return value pseudos to hard regs.

Index: gcc/combine.c
===================================================================
--- gcc/combine.c	(revision 223463)
+++ gcc/combine.c	(working copy)
@@ -1048,6 +1048,19 @@ can_combine_use_p (df_ref use)
   return true;
 }
 
+/* Used to build set of return value regs.  Add X to the set.  */
+
+static void
+set_return_regs (rtx x, void *arg)
+{
+  HARD_REG_SET *regs = (HARD_REG_SET *) arg;
+
+  add_to_hard_reg_set (regs, GET_MODE (x), REGNO (x));
+}
+
+#define DONT_COMBINE_PARAMS 1
+#define DONT_COMBINE_CALL_ARGS 1
+
 /* Fill in log links field for all insns.  */
 
 static void
@@ -1057,9 +1070,13 @@ create_log_links (void)
   rtx_insn **next_use;
   rtx_insn *insn;
   df_ref def, use;
+  HARD_REG_SET hard_regs;
 
   next_use = XCNEWVEC (rtx_insn *, max_reg_num ());
 
+  CLEAR_HARD_REG_SET (hard_regs);
+  diddle_return_value (set_return_regs, &hard_regs);
+
   /* Pass through each block from the end, recording the uses of each
      register and establishing log links when def is encountered.
      Note that we do not clear next_use array in order to save time,
@@ -1069,10 +1086,37 @@ create_log_links (void)
      usage -- these are taken from original flow.c did. Don't ask me why it is
      done this way; I don't know and if it works, I don't want to know.  */
 
-  FOR_EACH_BB_FN (bb, cfun)
+  bool in_parameters = false;
+  FOR_EACH_BB_REVERSE_FN (bb, cfun)
     {
       FOR_BB_INSNS_REVERSE (bb, insn)
         {
+	  if (DONT_COMBINE_PARAMS
+	      && NOTE_P (insn)
+	      && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG)
+	    {
+	      in_parameters = true;
+	      continue;
+	    }
+
+	  if (CALL_P (insn))
+	    {
+	      /* Add function args passed in hard regs to HARD_REGS.  */
+	      CLEAR_HARD_REG_SET (hard_regs);
+	      if (DONT_COMBINE_CALL_ARGS)
+		for (rtx link = CALL_INSN_FUNCTION_USAGE (insn);
+		     link;
+		     link = XEXP (link, 1))
+		  if (GET_CODE (XEXP (link, 0)) == USE)
+		    {
+		      rtx reg = XEXP (XEXP (link, 0), 0);
+		      if (REG_P (reg)
+			  && HARD_REGISTER_P (reg))
+			add_to_hard_reg_set (&hard_regs,
+					     GET_MODE (reg), REGNO (reg));
+		    }
+	    }
+
           if (!NONDEBUG_INSN_P (insn))
             continue;
 
@@ -1079,13 +1123,27 @@ create_log_links (void)
 	  /* Log links are created only once.  */
 	  gcc_assert (!LOG_LINKS (insn));
 
+	  /* Exclude certain reg,reg copies when one is a hard reg.
+	     Leaving these insns alone provides the register allocator
+	     useful information.  */
+	  rtx reg_reg = 0;
+	  int hard = 0;
+	  if (GET_CODE (PATTERN (insn)) == SET)
+	    {
+	      reg_reg = PATTERN (insn);
+	      if (REG_P (SET_DEST (reg_reg))
+		  && REG_P (SET_SRC (reg_reg)))
+		hard = (HARD_REGISTER_P (SET_DEST (reg_reg))
+			- HARD_REGISTER_P (SET_SRC (reg_reg)));
+	    }
+
 	  FOR_EACH_INSN_DEF (def, insn)
-            {
-              unsigned int regno = DF_REF_REGNO (def);
-              rtx_insn *use_insn;
+	    {
+	      unsigned int regno = DF_REF_REGNO (def);
+	      rtx_insn *use_insn;
 
-              if (!next_use[regno])
-                continue;
+	      if (!next_use[regno])
+		continue;
 
 	      if (!can_combine_def_p (def))
 		continue;
@@ -1096,6 +1154,14 @@ create_log_links (void)
 	      if (BLOCK_FOR_INSN (use_insn) != bb)
 		continue;
 
+	      if (in_parameters && hard < 0)
+		{
+		  /* This is a reg,reg copy from a hard reg to a pseudo,
+		     such as those copying parameter registers to
+		     pseudos.  Don't set up LOG_LINKS to this insn.  */
+		  continue;
+		}
+
 	      /* flow.c claimed:
 
 		 We don't build a LOG_LINK for hard registers contained
@@ -1110,18 +1176,36 @@ create_log_links (void)
 	      /* Don't add duplicate links between instructions.  */
 	      struct insn_link *links;
 	      FOR_EACH_LOG_LINK (links, use_insn)
-	        if (insn == links->insn && regno == links->regno)
+		if (insn == links->insn && regno == links->regno)
 		  break;
 
 	      if (!links)
 		LOG_LINKS (use_insn)
 		  = alloc_insn_link (insn, regno, LOG_LINKS (use_insn));
-            }
+	    }
 
 	  FOR_EACH_INSN_USE (use, insn)
-	    if (can_combine_use_p (use))
-	      next_use[DF_REF_REGNO (use)] = insn;
+	    {
+	      if (hard > 0
+		  && overlaps_hard_reg_set_p (hard_regs,
+					      GET_MODE (SET_DEST (reg_reg)),
+					      REGNO (SET_DEST (reg_reg))))
+		{
+		  /* This is a reg,reg copy to a hard register, such
+		     as those setting the function return value, or
+		     setting up arguments for a function call.  Don't
+		     provide LOG_LINKS from this insn.  This prevents
+		     the insn defining the pseudo from being combined
+		     into the reg,reg copy insn.  */
+		  remove_from_hard_reg_set (&hard_regs,
+					    GET_MODE (SET_DEST (reg_reg)),
+					    REGNO (SET_DEST (reg_reg)));
+		}
+	      else if (can_combine_use_p (use))
+		next_use[DF_REF_REGNO (use)] = insn;
+	    }
         }
+      CLEAR_HARD_REG_SET (hard_regs);
     }
 
   free (next_use);


-- 
Alan Modra
Australia Development Lab, IBM



More information about the Gcc-patches mailing list