This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

LRA patch for PR87718


  The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87718

  The patch adds a special treatment for moves with a hard register in register cost and class calculation.

  The patch was bootstrapped and tested on x86-64 and ppc64.

  I found two testsuite regressions because of the patch.  The expected generated code for PR82361 test is too specific.  GCC with the patch generates the same quality code but with a different hard register on x86-64.  So I just changed the test for  PR82361.

  Another test is for ppc64.  I think the expected generated code for this test is wrong.  I'll submit a changed test for a discussion later.

  Although I spent much time on the solution and I think it is the right one, the patch is in very sensitive area of RA and may affect expected code generation for many targets.  I am ready to work on the new regressions as soon as they are found.

  The patch was committed as rev. 260385.

Index: ChangeLog
===================================================================
--- ChangeLog	(revision 266384)
+++ ChangeLog	(working copy)
@@ -1,3 +1,10 @@
+2018-11-22  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/87718
+	* ira-costs.c: Remove trailing white-spaces.
+	(record_operand_costs): Add a special treatment for moves
+	involving a hard register.
+
 2018-11-22  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/i386/i386.c (ix86_avx_emit_vzeroupper): Remove.
Index: ira-costs.c
===================================================================
--- ira-costs.c	(revision 266155)
+++ ira-costs.c	(working copy)
@@ -1257,7 +1257,7 @@ record_address_regs (machine_mode mode,
 	    add_cost = (move_in_cost[i][rclass] * scale) / 2;
 	    if (INT_MAX - add_cost < pp_costs[k])
 	      pp_costs[k] = INT_MAX;
-	    else 
+	    else
 	      pp_costs[k] += add_cost;
 	  }
       }
@@ -1283,10 +1283,100 @@ record_operand_costs (rtx_insn *insn, en
 {
   const char *constraints[MAX_RECOG_OPERANDS];
   machine_mode modes[MAX_RECOG_OPERANDS];
-  rtx ops[MAX_RECOG_OPERANDS];
   rtx set;
   int i;
 
+  if ((set = single_set (insn)) != NULL_RTX
+      /* In rare cases the single set insn might have less 2 operands
+	 as the source can be a fixed special reg.  */
+      && recog_data.n_operands > 1
+      && recog_data.operand[0] == SET_DEST (set)
+      && recog_data.operand[1] == SET_SRC (set))
+    {
+      int regno, other_regno;
+      rtx dest = SET_DEST (set);
+      rtx src = SET_SRC (set);
+
+      if (GET_CODE (dest) == SUBREG
+	  && known_eq (GET_MODE_SIZE (GET_MODE (dest)),
+		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
+	dest = SUBREG_REG (dest);
+      if (GET_CODE (src) == SUBREG
+	  && known_eq (GET_MODE_SIZE (GET_MODE (src)),
+		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))
+	src = SUBREG_REG (src);
+      if (REG_P (src) && REG_P (dest)
+	  && (((regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER
+	       && (other_regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER)
+	      || ((regno = REGNO (dest)) >= FIRST_PSEUDO_REGISTER
+		  && (other_regno = REGNO (src)) < FIRST_PSEUDO_REGISTER)))
+	{
+	  machine_mode mode = GET_MODE (SET_SRC (set));
+	  cost_classes_t cost_classes_ptr = regno_cost_classes[regno];
+	  enum reg_class *cost_classes = cost_classes_ptr->classes;
+	  reg_class_t rclass, hard_reg_class, pref_class;
+	  int cost, k;
+	  bool dead_p = find_regno_note (insn, REG_DEAD, REGNO (src));
+
+	  hard_reg_class = REGNO_REG_CLASS (other_regno);
+	  i = regno == (int) REGNO (src) ? 1 : 0;
+	  for (k = cost_classes_ptr->num - 1; k >= 0; k--)
+	    {
+	      rclass = cost_classes[k];
+	      cost = ((i == 0
+		       ? ira_register_move_cost[mode][hard_reg_class][rclass]
+		       : ira_register_move_cost[mode][rclass][hard_reg_class])
+		      * frequency);
+	      op_costs[i]->cost[k] = cost;
+	      /* If we have assigned a class to this allocno in our
+		 first pass, add a cost to this alternative
+		 corresponding to what we would add if this allocno
+		 were not in the appropriate class.  */
+	      if (pref)
+		{
+		  if ((pref_class = pref[COST_INDEX (regno)]) == NO_REGS)
+		    op_costs[i]->cost[k]
+		      += ((i == 0 ? ira_memory_move_cost[mode][rclass][0] : 0)
+			  + (i == 1 ? ira_memory_move_cost[mode][rclass][1] : 0)
+			  * frequency);
+		  else if (ira_reg_class_intersect[pref_class][rclass]
+			   == NO_REGS)
+		    op_costs[i]->cost[k]
+		      += (ira_register_move_cost[mode][pref_class][rclass]
+			  * frequency);
+		}
+	      /* If this insn is a single set copying operand 1 to
+		 operand 0 and one operand is an allocno with the
+		 other a hard reg or an allocno that prefers a hard
+		 register that is in its own register class then we
+		 may want to adjust the cost of that register class to
+		 -1.
+
+		 Avoid the adjustment if the source does not die to
+		 avoid stressing of register allocator by preferencing
+		 two colliding registers into single class.  */
+	      if (dead_p
+		  && TEST_HARD_REG_BIT (reg_class_contents[rclass], other_regno)
+		  && (reg_class_size[(int) rclass]
+		      == (ira_reg_class_max_nregs
+			  [(int) rclass][(int) GET_MODE(src)])))
+		{
+		  if (reg_class_size[rclass] == 1)
+		    op_costs[i]->cost[k] = -frequency;
+		  else if (in_hard_reg_set_p (reg_class_contents[rclass],
+					      GET_MODE(src), other_regno))
+		    op_costs[i]->cost[k] = -frequency;
+		}
+	    }
+	  op_costs[i]->mem_cost
+	    = ira_memory_move_cost[mode][hard_reg_class][i] * frequency;
+	  if (pref && (pref_class = pref[COST_INDEX (regno)]) != NO_REGS)
+	    op_costs[i]->mem_cost
+	      += ira_memory_move_cost[mode][pref_class][i] * frequency;
+	  return;
+	}
+    }
+
   for (i = 0; i < recog_data.n_operands; i++)
     {
       constraints[i] = recog_data.constraints[i];
@@ -1302,7 +1392,6 @@ record_operand_costs (rtx_insn *insn, en
     {
       memcpy (op_costs[i], init_cost, struct_costs_size);
 
-      ops[i] = recog_data.operand[i];
       if (GET_CODE (recog_data.operand[i]) == SUBREG)
 	recog_data.operand[i] = SUBREG_REG (recog_data.operand[i]);
 
@@ -1318,7 +1407,7 @@ record_operand_costs (rtx_insn *insn, en
 			     recog_data.operand[i], 0, ADDRESS, SCRATCH,
 			     frequency * 2);
     }
-  
+
   /* Check for commutative in a separate loop so everything will have
      been initialized.  We must do this even if one operand is a
      constant--see addsi3 in m68k.md.  */
@@ -1328,8 +1417,8 @@ record_operand_costs (rtx_insn *insn, en
 	const char *xconstraints[MAX_RECOG_OPERANDS];
 	int j;
 
-	/* Handle commutative operands by swapping the constraints.
-	   We assume the modes are the same.  */
+	/* Handle commutative operands by swapping the
+	   constraints.  We assume the modes are the same.  */
 	for (j = 0; j < recog_data.n_operands; j++)
 	  xconstraints[j] = constraints[j];
 
@@ -1342,69 +1431,6 @@ record_operand_costs (rtx_insn *insn, en
   record_reg_classes (recog_data.n_alternatives, recog_data.n_operands,
 		      recog_data.operand, modes,
 		      constraints, insn, pref);
-
-  /* If this insn is a single set copying operand 1 to operand 0 and
-     one operand is an allocno with the other a hard reg or an allocno
-     that prefers a hard register that is in its own register class
-     then we may want to adjust the cost of that register class to -1.
-
-     Avoid the adjustment if the source does not die to avoid
-     stressing of register allocator by preferencing two colliding
-     registers into single class.
-
-     Also avoid the adjustment if a copy between hard registers of the
-     class is expensive (ten times the cost of a default copy is
-     considered arbitrarily expensive).  This avoids losing when the
-     preferred class is very expensive as the source of a copy
-     instruction.  */
-  if ((set = single_set (insn)) != NULL_RTX
-      /* In rare cases the single set insn might have less 2 operands
-	 as the source can be a fixed special reg.  */
-      && recog_data.n_operands > 1
-      && ops[0] == SET_DEST (set) && ops[1] == SET_SRC (set))
-    {
-      int regno, other_regno;
-      rtx dest = SET_DEST (set);
-      rtx src = SET_SRC (set);
-
-      if (GET_CODE (dest) == SUBREG
-	  && known_eq (GET_MODE_SIZE (GET_MODE (dest)),
-		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))
-	dest = SUBREG_REG (dest);
-      if (GET_CODE (src) == SUBREG
-	  && known_eq (GET_MODE_SIZE (GET_MODE (src)),
-		       GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))))
-	src = SUBREG_REG (src);
-      if (REG_P (src) && REG_P (dest)
-	  && find_regno_note (insn, REG_DEAD, REGNO (src))
-	  && (((regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER
-	       && (other_regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER)
-	      || ((regno = REGNO (dest)) >= FIRST_PSEUDO_REGISTER
-		  && (other_regno = REGNO (src)) < FIRST_PSEUDO_REGISTER)))
-	{
-	  machine_mode mode = GET_MODE (src);
-	  cost_classes_t cost_classes_ptr = regno_cost_classes[regno];
-	  enum reg_class *cost_classes = cost_classes_ptr->classes;
-	  reg_class_t rclass;
-	  int k;
-
-	  i = regno == (int) REGNO (src) ? 1 : 0;
-	  for (k = cost_classes_ptr->num - 1; k >= 0; k--)
-	    {
-	      rclass = cost_classes[k];
-	      if (TEST_HARD_REG_BIT (reg_class_contents[rclass], other_regno)
-		  && (reg_class_size[(int) rclass]
-		      == ira_reg_class_max_nregs [(int) rclass][(int) mode]))
-		{
-		  if (reg_class_size[rclass] == 1)
-		    op_costs[i]->cost[k] = -frequency;
-		  else if (in_hard_reg_set_p (reg_class_contents[rclass],
-					      mode, other_regno))
-		    op_costs[i]->cost[k] = -frequency;
-		}
-	    }
-	}
-    }
 }
 
 
@@ -1457,7 +1483,7 @@ scan_one_insn (rtx_insn *insn)
 
   /* If this insn loads a parameter from its stack slot, then it
      represents a savings, rather than a cost, if the parameter is
-     stored in memory.  Record this fact. 
+     stored in memory.  Record this fact.
 
      Similarly if we're loading other constants from memory (constant
      pool, TOC references, small data areas, etc) and this is the only
@@ -1468,7 +1494,7 @@ scan_one_insn (rtx_insn *insn)
      mem_cost might result in it being loaded using the specialized
      instruction into a register, then stored into stack and loaded
      again from the stack.  See PR52208.
-     
+
      Don't do this if SET_SRC (set) has side effect.  See PR56124.  */
   if (set != 0 && REG_P (SET_DEST (set)) && MEM_P (SET_SRC (set))
       && (note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) != NULL_RTX
@@ -1766,7 +1792,7 @@ find_costs_and_classes (FILE *dump_file)
 		   a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
 		{
 		  int *a_costs, *p_costs;
-		      
+
 		  a_num = ALLOCNO_NUM (a);
 		  if ((flag_ira_region == IRA_REGION_ALL
 		       || flag_ira_region == IRA_REGION_MIXED)
@@ -1936,7 +1962,7 @@ find_costs_and_classes (FILE *dump_file)
 	      int a_num = ALLOCNO_NUM (a);
 	      int *total_a_costs = COSTS (total_allocno_costs, a_num)->cost;
 	      int *a_costs = COSTS (costs, a_num)->cost;
-	
+
 	      if (aclass == NO_REGS)
 		best = NO_REGS;
 	      else
@@ -1998,7 +2024,7 @@ find_costs_and_classes (FILE *dump_file)
 		}
 	    }
 	}
-      
+
       if (internal_flag_ira_verbose > 4 && dump_file)
 	{
 	  if (allocno_p)
@@ -2081,7 +2107,7 @@ process_bb_node_for_hard_reg_moves (ira_
 	int cost;
 	enum reg_class hard_reg_class;
 	machine_mode mode;
-	
+
 	mode = ALLOCNO_MODE (a);
 	hard_reg_class = REGNO_REG_CLASS (hard_regno);
 	ira_init_register_move_cost_if_necessary (mode);
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog	(revision 266384)
+++ testsuite/ChangeLog	(working copy)
@@ -1,3 +1,9 @@
+2018-11-22  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR rtl-optimization/87718
+	* gcc.target/i386/pr82361-1.c: Check only the first operand of
+	moves.
+
 2018-11-22  Thomas Preud'homme  <thomas.preudhomme@linaro.org>
 
 	* gcc.target/arm/pr85434.c: New test.
Index: testsuite/gcc.target/i386/pr82361-1.c
===================================================================
--- testsuite/gcc.target/i386/pr82361-1.c	(revision 266155)
+++ testsuite/gcc.target/i386/pr82361-1.c	(working copy)
@@ -6,7 +6,7 @@
 /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
 /* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
    one.  */
-/* { dg-final { scan-assembler-times "movl\t%edx, %edx" 2 } } */
+/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
 
 void
 f1 (unsigned int a, unsigned int b)

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]