This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
Re: [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c

From: Alan Modra <amodra at gmail dot com>
To: gcc-patches at gcc dot gnu dot org
Cc: Segher Boessenkool <segher at kernel dot crashing dot org>
Date: Wed, 8 May 2019 15:02:48 +0930
Subject: Re: [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c
References: <20190327134006.GA3194@bubble.grove.modra.org>
This is https://gcc.gnu.org/ml/gcc-patches/2019-03/msg01299.html with
the fixes Segher requested, plus a few more:
- delete PREFERRED_RELOAD_CLASS changes
- adjust for recent register renumbering
- use defines rather than hard coding register numbers
- flip altivec/float test when dealing with moves within vsx regs,
  so that the altivec hard reg count is preferred over the fp hard reg
  count when both reg types are possible.
- use 2 for power9 direct move cost, and remove more '?'s from insns.
- use reg_class_subset_p in the test for slow LR/CTR moves

Bootstrapped and regression tested powerpc64le-linux.  OK for mainline?

	PR target/89271
	* config/rs6000/rs6000.h (enum reg_class, REG_CLASS_NAMES),
	(REG_CLASS_CONTENTS): Add GEN_OR_VSX_REGS class.
	* config/rs6000/rs6000.c (rs6000_register_move_cost): Correct
	cost for general <-> vsx when direct moves are available.
	Cost union classes at minimal cost for any reg in the class.
	Correct calculation for moves between vsx, float, and altivec.
	Don't return a low cost for moves between special regs.  Don't
	use hard coded register numbers.
	(TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS): Define.
	(rs6000_ira_change_pseudo_allocno_class): New function.
	* config/rs6000/rs6000.md (movsi_internal1, mov<mode>_internal),
	(movdi_internal32, movdi_internal64): Remove '*' from vsx register
	alternatives.
	(movsi_internal1): Don't disparage vector alternatives.
	(mov<mode>_internal): Likewise, excepting alternative that
	will be split.
	* config/rs6000/vsx.md (vsx_splat_<mode>_reg): Don't disparage
	we <- b alternative.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5d5765d89b2..e7c63c263ae 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1729,6 +1729,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
+  rs6000_ira_change_pseudo_allocno_class
 #undef TARGET_CANNOT_COPY_INSN_P
 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
 #undef TARGET_RTX_COSTS
@@ -34648,22 +34651,54 @@ rs6000_register_move_cost (machine_mode mode,
 			   reg_class_t from, reg_class_t to)
 {
   int ret;
+  reg_class_t rclass;
 
   if (TARGET_DEBUG_COST)
     dbg_cost_ctrl++;
 
+  /* If we have VSX, we can easily move between FPR or Altivec registers,
+     otherwise we can only easily move within classes.
+     Do this first so we give best-case answers for union classes
+     containing both gprs and vsx regs.  */
+  HARD_REG_SET to_vsx, from_vsx;
+  COPY_HARD_REG_SET (to_vsx, reg_class_contents[to]);
+  AND_HARD_REG_SET (to_vsx, reg_class_contents[VSX_REGS]);
+  COPY_HARD_REG_SET (from_vsx, reg_class_contents[from]);
+  AND_HARD_REG_SET (from_vsx, reg_class_contents[VSX_REGS]);
+  if (!hard_reg_set_empty_p (to_vsx)
+      && !hard_reg_set_empty_p (from_vsx)
+      && (TARGET_VSX
+	  || hard_reg_set_intersect_p (to_vsx, from_vsx)))
+    {
+      int reg = FIRST_FPR_REGNO;
+      if (TARGET_VSX
+	  || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
+	      && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
+	reg = FIRST_ALTIVEC_REGNO;
+      ret = 2 * hard_regno_nregs (reg, mode);
+    }
+
   /*  Moves from/to GENERAL_REGS.  */
-  if (reg_classes_intersect_p (to, GENERAL_REGS)
-      || reg_classes_intersect_p (from, GENERAL_REGS))
+  else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
+	   || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
     {
-      reg_class_t rclass = from;
-
-      if (! reg_classes_intersect_p (to, GENERAL_REGS))
-	rclass = to;
-
       if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
-	ret = (rs6000_memory_move_cost (mode, rclass, false)
-	       + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+	{
+	  if (TARGET_DIRECT_MOVE)
+	    {
+	      if (rs6000_tune != PROCESSOR_POWER9)
+		ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
+	      else
+		ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
+	      /* SFmode requires a conversion when moving between gprs
+		 and vsx.  */
+	      if (mode == SFmode)
+		ret += 2;
+	    }
+	  else
+	    ret = (rs6000_memory_move_cost (mode, rclass, false)
+		   + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
+	}
 
       /* It's more expensive to move CR_REGS than CR0_REGS because of the
 	 shift.  */
@@ -34676,24 +34711,14 @@ rs6000_register_move_cost (machine_mode mode,
 		|| rs6000_tune == PROCESSOR_POWER7
 		|| rs6000_tune == PROCESSOR_POWER8
 		|| rs6000_tune == PROCESSOR_POWER9)
-	       && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
-        ret = 6 * hard_regno_nregs (0, mode);
+	       && reg_class_subset_p (rclass, SPECIAL_REGS))
+        ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
 
       else
 	/* A move will cost one instruction per GPR moved.  */
-	ret = 2 * hard_regno_nregs (0, mode);
+	ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
     }
 
-  /* If we have VSX, we can easily move between FPR or Altivec registers.  */
-  else if (VECTOR_MEM_VSX_P (mode)
-	   && reg_classes_intersect_p (to, VSX_REGS)
-	   && reg_classes_intersect_p (from, VSX_REGS))
-    ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
-
-  /* Moving between two similar registers is just one instruction.  */
-  else if (reg_classes_intersect_p (to, from))
-    ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
-
   /* Everything else has to go through GENERAL_REGS.  */
   else
     ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
@@ -34746,6 +34771,64 @@ rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
   return ret;
 }
 
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
+
+   The register allocator chooses GEN_OR_VSX_REGS for the allocno
+   class if GENERAL_REGS and VSX_REGS cost is lower than the memory
+   cost.  This happens a lot when TARGET_DIRECT_MOVE makes the register
+   move cost between GENERAL_REGS and VSX_REGS low.
+
+   It might seem reasonable to use a union class.  After all, if usage
+   of vsr is low and gpr high, it might make sense to spill gpr to vsr
+   rather than memory.  However, in cases where register pressure of
+   both is high, like the cactus_adm spec test, allowing
+   GEN_OR_VSX_REGS as the allocno class results in bad decisions in
+   the first scheduling pass.  This is partly due to an allocno of
+   GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
+   class, which gives too high a pressure for GENERAL_REGS and too low
+   for VSX_REGS.  So, force a choice of the subclass here.
+
+   The best class is also the union if GENERAL_REGS and VSX_REGS have
+   the same cost.  In that case we do use GEN_OR_VSX_REGS as the
+   allocno class, since trying to narrow down the class by regno mode
+   is prone to error.  For example, SImode is allowed in VSX regs and
+   in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
+   it would be wrong to choose an allocno of GENERAL_REGS based on
+   SImode.  */
+
+static reg_class_t
+rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
+					reg_class_t allocno_class,
+					reg_class_t best_class)
+{
+  switch (allocno_class)
+    {
+    case GEN_OR_VSX_REGS:
+      /* best_class must be a subset of allocno_class.  */
+      gcc_checking_assert (best_class == GEN_OR_VSX_REGS
+			   || best_class == GEN_OR_FLOAT_REGS
+			   || best_class == VSX_REGS
+			   || best_class == ALTIVEC_REGS
+			   || best_class == FLOAT_REGS
+			   || best_class == GENERAL_REGS
+			   || best_class == BASE_REGS);
+      /* Use best_class but choose wider classes when copying from the
+	 wider class to best_class is cheap.  This mimics IRA choice
+	 of allocno class.  */
+      if (best_class == BASE_REGS)
+	return GENERAL_REGS;
+      if (TARGET_VSX
+	  && (best_class == FLOAT_REGS || best_class == ALTIVEC_REGS))
+	return VSX_REGS;
+      return best_class;
+
+    default:
+      break;
+    }
+
+  return allocno_class;
+}
+
 /* Returns a code for a target-specific builtin that implements
    reciprocal of the function, or NULL_TREE if not available.  */
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 14a9e199bc8..30a72dd5e55 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1141,6 +1141,7 @@ enum reg_class
   VRSAVE_REGS,
   VSCR_REGS,
   GEN_OR_FLOAT_REGS,
+  GEN_OR_VSX_REGS,
   LINK_REGS,
   CTR_REGS,
   LINK_OR_CTR_REGS,
@@ -1169,6 +1170,7 @@ enum reg_class
   "VRSAVE_REGS",							\
   "VSCR_REGS",								\
   "GEN_OR_FLOAT_REGS",							\
+  "GEN_OR_VSX_REGS",							\
   "LINK_REGS",								\
   "CTR_REGS",								\
   "LINK_OR_CTR_REGS",							\
@@ -1205,6 +1207,8 @@ enum reg_class
   { 0x00000000, 0x00000000, 0x00000000, 0x00002000 },			\
   /* GEN_OR_FLOAT_REGS.  */						\
   { 0xffffffff, 0xffffffff, 0x00000000, 0x00004008 },			\
+  /* GEN_OR_VSX_REGS.  */						\
+  { 0xffffffff, 0xffffffff, 0xffffffff, 0x00004008 },			\
   /* LINK_REGS.  */							\
   { 0x00000000, 0x00000000, 0x00000000, 0x00000001 },			\
   /* CTR_REGS.  */							\
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 411d7f0d352..8da7aba4080 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6830,10 +6830,10 @@ (define_insn "movsi_low"
 ;;		MF%1         MT%0         NOP
 (define_insn "*movsi_internal1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-		"=r,         r,           r,           ?*wI,        ?*wH,
-		 m,          ?Z,          ?Z,          r,           r,
-		 r,          ?*wIwH,      ?*wJwK,      ?*wJwK,      ?*wu,
-		 ?*wJwK,     ?*wH,        ?*wK,        ?*wIwH,      ?r,
+		"=r,         r,           r,           wI,          wH,
+		 m,          Z,           Z,           r,           r,
+		 r,          wIwH,        wJwK,        wJwK,        wu,
+		 wJwK,       wH,          wK,          wIwH,        r,
 		 r,          *h,          *h")
 
 	(match_operand:SI 1 "input_operand"
@@ -7104,13 +7104,13 @@ (define_expand "mov<mode>"
 ;;		MTVSRWZ     MF%1       MT%1       NOP
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:QHI 0 "nonimmediate_operand"
-		"=r,        r,         ?*wJwK,    m,         Z,         r,
-		 ?*wJwK,    ?*wJwK,    ?*wJwK,    ?*wK,      ?*wK,      r,
-		 ?*wJwK,    r,         *c*l,      *h")
+		"=r,        r,         wJwK,      m,         Z,         r,
+		 wJwK,      wJwK,      wJwK,      wK,        ?wK,       r,
+		 wJwK,      r,         *c*l,      *h")
 
 	(match_operand:QHI 1 "input_operand"
 		"r,         m,         Z,         r,         wJwK,      i,
-		 wJwK,      O,         wM,        wB,        wS,        ?*wJwK,
+		 wJwK,      O,         wM,        wB,        wS,        wJwK,
 		 r,         *h,        r,         0"))]
 
   "gpc_reg_operand (operands[0], <MODE>mode)
@@ -8671,8 +8671,8 @@ (define_insn "*movdi_internal32"
   [(set (match_operand:DI 0 "nonimmediate_operand"
          "=Y,        r,         r,         m,         ^d,        ^d,
           r,         wY,        Z,         ^wb,       $wv,       ^wi,
-          *wo,       *wo,       *wv,       *wi,       *wi,       *wv,
-          *wv")
+          wo,        wo,        wv,        wi,        *i,        wv,
+          wv")
 
 	(match_operand:DI 1 "input_operand"
          "r,         Y,         r,         ^d,        m,         ^d,
@@ -8751,9 +8751,9 @@ (define_insn "*movdi_internal64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
                "=YZ,       r,         r,         r,         r,          r,
                 m,         ^d,        ^d,        wY,        Z,          $wb,
-                $wv,       ^wi,       *wo,       *wo,       *wv,        *wi,
-                *wi,       *wv,       *wv,       r,         *h,         *h,
-                ?*r,       ?*wg,      ?*r,       ?*wj")
+                $wv,       ^wi,       wo,        wo,        wv,         wi,
+                wi,        wv,        wv,        r,         *h,         *h,
+                ?r,        ?wg,       ?r,        ?wj")
 
 	(match_operand:DI 1 "input_operand"
                "r,         YZ,        r,         I,         L,          nF,
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 607c0cd33f2..80434d10247 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4106,7 +4106,7 @@ (define_expand "vsx_splat_<mode>"
 })
 
 (define_insn "vsx_splat_<mode>_reg"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we")
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,we")
 	(vec_duplicate:VSX_D
 	 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))]
   "VECTOR_MEM_VSX_P (<MODE>mode)"

-- 
Alan Modra
Australia Development Lab, IBM
Follow-Ups:
- Re: [RS6000] PR89271, gcc.target/powerpc/vsx-simode2.c
  - From: Segher Boessenkool
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]