This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[committed] Put ACC_REGS before GR_REGS in the MIPS REG_ALLOC_ORDER


Pretty much as the subject says.  As it stands, if accumulators and GPRs
have equal cost, we prefer to use GPRs, whereas in most cases we ought
to prefer accumulators instead.  E.g. a division of multiplication
result is better allocated to LO, so that we put the MFLO at the point
of use instead of at the point of definition.  It's also needed if
we're to take advantage of the extra accumulators available with
-mdspr2.

This patch fixes the dspr2-MULT{,U}.c regressions, in combination
with the IRA patch I posted last weekend.

Of course, this isn't going to help in all cases.  E.g. if a division
result is used twice, I imagine we'll still prefer to allocate it to
a GPR, and thus put the MFLO immediately after the division (at which
point we rely on the post-reload scheduler to try to move the MFLO as
late as possible).  But it's still a nice easy way of getting what we
want in common cases.

gcc.target/mips/dspr2-MULT{,U}.c currently have four multiplications
and expect three different accumulators to be used.  But there are
so many other instructions (loads, stores, MFLOs and MFHIs) that we
don't really need 3 accumulators to get a good schedule.  I've changed
the test to check for 2 accumulators only, but only given it 2
multiplications to go at.

Tested on mipsisa64-elfoabi and applied.

Richard


gcc/
	* config/mips/mips.h (REG_ALLOC_ORDER): Put accumulators first.
	Tweak formatting.
	* config/mips/mips.c (mips_ira_cover_classes): Don't use accumulator
	registers when not optimizing.

gcc/testsuite/
	* gcc.target/mips/dspr2-MULT.c: Just check for $ac1 and $ac2.
	* gcc.target/mips/dspr2-MULTU.c: Likewise.

Index: gcc/config/mips/mips.h
===================================================================
--- gcc/config/mips/mips.h	2008-11-17 22:19:16.000000000 +0000
+++ gcc/config/mips/mips.h	2008-11-17 22:53:01.000000000 +0000
@@ -1925,8 +1925,16 @@ #define SMALL_REGISTER_CLASSES (TARGET_M
    call-saved ones.  (IRA expects this.)  */
 
 #define REG_ALLOC_ORDER							\
-{ /* Call-clobbered GPRs.  */						\
-   1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,		\
+{ /* Accumulator registers.  When GPRs and accumulators have equal	\
+     cost, we generally prefer to use accumulators.  For example,	\
+     a division of multiplication result is better allocated to LO,	\
+     so that we put the MFLO at the point of use instead of at the	\
+     point of definition.  It's also needed if we're to take advantage	\
+     of the extra accumulators available with -mdspr2.  In some cases,	\
+     it can also help to reduce register pressure.  */			\
+  64, 65,176,177,178,179,180,181,					\
+  /* Call-clobbered GPRs.  */						\
+  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,		\
   24, 25, 31,								\
   /* The global pointer.  This is call-clobbered for o32 and o64	\
      abicalls, call-saved for n32 and n64 abicalls, and a program	\
@@ -1936,7 +1944,7 @@ #define REG_ALLOC_ORDER							\
   /* Call-saved GPRs.  */						\
   16, 17, 18, 19, 20, 21, 22, 23, 30,					\
   /* GPRs that can never be exposed to the register allocator.  */	\
-   0, 26, 27, 29,							\
+  0,  26, 27, 29,							\
   /* Call-clobbered FPRs.  */						\
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
   48, 49, 50, 51,							\
@@ -1949,14 +1957,14 @@ #define REG_ALLOC_ORDER							\
   52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,			\
   /* None of the remaining classes have defined call-saved		\
      registers.  */							\
-  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,	\
+  66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,		\
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,	\
   96, 97, 98, 99, 100,101,102,103,104,105,106,107,108,109,110,111,	\
   112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,	\
   128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,	\
   144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,	\
   160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,	\
-  176,177,178,179,180,181,182,183,184,185,186,187			\
+  182,183,184,185,186,187						\
 }
 
 /* ORDER_REGS_FOR_LOCAL_ALLOC is a macro which permits reg_alloc_order
Index: gcc/config/mips/mips.c
===================================================================
--- gcc/config/mips/mips.c	2008-11-17 22:19:16.000000000 +0000
+++ gcc/config/mips/mips.c	2008-11-17 22:56:51.000000000 +0000
@@ -9886,8 +9886,12 @@ mips_ira_cover_classes (void)
   };
 
   /* Don't allow the register allocators to use LO and HI in MIPS16 mode,
-     which has no MTLO or MTHI instructions.  */
-  return TARGET_MIPS16 ? no_acc_classes : acc_classes;
+     which has no MTLO or MTHI instructions.  Also, using GR_AND_ACC_REGS
+     as a cover class only works well when we keep per-register costs.
+     Using it when not optimizing can cause us to think accumulators
+     have the same cost as GPRs in cases where GPRs are actually much
+     cheaper.  */
+  return TARGET_MIPS16 || !optimize ? no_acc_classes : acc_classes;
 }
 
 /* Return the register class required for a secondary register when
Index: gcc/testsuite/gcc.target/mips/dspr2-MULT.c
===================================================================
--- gcc/testsuite/gcc.target/mips/dspr2-MULT.c	2008-11-17 22:19:16.000000000 +0000
+++ gcc/testsuite/gcc.target/mips/dspr2-MULT.c	2008-11-17 22:49:27.000000000 +0000
@@ -5,17 +5,11 @@
 /* { dg-final { scan-assembler "\tmult\t" } } */
 /* { dg-final { scan-assembler "ac1" } } */
 /* { dg-final { scan-assembler "ac2" } } */
-/* { dg-final { scan-assembler "ac3" } } */
 
 typedef long long a64;
-a64 a[4];
-int b[4], c[4];
 
-NOMIPS16 void test ()
+NOMIPS16 a64 test (a64 *a, int *b, int *c)
 {
   a[0] = (a64) b[0] * c[0];
   a[1] = (a64) b[1] * c[1];
-  a[2] = (a64) b[2] * c[2];
-  a[3] = (a64) b[3] * c[3];
 }
-
Index: gcc/testsuite/gcc.target/mips/dspr2-MULTU.c
===================================================================
--- gcc/testsuite/gcc.target/mips/dspr2-MULTU.c	2008-11-17 22:19:16.000000000 +0000
+++ gcc/testsuite/gcc.target/mips/dspr2-MULTU.c	2008-11-17 22:49:27.000000000 +0000
@@ -5,17 +5,11 @@
 /* { dg-final { scan-assembler "\tmultu\t" } } */
 /* { dg-final { scan-assembler "ac1" } } */
 /* { dg-final { scan-assembler "ac2" } } */
-/* { dg-final { scan-assembler "ac3" } } */
 
-typedef long long a64;
-a64 a[4];
-unsigned int b[4], c[4];
+typedef unsigned long long a64;
 
-NOMIPS16 void test ()
+NOMIPS16 a64 test (a64 *a, unsigned int *b, unsigned int *c)
 {
   a[0] = (a64) b[0] * c[0];
   a[1] = (a64) b[1] * c[1];
-  a[2] = (a64) b[2] * c[2];
-  a[3] = (a64) b[3] * c[3];
 }
-


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]