This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] PR/19653: fld used for reloads in sse mode


This patch is enough to remove all fld's from Richard Guenther's libsse2, on top of the regclass patch at http://gcc.gnu.org/ml/gcc-patches/2005-07/msg00914.html and after removing from the MD the `#' hints for regclass. These can be removed with

sed -i 's/#[^",\][^",]*\([",]\)/\1/g' i386.md

All three patches are necessary.

---

Luckily, no reload surgery is needed, most of the changes are localized in the i386 target.

The patch is composed of several parts:

1) it modifies PREFERRED_RELOAD_CLASS to return NO_REGS for floating-point constants if math will be done in SSE registers: this mimicks what is done for SSE vector constants.

2) it modifies PREFERRED_RELOAD_CLASS to return a subclass of SSE_REGS for floating-point rtx's that are not constant, if math will be done in SSE registers.

3) it defines a PREFERRED_OUTPUT_RELOAD_CLASS install that constrains registers in the correct class, depending on the -mfpmath setting.

4) it makes find_reloads obey PREFERRED_OUTPUT_RELOAD_CLASS to compute the badness of an alternative.

Finally, this patch also includes a fix to push_reload, to not obey PREFERRED_*_RELOAD_CLASS if it returns NO_REGS but this is not valid. I think it is correct, because ix86_preferred_reload_class is very liberal in returning NO_REGS. It was enough to move

   if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
     return NO_REGS;
   if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
     return NO_REGS;

near the beginning of ix86_preferred_reload_class, to trigger the assertion near line 1300 of reload.c.

Do you think this is the right approach?

Paolo
2005-07-14  Paolo Bonzini  <bonzini@gnu.org>

	* config/i386/i386.c (ix86_preferred_reload_class): Force
	using SSE registers (and return NO_REGS for floating-point
	constants) if math is done with SSE.
	(ix86_preferred_output_reload_class): New.
	* config/i386/i386-protos.h (ix86_preferred_output_reload_class): New.
	* config/i386/i386.h (PREFERRED_OUTPUT_RELOAD_CLASS): New.
	* reload.c (find_reloads): Take PREFERRED_OUTPUT_RELOAD_CLASS
	into account.
	(push_reload): Allow PREFERRED_*_RELOAD_CLASS to liberally
	return NO_REGS.

Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v
retrieving revision 1.841
diff -c -r1.841 i386.c
*** config/i386/i386.c	11 Jul 2005 09:35:16 -0000	1.841
--- config/i386/i386.c	14 Jul 2005 08:07:55 -0000
***************
*** 15315,15329 ****
  enum reg_class
  ix86_preferred_reload_class (rtx x, enum reg_class class)
  {
    /* We're only allowed to return a subclass of CLASS.  Many of the 
       following checks fail for NO_REGS, so eliminate that early.  */
    if (class == NO_REGS)
      return NO_REGS;
  
    /* All classes can load zeros.  */
!   if (x == CONST0_RTX (GET_MODE (x)))
      return class;
  
    /* Floating-point constants need more complex checks.  */
    if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
      {
--- 15315,15353 ----
  enum reg_class
  ix86_preferred_reload_class (rtx x, enum reg_class class)
  {
+   enum machine_mode mode = GET_MODE (x);
+   bool is_sse_math_mode;
+ 
    /* We're only allowed to return a subclass of CLASS.  Many of the 
       following checks fail for NO_REGS, so eliminate that early.  */
    if (class == NO_REGS)
      return NO_REGS;
  
    /* All classes can load zeros.  */
!   if (x == CONST0_RTX (mode))
      return class;
  
+   /* Let reload make the decision if we are loading: a) a vector constant into
+      an MMX or SSE register b) a floating-point constant into an SSE register
+      that will be used for math.  This is because there are no MMX/SSE
+      load-from-constant instructions.  */
+ 
+   is_sse_math_mode =
+     TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode);
+ 
+   if (CONSTANT_P (x))
+     {
+       if (MAYBE_MMX_CLASS_P (class))
+         return NO_REGS;
+       if (MAYBE_SSE_CLASS_P (class)
+ 	  && (VECTOR_MODE_P (mode) || mode == TImode || is_sse_math_mode))
+         return NO_REGS;
+     }
+ 
+   /* Prefer SSE regs if we only use them from math.  */
+   if (is_sse_math_mode)
+     return reg_class_subset_p (SSE_REGS, class) ? SSE_REGS : NO_REGS;
+ 
    /* Floating-point constants need more complex checks.  */
    if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
      {
***************
*** 15335,15342 ****
  	 zero above.  We only want to wind up preferring 80387 registers if
  	 we plan on doing computation with them.  */
        if (TARGET_80387
! 	  && (TARGET_MIX_SSE_I387 
! 	      || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
  	  && standard_80387_constant_p (x))
  	{
  	  /* Limit class to non-sse.  */
--- 15359,15365 ----
  	 zero above.  We only want to wind up preferring 80387 registers if
  	 we plan on doing computation with them.  */
        if (TARGET_80387
! 	  && TARGET_MIX_SSE_I387 
  	  && standard_80387_constant_p (x))
  	{
  	  /* Limit class to non-sse.  */
***************
*** 15352,15361 ****
  
        return NO_REGS;
      }
-   if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
-     return NO_REGS;
-   if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
-     return NO_REGS;
  
    /* Generally when we see PLUS here, it's the function invariant
       (plus soft-fp const_int).  Which can only be computed into general
--- 15375,15380 ----
***************
*** 15376,15381 ****
--- 15395,15437 ----
  
    return class;
  }
+ 
+ /* Discourage putting floating-point values in SSE registers unless
+    SSE math is being used, and likewise for the 387 registers.  */
+ enum reg_class
+ ix86_preferred_output_reload_class (rtx x, enum reg_class class)
+ {
+   enum reg_class desired_class;
+ 
+   if (class == NO_REGS)
+     return NO_REGS;
+ 
+   if (!FLOAT_MODE_P (GET_MODE (x)) || TARGET_MIX_SSE_I387)
+     return class;
+ 
+   /* Restrict the output reload class to the register bank that
+      we are doing math on.  */
+   if (TARGET_80387
+       && (TARGET_MIX_SSE_I387 
+           || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x)))))
+     {
+       if (class == FP_TOP_SSE_REGS)
+ 	desired_class = FP_TOP_REG;
+       else if (class == FP_SECOND_SSE_REGS)
+ 	desired_class = FP_SECOND_REG;
+       else
+ 	desired_class = FLOAT_REGS;
+     }
+   else
+     desired_class = SSE_REGS;
+ 
+   /* If we would like not to return a subset of CLASS, fail.  If reload
+      knows better, it will override our choice.  */
+   if (!reg_class_subset_p (desired_class, class))
+     return NO_REGS;
+   else
+     return desired_class;
+ }
  
  /* If we are copying between general and FP registers, we need a memory
     location. The same is true for SSE and MMX registers.
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v
retrieving revision 1.440
diff -c -r1.440 i386.h
*** config/i386/i386.h	26 Jun 2005 05:18:34 -0000	1.440
--- config/i386/i386.h	14 Jul 2005 08:07:55 -0000
***************
*** 1294,1299 ****
--- 1294,1305 ----
  #define PREFERRED_RELOAD_CLASS(X, CLASS) \
     ix86_preferred_reload_class ((X), (CLASS))
  
+ /* Discourage putting floating-point values in SSE registers unless
+    SSE math is being used, and likewise for the 387 registers.  */
+ 
+ #define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \
+    ix86_preferred_output_reload_class ((X), (CLASS))
+ 
  /* If we are copying between general and FP registers, we need a memory
     location. The same is true for SSE and MMX registers.  */
  #define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
Index: config/i386/i386-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/i386/i386-protos.h,v
retrieving revision 1.143
diff -c -r1.143 i386-protos.h
*** config/i386/i386-protos.h	29 Jun 2005 17:27:16 -0000	1.143
--- config/i386/i386-protos.h	14 Jul 2005 08:07:55 -0000
***************
*** 188,193 ****
--- 188,194 ----
  extern bool ix86_cannot_change_mode_class (enum machine_mode,
  					   enum machine_mode, enum reg_class);
  extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
+ extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
  extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
  extern int ix86_mode_needed (int, rtx);
  extern void emit_i387_cw_initialization (int);
Index: reload.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/reload.c,v
retrieving revision 1.273
diff -c -r1.273 reload.c
*** reload.c	25 Jun 2005 02:00:53 -0000	1.273
--- reload.c	14 Jul 2005 08:07:56 -0000
***************
*** 1231,1245 ****
  
    /* Narrow down the class of register wanted if that is
       desirable on this machine for efficiency.  */
!   if (in != 0)
!     class = PREFERRED_RELOAD_CLASS (in, class);
  
    /* Output reloads may need analogous treatment, different in detail.  */
  #ifdef PREFERRED_OUTPUT_RELOAD_CLASS
!   if (out != 0)
!     class = PREFERRED_OUTPUT_RELOAD_CLASS (out, class);
  #endif
  
    /* Make sure we use a class that can handle the actual pseudo
       inside any subreg.  For example, on the 386, QImode regs
       can appear within SImode subregs.  Although GENERAL_REGS
--- 1231,1254 ----
  
    /* Narrow down the class of register wanted if that is
       desirable on this machine for efficiency.  */
!   {
!     enum reg_class preferred_class = class;
! 
!     if (in != 0)
!       preferred_class = PREFERRED_RELOAD_CLASS (in, class);
  
    /* Output reloads may need analogous treatment, different in detail.  */
  #ifdef PREFERRED_OUTPUT_RELOAD_CLASS
!     if (out != 0)
!       preferred_class = PREFERRED_OUTPUT_RELOAD_CLASS (out, preferred_class);
  #endif
  
+     /* Discard what the target said if we cannot do it.  */
+     if (preferred_class != NO_REGS
+ 	|| (optional && type == RELOAD_FOR_OUTPUT))
+       class = preferred_class;
+   }
+ 
    /* Make sure we use a class that can handle the actual pseudo
       inside any subreg.  For example, on the 386, QImode regs
       can appear within SImode subregs.  Although GENERAL_REGS
***************
*** 3443,3457 ****
  
  	      /* If we can't reload this value at all, reject this
  		 alternative.  Note that we could also lose due to
! 		 LIMIT_RELOAD_RELOAD_CLASS, but we don't check that
  		 here.  */
  
  	      if (! CONSTANT_P (operand)
! 		  && (enum reg_class) this_alternative[i] != NO_REGS
! 		  && (PREFERRED_RELOAD_CLASS (operand,
! 					      (enum reg_class) this_alternative[i])
! 		      == NO_REGS))
! 		bad = 1;
  
  	      /* Alternative loses if it requires a type of reload not
  		 permitted for this insn.  We can always reload SCRATCH
--- 3452,3477 ----
  
  	      /* If we can't reload this value at all, reject this
  		 alternative.  Note that we could also lose due to
! 		 LIMIT_RELOAD_CLASS, but we don't check that
  		 here.  */
  
  	      if (! CONSTANT_P (operand)
! 		  && (enum reg_class) this_alternative[i] != NO_REGS)
! 		{
! 		  if (PREFERRED_RELOAD_CLASS
! 			(operand, (enum reg_class) this_alternative[i])
! 		      == NO_REGS)
! 		    bad = 1;
! 
! #ifdef PREFERRED_OUTPUT_RELOAD_CLASS
! 		  if (operand_type[i] == RELOAD_FOR_OUTPUT
! 		      && PREFERRED_OUTPUT_RELOAD_CLASS
! 			   (operand, (enum reg_class) this_alternative[i])
! 		         == NO_REGS)
! 		    bad = 1;
! #endif
! 		}
! 
  
  	      /* Alternative loses if it requires a type of reload not
  		 permitted for this insn.  We can always reload SCRATCH

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]