[committed] Encourage use of VR5500 madd/macc instructions

Richard Sandiford rsandifo@redhat.com
Sun Apr 25 08:30:00 GMT 2004


This patch improves the pre-reload scheduling of multiply-accumulate
instructions.  At the moment it's enabled only for the VR5500, but I'll
soon be submitting other patches related to the VR4120 and VR4130 as well.
See the comments in the patch for the intended effect.

The patch implements two more scheduling hooks, TARGET_SCHED_REORDER
and TARGET_SCHED_VARIABLE_ISSUE.  It's likely that these hooks will want
to do other optimisations in future, so I've tried to structure the code
so that the meat of the work is done by subroutines, with the hooks
themselves just providing a simple framework.

Tested on mips64vrel-elf.  Bootstrapped & regression tested on
mips64{,el}-linux-gnu as a sanity check.  Applied to HEAD.

Richard


	* config/mips/mips-protos.h (mips_linked_macc_p): Declare.
	* config/mips/mips.h (TUNE_MACC_CHAINS): New macro.
	* config/mips/mips.c (TARGET_SCHED_REORDER): Define.
	(TARGET_SCHED_VARIABLE_ISSUE): Define.
	(mips_adjust_cost): Move later in file, next to other sched hooks.
	(mips_macc_chains_last_hilo): New variable.
	(mips_linked_madd_p, mips_macc_chains_record, mips_macc_chains_reorder)
	(mips_promote_ready, mips_sched_reorder, mips_variable_issue): New.
	* config/mips/mips.md (may_clobber_hilo): New attribute.

Index: config/mips/mips-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips-protos.h,v
retrieving revision 1.69
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.69 mips-protos.h
*** config/mips/mips-protos.h	22 Apr 2004 07:02:57 -0000	1.69
--- config/mips/mips-protos.h	25 Apr 2004 07:59:33 -0000
*************** extern const char *mips_output_condition
*** 201,206 ****
--- 201,207 ----
  						   int, int);
  extern const char *mips_output_division (const char *, rtx *);
  extern unsigned int mips_hard_regno_nregs (int, enum machine_mode);
+ extern bool mips_linked_madd_p (rtx, rtx);
  extern const char *mips_emit_prefetch (rtx *);
  
  extern void irix_asm_output_align (FILE *, unsigned);
Index: config/mips/mips.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips.h,v
retrieving revision 1.336
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.336 mips.h
*** config/mips/mips.h	22 Apr 2004 07:02:59 -0000	1.336
--- config/mips/mips.h	25 Apr 2004 07:59:33 -0000
*************** #define TUNE_MIPS7000               (mip
*** 340,345 ****
--- 340,378 ----
  #define TUNE_MIPS9000               (mips_tune == PROCESSOR_R9000)
  #define TUNE_SB1                    (mips_tune == PROCESSOR_SB1)
  
+ /* True if the pre-reload scheduler should try to create chains of
+    multiply-add or multiply-subtract instructions.  For example,
+    suppose we have:
+ 
+ 	t1 = a * b
+ 	t2 = t1 + c * d
+ 	t3 = c * d
+ 	t4 = t3 - c * d
+ 
+    t1 will have a higher priority and t2 and t3 will have a higher
+    priority than t4.  However, before reload, there is no dependence
+    between t1 and t3, and they can often have similar priorities.
+    The scheduler will then tend to prefer:
+ 
+ 	t1 = a * b
+ 	t3 = e * f
+ 	t2 = t1 + c * d
+ 	t4 = t3 - g * h
+ 
+    which stops us from making full use of macc/madd-style instructions.
+    This sort of situation occurs frequently in Fourier transforms and
+    in unrolled loops.
+ 
+    To counter this, the TUNE_MACC_CHAINS code will reorder the ready
+    queue so that chained multiply-add and multiply-subtract instructions
+    appear ahead of any other instruction that is likely to clobber lo.
+    In the example above, if t2 and t3 become ready at the same time,
+    the code ensures that t2 is scheduled first.
+ 
+    Multiply-accumulate instructions are a bigger win for some targets
+    than others, so this macro is defined on an opt-in basis.  */
+ #define TUNE_MACC_CHAINS	    TUNE_MIPS5500
+ 
  #define TARGET_OLDABI		    (mips_abi == ABI_32 || mips_abi == ABI_O64)
  #define TARGET_NEWABI		    (mips_abi == ABI_N32 || mips_abi == ABI_64)
  
Index: config/mips/mips.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips.c,v
retrieving revision 1.406
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.406 mips.c
*** config/mips/mips.c	22 Apr 2004 07:02:57 -0000	1.406
--- config/mips/mips.c	25 Apr 2004 07:59:35 -0000
*************** static bool mips_strict_matching_cpu_nam
*** 226,234 ****
  static bool mips_matching_cpu_name_p (const char *, const char *);
  static const struct mips_cpu_info *mips_parse_cpu (const char *, const char *);
  static const struct mips_cpu_info *mips_cpu_info_from_isa (int);
- static int mips_adjust_cost (rtx, rtx, rtx, int);
  static bool mips_return_in_memory (tree, tree);
  static bool mips_strict_argument_naming (CUMULATIVE_ARGS *);
  static int mips_issue_rate (void);
  static int mips_use_dfa_pipeline_interface (void);
  static int mips_multipass_dfa_lookahead (void);
--- 226,239 ----
  static bool mips_matching_cpu_name_p (const char *, const char *);
  static const struct mips_cpu_info *mips_parse_cpu (const char *, const char *);
  static const struct mips_cpu_info *mips_cpu_info_from_isa (int);
  static bool mips_return_in_memory (tree, tree);
  static bool mips_strict_argument_naming (CUMULATIVE_ARGS *);
+ static void mips_macc_chains_record (rtx);
+ static void mips_macc_chains_reorder (rtx *, int);
+ static void mips_promote_ready (rtx *, int, int);
+ static int mips_sched_reorder (FILE *, int, rtx *, int *, int);
+ static int mips_variable_issue (FILE *, int, rtx, int);
+ static int mips_adjust_cost (rtx, rtx, rtx, int);
  static int mips_issue_rate (void);
  static int mips_use_dfa_pipeline_interface (void);
  static int mips_multipass_dfa_lookahead (void);
*************** #define TARGET_ASM_FUNCTION_EPILOGUE mip
*** 670,675 ****
--- 675,684 ----
  #undef TARGET_ASM_SELECT_RTX_SECTION
  #define TARGET_ASM_SELECT_RTX_SECTION mips_select_rtx_section
  
+ #undef TARGET_SCHED_REORDER
+ #define TARGET_SCHED_REORDER mips_sched_reorder
+ #undef TARGET_SCHED_VARIABLE_ISSUE
+ #define TARGET_SCHED_VARIABLE_ISSUE mips_variable_issue
  #undef TARGET_SCHED_ADJUST_COST
  #define TARGET_SCHED_ADJUST_COST mips_adjust_cost
  #undef TARGET_SCHED_ISSUE_RATE
*************** mips_cpu_info_from_isa (int isa)
*** 9152,9171 ****
    return 0;
  }
  
- /* Adjust the cost of INSN based on the relationship between INSN that
-    is dependent on DEP_INSN through the dependence LINK.  The default
-    is to make no adjustment to COST.
- 
-    On the MIPS, ignore the cost of anti- and output-dependencies.  */
- static int
- mips_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link,
- 		  rtx dep ATTRIBUTE_UNUSED, int cost)
- {
-   if (REG_NOTE_KIND (link) != 0)
-     return 0;	/* Anti or output dependence.  */
-   return cost;
- }
- 
  /* Implement HARD_REGNO_NREGS.  The size of FP registers are controlled
     by UNITS_PER_FPREG.  All other registers are word sized.  */
  
--- 9161,9166 ----
*************** mips_strict_argument_naming (CUMULATIVE_
*** 9200,9205 ****
--- 9195,9334 ----
    return !TARGET_OLDABI;
  }
  
+ /* Return true if INSN is a multiply-add or multiply-subtract
+    instruction and PREV assigns to the accumulator operand.  */
+ 
+ bool
+ mips_linked_madd_p (rtx prev, rtx insn)
+ {
+   rtx x;
+ 
+   x = single_set (insn);
+   if (x == 0)
+     return false;
+ 
+   x = SET_SRC (x);
+ 
+   if (GET_CODE (x) == PLUS
+       && GET_CODE (XEXP (x, 0)) == MULT
+       && reg_set_p (XEXP (x, 1), prev))
+     return true;
+ 
+   if (GET_CODE (x) == MINUS
+       && GET_CODE (XEXP (x, 1)) == MULT
+       && reg_set_p (XEXP (x, 0), prev))
+     return true;
+ 
+   return false;
+ }
+ 
+ /* Used by TUNE_MACC_CHAINS to record the last scheduled instruction
+    that may clobber hi or lo.  */
+ 
+ static rtx mips_macc_chains_last_hilo;
+ 
+ /* A TUNE_MACC_CHAINS helper function.  Record that instruction INSN has
+    been scheduled, updating mips_macc_chains_last_hilo appropriately.  */
+ 
+ static void
+ mips_macc_chains_record (rtx insn)
+ {
+   if (get_attr_may_clobber_hilo (insn))
+     mips_macc_chains_last_hilo = insn;
+ }
+ 
+ /* A TUNE_MACC_CHAINS helper function.  Search ready queue READY, which
+    has NREADY elements, looking for a multiply-add or multiply-subtract
+    instruction that is cumulative with mips_macc_chains_last_hilo.
+    If there is one, promote it ahead of anything else that might
+    clobber hi or lo.  */
+ 
+ static void
+ mips_macc_chains_reorder (rtx *ready, int nready)
+ {
+   int i, j;
+ 
+   if (mips_macc_chains_last_hilo != 0)
+     for (i = nready - 1; i >= 0; i--)
+       if (mips_linked_madd_p (mips_macc_chains_last_hilo, ready[i]))
+ 	{
+ 	  for (j = nready - 1; j > i; j--)
+ 	    if (recog_memoized (ready[j]) >= 0
+ 		&& get_attr_may_clobber_hilo (ready[j]))
+ 	      {
+ 		mips_promote_ready (ready, i, j);
+ 		break;
+ 	      }
+ 	  break;
+ 	}
+ }
+ 
+ /* Remove the instruction at index LOWER from ready queue READY and
+    reinsert it in front of the instruction at index HIGHER.  LOWER must
+    be <= HIGHER.  */
+ 
+ static void
+ mips_promote_ready (rtx *ready, int lower, int higher)
+ {
+   rtx new_head;
+   int i;
+ 
+   new_head = ready[lower];
+   for (i = lower; i < higher; i++)
+     ready[i] = ready[i + 1];
+   ready[i] = new_head;
+ }
+ 
+ /* Implement TARGET_SCHED_REORDER.  */
+ 
+ static int
+ mips_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+ 		    rtx *ready, int *nreadyp, int cycle)
+ {
+   if (!reload_completed && TUNE_MACC_CHAINS)
+     {
+       if (cycle == 0)
+ 	mips_macc_chains_last_hilo = 0;
+       if (*nreadyp > 0)
+ 	mips_macc_chains_reorder (ready, *nreadyp);
+     }
+   return mips_issue_rate ();
+ }
+ 
+ /* Implement TARGET_SCHED_VARIABLE_ISSUE.  */
+ 
+ static int
+ mips_variable_issue (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
+ 		     rtx insn, int more)
+ {
+   switch (GET_CODE (PATTERN (insn)))
+     {
+     case USE:
+     case CLOBBER:
+       /* Don't count USEs and CLOBBERs against the issue rate.  */
+       break;
+ 
+     default:
+       more--;
+       if (!reload_completed && TUNE_MACC_CHAINS)
+ 	mips_macc_chains_record (insn);
+       break;
+     }
+   return more;
+ }
+ 
+ /* Implement TARGET_SCHED_ADJUST_COST.  We assume that anti and output
+    dependencies have no cost.  */
+ 
+ static int
+ mips_adjust_cost (rtx insn ATTRIBUTE_UNUSED, rtx link,
+ 		  rtx dep ATTRIBUTE_UNUSED, int cost)
+ {
+   if (REG_NOTE_KIND (link) != 0)
+     return 0;
+   return cost;
+ }
+ 
  /* Return the number of instructions that can be issued per cycle.  */
  
  static int
Index: config/mips/mips.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/mips/mips.md,v
retrieving revision 1.235
diff -c -p -F^\([(a-zA-Z0-9_]\|#define\) -r1.235 mips.md
*** config/mips/mips.md	22 Apr 2004 07:03:00 -0000	1.235
--- config/mips/mips.md	25 Apr 2004 07:59:37 -0000
*************** (define_attr "branch_likely" "no,yes"
*** 277,282 ****
--- 277,289 ----
  		 (const_string "yes")
  		 (const_string "no"))))
  
+ ;; True if an instruction might assign to hi or lo when reloaded.
+ ;; This is used by the TUNE_MACC_CHAINS code.
+ (define_attr "may_clobber_hilo" "no,yes"
+   (if_then_else (eq_attr "type" "imul,imadd,idiv,mthilo")
+ 		(const_string "yes")
+ 		(const_string "no")))
+ 
  ;; Describe a user's asm statement.
  (define_asm_attributes
    [(set_attr "type" "multi")])



More information about the Gcc-patches mailing list