This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

ia64: Better scheduling for shifts


The Itanium pipeline has a feature which requires some additional scheduling
code.  If shifts (or other MM insns) are scheduled less than 4 cycles away
from the insn that uses the result, there's a 10 cycle stall.  This means we
have to insert NOPs in certain cases.

Bootstrapped on ia64-linux (up to the usual libffi build failure), and tested
with SPEC95 (1-2% improvement).


Bernd

	* ia64.h (MD_SCHED_REORDER, MD_SCHED_REORDER2): Pass CLOCK to called
	function.
	* ia64-protos.h (ia64_sched_reorder): Additional arg for clock.
	* ia64.c (nop_cycles_until): New function.
	(prev_cycle, prev_first, last_issued): New static variables.
	(ia64_sched_reorder): Additional arg for clock.
	On final scheduling pass, emit extra NOPs as needed.
	Set prev_first and prev_cycle.
	(ia64_sched_reorder2): Pass clock arg down to ia64_sched_reorder.
	(ia64_variable_issue): Set last_issued.

Index: config/ia64/ia64-protos.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.27
diff -u -p -r1.27 ia64-protos.h
--- ia64-protos.h	2001/01/24 04:30:47	1.27
+++ ia64-protos.h	2001/04/20 11:51:19
@@ -97,7 +97,7 @@ extern int ia64_issue_rate PARAMS ((void
 extern int ia64_adjust_cost PARAMS ((rtx, rtx, rtx, int));
 extern void ia64_sched_init PARAMS ((FILE *, int, int));
 extern void ia64_sched_finish PARAMS ((FILE *, int));
-extern int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
+extern int ia64_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int, int));
 extern int ia64_sched_reorder2 PARAMS ((FILE *, int, rtx *, int *, int));
 extern int ia64_variable_issue PARAMS ((FILE *, int, rtx, int));
 #endif /* RTX_CODE */
Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.c,v
retrieving revision 1.89
diff -u -p -r1.89 ia64.c
--- ia64.c	2001/04/14 03:49:46	1.89
+++ ia64.c	2001/04/20 11:51:20
@@ -4901,6 +4901,7 @@ static void maybe_rotate PARAMS ((FILE *
 static void finish_last_head PARAMS ((FILE *, int));
 static void rotate_one_bundle PARAMS ((FILE *));
 static void rotate_two_bundles PARAMS ((FILE *));
+static void nop_cycles_until PARAMS ((int, FILE *));
 static void cycle_end_fill_slots PARAMS ((FILE *));
 static int packet_matches_p PARAMS ((const struct ia64_packet *, int, int *));
 static int get_split PARAMS ((const struct ia64_packet *, int));
@@ -5780,16 +5781,125 @@ maybe_rotate (dump)
   sched_data.first_slot = sched_data.cur;
 }

+/* The clock cycle when ia64_sched_reorder was last called.  */
+static int prev_cycle;
+
+/* The first insn scheduled in the previous cycle.  This is the saved
+   value of sched_data.first_slot.  */
+static int prev_first;
+
+/* The last insn that has been scheduled.  At the start of a new cycle
+   we know that we can emit new insns after it; the main scheduling code
+   has already emitted a cycle_display insn after it and is using that
+   as its current last insn.  */
+static rtx last_issued;
+
+/* Emit NOPs to fill the delay between PREV_CYCLE and CLOCK_VAR.  Used to
+   pad out the delay between MM (shifts, etc.) and integer operations.  */
+
+static void
+nop_cycles_until (clock_var, dump)
+     int clock_var;
+     FILE *dump;
+{
+  int prev_clock = prev_cycle;
+  int cycles_left = clock_var - prev_clock;
+
+  /* Finish the previous cycle; pad it out with NOPs.  */
+  if (sched_data.cur == 3)
+    {
+      rtx t = gen_insn_group_barrier (GEN_INT (3));
+      last_issued = emit_insn_after (t, last_issued);
+      maybe_rotate (dump);
+    }
+  else if (sched_data.cur > 0)
+    {
+      int need_stop = 0;
+      int split = itanium_split_issue (sched_data.packet, prev_first);
+
+      if (sched_data.cur < 3 && split > 3)
+	{
+	  split = 3;
+	  need_stop = 1;
+	}
+
+      if (split > sched_data.cur)
+	{
+	  int i;
+	  for (i = sched_data.cur; i < split; i++)
+	    {
+	      rtx t;
+
+	      t = gen_nop_type (sched_data.packet->t[i]);
+	      last_issued = emit_insn_after (t, last_issued);
+	      sched_data.types[i] = sched_data.packet->t[sched_data.cur];
+	      sched_data.insns[i] = last_issued;
+	      sched_data.stopbit[i] = 0;
+	    }
+	  sched_data.cur = split;
+	}
+
+      if (! need_stop && sched_data.cur > 0 && sched_data.cur < 6
+	  && cycles_left > 1)
+	{
+	  int i;
+	  for (i = sched_data.cur; i < 6; i++)
+	    {
+	      rtx t;
+
+	      t = gen_nop_type (sched_data.packet->t[i]);
+	      last_issued = emit_insn_after (t, last_issued);
+	      sched_data.types[i] = sched_data.packet->t[sched_data.cur];
+	      sched_data.insns[i] = last_issued;
+	      sched_data.stopbit[i] = 0;
+	    }
+	  sched_data.cur = 6;
+	  cycles_left--;
+	  need_stop = 1;
+	}
+
+      if (need_stop || sched_data.cur == 6)
+	{
+	  rtx t = gen_insn_group_barrier (GEN_INT (3));
+	  last_issued = emit_insn_after (t, last_issued);
+	}
+      maybe_rotate (dump);
+    }
+
+  cycles_left--;
+  while (cycles_left > 0)
+    {
+      rtx t = gen_bundle_selector (GEN_INT (0));
+      last_issued = emit_insn_after (t, last_issued);
+      t = gen_nop_type (TYPE_M);
+      last_issued = emit_insn_after (t, last_issued);
+      t = gen_nop_type (TYPE_I);
+      last_issued = emit_insn_after (t, last_issued);
+      if (cycles_left > 1)
+	{
+	  t = gen_insn_group_barrier (GEN_INT (2));
+	  last_issued = emit_insn_after (t, last_issued);
+	  cycles_left--;
+	}
+      t = gen_nop_type (TYPE_I);
+      last_issued = emit_insn_after (t, last_issued);
+      t = gen_insn_group_barrier (GEN_INT (3));
+      last_issued = emit_insn_after (t, last_issued);
+      cycles_left--;
+    }
+}
+
 /* We are about to being issuing insns for this clock cycle.
    Override the default sort algorithm to better slot instructions.  */

 int
-ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, reorder_type)
+ia64_sched_reorder (dump, sched_verbose, ready, pn_ready,
+		    reorder_type, clock_var)
      FILE *dump ATTRIBUTE_UNUSED;
      int sched_verbose ATTRIBUTE_UNUSED;
      rtx *ready;
      int *pn_ready;
-     int reorder_type;
+     int reorder_type, clock_var;
 {
   int n_ready = *pn_ready;
   rtx *e_ready = ready + n_ready;
@@ -5802,6 +5912,38 @@ ia64_sched_reorder (dump, sched_verbose,
       dump_current_packet (dump);
     }

+  if (reorder_type == 0 && clock_var > 0 && ia64_final_schedule)
+    {
+      for (insnp = ready; insnp < e_ready; insnp++)
+	{
+	  rtx insn = *insnp;
+	  enum attr_itanium_class t = ia64_safe_itanium_class (insn);
+	  if (t == ITANIUM_CLASS_IALU || t == ITANIUM_CLASS_ISHF
+	      || t == ITANIUM_CLASS_ILOG
+	      || t == ITANIUM_CLASS_LD || t == ITANIUM_CLASS_ST)
+	    {
+	      rtx link;
+	      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+		if (REG_NOTE_KIND (link) != REG_DEP_OUTPUT
+		    && REG_NOTE_KIND (link) != REG_DEP_ANTI)
+		  {
+		    rtx other = XEXP (link, 0);
+		    enum attr_itanium_class t0 = ia64_safe_itanium_class (other);
+		    if (t0 == ITANIUM_CLASS_MMSHF
+			|| t0 == ITANIUM_CLASS_MMMUL)
+		      {
+			nop_cycles_until (clock_var, sched_verbose ? dump : NULL);
+			goto out;
+		      }
+		  }
+	    }
+	}
+    }
+ out:
+
+  prev_first = sched_data.first_slot;
+  prev_cycle = clock_var;
+
   if (reorder_type == 0)
     maybe_rotate (sched_verbose ? dump : NULL);

@@ -5893,7 +6035,7 @@ ia64_sched_reorder2 (dump, sched_verbose
      int sched_verbose ATTRIBUTE_UNUSED;
      rtx *ready;
      int *pn_ready;
-     int clock_var ATTRIBUTE_UNUSED;
+     int clock_var;
 {
   if (sched_data.last_was_stop)
     return 0;
@@ -5977,7 +6119,8 @@ ia64_sched_reorder2 (dump, sched_verbose

   if (*pn_ready > 0)
     {
-      int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1);
+      int more = ia64_sched_reorder (dump, sched_verbose, ready, pn_ready, 1,
+				     clock_var);
       if (more)
 	return more;
       /* Did we schedule a stop?  If so, finish this cycle.  */
@@ -6005,6 +6148,8 @@ ia64_variable_issue (dump, sched_verbose
      int can_issue_more ATTRIBUTE_UNUSED;
 {
   enum attr_type t = ia64_safe_type (insn);
+
+  last_issued = insn;

   if (sched_data.last_was_stop)
     {
Index: config/ia64/ia64.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.h,v
retrieving revision 1.61
diff -u -p -r1.61 ia64.h
--- ia64.h	2001/03/16 05:21:42	1.61
+++ ia64.h	2001/04/20 11:51:21
@@ -2827,10 +2827,10 @@ do {									\
   ia64_sched_init (DUMP, SCHED_VERBOSE, MAX_READY)

 #define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
-  (CIM) = ia64_sched_reorder (DUMP, SCHED_VERBOSE, READY, &N_READY, 0)
+  (CIM) = ia64_sched_reorder (DUMP, SCHED_VERBOSE, READY, &N_READY, 0, CLOCK)

 #define MD_SCHED_REORDER2(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
-  (CIM) = ia64_sched_reorder2 (DUMP, SCHED_VERBOSE, READY, &N_READY, 1)
+  (CIM) = ia64_sched_reorder2 (DUMP, SCHED_VERBOSE, READY, &N_READY, CLOCK)

 #define MD_SCHED_FINISH(DUMP, SCHED_VERBOSE) \
   ia64_sched_finish (DUMP, SCHED_VERBOSE)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]