This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[itanium-sched-branch] Patch for one more improvement of itanium2 gcc


  The following patch significantly increases the compiler speed for
Itanium2.  I've just commited it into the branch.

Now Itanium2 gcc on itanium-sched-branch is 55% faster than
one on itanium-sched-branchpoint (it was 20% before the patch).  The
current state of compiler on the branch is

                                Itanium (733Mhz)

                             branchpoint          branch        speedup
SPECInt2000 (without eon)
compilation (user) time      28m30.674s         19m24.990s      46.8%

SPECINT2000               branchpoint                    branch
-------------------------------------------------------------------------
164.gzip        1400       797       176*     1400       792      177*
175.vpr         1400       731       192*     1400       689      203*
176.gcc         1100       467       236*     1100       468      235*
181.mcf         1800      1271       142*     1800      1253      144*
186.crafty      1000       403       248*     1000       411      243*
197.parser      1800      1074       168*     1800      1053      171*
252.eon         1300       870       149*     1300       887      147*
253.perlbmk     1800       895       201*     1800       871      207*
254.gap         1100       675       163*     1100       660      167*
255.vortex      1900       819       232*     1900       814      233*
256.bzip2       1500       823       182*     1500       799      188*
300.twolf       3000      1212       247*     3000      1131      265*
Est. SPECint_base2000                191
Est. SPECint2000                                                  195




                                Itanium2 (900Mhz)

                             branchpoint          branch        speedup
SPECInt2000 (without eon)
compilation (user) time      14m1.021s            9m3.559s      54.7%

SPECINT2000               branchpoint       branch(-mcpu=itanium2)
-------------------------------------------------------------------------
164.gzip        1400       406       345*     1400       388      361*
175.vpr         1400       316       444*     1400       308      454*
176.gcc         1100       239       460*     1100       231      477*
181.mcf         1800       714       252*     1800       722      249*
186.crafty      1000       208       480*     1000       201      497*
197.parser      1800       492       366*     1800       489      368*
252.eon         1300       475       274*     1300       476      273*
253.perlbmk     1800       401       449*     1800       388      463*
254.gap         1100       337       326*     1100       332      331*
255.vortex      1900       373       509*     1900       371      512*
256.bzip2       1500       414       362*     1500       399      376*
300.twolf       3000       592       506*     3000       536      559*
Est. SPECint_base2000                388
Est. SPECint2000                                                  399


  Gcc for Itanium2 is still slower than Intel compiler (Intel
compiler compiles SPECInt2000 tests except eon for 8m45.485s)
but it is very close.

  I don't see more possibilities to speed up the insn scheduling and
bundling.  I'll prepare the branch to review and merge it into main
line.

2002-11-19  Vladimir Makarov  <vmakarov@redhat.com>

        * haifa-sched.c (choice_entry): New structure.
        (choice_stack, cycle_issued_insns): New variables.
        (max_issue): Rewrite it.
        (choose_ready): Set up ready_try for unknown insns too.
        (schedule_block): Allocate and free choice_stack.  Set up
        and modify cycle_issued_insns.

        * config/ia64/ia64.c (issue_nops_and_insn): Combine insn issue
        with and without filling the bundle.
        (bundling): Combine calls of issue_nops_and_insn.
        
Vlad
Index: haifa-sched.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/haifa-sched.c,v
retrieving revision 1.211.10.3
diff -c -p -r1.211.10.3 haifa-sched.c
*** haifa-sched.c	2 Oct 2002 14:27:55 -0000	1.211.10.3
--- haifa-sched.c	19 Nov 2002 19:58:13 -0000
*************** static rtx move_insn PARAMS ((rtx, rtx))
*** 364,370 ****
     on the first cycle.  It is used only for DFA based scheduler.  */
  static rtx ready_element PARAMS ((struct ready_list *, int));
  static rtx ready_remove PARAMS ((struct ready_list *, int));
! static int max_issue PARAMS ((struct ready_list *, state_t, int *));
  
  static rtx choose_ready PARAMS ((struct ready_list *));
  
--- 364,370 ----
     on the first cycle.  It is used only for DFA based scheduler.  */
  static rtx ready_element PARAMS ((struct ready_list *, int));
  static rtx ready_remove PARAMS ((struct ready_list *, int));
! static int max_issue PARAMS ((struct ready_list *, int *));
  
  static rtx choose_ready PARAMS ((struct ready_list *));
  
*************** ready_element (ready, index)
*** 1015,1022 ****
--- 1015,1024 ----
       struct ready_list *ready;
       int index;
  {
+ #ifdef ENABLE_CHECKING
    if (ready->n_ready == 0 || index >= ready->n_ready)
      abort ();
+ #endif
    return ready->vec[ready->first - index];
  }
  
*************** move_insn (insn, last)
*** 1757,1843 ****
    return retval;
  }
  
  /* The following function returns maximal (or close to maximal) number
     of insns which can be issued on the same cycle and one of which
!    insns is insns with the best rank (the last insn in READY).  To
     make this function tries different samples of ready insns.  READY
     is current queue `ready'.  Global array READY_TRY reflects what
!    insns are already issued in this try.  STATE is current processor
!    state.  If the function returns nonzero, INDEX will contain index
     of the best insn in READY.  The following function is used only for
     first cycle multipass scheduling.  */
- 
  static int
! max_issue (ready, state, index)
!      struct ready_list *ready;
!      state_t state;
!      int *index;
  {
!   int i, best, n, temp_index, delay;
!   state_t temp_state;
    rtx insn;
-   int max_lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) ();
  
!   if (state_dead_lock_p (state))
!     return 0;
! 
!   temp_state = alloca (dfa_state_size);
    best = 0;
!   
!   for (i = 0; i < ready->n_ready; i++)
      if (!ready_try [i])
!       {
! 	insn = ready_element (ready, i);
! 	
! 	if (INSN_CODE (insn) < 0)
! 	  continue;
! 	
! 	memcpy (temp_state, state, dfa_state_size);
! 	
! 	delay = state_transition (temp_state, insn);
! 	
! 	if (delay == 0)
! 	  {
! 	    if (!targetm.sched.dfa_bubble)
! 	      continue;
! 	    else
! 	      {
! 		int j;
! 		rtx bubble;
! 		
! 		for (j = 0;
! 		     (bubble = (*targetm.sched.dfa_bubble) (j)) != NULL_RTX;
! 		     j++)
! 		  if (state_transition (temp_state, bubble) < 0
! 		      && state_transition (temp_state, insn) < 0)
! 		    break;
! 		
! 		if (bubble == NULL_RTX)
! 		  continue;
! 	      }
! 	  }
! 	else if (delay > 0)
! 	  continue;
! 	
! 	--max_lookahead;
! 	
! 	if (max_lookahead < 0)
! 	  break;
! 	
! 	ready_try [i] = 1;
! 
! 	n = max_issue (ready, temp_state, &temp_index);
! 	if (n > 0 || ready_try[0])
! 	  n += 1;
! 
! 	if (best < n)
! 	  {
! 	    best = n;
! 	    *index = i;
! 	  }
! 	ready_try [i] = 0;
!       }
!   
    return best;
  }
  
--- 1759,1862 ----
    return retval;
  }
  
+ /* The following structure describe an entry of the stack of choices.  */
+ struct choice_entry
+ {
+   /* Ordinal number of the issued insn in the ready queue.  */
+   int index;
+   /* The number of the rest insns whose issues we should try.  */
+   int rest;
+   /* The number of issued essential insns.  */
+   int n;
+   /* State after issuing the insn.  */
+   state_t state;
+ };
+ 
+ /* The following array is used to implement a stack of choices used in
+    function max_issue.  */
+ static struct choice_entry *choice_stack;
+ 
+ /* The following variable value is number of essential insns issued on
+    the current cycle.  An insn is essential one if it changes the
+    processors state.  */
+ static int cycle_issued_insns;
+ 
  /* The following function returns maximal (or close to maximal) number
     of insns which can be issued on the same cycle and one of which
!    insns is insns with the best rank (the first insn in READY).  To
     make this function tries different samples of ready insns.  READY
     is current queue `ready'.  Global array READY_TRY reflects what
!    insns are already issued in this try.  INDEX will contain index
     of the best insn in READY.  The following function is used only for
     first cycle multipass scheduling.  */
  static int
! max_issue (ready, index)
!   struct ready_list *ready;
!   int *index;
  {
!   int n, i, all, n_ready, lookahead, best, delay;
!   struct choice_entry *top;
    rtx insn;
  
!   lookahead = (*targetm.sched.first_cycle_multipass_dfa_lookahead) ();
    best = 0;
!   memcpy (choice_stack->state, curr_state, dfa_state_size);
!   top = choice_stack;
!   top->rest = lookahead;
!   top->n = 0;
!   n_ready = ready->n_ready;
!   for (all = i = 0; i < n_ready; i++)
      if (!ready_try [i])
!       all++;
!   i = 0;
!   for (;;)
!     {
!       if (top->rest == 0 || i >= n_ready)
! 	{
! 	  if (top == choice_stack)
! 	    break;
! 	  if (best < top - choice_stack && ready_try [0])
! 	    {
! 	      best = top - choice_stack;
! 	      *index = choice_stack [1].index;
! 	      if (top->n == issue_rate - cycle_issued_insns || best == all)
! 		break;
! 	    }
! 	  i = top->index;
! 	  ready_try [i] = 0;
! 	  top--;
! 	  memcpy (curr_state, top->state, dfa_state_size);
! 	}
!       else if (!ready_try [i])
! 	{
! 	  insn = ready_element (ready, i);
! 	  delay = state_transition (curr_state, insn);
! 	  if (delay < 0)
! 	    {
! 	      if (state_dead_lock_p (curr_state))
! 		top->rest = 0;
! 	      else
! 		top->rest--;
! 	      n = top->n;
! 	      if (memcmp (top->state, curr_state, dfa_state_size) != 0)
! 		n++;
! 	      top++;
! 	      top->rest = lookahead;
! 	      top->index = i;
! 	      top->n = n;
! 	      memcpy (top->state, curr_state, dfa_state_size);
! 	      ready_try [i] = 1;
! 	      i = -1;
! 	    }
! 	}
!       i++;
!     }
!   while (top != choice_stack)
!     {
!       ready_try [top->index] = 0;
!       top--;
!     }
!   memcpy (curr_state, choice_stack->state, dfa_state_size);
    return best;
  }
  
*************** choose_ready (ready)
*** 1858,1872 ****
        int index, i;
        rtx insn;
  
!       if (targetm.sched.first_cycle_multipass_dfa_lookahead_guard)
! 	for (i = 1; i < ready->n_ready; i++)
! 	  {
! 	    insn = ready_element (ready, i);
! 	    ready_try [i]
! 	      = !((*targetm.sched.first_cycle_multipass_dfa_lookahead_guard)
! 		  (insn));
! 	  }
!       if (max_issue (ready, curr_state, &index) == 0)
  	return ready_remove_first (ready);
        else
  	return ready_remove (ready, index);
--- 1877,1894 ----
        int index, i;
        rtx insn;
  
!       insn = ready_element (ready, 0);
!       if (INSN_CODE (insn) < 0)
! 	return ready_remove_first (ready);
!       for (i = 1; i < ready->n_ready; i++)
! 	{
! 	  insn = ready_element (ready, i);
! 	  ready_try [i]
! 	    = (INSN_CODE (insn) < 0
! 	       || (targetm.sched.first_cycle_multipass_dfa_lookahead_guard
! 		   && !(*targetm.sched.first_cycle_multipass_dfa_lookahead_guard) (insn)));
! 	}
!       if (max_issue (ready, &index) == 0)
  	return ready_remove_first (ready);
        else
  	return ready_remove (ready, index);
*************** schedule_block (b, rgn_n_insns)
*** 1894,1900 ****
       int rgn_n_insns;
  {
    struct ready_list ready;
!   int first_cycle_insn_p;
    int can_issue_more;
    state_t temp_state = NULL;  /* It is used for multipass scheduling.  */
    int sort_p;
--- 1916,1922 ----
       int rgn_n_insns;
  {
    struct ready_list ready;
!   int i, first_cycle_insn_p;
    int can_issue_more;
    state_t temp_state = NULL;  /* It is used for multipass scheduling.  */
    int sort_p;
*************** schedule_block (b, rgn_n_insns)
*** 1949,1954 ****
--- 1971,1981 ----
        temp_state = alloca (dfa_state_size);
        ready_try = (char *) xmalloc ((rgn_n_insns + 1) * sizeof (char));
        memset (ready_try, 0, (rgn_n_insns + 1) * sizeof (char));
+       choice_stack
+ 	= (struct choice_entry *) xmalloc ((rgn_n_insns + 1)
+ 					   * sizeof (struct choice_entry));
+       for (i = 0; i <= rgn_n_insns; i++)
+ 	choice_stack[i].state = (state_t) xmalloc (dfa_state_size);
      }
  
    (*current_sched_info->init_ready_list) (&ready);
*************** schedule_block (b, rgn_n_insns)
*** 2023,2028 ****
--- 2050,2056 ----
  	can_issue_more = issue_rate;
  
        first_cycle_insn_p = 1;
+       cycle_issued_insns = 0;
        for (;;)
  	{
  	  rtx insn;
*************** schedule_block (b, rgn_n_insns)
*** 2170,2176 ****
  
  	  if (targetm.sched.use_dfa_pipeline_interface
  	      && (*targetm.sched.use_dfa_pipeline_interface) ())
! 	    memcpy (curr_state, temp_state, dfa_state_size);
  	    
  	  if (targetm.sched.variable_issue)
  	    can_issue_more =
--- 2198,2208 ----
  
  	  if (targetm.sched.use_dfa_pipeline_interface
  	      && (*targetm.sched.use_dfa_pipeline_interface) ())
! 	    {
! 	      if (memcmp (curr_state, temp_state, dfa_state_size) != 0)
! 		cycle_issued_insns++;
! 	      memcpy (curr_state, temp_state, dfa_state_size);
! 	    }
  	    
  	  if (targetm.sched.variable_issue)
  	    can_issue_more =
*************** schedule_block (b, rgn_n_insns)
*** 2286,2292 ****
  
    if (targetm.sched.use_dfa_pipeline_interface
        && (*targetm.sched.use_dfa_pipeline_interface) ())
!     free (ready_try);
  }
  
  /* Set_priorities: compute priority of each insn in the block.  */
--- 2318,2329 ----
  
    if (targetm.sched.use_dfa_pipeline_interface
        && (*targetm.sched.use_dfa_pipeline_interface) ())
!     {
!       free (ready_try);
!       for (i = 0; i <= rgn_n_insns; i++)
! 	free (choice_stack [i].state);
!       free (choice_stack);
!     }
  }
  
  /* Set_priorities: compute priority of each insn in the block.  */
Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.184.4.9
diff -c -p -r1.184.4.9 ia64.c
*** config/ia64/ia64.c	17 Oct 2002 22:28:37 -0000	1.184.4.9
--- config/ia64/ia64.c	19 Nov 2002 19:58:13 -0000
*************** static void finish_bundle_state_table PA
*** 193,199 ****
  static int try_issue_nops PARAMS ((struct bundle_state *, int));
  static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
  static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
! 					     rtx, int));
  static int get_max_pos PARAMS ((state_t));
  static int get_template PARAMS ((state_t, int));
  
--- 193,199 ----
  static int try_issue_nops PARAMS ((struct bundle_state *, int));
  static int try_issue_insn PARAMS ((struct bundle_state *, rtx));
  static void issue_nops_and_insn PARAMS ((struct bundle_state *, int,
! 					 rtx, int));
  static int get_max_pos PARAMS ((state_t));
  static int get_template PARAMS ((state_t, int));
  
*************** insert_bundle_state (bundle_state)
*** 6043,6049 ****
        *bundle_state = temp;
      }
    return FALSE;
- 
  }
  
  /* Start work with the hash table.  */
--- 6043,6048 ----
*************** try_issue_insn (curr_state, insn)
*** 6109,6117 ****
  
  /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
     starting with ORIGINATOR without advancing processor cycle.  If
!    TRY_BUNDLE_END_P is TRUE, the function tries to issue nops to fill
!    all bundle. If it was successful, the function creates new bundle
!    state and insert into the hash table and into
     `index_to_bundle_states'.  */
  
  static void
--- 6108,6116 ----
  
  /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
     starting with ORIGINATOR without advancing processor cycle.  If
!    TRY_BUNDLE_END_P is TRUE, the function also tries to issue nops to
!    fill all bundle. If it was successful, the function creates new
!    bundle state and insert into the hash table and into
     `index_to_bundle_states'.  */
  
  static void
*************** issue_nops_and_insn (originator, before_
*** 6182,6193 ****
    if (ia64_safe_type (insn) == TYPE_B)
      curr_state->branch_deviation
        += 2 - (curr_state->accumulated_insns_num - 1) % 3;
!   if (try_bundle_end_p)
      {
!       if (curr_state->accumulated_insns_num % 3 == 0)
  	{
! 	  free_bundle_state (curr_state);
! 	  return;
  	}
        if (!try_issue_nops (curr_state,
  			   3 - curr_state->accumulated_insns_num % 3))
--- 6181,6203 ----
    if (ia64_safe_type (insn) == TYPE_B)
      curr_state->branch_deviation
        += 2 - (curr_state->accumulated_insns_num - 1) % 3;
!   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
      {
!       if (insert_bundle_state (curr_state))
  	{
! 	  state_t dfa_state;
! 	  struct bundle_state *curr_state1;
! 	  struct bundle_state *allocated_states_chain;
! 
! 	  curr_state1 = get_free_bundle_state ();
! 	  dfa_state = curr_state1->dfa_state;
! 	  allocated_states_chain = curr_state1->allocated_states_chain;
! 	  *curr_state1 = *curr_state;
! 	  curr_state1->dfa_state = dfa_state;
! 	  curr_state1->allocated_states_chain = allocated_states_chain;
! 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
! 		  dfa_state_size);
! 	  curr_state = curr_state1;
  	}
        if (!try_issue_nops (curr_state,
  			   3 - curr_state->accumulated_insns_num % 3))
*************** issue_nops_and_insn (originator, before_
*** 6198,6207 ****
  	+= 3 - curr_state->accumulated_insns_num % 3;
      }
    if (!insert_bundle_state (curr_state))
!     {
!       free_bundle_state (curr_state);
!       return;
!     }
  }
  
  /* The following function returns position in the two window bundle
--- 6208,6215 ----
  	+= 3 - curr_state->accumulated_insns_num % 3;
      }
    if (!insert_bundle_state (curr_state))
!     free_bundle_state (curr_state);
!   return;
  }
  
  /* The following function returns position in the two window bundle
*************** bundling (dump, verbose, prev_head_insn,
*** 6322,6331 ****
    struct bundle_state *curr_state, *next_state, *best_state;
    rtx insn, next_insn;
    int insn_num;
!   int i;
    int pos, max_pos, template0, template1;
    rtx b;
    rtx nop;
  
    insn_num = 0;
    for (insn = NEXT_INSN (prev_head_insn);
--- 6330,6340 ----
    struct bundle_state *curr_state, *next_state, *best_state;
    rtx insn, next_insn;
    int insn_num;
!   int i, bundle_end_p;
    int pos, max_pos, template0, template1;
    rtx b;
    rtx nop;
+   enum attr_type type;
  
    insn_num = 0;
    for (insn = NEXT_INSN (prev_head_insn);
*************** bundling (dump, verbose, prev_head_insn,
*** 6392,6415 ****
  	   curr_state != NULL;
  	   curr_state = next_state)
  	{
  	  next_state = curr_state->next;
! 	  if (next_insn == NULL_RTX
! 	      || (GET_MODE (next_insn) == TImode
! 		  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier))
! 	    {
! 	      if (ia64_safe_type (insn) == TYPE_F
! 		  || ia64_safe_type (insn) == TYPE_L)
! 		issue_nops_and_insn (curr_state, 2, insn, TRUE);
! 	      issue_nops_and_insn (curr_state, 1, insn, TRUE);
! 	      issue_nops_and_insn (curr_state, 0, insn, TRUE);
! 	    }
! 	  if (ia64_safe_type (insn) == TYPE_F
! 	      || ia64_safe_type (insn) == TYPE_B
! 	      || ia64_safe_type (insn) == TYPE_L
! 	      || ia64_safe_type (insn) == TYPE_S)
! 	    issue_nops_and_insn (curr_state, 2, insn, FALSE);
! 	  issue_nops_and_insn (curr_state, 1, insn, FALSE);
! 	  issue_nops_and_insn (curr_state, 0, insn, FALSE);
  	}
        if (index_to_bundle_states [insn_num] == NULL)
  	abort ();
--- 6401,6418 ----
  	   curr_state != NULL;
  	   curr_state = next_state)
  	{
+ 	  pos = curr_state->accumulated_insns_num % 3;
+ 	  type = ia64_safe_type (insn);
  	  next_state = curr_state->next;
! 	  bundle_end_p
! 	    = (next_insn == NULL_RTX
! 	       || (GET_MODE (next_insn) == TImode
! 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
! 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
! 	      || type == TYPE_S)
! 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p);
! 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p);
! 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p);
  	}
        if (index_to_bundle_states [insn_num] == NULL)
  	abort ();





Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]