This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] Fix PR45352


Hello,

Now that we have agreed about the scheduler not issuing more than issue_rate insns, the PR can actually be solved easier. I only need the loop resetting sched cycles be in line with the rest of the scheduler and advance state when we have issued issue_rate insns. The only bits from the old patch that are needed are not setting need_stall to 1 when no variable_issue hook exists and properly synchronizing rescheduling regions and resetting cycles regions. These do not make us trying to issue more than issue_rate insns.

The patch fixes all test cases and in the process of bootstrap/test on ia64, ok if it passes?

Andrey

2010-10-22 Andrey Belevantsev <abel@ispras.ru>

    PR rtl-optimization/45352
    * sel-sched.c (find_best_expr): Do not set pneed_stall when
    the variable_issue hook is not implemented.
    (fill_insns): Remove dead variable stall_iterations.
    (init_seqno_1): Force EBB start for resetting sched cycles on any
    successor blocks of the rescheduled region.
    (sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit.
    (reset_sched_cycles_in_current_ebb): Add debug printing.
    New variable issued_insns.  Advance state when we have issued
    issue_rate insns.
	
    gcc.dg/pr45352.c, gcc.dg/pr45352-1.c, gcc.dg/pr45352-2.c: New tests.
    gcc.target/i386/pr45352.c, gcc.target/i386/pr45352-1.c,
    gcc.target/i386/pr45352-2.c: New tests.
Index: gcc/testsuite/gcc.target/i386/pr45352-2.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr45352-2.c	(revision 0)
--- gcc/testsuite/gcc.target/i386/pr45352-2.c	(revision 0)
***************
*** 0 ****
--- 1,108 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O1 -mtune=amdfam10 -fexpensive-optimizations -fgcse -foptimize-register-move -freorder-blocks -fschedule-insns2 -funswitch-loops -fgcse-las -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+ 
+ typedef char uint8_t;
+ typedef uint32_t;
+ typedef vo_frame_t;
+ struct vo_frame_s
+ {
+     uint8_t base[3];
+   int pitches[3];};
+ typedef struct
+ {
+ void
+     (*proc_macro_block)
+     (void);
+ }
+ xine_xvmc_t;
+ typedef struct
+ {
+   uint8_t ref[2][3];
+ int pmv;
+ }
+ motion_t;
+ typedef struct
+ {
+   uint32_t bitstream_buf;
+   int bitstream_bits;
+     uint8_t * bitstream_ptr;
+     uint8_t dest[3];
+   int pitches[3];
+   int offset;
+     motion_t b_motion;
+     motion_t f_motion;
+   int v_offset;
+   int coded_picture_width;
+   int picture_structure;
+ struct vo_frame_s *current_frame;}
+ picture_t;
+ typedef struct
+ {
+ int xvmc_last_slice_code;}
+ mpeg2dec_accel_t;
+ static bitstream_init (picture_t * picture, void *start)
+ {
+   picture->bitstream_ptr = start;
+ }
+ static slice_xvmc_init (picture_t * picture, int code)
+ {
+   int offset;
+   struct vo_frame_s *forward_reference_frame;
+   offset = picture->picture_structure == 2;
+   picture->pitches[0] = picture->current_frame->pitches[0];
+   picture->pitches[1] = picture->current_frame->pitches[1];
+   if (picture)
+     picture->f_motion.ref
+       [0]
+       [0]
+       = forward_reference_frame->base + (offset ? picture->pitches[0] : 0);
+   picture->f_motion.ref[0][1] = (offset);
+   if (picture->picture_structure)
+       picture->pitches[0] <<= picture->pitches[1] <<= 1;
+   offset = 0;
+   while (1)
+     {
+       if (picture->bitstream_buf >= 0x08000000)
+ 	  break;
+       switch (picture->bitstream_buf >> 12)
+ 	{
+ 	case 8:
+ 	  offset += 33;
+ 		picture->bitstream_buf
+ 		  |=
+ 		  picture->bitstream_ptr[1] << picture->bitstream_bits;
+ 	}
+     }
+   picture->offset = (offset);
+   while (picture->offset - picture->coded_picture_width >= 0)
+     {
+       picture->offset -= picture->coded_picture_width;
+       if (picture->current_frame)
+ 	{
+ 	  picture->dest[0] += picture->pitches[0];
+ 	  picture->dest[1] += picture->pitches[1];
+ 	}
+       picture->v_offset += 16;
+     }
+ }
+ 
+ void
+ mpeg2_xvmc_slice
+   (mpeg2dec_accel_t * accel, picture_t * picture, int code, uint8_t buffer,int mba_inc)
+ {
+   xine_xvmc_t * xvmc = bitstream_init (picture, buffer);
+   slice_xvmc_init (picture, code);
+     while (1)
+       {
+ 	if (picture)
+ 	    break;
+ 	switch (picture->bitstream_buf)
+ 	  {
+ 	  case 8:
+ 	    mba_inc += accel->xvmc_last_slice_code = code;
+ 		  xvmc->proc_macro_block   ();
+ 	    while (mba_inc)
+ 	      ;
+ 	  }
+       }
+ }
Index: gcc/testsuite/gcc.target/i386/pr45352.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr45352.c	(revision 0)
--- gcc/testsuite/gcc.target/i386/pr45352.c	(revision 0)
***************
*** 0 ****
--- 1,25 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -march=amdfam10 -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+ 
+ struct S
+ {
+   struct
+   {
+     int i;
+   } **p;
+   int x;
+   int y;
+ };
+ 
+ extern int baz (void);
+ extern int bar (void *, int, int);
+ 
+ void
+ foo (struct S *s)
+ {
+   int i;
+   for (i = 0; i < s->x; i++)
+     bar (s->p[i], baz (), s->y);
+   for (i = 0; i < s->x; i++)
+     s->p[i]->i++;
+ }
Index: gcc/testsuite/gcc.target/i386/pr45352-1.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr45352-1.c	(revision 0)
--- gcc/testsuite/gcc.target/i386/pr45352-1.c	(revision 0)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-mtune=amdfam10 -O3 -fpeel-loops -fselective-scheduling2 -fsel-sched-pipelining -fPIC" } */
+ 
+ static int FIR_Tab_16[16][16];
+ 
+ void
+ V_Pass_Avrg_16_C_ref (int *Dst, int *Src, int W, int BpS, int Rnd)
+ {
+   while (W-- > 0)
+     {
+       int i, k;
+       int Sums[16] = { };
+       for (i = 0; i < 16; ++i)
+ 	for (k = 0; k < 16; ++k)
+ 	  Sums[k] += FIR_Tab_16[i][k] * Src[i];
+       for (i = 0; i < 16; ++i)
+ 	Dst[i] = Sums[i] + Src[i];
+     }
+ }
Index: gcc/testsuite/gcc.dg/pr45352-1.c
===================================================================
*** gcc/testsuite/gcc.dg/pr45352-1.c	(revision 0)
--- gcc/testsuite/gcc.dg/pr45352-1.c	(revision 0)
***************
*** 0 ****
--- 1,13 ----
+ /* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+ /* { dg-options "-O3 -fschedule-insns -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining -funroll-loops -fprefetch-loop-arrays" } */
+ 
+ void main1 (float *pa, float *pc)
+ {
+   int i;
+   float b[256];
+   float c[256];
+   for (i = 0; i < 256; i++)
+     b[i] = c[i] = pc[i];
+   for (i = 0; i < 256; i++)
+     pa[i] = b[i] * c[i];
+ }
Index: gcc/testsuite/gcc.dg/pr45352-2.c
===================================================================
*** gcc/testsuite/gcc.dg/pr45352-2.c	(revision 0)
--- gcc/testsuite/gcc.dg/pr45352-2.c	(revision 0)
***************
*** 0 ****
--- 1,17 ----
+ /* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+ /* { dg-options "-O1 -freorder-blocks -fschedule-insns2 -funswitch-loops -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+ void
+ foo1 (int *s)
+ {
+   s[0] = s[1];
+   while (s[6] - s[8])
+     {
+       s[6] -= s[8];
+       if (s[8] || s[0])
+ 	{
+ 	  s[3] += s[0];
+ 	  s[4] += s[1];
+ 	}
+       s[7]++;
+     }
+ }
Index: gcc/testsuite/gcc.dg/pr45352.c
===================================================================
*** gcc/testsuite/gcc.dg/pr45352.c	(revision 0)
--- gcc/testsuite/gcc.dg/pr45352.c	(revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+ /* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate" } */
+ 
+ static inline void
+ bmp_iter_next (int *bi, int *bit_no)
+ {
+   *bi >>= 1;
+   *bit_no += 1;
+ }
+ 
+ int bmp_iter_set (int *bi, int *bit_no);
+ void bitmap_initialize_stat (int, ...);
+ void bitmap_clear (void);
+ 
+ void
+ df_md_alloc (int bi, int bb_index, void *bb_info)
+ {
+   for (; bmp_iter_set (&bi, &bb_index); bmp_iter_next (&bi, &bb_index))
+ 
+     if (bb_info)
+       bitmap_clear ();
+     else
+       bitmap_initialize_stat (0);
+ }
Index: gcc/sel-sched.c
===================================================================
*** gcc/sel-sched.c	(revision 166235)
--- gcc/sel-sched.c	(working copy)
*************** find_best_expr (av_set_t *av_vliw_ptr, b
*** 4403,4409 ****
      {
        can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
                                                 can_issue_more);
!       if (can_issue_more == 0)
          *pneed_stall = 1;
      }
  
--- 4403,4410 ----
      {
        can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
                                                 can_issue_more);
!       if (targetm.sched.variable_issue
! 	  && can_issue_more == 0)
          *pneed_stall = 1;
      }
  
*************** fill_insns (fence_t fence, int seqno, il
*** 5511,5517 ****
        blist_t *bnds_tailp1, *bndsp;
        expr_t expr_vliw;
        int need_stall;
!       int was_stall = 0, scheduled_insns = 0, stall_iterations = 0;
        int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
        int max_stall = pipelining_p ? 1 : 3;
        bool last_insn_was_debug = false;
--- 5512,5518 ----
        blist_t *bnds_tailp1, *bndsp;
        expr_t expr_vliw;
        int need_stall;
!       int was_stall = 0, scheduled_insns = 0;
        int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
        int max_stall = pipelining_p ? 1 : 3;
        bool last_insn_was_debug = false;
*************** fill_insns (fence_t fence, int seqno, il
*** 5530,5545 ****
        do
          {
            expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
!           if (!expr_vliw && need_stall)
              {
                /* All expressions required a stall.  Do not recompute av sets
                   as we'll get the same answer (modulo the insns between
                   the fence and its boundary, which will not be available for
!                  pipelining).  */
!               gcc_assert (! expr_vliw && stall_iterations < 2);
!               was_stall++;
! 	      /* If we are going to stall for too long, break to recompute av
  		 sets and bring more insns for pipelining.  */
  	      if (need_stall <= 3)
  		stall_for_cycles (fence, need_stall);
  	      else
--- 5531,5545 ----
        do
          {
            expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
!           if (! expr_vliw && need_stall)
              {
                /* All expressions required a stall.  Do not recompute av sets
                   as we'll get the same answer (modulo the insns between
                   the fence and its boundary, which will not be available for
!                  pipelining).
! 		 If we are going to stall for too long, break to recompute av
  		 sets and bring more insns for pipelining.  */
+               was_stall++;
  	      if (need_stall <= 3)
  		stall_for_cycles (fence, need_stall);
  	      else
*************** init_seqno_1 (basic_block bb, sbitmap vi
*** 6712,6717 ****
--- 6712,6719 ----
  
  	  init_seqno_1 (succ, visited_bbs, blocks_to_reschedule);
  	}
+       else if (blocks_to_reschedule)
+         bitmap_set_bit (forced_ebb_heads, succ->index);
      }
  
    for (insn = BB_END (bb); insn != note; insn = PREV_INSN (insn))
*************** reset_sched_cycles_in_current_ebb (void)
*** 6966,6971 ****
--- 6968,6974 ----
    int last_clock = 0;
    int haifa_last_clock = -1;
    int haifa_clock = 0;
+   int issued_insns = 0;
    insn_t insn;
  
    if (targetm.sched.init)
*************** reset_sched_cycles_in_current_ebb (void)
*** 7020,7033 ****
            haifa_cost = cost;
            after_stall = 1;
          }
! 
        if (haifa_cost > 0)
  	{
  	  int i = 0;
  
  	  while (haifa_cost--)
  	    {
! 	      advance_state (curr_state);
                i++;
  
  	      if (sched_verbose >= 2)
--- 7023,7038 ----
            haifa_cost = cost;
            after_stall = 1;
          }
!       if (haifa_cost == 0
! 	  && issued_insns == issue_rate)
! 	haifa_cost = 1;
        if (haifa_cost > 0)
  	{
  	  int i = 0;
  
  	  while (haifa_cost--)
  	    {
! 	      advance_state (curr_state), issued_insns = 0;
                i++;
  
  	      if (sched_verbose >= 2)
*************** reset_sched_cycles_in_current_ebb (void)
*** 7047,7052 ****
--- 7052,7059 ----
  	    }
  
  	  haifa_clock += i;
+           if (sched_verbose >= 2)
+             sel_print ("haifa clock: %d\n", haifa_clock);
  	}
        else
  	gcc_assert (haifa_cost == 0);
*************** reset_sched_cycles_in_current_ebb (void)
*** 7059,7080 ****
  					    haifa_last_clock, haifa_clock,
  					    &sort_p))
  	  {
! 	    advance_state (curr_state);
  	    haifa_clock++;
  	    if (sched_verbose >= 2)
                {
                  sel_print ("advance_state (dfa_new_cycle)\n");
                  debug_state (curr_state);
                }
            }
  
        if (real_insn)
  	{
  	  cost = state_transition (curr_state, insn);
  
            if (sched_verbose >= 2)
!             debug_state (curr_state);
! 
  	  gcc_assert (cost < 0);
  	}
  
--- 7066,7092 ----
  					    haifa_last_clock, haifa_clock,
  					    &sort_p))
  	  {
! 	    advance_state (curr_state), issued_insns = 0;
  	    haifa_clock++;
  	    if (sched_verbose >= 2)
                {
                  sel_print ("advance_state (dfa_new_cycle)\n");
                  debug_state (curr_state);
+ 		sel_print ("haifa clock: %d\n", haifa_clock + 1);
                }
            }
  
        if (real_insn)
  	{
  	  cost = state_transition (curr_state, insn);
+ 	  issued_insns++;
  
            if (sched_verbose >= 2)
! 	    {
! 	      sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn),
! 			 haifa_clock + 1);
!               debug_state (curr_state);
! 	    }
  	  gcc_assert (cost < 0);
  	}
  
*************** sel_sched_region_1 (void)
*** 7518,7524 ****
                    continue;
                  }
  
!               if (bitmap_clear_bit (blocks_to_reschedule, bb->index))
                  {
                    flist_tail_init (new_fences);
  
--- 7530,7536 ----
                    continue;
                  }
  
!               if (bitmap_bit_p (blocks_to_reschedule, bb->index))
                  {
                    flist_tail_init (new_fences);
  

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]