This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix PR45352
- From: Andrey Belevantsev <abel at ispras dot ru>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: "Vladimir N. Makarov" <vmakarov at redhat dot com>
- Date: Wed, 03 Nov 2010 15:18:21 +0300
- Subject: Re: [PATCH] Fix PR45352
- References: <4CC13E91.2000800@ispras.ru>
Hello,
Now that we have agreed about the scheduler not issuing more than
issue_rate insns, the PR can actually be solved easier. I only need the
loop resetting sched cycles be in line with the rest of the scheduler and
advance state when we have issued issue_rate insns. The only bits from the
old patch that are needed are not setting need_stall to 1 when no
variable_issue hook exists and properly synchronizing rescheduling regions
and resetting cycles regions. These do not make us trying to issue more
than issue_rate insns.
The patch fixes all test cases and in the process of bootstrap/test on
ia64, ok if it passes?
Andrey
2010-10-22 Andrey Belevantsev <abel@ispras.ru>
PR rtl-optimization/45352
* sel-sched.c (find_best_expr): Do not set pneed_stall when
the variable_issue hook is not implemented.
(fill_insns): Remove dead variable stall_iterations.
(init_seqno_1): Force EBB start for resetting sched cycles on any
successor blocks of the rescheduled region.
(sel_sched_region_1): Use bitmap_bit_p instead of bitmap_clear_bit.
(reset_sched_cycles_in_current_ebb): Add debug printing.
New variable issued_insns. Advance state when we have issued
issue_rate insns.
gcc.dg/pr45352.c, gcc.dg/pr45352-1.c, gcc.dg/pr45352-2.c: New tests.
gcc.target/i386/pr45352.c, gcc.target/i386/pr45352-1.c,
gcc.target/i386/pr45352-2.c: New tests.
Index: gcc/testsuite/gcc.target/i386/pr45352-2.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr45352-2.c (revision 0)
--- gcc/testsuite/gcc.target/i386/pr45352-2.c (revision 0)
***************
*** 0 ****
--- 1,108 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O1 -mtune=amdfam10 -fexpensive-optimizations -fgcse -foptimize-register-move -freorder-blocks -fschedule-insns2 -funswitch-loops -fgcse-las -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+
+ typedef char uint8_t;
+ typedef uint32_t;
+ typedef vo_frame_t;
+ struct vo_frame_s
+ {
+ uint8_t base[3];
+ int pitches[3];};
+ typedef struct
+ {
+ void
+ (*proc_macro_block)
+ (void);
+ }
+ xine_xvmc_t;
+ typedef struct
+ {
+ uint8_t ref[2][3];
+ int pmv;
+ }
+ motion_t;
+ typedef struct
+ {
+ uint32_t bitstream_buf;
+ int bitstream_bits;
+ uint8_t * bitstream_ptr;
+ uint8_t dest[3];
+ int pitches[3];
+ int offset;
+ motion_t b_motion;
+ motion_t f_motion;
+ int v_offset;
+ int coded_picture_width;
+ int picture_structure;
+ struct vo_frame_s *current_frame;}
+ picture_t;
+ typedef struct
+ {
+ int xvmc_last_slice_code;}
+ mpeg2dec_accel_t;
+ static bitstream_init (picture_t * picture, void *start)
+ {
+ picture->bitstream_ptr = start;
+ }
+ static slice_xvmc_init (picture_t * picture, int code)
+ {
+ int offset;
+ struct vo_frame_s *forward_reference_frame;
+ offset = picture->picture_structure == 2;
+ picture->pitches[0] = picture->current_frame->pitches[0];
+ picture->pitches[1] = picture->current_frame->pitches[1];
+ if (picture)
+ picture->f_motion.ref
+ [0]
+ [0]
+ = forward_reference_frame->base + (offset ? picture->pitches[0] : 0);
+ picture->f_motion.ref[0][1] = (offset);
+ if (picture->picture_structure)
+ picture->pitches[0] <<= picture->pitches[1] <<= 1;
+ offset = 0;
+ while (1)
+ {
+ if (picture->bitstream_buf >= 0x08000000)
+ break;
+ switch (picture->bitstream_buf >> 12)
+ {
+ case 8:
+ offset += 33;
+ picture->bitstream_buf
+ |=
+ picture->bitstream_ptr[1] << picture->bitstream_bits;
+ }
+ }
+ picture->offset = (offset);
+ while (picture->offset - picture->coded_picture_width >= 0)
+ {
+ picture->offset -= picture->coded_picture_width;
+ if (picture->current_frame)
+ {
+ picture->dest[0] += picture->pitches[0];
+ picture->dest[1] += picture->pitches[1];
+ }
+ picture->v_offset += 16;
+ }
+ }
+
+ void
+ mpeg2_xvmc_slice
+ (mpeg2dec_accel_t * accel, picture_t * picture, int code, uint8_t buffer,int mba_inc)
+ {
+ xine_xvmc_t * xvmc = bitstream_init (picture, buffer);
+ slice_xvmc_init (picture, code);
+ while (1)
+ {
+ if (picture)
+ break;
+ switch (picture->bitstream_buf)
+ {
+ case 8:
+ mba_inc += accel->xvmc_last_slice_code = code;
+ xvmc->proc_macro_block ();
+ while (mba_inc)
+ ;
+ }
+ }
+ }
Index: gcc/testsuite/gcc.target/i386/pr45352.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr45352.c (revision 0)
--- gcc/testsuite/gcc.target/i386/pr45352.c (revision 0)
***************
*** 0 ****
--- 1,25 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -march=amdfam10 -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+
+ struct S
+ {
+ struct
+ {
+ int i;
+ } **p;
+ int x;
+ int y;
+ };
+
+ extern int baz (void);
+ extern int bar (void *, int, int);
+
+ void
+ foo (struct S *s)
+ {
+ int i;
+ for (i = 0; i < s->x; i++)
+ bar (s->p[i], baz (), s->y);
+ for (i = 0; i < s->x; i++)
+ s->p[i]->i++;
+ }
Index: gcc/testsuite/gcc.target/i386/pr45352-1.c
===================================================================
*** gcc/testsuite/gcc.target/i386/pr45352-1.c (revision 0)
--- gcc/testsuite/gcc.target/i386/pr45352-1.c (revision 0)
***************
*** 0 ****
--- 1,19 ----
+ /* { dg-do compile } */
+ /* { dg-options "-mtune=amdfam10 -O3 -fpeel-loops -fselective-scheduling2 -fsel-sched-pipelining -fPIC" } */
+
+ static int FIR_Tab_16[16][16];
+
+ void
+ V_Pass_Avrg_16_C_ref (int *Dst, int *Src, int W, int BpS, int Rnd)
+ {
+ while (W-- > 0)
+ {
+ int i, k;
+ int Sums[16] = { };
+ for (i = 0; i < 16; ++i)
+ for (k = 0; k < 16; ++k)
+ Sums[k] += FIR_Tab_16[i][k] * Src[i];
+ for (i = 0; i < 16; ++i)
+ Dst[i] = Sums[i] + Src[i];
+ }
+ }
Index: gcc/testsuite/gcc.dg/pr45352-1.c
===================================================================
*** gcc/testsuite/gcc.dg/pr45352-1.c (revision 0)
--- gcc/testsuite/gcc.dg/pr45352-1.c (revision 0)
***************
*** 0 ****
--- 1,13 ----
+ /* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+ /* { dg-options "-O3 -fschedule-insns -fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining -funroll-loops -fprefetch-loop-arrays" } */
+
+ void main1 (float *pa, float *pc)
+ {
+ int i;
+ float b[256];
+ float c[256];
+ for (i = 0; i < 256; i++)
+ b[i] = c[i] = pc[i];
+ for (i = 0; i < 256; i++)
+ pa[i] = b[i] * c[i];
+ }
Index: gcc/testsuite/gcc.dg/pr45352-2.c
===================================================================
*** gcc/testsuite/gcc.dg/pr45352-2.c (revision 0)
--- gcc/testsuite/gcc.dg/pr45352-2.c (revision 0)
***************
*** 0 ****
--- 1,17 ----
+ /* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+ /* { dg-options "-O1 -freorder-blocks -fschedule-insns2 -funswitch-loops -fselective-scheduling2 -fsel-sched-pipelining -funroll-all-loops" } */
+ void
+ foo1 (int *s)
+ {
+ s[0] = s[1];
+ while (s[6] - s[8])
+ {
+ s[6] -= s[8];
+ if (s[8] || s[0])
+ {
+ s[3] += s[0];
+ s[4] += s[1];
+ }
+ s[7]++;
+ }
+ }
Index: gcc/testsuite/gcc.dg/pr45352.c
===================================================================
*** gcc/testsuite/gcc.dg/pr45352.c (revision 0)
--- gcc/testsuite/gcc.dg/pr45352.c (revision 0)
***************
*** 0 ****
--- 1,24 ----
+ /* { dg-do compile { target powerpc*-*-* ia64-*-* x86_64-*-* } } */
+ /* { dg-options "-Os -fselective-scheduling2 -fsel-sched-pipelining -fprofile-generate" } */
+
+ static inline void
+ bmp_iter_next (int *bi, int *bit_no)
+ {
+ *bi >>= 1;
+ *bit_no += 1;
+ }
+
+ int bmp_iter_set (int *bi, int *bit_no);
+ void bitmap_initialize_stat (int, ...);
+ void bitmap_clear (void);
+
+ void
+ df_md_alloc (int bi, int bb_index, void *bb_info)
+ {
+ for (; bmp_iter_set (&bi, &bb_index); bmp_iter_next (&bi, &bb_index))
+
+ if (bb_info)
+ bitmap_clear ();
+ else
+ bitmap_initialize_stat (0);
+ }
Index: gcc/sel-sched.c
===================================================================
*** gcc/sel-sched.c (revision 166235)
--- gcc/sel-sched.c (working copy)
*************** find_best_expr (av_set_t *av_vliw_ptr, b
*** 4403,4409 ****
{
can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
can_issue_more);
! if (can_issue_more == 0)
*pneed_stall = 1;
}
--- 4403,4410 ----
{
can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
can_issue_more);
! if (targetm.sched.variable_issue
! && can_issue_more == 0)
*pneed_stall = 1;
}
*************** fill_insns (fence_t fence, int seqno, il
*** 5511,5517 ****
blist_t *bnds_tailp1, *bndsp;
expr_t expr_vliw;
int need_stall;
! int was_stall = 0, scheduled_insns = 0, stall_iterations = 0;
int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
int max_stall = pipelining_p ? 1 : 3;
bool last_insn_was_debug = false;
--- 5512,5518 ----
blist_t *bnds_tailp1, *bndsp;
expr_t expr_vliw;
int need_stall;
! int was_stall = 0, scheduled_insns = 0;
int max_insns = pipelining_p ? issue_rate : 2 * issue_rate;
int max_stall = pipelining_p ? 1 : 3;
bool last_insn_was_debug = false;
*************** fill_insns (fence_t fence, int seqno, il
*** 5530,5545 ****
do
{
expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
! if (!expr_vliw && need_stall)
{
/* All expressions required a stall. Do not recompute av sets
as we'll get the same answer (modulo the insns between
the fence and its boundary, which will not be available for
! pipelining). */
! gcc_assert (! expr_vliw && stall_iterations < 2);
! was_stall++;
! /* If we are going to stall for too long, break to recompute av
sets and bring more insns for pipelining. */
if (need_stall <= 3)
stall_for_cycles (fence, need_stall);
else
--- 5531,5545 ----
do
{
expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
! if (! expr_vliw && need_stall)
{
/* All expressions required a stall. Do not recompute av sets
as we'll get the same answer (modulo the insns between
the fence and its boundary, which will not be available for
! pipelining).
! If we are going to stall for too long, break to recompute av
sets and bring more insns for pipelining. */
+ was_stall++;
if (need_stall <= 3)
stall_for_cycles (fence, need_stall);
else
*************** init_seqno_1 (basic_block bb, sbitmap vi
*** 6712,6717 ****
--- 6712,6719 ----
init_seqno_1 (succ, visited_bbs, blocks_to_reschedule);
}
+ else if (blocks_to_reschedule)
+ bitmap_set_bit (forced_ebb_heads, succ->index);
}
for (insn = BB_END (bb); insn != note; insn = PREV_INSN (insn))
*************** reset_sched_cycles_in_current_ebb (void)
*** 6966,6971 ****
--- 6968,6974 ----
int last_clock = 0;
int haifa_last_clock = -1;
int haifa_clock = 0;
+ int issued_insns = 0;
insn_t insn;
if (targetm.sched.init)
*************** reset_sched_cycles_in_current_ebb (void)
*** 7020,7033 ****
haifa_cost = cost;
after_stall = 1;
}
!
if (haifa_cost > 0)
{
int i = 0;
while (haifa_cost--)
{
! advance_state (curr_state);
i++;
if (sched_verbose >= 2)
--- 7023,7038 ----
haifa_cost = cost;
after_stall = 1;
}
! if (haifa_cost == 0
! && issued_insns == issue_rate)
! haifa_cost = 1;
if (haifa_cost > 0)
{
int i = 0;
while (haifa_cost--)
{
! advance_state (curr_state), issued_insns = 0;
i++;
if (sched_verbose >= 2)
*************** reset_sched_cycles_in_current_ebb (void)
*** 7047,7052 ****
--- 7052,7059 ----
}
haifa_clock += i;
+ if (sched_verbose >= 2)
+ sel_print ("haifa clock: %d\n", haifa_clock);
}
else
gcc_assert (haifa_cost == 0);
*************** reset_sched_cycles_in_current_ebb (void)
*** 7059,7080 ****
haifa_last_clock, haifa_clock,
&sort_p))
{
! advance_state (curr_state);
haifa_clock++;
if (sched_verbose >= 2)
{
sel_print ("advance_state (dfa_new_cycle)\n");
debug_state (curr_state);
}
}
if (real_insn)
{
cost = state_transition (curr_state, insn);
if (sched_verbose >= 2)
! debug_state (curr_state);
!
gcc_assert (cost < 0);
}
--- 7066,7092 ----
haifa_last_clock, haifa_clock,
&sort_p))
{
! advance_state (curr_state), issued_insns = 0;
haifa_clock++;
if (sched_verbose >= 2)
{
sel_print ("advance_state (dfa_new_cycle)\n");
debug_state (curr_state);
+ sel_print ("haifa clock: %d\n", haifa_clock + 1);
}
}
if (real_insn)
{
cost = state_transition (curr_state, insn);
+ issued_insns++;
if (sched_verbose >= 2)
! {
! sel_print ("scheduled insn %d, clock %d\n", INSN_UID (insn),
! haifa_clock + 1);
! debug_state (curr_state);
! }
gcc_assert (cost < 0);
}
*************** sel_sched_region_1 (void)
*** 7518,7524 ****
continue;
}
! if (bitmap_clear_bit (blocks_to_reschedule, bb->index))
{
flist_tail_init (new_fences);
--- 7530,7536 ----
continue;
}
! if (bitmap_bit_p (blocks_to_reschedule, bb->index))
{
flist_tail_init (new_fences);