gcc/ 2010-12-18 H.J. Lu PR target/46519 * config/i386/i386.c (block_info_def): Remove referenced, count and rescanned. (move_or_delete_vzeroupper_2): Updated. (move_or_delete_vzeroupper_1): Rewritten to avoid recursive call. (rescan_move_or_delete_vzeroupper): Removed. (move_or_delete_vzeroupper): Repeat processing all basic blocks until no basic block state is changed to used at exit. gcc/testsuite/ 2010-12-18 H.J. Lu PR target/46519 * gfortran.dg/pr46519-2.f90: New. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 40999c8..28b26ef 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -68,14 +68,8 @@ typedef struct block_info_def { /* State of the upper 128bits of any AVX registers at exit. */ enum upper_128bits_state state; - /* If the upper 128bits of any AVX registers are referenced. */ - enum upper_128bits_state referenced; - /* Number of vzerouppers in this block. */ - unsigned int count; /* TRUE if block has been processed. */ bool processed; - /* TRUE if block has been rescanned. */ - bool rescanned; } *block_info; #define BLOCK_INFO(B) ((block_info) (B)->aux) @@ -127,8 +121,6 @@ move_or_delete_vzeroupper_2 (basic_block bb, rtx vzeroupper_insn = NULL_RTX; rtx pat; int avx256; - enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced; - int count = BLOCK_INFO (bb)->count; if (dump_file) fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n", @@ -191,24 +183,20 @@ move_or_delete_vzeroupper_2 (basic_block bb, /* Delete pending vzeroupper insertion. */ if (vzeroupper_insn) { - count--; delete_insn (vzeroupper_insn); vzeroupper_insn = NULL_RTX; } } - else if (state != used && referenced != unused) + else if (state != used) { /* No need to call note_stores if the upper 128bits of AVX registers are never referenced. */ note_stores (pat, check_avx256_stores, &state); - if (state == used) - referenced = used; } continue; } /* Process vzeroupper intrinsic. */ - count++; avx256 = INTVAL (XVECEXP (pat, 0, 0)); if (state == unused) @@ -226,7 +214,6 @@ move_or_delete_vzeroupper_2 (basic_block bb, fprintf (dump_file, "Delete redundant vzeroupper:\n"); print_rtl_single (dump_file, insn); } - count--; delete_insn (insn); } else @@ -246,7 +233,6 @@ move_or_delete_vzeroupper_2 (basic_block bb, fprintf (dump_file, "Delete callee pass vzeroupper:\n"); print_rtl_single (dump_file, insn); } - count--; delete_insn (insn); } else @@ -256,30 +242,22 @@ move_or_delete_vzeroupper_2 (basic_block bb, BLOCK_INFO (bb)->state = state; - if (BLOCK_INFO (bb)->referenced == unknown) - { - /* The upper 128bits of AVX registers are never referenced if - REFERENCED isn't updated. */ - if (referenced == unknown) - referenced = unused; - BLOCK_INFO (bb)->referenced = referenced; - BLOCK_INFO (bb)->count = count; - } - if (dump_file) fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n", bb->index, state); } /* Helper function for move_or_delete_vzeroupper. Process vzeroupper - in BLOCK and its predecessor blocks recursively. */ + in BLOCK and check its predecessor blocks. Treat UNKNOWN state + as USED if UNKNOWN_IS_UNUSED is true. */ static void -move_or_delete_vzeroupper_1 (basic_block block) +move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused) { edge e; edge_iterator ei; - enum upper_128bits_state state; + enum upper_128bits_state state, old_state, new_state; + bool seen_unknown; if (dump_file) fprintf (dump_file, " Process [bb %i]: status: %d\n", @@ -288,83 +266,42 @@ move_or_delete_vzeroupper_1 (basic_block block) if (BLOCK_INFO (block)->processed) return; - BLOCK_INFO (block)->processed = true; - - state = unknown; + state = unused; - /* Process all predecessor edges of this block. */ + /* Check all predecessor edges of this block. */ + seen_unknown = false; FOR_EACH_EDGE (e, ei, block->preds) { if (e->src == block) continue; - move_or_delete_vzeroupper_1 (e->src); switch (BLOCK_INFO (e->src)->state) { case unknown: - if (state == unused) - state = unknown; + if (!unknown_is_unused) + seen_unknown = true; + case unused: break; case used: state = used; - break; - case unused: - break; + goto done; } } - /* If state of any predecessor edges is unknown, we need to rescan. */ - if (state == unknown) - cfun->machine->rescan_vzeroupper_p = 1; + if (seen_unknown) + state = unknown; - /* Process this block. */ +done: + old_state = BLOCK_INFO (block)->state; move_or_delete_vzeroupper_2 (block, state); -} - -/* Helper function for move_or_delete_vzeroupper. Rescan vzeroupper - in BLOCK and its predecessor blocks recursively. */ - -static void -rescan_move_or_delete_vzeroupper (basic_block block) -{ - edge e; - edge_iterator ei; - enum upper_128bits_state state; - - if (dump_file) - fprintf (dump_file, " Rescan [bb %i]: status: %d\n", - block->index, BLOCK_INFO (block)->rescanned); - - if (BLOCK_INFO (block)->rescanned) - return; - - BLOCK_INFO (block)->rescanned = true; - - state = unused; + new_state = BLOCK_INFO (block)->state; - /* Rescan all predecessor edges of this block. */ - FOR_EACH_EDGE (e, ei, block->preds) - { - if (e->src == block) - continue; - rescan_move_or_delete_vzeroupper (e->src); - /* For rescan, UKKNOWN state is treated as UNUSED. */ - if (BLOCK_INFO (e->src)->state == used) - state = used; - } + if (state != unknown || new_state == used) + BLOCK_INFO (block)->processed = true; - /* Rescan this block only if there are vzerouppers or the upper - 128bits of AVX registers are referenced. */ - if (BLOCK_INFO (block)->count == 0 - && (state == used || BLOCK_INFO (block)->referenced != used)) - { - if (state == used) - BLOCK_INFO (block)->state = state; - if (dump_file) - fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n", - block->index, BLOCK_INFO (block)->state); - } - else - move_or_delete_vzeroupper_2 (block, state); + /* Need to rescan if the upper 128bits of AVX registers are changed + to USED at exit. */ + if (new_state != old_state && new_state == used) + cfun->machine->rescan_vzeroupper_p = 1; } /* Go through the instruction stream looking for vzeroupper. Delete @@ -377,7 +314,7 @@ move_or_delete_vzeroupper (void) edge e; edge_iterator ei; basic_block bb; - unsigned int count = 0; + unsigned int count; /* Set up block info for each basic block. */ alloc_aux_for_blocks (sizeof (struct block_info_def)); @@ -392,28 +329,30 @@ move_or_delete_vzeroupper (void) cfun->machine->caller_pass_avx256_p ? used : unused); BLOCK_INFO (e->dest)->processed = true; - BLOCK_INFO (e->dest)->rescanned = true; } /* Process all basic blocks. */ - if (dump_file) - fprintf (dump_file, "Process all basic blocks\n"); - - FOR_EACH_BB (bb) - { - move_or_delete_vzeroupper_1 (bb); - count += BLOCK_INFO (bb)->count; - } - - /* Rescan all basic blocks if needed. */ - if (count && cfun->machine->rescan_vzeroupper_p) + count = 0; + do { if (dump_file) - fprintf (dump_file, "Rescan all basic blocks\n"); - + fprintf (dump_file, "Process all basic blocks: trip %d\n", + count); + cfun->machine->rescan_vzeroupper_p = 0; FOR_EACH_BB (bb) - rescan_move_or_delete_vzeroupper (bb); + move_or_delete_vzeroupper_1 (bb, false); } + while (cfun->machine->rescan_vzeroupper_p && count++ < 20); + + /* FIXME: Is 20 big enough? */ + if (count >= 20) + gcc_unreachable (); + + if (dump_file) + fprintf (dump_file, "Process all basic blocks\n"); + + FOR_EACH_BB (bb) + move_or_delete_vzeroupper_1 (bb, true); free_aux_for_blocks (); } diff --git a/gcc/testsuite/gfortran.dg/pr46519-2.f90 b/gcc/testsuite/gfortran.dg/pr46519-2.f90 new file mode 100644 index 0000000..b4d6980 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr46519-2.f90 @@ -0,0 +1,31 @@ +! { dg-do compile { target i?86-*-* x86_64-*-* } } +! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" } + + SUBROUTINE func(kts, kte, qrz, qiz, rho) + IMPLICIT NONE + INTEGER, INTENT(IN) :: kts, kte + REAL, DIMENSION(kts:kte), INTENT(INOUT) :: qrz, qiz, rho + INTEGER :: k + REAL, DIMENSION(kts:kte) :: praci, vtiold + REAL :: fluxout + INTEGER :: min_q, max_q, var + do k=kts,kte + praci(k)=0.0 + enddo + min_q=kte + max_q=kts-1 + DO var=1,20 + do k=max_q,min_q,-1 + fluxout=rho(k)*qrz(k) + enddo + qrz(min_q-1)=qrz(min_q-1)+fluxout + ENDDO + DO var=1,20 + do k=kts,kte-1 + vtiold(k)= (rho(k))**0.16 + enddo + ENDDO + STOP + END SUBROUTINE func + +! { dg-final { scan-assembler "avx_vzeroupper" } }