Index: cfgloopmanip.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/cfgloopmanip.c,v retrieving revision 1.40 diff -c -p -r1.40 cfgloopmanip.c *** cfgloopmanip.c 16 Dec 2004 20:01:25 -0000 1.40 --- cfgloopmanip.c 27 Dec 2004 15:23:58 -0000 *************** static void fix_bb_placements (struct lo *** 46,52 **** static void place_new_loop (struct loops *, struct loop *); static void scale_loop_frequencies (struct loop *, int, int); static void scale_bbs_frequencies (basic_block *, int, int, int); ! static basic_block create_preheader (struct loop *, int); static void fix_irreducible_loops (basic_block); static void unloop (struct loops *, struct loop *); --- 46,52 ---- static void place_new_loop (struct loops *, struct loop *); static void scale_loop_frequencies (struct loop *, int, int); static void scale_bbs_frequencies (basic_block *, int, int, int); ! basic_block create_preheader (struct loop *, int); static void fix_irreducible_loops (basic_block); static void unloop (struct loops *, struct loop *); *************** mfb_update_loops (basic_block jump) *** 1134,1140 **** entry; otherwise we also force preheader block to have only one successor. The function also updates dominators. */ ! static basic_block create_preheader (struct loop *loop, int flags) { edge e, fallthru; --- 1134,1140 ---- entry; otherwise we also force preheader block to have only one successor. The function also updates dominators. */ ! basic_block create_preheader (struct loop *loop, int flags) { edge e, fallthru; Index: common.opt =================================================================== RCS file: /cvs/gcc/gcc/gcc/common.opt,v retrieving revision 1.59 diff -c -p -r1.59 common.opt *** common.opt 28 Oct 2004 03:42:22 -0000 1.59 --- common.opt 27 Dec 2004 15:23:59 -0000 *************** fmodulo-sched *** 523,528 **** --- 523,532 ---- Common Report Var(flag_modulo_sched) Perform SMS based modulo scheduling before the first scheduling pass + frtl-loop-hc + Common Report Var(flag_rtl_loop_hc) + Perform loop header copying at RTL level. + fmove-loop-invariants Common Report Var(flag_move_loop_invariants) Move loop invariant computations out of loops Index: loop-unroll.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/loop-unroll.c,v retrieving revision 1.24 diff -c -p -r1.24 loop-unroll.c *** loop-unroll.c 22 Nov 2004 12:23:53 -0000 1.24 --- loop-unroll.c 27 Dec 2004 15:23:59 -0000 *************** free_opt_info (struct opt_info *opt_info *** 2171,2173 **** --- 2171,2373 ---- } free (opt_info); } + + #define SIMPLE_LOOP_P(loop) ((loop->num_nodes < 3 ) \ + && (EDGE_COUNT (loop->latch->preds) == 1) \ + && (EDGE_COUNT (loop->latch->succs) == 1)) + + /* Return true if all the BBs of the loop are empty except the + loop header. */ + static bool + rtl_loop_single_full_bb_p (struct loop *loop) + { + unsigned i; + basic_block *bbs = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes ; i++) + { + rtx head, tail; + bool empty_bb = true; + + if (bbs[i] == loop->header) + continue; + + /* Make sure that basic blocks other than the header + have only notes labels or jumps. */ + tail = BB_END (bbs[i]); + for (head = BB_HEAD (bbs[i]) ; head != NEXT_INSN (tail); head = NEXT_INSN (head)) + { + if (NOTE_P (head) || LABEL_P (head) + || (INSN_P (head) && JUMP_P (head))) + continue; + empty_bb = false; + break; + } + + if (! empty_bb) + { + free (bbs); + return false; + } + } + free (bbs); + return true; + } + + static bool + rtl_loop_ch_appropriate_p (struct loop *loop) + { + unsigned nexits; + rtx head; + int n_insns = 0; + + if (loop->inner || ! loop->outer) + return false; + + get_loop_exit_edges (loop, &nexits); + if (nexits > 1) + return false; + + if (! SIMPLE_LOOP_P (loop) || rtl_loop_single_full_bb_p (loop)) + return false; + + for (head = BB_HEAD (loop->header); + head != NEXT_INSN (BB_END (loop->header)); + head = NEXT_INSN (head)) + { + if (NOTE_P (head) || LABEL_P (head)) + continue; + n_insns++; + } + if (n_insns > LOOP_CH_MAX_SIZE) + return false; + + return true; + } + + /* The same tree_duplicate_sese_region but for RTL, no need to update ssa. */ + static bool + cfg_duplicate_sese_region (edge entry, edge exit, + basic_block *region, unsigned n_region, + basic_block *region_copy) + { + unsigned i, n_doms; + bool free_region_copy = false, copying_header = false; + struct loop *loop = entry->dest->loop_father; + edge exit_copy; + basic_block *doms; + edge redirected; + + if (!can_copy_bbs_p (region, n_region)) + return false; + + /* Some sanity checking. Note that we do not check for all possible + missuses of the functions. I.e. if you ask to copy something weird, + it will work, but the state of structures probably will not be + correct. */ + + for (i = 0; i < n_region; i++) + { + /* We do not handle subloops, i.e. all the blocks must belong to the + same loop. */ + if (region[i]->loop_father != loop) + return false; + + if (region[i] != entry->dest + && region[i] == loop->header) + return false; + } + + loop->copy = loop; + + /* In case the function is used for loop header copying (which is the primary + use), ensure that EXIT and its copy will be new latch and entry edges. */ + if (loop->header == entry->dest) + { + copying_header = true; + loop->copy = loop->outer; + + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, exit->src)) + return false; + + for (i = 0; i < n_region; i++) + if (region[i] != exit->src + && dominated_by_p (CDI_DOMINATORS, region[i], exit->src)) + return false; + } + + if (!region_copy) + { + region_copy = xmalloc (sizeof (basic_block) * n_region); + free_region_copy = true; + } + + gcc_assert (!any_marked_for_rewrite_p ()); + + /* Record blocks outside the region that are duplicated by something + inside. */ + doms = xmalloc (sizeof (basic_block) * n_basic_blocks); + n_doms = get_dominated_by_region (CDI_DOMINATORS, region, n_region, doms); + + copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop); + + if (copying_header) + { + loop->header = exit->dest; + loop->latch = exit->src; + } + + /* Redirect the entry and add the phi node arguments. */ + redirected = redirect_edge_and_branch (entry, entry->dest->rbi->copy); + gcc_assert (redirected != NULL); + + /* Concerning updating of dominators: We must recount dominators + for entry block and its copy. Anything that is outside of the region, but + was dominated by something inside needs recounting as well. */ + set_immediate_dominator (CDI_DOMINATORS, entry->dest, entry->src); + doms[n_doms++] = entry->dest->rbi->original; + iterate_fix_dominators (CDI_DOMINATORS, doms, n_doms); + free (doms); + } + + /* This is loop_copy_header does but in RTL level. + Having a loop that executes N times where the loop header + executes N times and the latch executes N - 1 times we transform + it to a loop that executes N - 1 times (both header and latch) + and a single iteration of the loop header to the entry of the + loop. Thie perpose fromt this transformation is to make + the scheduling (and SMS) of loops more efficient. */ + void + rtl_loop_copy_header (struct loops *loops) + { + unsigned i; + edge exit_e; + sbitmap wont_exit = sbitmap_alloc (2); + + sbitmap_zero (wont_exit); + for (i = 0; i < loops->num; i++) + { + struct loop *loop = loops->parray[i]; + edge e; + basic_block *bbs; + + if (! rtl_loop_ch_appropriate_p (loop)) + continue; + + create_preheader (loop, CP_SIMPLE_PREHEADERS); + bbs = get_loop_body (loop); + + if (EDGE_PRED (loop->header, 0)->src != loop->latch) + e = EDGE_PRED (loop->header, 0); + else + e = EDGE_PRED (loop->header, 1); + if (EDGE_SUCC (loop->header, 0)->dest == loop->latch) + exit_e = EDGE_SUCC (loop->header, 0); + else + exit_e = EDGE_SUCC (loop->header, 1); + cfg_duplicate_sese_region (e, exit_e, bbs, 1, NULL); + } + + free_dominance_info (CDI_DOMINATORS); + cleanup_cfg (CLEANUP_CFGLAYOUT); + } Index: params.def =================================================================== RCS file: /cvs/gcc/gcc/gcc/params.def,v retrieving revision 1.51 diff -c -p -r1.51 params.def *** params.def 18 Dec 2004 20:22:51 -0000 1.51 --- params.def 27 Dec 2004 15:23:59 -0000 *************** DEFPARAM (PARAM_INTEGER_SHARE_LIMIT, *** 416,421 **** --- 416,426 ---- "The upper bound for sharing integer constants", 256, 2, 2) + DEFPARAM (PARAM_LOOP_CH_MAX_SIZE, + "loop-ch-max-size", + "The maximum number of instructions in a loop to perfrom loop-ch on it", + 100, 0, 0) + /* Local variables: mode:c Index: params.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/params.h,v retrieving revision 1.25 diff -c -p -r1.25 params.h *** params.h 1 Dec 2004 16:46:25 -0000 1.25 --- params.h 27 Dec 2004 15:23:59 -0000 *************** typedef enum compiler_param *** 135,138 **** --- 135,140 ---- PARAM_VALUE (PARAM_MAX_ALIASED_VOPS) #define INTEGER_SHARE_LIMIT \ PARAM_VALUE (PARAM_INTEGER_SHARE_LIMIT) + #define LOOP_CH_MAX_SIZE \ + PARAM_VALUE (PARAM_LOOP_CH_MAX_SIZE) #endif /* ! GCC_PARAMS_H */ Index: passes.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/passes.c,v retrieving revision 2.60 diff -c -p -r2.60 passes.c *** passes.c 14 Dec 2004 01:30:31 -0000 2.60 --- passes.c 27 Dec 2004 15:23:59 -0000 *************** rest_of_handle_loop2 (void) *** 1193,1198 **** --- 1193,1207 ---- /* Initialize structures for layout changes. */ cfg_layout_initialize (0); + if (flag_rtl_loop_hc) + { + loops = loop_optimizer_init (dump_file); + if (loops) + { + rtl_loop_copy_header (loops); + loop_optimizer_finalize (loops, dump_file); + } + } loops = loop_optimizer_init (dump_file); if (loops)