diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c index 4317c9f..9702ebe 100644 --- a/gcc/haifa-sched.c +++ b/gcc/haifa-sched.c @@ -1361,7 +1361,10 @@ insn_cost (rtx insn) if (recog_memoized (insn) < 0) return 0; - cost = insn_default_latency (insn); + if (sched_load) + cost = 0; + else + cost = insn_default_latency (insn); if (cost < 0) cost = 0; @@ -1383,7 +1386,10 @@ insn_cost (rtx insn) } else { - cost = insn_default_latency (insn); + if (sched_load) + cost = 0; + else + cost = insn_default_latency (insn); if (cost < 0) cost = 0; @@ -1568,6 +1574,27 @@ dep_list_size (rtx insn, sd_list_types_def list) return nodbgcount; } +static int saved_priority; +bool sched_load; + + +static int +pri (bool wasload, int regno, HOST_WIDE_INT offp) { + int p; + int off = (int)offp; + int m = /* ((1<14)-1) */ 0x3fff; + off = off + (1<<13); + off = off & m; + /* loads go first, then stores */ + p = wasload*(INT_MAX/4+1) + (INT_MAX/4+1); + /* then we sort upon the base register, increasing regnos. */ + p -= (regno & 0xfff) <<14; + /* then we sort on the offset, increasing offsets. */ + p -= off; + gcc_assert (p>0); + return p; +} + /* Compute the priority number for INSN. */ static int priority (rtx insn) @@ -1582,7 +1609,43 @@ priority (rtx insn) { int this_priority = -1; - if (dep_list_size (insn, SD_LIST_FORW) == 0) + if (sched_load) + { + rtx x; + if (! INSN_P (insn)) + this_priority = 0; + else if (GET_CODE (x=PATTERN (insn)) != SET) + this_priority = saved_priority; + else if (GET_CODE (SET_DEST (x)) == REG + && GET_CODE (SET_SRC (x)) == MEM + && GET_CODE (XEXP (SET_SRC (x), 0)) == REG) + /* we have a recognized load: r = (mem r) */ + this_priority = pri (true, REGNO (XEXP (SET_SRC (x), 0)), 0); + else if (GET_CODE (SET_DEST (x)) == REG + && GET_CODE (SET_SRC (x)) == MEM + && GET_CODE (XEXP (SET_SRC (x), 0)) == PLUS + && GET_CODE (XEXP (XEXP (SET_SRC (x), 0), 0)) == REG + && GET_CODE (XEXP (XEXP (SET_SRC (x), 0), 1)) == CONST_INT) + /* we have a recognized load: r = (mem (+ r n)) */ + this_priority = pri (true, REGNO (XEXP (XEXP (SET_SRC (x), 0), 0)), + INTVAL (XEXP (XEXP (SET_SRC (x), 0), 1))); + else if (GET_CODE (SET_SRC (x)) == REG + && GET_CODE (SET_DEST (x)) == MEM + && GET_CODE (XEXP (SET_DEST (x), 0)) == REG) + /* we have a recognized store: (mem r) = r */ + this_priority = pri (false, REGNO (XEXP (SET_DEST (x), 0)), 0); + else if (GET_CODE (SET_SRC (x)) == REG + && GET_CODE (SET_DEST (x)) == MEM + && GET_CODE (XEXP (SET_DEST (x), 0)) == PLUS + && GET_CODE (XEXP (XEXP (SET_DEST (x), 0), 0)) == REG + && GET_CODE (XEXP (XEXP (SET_DEST (x), 0), 1)) == CONST_INT) + /* we have a recognized store: (mem (+ r n)) = r */ + this_priority = pri (false, REGNO (XEXP (XEXP (SET_DEST (x), 0), 0)), + INTVAL (XEXP (XEXP (SET_DEST (x), 0), 1))); + else + this_priority = saved_priority; + } + else if (dep_list_size (insn, SD_LIST_FORW) == 0) /* ??? We should set INSN_PRIORITY to insn_cost when and insn has some forward deps but all of them are ignored by contributes_to_priority hook. At the moment we set priority of @@ -2513,6 +2576,8 @@ model_set_excess_costs (rtx *insns, int count) } } +static int last_insn_pri; + /* Returns a positive value if x is preferred; returns a negative value if y is preferred. Should never return 0, since that will make the sort unstable. */ @@ -2544,6 +2609,23 @@ rank_for_schedule (const void *x, const void *y) /* Make sure that priority of TMP and TMP2 are initialized. */ gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2)); + if (sched_load) + { + /* The instruction that is closest after to the last instruction + is the instruction we want to pick next. */ + int a = last_insn_pri-INSN_PRIORITY (tmp); + int b = last_insn_pri-INSN_PRIORITY (tmp2); + if (a>=0) + { + if (b<0) + return -1; + return a-b; + } + if (b<0) + return a-b; + return 1; + } + if (sched_pressure != SCHED_PRESSURE_NONE) { int diff; @@ -5620,6 +5702,7 @@ commit_schedule (rtx prev_head, rtx tail, basic_block *target_bb) { unsigned int i; rtx insn; + basic_block dirty_block = 0; last_scheduled_insn = prev_head; for (i = 0; @@ -5645,6 +5728,19 @@ commit_schedule (rtx prev_head, rtx tail, basic_block *target_bb) if (current_sched_info->begin_move_insn) (*current_sched_info->begin_move_insn) (insn, last_scheduled_insn); + + /* We have to mark the block dirty so that df will rescan + instructions whenever we move instructions around. */ + if (PREV_INSN (insn) != last_scheduled_insn) + { + basic_block new_dirty_block = BLOCK_FOR_INSN (insn); + if (new_dirty_block != dirty_block) + { + df_set_bb_dirty (new_dirty_block); + dirty_block = new_dirty_block; + } + } + move_insn (insn, last_scheduled_insn, current_sched_info->next_tail); if (!DEBUG_INSN_P (insn)) @@ -5766,6 +5862,10 @@ prune_ready_list (state_t temp_state, bool first_cycle_insn_p, reason = "shadow tick"; } } + + if (cost > 1 && sched_load) + cost = 1; + if (cost >= 1) { if (SCHED_GROUP_P (insn) && cost > min_cost_group) @@ -6652,12 +6752,19 @@ sched_init (void) curr_state = xmalloc (dfa_state_size); } +bool free_hid = false; + static void haifa_init_only_bb (basic_block, basic_block); /* Initialize data structures specific to the Haifa scheduler. */ void haifa_sched_init (void) { + if (free_hid) + { + free_hid = false; + haifa_finish_h_i_d (); + } setup_sched_dump (); sched_init (); @@ -6698,6 +6805,9 @@ haifa_sched_init (void) after_recovery = 0; modulo_ii = 0; + /* normal instructions go first. */ + saved_priority = INT_MAX/2 + 2; + last_insn_pri = INT_MAX/2 + 1; } /* Finish work with the data specific to the Haifa scheduler. */ @@ -6745,8 +6855,10 @@ haifa_sched_finish (void) void sched_finish (void) { - if (!flag_timing_info) + if (!flag_timing_info || !reload_completed) haifa_finish_h_i_d (); + else + free_hid = true; if (sched_pressure != SCHED_PRESSURE_NONE) { if (regstat_n_sets_and_refs != NULL) @@ -8568,4 +8680,9 @@ get_ready_element (int i) return ready_element (&ready, i); } +haifa_insn_data_def* (HID) (rtx insn) +{ + return HID(insn); +} + #endif /* INSN_SCHEDULING */ diff --git a/gcc/passes.c b/gcc/passes.c index 2a5ea00..26e1455 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1600,6 +1600,8 @@ init_optimization_passes (void) NEXT_PASS (pass_rtl_loop_done); *p = NULL; } + NEXT_PASS (pass_sched0); + NEXT_PASS (pass_peephole1); NEXT_PASS (pass_web); NEXT_PASS (pass_rtl_cprop); NEXT_PASS (pass_cse2); diff --git a/gcc/recog.c b/gcc/recog.c index 3c56703..5cf76a8 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -72,7 +72,7 @@ static rtx split_insn (rtx); int volatile_ok; -struct recog_data recog_data; +struct Recog_data recog_data; /* Contains a vector of operand_alternative structures for every operand. Set up by preprocess_constraints. */ @@ -3104,6 +3104,9 @@ peep2_find_free_register (int from, int to, const char *class_str, cl = (class_str[0] == 'r' ? GENERAL_REGS : REG_CLASS_FROM_CONSTRAINT (class_str[0], class_str)); + if (can_create_pseudo_p ()) + return gen_reg_rtx (mode); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) { int raw_regno, regno, success, j; @@ -3750,6 +3753,27 @@ rest_of_handle_peephole2 (void) return 0; } +struct rtl_opt_pass pass_peephole1 = +{ + { + RTL_PASS, + "peephole1", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + gate_handle_peephole2, /* gate */ + rest_of_handle_peephole2, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_PEEPHOLE2, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish | TODO_verify_rtl_sharing | + 0 /* todo_flags_finish */ + } +}; + struct rtl_opt_pass pass_peephole2 = { { diff --git a/gcc/recog.h b/gcc/recog.h index 0e7d537..9b86964 100644 --- a/gcc/recog.h +++ b/gcc/recog.h @@ -180,7 +180,7 @@ extern int which_alternative; /* The following vectors hold the results from insn_extract. */ -struct recog_data +struct Recog_data { /* It is very tempting to make the 5 operand related arrays into a structure and index on that. However, to be source compatible @@ -246,7 +246,7 @@ struct recog_data rtx insn; }; -extern struct recog_data recog_data; +extern struct Recog_data recog_data; /* Contains a vector of operand_alternative structures for every operand. Set up by preprocess_constraints. */ diff --git a/gcc/reload.c b/gcc/reload.c index 5fd43a3..e3d6eff 100644 --- a/gcc/reload.c +++ b/gcc/reload.c @@ -895,7 +895,7 @@ can_reload_into (rtx in, int regno, enum machine_mode mode) { rtx dst, test_insn; int r = 0; - struct recog_data save_recog_data; + struct Recog_data save_recog_data; /* For matching constraints, we often get notional input reloads where we want to use the original register as the reload register. I.e. diff --git a/gcc/sched-int.h b/gcc/sched-int.h index c2c1f2f..444113e 100644 --- a/gcc/sched-int.h +++ b/gcc/sched-int.h @@ -1579,5 +1579,7 @@ extern void sd_debug_lists (rtx, sd_list_types_def); #endif /* INSN_SCHEDULING */ +extern bool sched_load; + #endif /* GCC_SCHED_INT_H */ diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c index 46edff8..f97de39 100644 --- a/gcc/sched-rgn.c +++ b/gcc/sched-rgn.c @@ -3526,6 +3526,18 @@ gate_handle_sched (void) /* Run instruction scheduler. */ static unsigned int +rest_of_handle_sched0 (void) +{ +#ifdef INSN_SCHEDULING + sched_load = true; + schedule_insns (); + sched_load = false; +#endif + return 0; +} + +/* Run instruction scheduler. */ +static unsigned int rest_of_handle_sched (void) { #ifdef INSN_SCHEDULING @@ -3570,6 +3582,28 @@ rest_of_handle_sched2 (void) return 0; } +struct rtl_opt_pass pass_sched0 = +{ + { + RTL_PASS, + "sched0", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + gate_handle_sched, /* gate */ + rest_of_handle_sched0, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_SCHED, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_df_finish | TODO_verify_rtl_sharing | + TODO_verify_flow | + TODO_ggc_collect /* todo_flags_finish */ + } +}; + struct rtl_opt_pass pass_sched = { { diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 4c67495..73412c4 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -5370,9 +5370,9 @@ static rtx simplify_immed_subreg (enum machine_mode outermode, rtx op, enum machine_mode innermode, unsigned int byte) { - /* We support up to 512-bit values (for V8DFmode). */ + /* We support up to 1024-bit values (for V16DFmode). */ enum { - max_bitsize = 512, + max_bitsize = 1024, value_bit = 8, value_mask = (1 << value_bit) - 1 }; diff --git a/gcc/testsuite/gfortran.dg/select_type_4.f90 b/gcc/testsuite/gfortran.dg/select_type_4.f90 index 7e12d93..b58cc39 100644 --- a/gcc/testsuite/gfortran.dg/select_type_4.f90 +++ b/gcc/testsuite/gfortran.dg/select_type_4.f90 @@ -1,4 +1,7 @@ ! { dg-do run } +! This testcase seem to show a flaw with the alias set information when loads are +! rescheduled earlier by sched0. -fno-strict-aliasing seems to work around it. +! { dg-options "-fno-strict-aliasing" } ! ! Contributed by by Richard Maine ! http://coding.derkeiler.com/Archive/Fortran/comp.lang.fortran/2006-10/msg00104.html diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 7581cf3..b1e1c4f 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -425,6 +425,8 @@ extern struct rtl_opt_pass pass_rtl_unroll_and_peel_loops; extern struct rtl_opt_pass pass_rtl_doloop; extern struct rtl_opt_pass pass_rtl_loop_done; +extern struct rtl_opt_pass pass_sched0; +extern struct rtl_opt_pass pass_peephole1; extern struct rtl_opt_pass pass_web; extern struct rtl_opt_pass pass_cse2; extern struct rtl_opt_pass pass_df_initialize_opt;