This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH PING]: Target support for IA-64 speculation
- From: Vladimir Makarov <vmakarov at redhat dot com>
- To: Maxim Kuvyrkov <mkuvyrkov at ispras dot ru>, Jim Wilson <wilson at specifixinc dot com>
- Cc: Andrey Belevantsev <abel at ispras dot ru>, "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 15 Mar 2006 12:50:45 -0500
- Subject: Re: [PATCH PING]: Target support for IA-64 speculation
- References: <440E4DBC.8020106@ispras.ru>
Maxim Kuvyrkov wrote:
Hi, Jim!
My name is Maxim and I am a part of the ISPRAS team, that is working
on the improvement of GCC for Itanium.
As Vlad is reviewing the last part of the machine-independent patch, I
am asking you if you could take a look at the ia64 port patch.
Please find attached the latest version of the ia64 port patch, as
well as cumulative machine-independent patch.
Here is the link to the original patch:
http://gcc.gnu.org/ml/gcc-patches/2005-12/msg01925.html .
Jim, I've looked at this patch (many ia64 changes needs to know
previous patches to review them and many of them are in code I wrote).
The patch is ok for me (with some comments below). But probably, you
might add more if you have time for this.
So if you don't object, they could submit the patch with changes I
wrote below. What do you think?
Maxim, my review is below.
Vlad
2006-02-11 Maxim Kuvyrkov <mkuvyrkov@ispras.ru>
...
* itanium1.md (1_fldc, 1_fldpc, 1_ldc, 1_chk_s_f, 1_chk_a,
1b_fldc, 1b_fldpc, 1b_ldc, 1b_chk_s_f, 1b_chk_a): New resource
constraints.
I'd add
(1_fld, 1_fldp, 1_ld, 1b_fld, 1b_fldp, 1b_ld): Add a condition
for speculation.
(1_chk_s, 1b_chk_s): Renamed to 1_chk_s_i, 1b_chk_s_i.
* itanium2.md (2_flda, 2_fldc, 2_fldpc, 2_ldc, 2_chk_s_f, 2_chk_a,
2b_flda, 2b_fldc, 2b_fldpc, 2b_ldc, 2b_chk_s_f, 2b_chk_a): New resource
constraints.
Again I'd add
(2_fld, 2_fldp, 2_ld, 2b_fld, 2b_fldp, 2b_ld): Add a condition
for speculation.
(2_chk_s, 2b_chk_s): Renamed to 2_chk_s_i, 2b_chk_s_i.
...
Index: config/ia64/ia64.opt
===================================================================
--- config/ia64/ia64.opt (.../insn-tick/gcc) (revision 2383)
+++ config/ia64/ia64.opt (.../recovery/gcc) (revision 2383)
@@ -96,3 +96,50 @@ mtune=
Target RejectNegative Joined
Schedule code for given CPU
+msched-br-data-spec
+Target Report Var(mflag_sched_br_data_spec) Init(0)
+Use data speculation before reload
+
+msched-ar-data-spec
+Target Report Var(mflag_sched_ar_data_spec) Init(1)
+Use data speculation after reload
+
+msched-control-spec
+Target Report Var(mflag_sched_control_spec) Init(0)
+Use control speculation
+
+msched-br-in-data-spec
+Target Report Var(mflag_sched_br_in_data_spec) Init(1)
+Use in block data speculation before reload
+
+msched-ar-in-data-spec
+Target Report Var(mflag_sched_ar_in_data_spec) Init(1)
+Use in block data speculation after reload
+
+msched-in-control-spec
+Target Report Var(mflag_sched_in_control_spec) Init(1)
+Use in block control speculation
+
+msched-ldc
+Target Report Var(mflag_sched_ldc) Init(1)
+Use simple data speculation check
+
+msched-control-ldc
+Target Report Var(mflag_control_ldc) Init(0)
+Use simple data speculation check for control speculation
+
+msched-spec-verbose
+Common Report Var(mflag_sched_spec_verbose) Init(0)
+Print information about speculative motions.
+
+msched-prefer-non-data-spec-insns
+Common Report Var(mflag_sched_prefer_non_data_spec_insns) Init(1)
+If non-zero, data speculative instructions will be choosen for schedule only if there are no other choices at the moment
+
+msched-prefer-non-control-spec-insns
+Common Report Var(mflag_sched_prefer_non_control_spec_insns) Init(1)
+If non-zero, control speculative instructions will be choosen for schedule only if there are no other choices at the moment
+
+msched-count-spec-in-critical-path
+Common Report Var(mflag_sched_count_spec_in_critical_path) Init(1)
+Count speculative dependencies while calculating priority of instructions
You should add description of the flags in the documentation too.
Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c (.../insn-tick/gcc) (revision 2383)
+++ config/ia64/ia64.c (.../recovery/gcc) (revision 2383)
...
@@ -200,8 +211,10 @@ static void ia64_output_function_epilogu
static void ia64_output_function_end_prologue (FILE *);
static int ia64_issue_rate (void);
-static int ia64_adjust_cost (rtx, rtx, rtx, int);
+static int ia64_adjust_cost_2 (rtx, enum reg_note, rtx, int);
static void ia64_sched_init (FILE *, int, int);
+static void ia64_sched_init_global (FILE *, int, int);
+static void ia64_sched_finish_global (FILE *, int);
There is no log entry for ia64_sched_finish_global.
static void ia64_sched_finish (FILE *, int);
static int ia64_dfa_sched_reorder (FILE *, int, rtx *, int *, int, int);
static int ia64_sched_reorder (FILE *, int, rtx *, int *, int);
@@ -323,6 +336,10 @@ static const struct attribute_spec ia64_
#define TARGET_SCHED_INIT ia64_sched_init
#undef TARGET_SCHED_FINISH
#define TARGET_SCHED_FINISH ia64_sched_finish
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
+#undef TARGET_SCHED_FINISH_GLOBAL
+#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
No log entry for TARGET_SCHED_FINISH_GLOBAL.
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER ia64_sched_reorder
#undef TARGET_SCHED_REORDER2
...
@@ -5465,18 +5503,29 @@ set_src_needs_barrier (rtx x, struct reg
/* X is a conditional branch. */
/* ??? This seems redundant, as the caller sets this bit for
all JUMP_INSNs. */
- flags.is_branch = 1;
+ if (!ia64_spec_check_src_p (src))
+ flags.is_branch = 1;
return rtx_needs_barrier (src, flags, pred);
}
- need_barrier = rtx_needs_barrier (src, flags, pred);
+ if (ia64_spec_check_src_p (src))
+ /* Avoid checking one register twice (in condition
+ and in 'then' section) for ldc pattern. */
+ {
+ gcc_assert (REG_P (XEXP (src, 2)));
+ need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
+
+ /* We process MEM below. */
+ src = XEXP (src, 1);
+ }
+
+ need_barrier |= rtx_needs_barrier (src, flags, pred);
dst = SET_DEST (x);
if (GET_CODE (dst) == ZERO_EXTRACT)
{
need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
- dst = XEXP (dst, 0);
}
return need_barrier;
}
There is no log entry for changes in set_src_needs_barrier.
@@ -5725,6 +5774,11 @@ rtx_needs_barrier (rtx x, struct reg_fla
case UNSPEC_SETF_EXP:
case UNSPEC_ADDP4:
case UNSPEC_FR_SQRT_RECIP_APPROX:
+ case UNSPEC_LDA:
+ case UNSPEC_LDS:
+ case UNSPEC_LDSA:
+ case UNSPEC_CHKACLR:
+ case UNSPEC_CHKS:
need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
break;
...
@@ -6293,6 +6360,25 @@ ia64_sched_init (FILE *dump ATTRIBUTE_UN
init_insn_group_barriers ();
}
+/* We're beginning a scheduling pass. Check assertion. */
+
+static void
+ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
+ int sched_verbose ATTRIBUTE_UNUSED,
+ int max_ready ATTRIBUTE_UNUSED)
+{
+ gcc_assert (!pending_data_specs);
+}
+
+static void
+ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
+ int sched_verbose ATTRIBUTE_UNUSED)
There is no function description (you could write 'see comments for
the corresponding hook).
+{
+ free (spec_check_no);
+ spec_check_no = 0;
+ max_uid = 0;
+}
+
/* We are about to being issuing insns for this clock cycle.
Override the default sort algorithm to better slot instructions. */
...
@@ -6526,6 +6636,577 @@ ia64_dfa_new_cycle (FILE *dump, int verb
return 0;
}
+/* Implement targetm.sched.h_i_d_extended hook.
+ Extend internal data structures. */
+static void
+ia64_h_i_d_extended (void)
+{
+ if (current_sched_info->flags & DO_SPECULATION)
+ {
+ int new_max_uid = get_max_uid () + 1;
+
+ spec_check_no = xrecalloc (spec_check_no, new_max_uid,
+ max_uid, sizeof (*spec_check_no));
+ max_uid = new_max_uid;
+ }
+
+ if (stops_p != NULL)
+ {
+ int new_clocks_length = get_max_uid () + 1;
+
+ stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
+
+ if (ia64_tune == PROCESSOR_ITANIUM)
+ {
+ clocks = xrecalloc (clocks, new_clocks_length, clocks_length,
+ sizeof (int));
+ add_cycles = xrecalloc (add_cycles, new_clocks_length, clocks_length,
+ sizeof (int));
+ }
+
+ clocks_length = new_clocks_length;
+ }
+}
+
This is the same thing which I wrote in my previous patch review.
Extension by 1 is not effective. Although it is rare case probably it
should be fixed in the future.
...
+/* Return index of the MODE. */
+static int
+ia64_mode_to_int (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case BImode: return 0; /* SPEC_MODE_FIRST */
+ case QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST */
+ case HImode: return 2;
+ case SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST */
+ case DImode: return 4;
+ case SFmode: return 5;
+ case DFmode: return 6;
+ case XFmode: return 7;
+ case TImode:
+ /* This mode needs testing. Bypasses for ldfp8 instruction are not
+ mentioned in itanium[12].md. Predicate fp_register_operand is also
+ need to be defined. Overall: better disable for now. */
Usually such comments start with ??? (as item to do).
+ return SPEC_MODE_INVALID;
+ default: return SPEC_MODE_INVALID;
+ }
+}
+
...
+/* Implement targetm.sched.speculate_insn hook.
+ Check if INSN can be TS speculative.
+ If 'no' - return -1.
You should write what 0 means too.
+ If 'yes' - generate speculative pattern in NEW_PAT and return 1. */
+static int
+ia64_speculate_insn (rtx insn, ds_t ts, rtx *new_pat)
+{
+ rtx pat, reg, mem, mem_reg;
+ int mode_no, gen_p = 1;
+ bool extend_p;
+
+ gcc_assert (!(ts & ~BEGIN_SPEC) && ts);
+
+ pat = PATTERN (insn);
+
+ if (GET_CODE (pat) == COND_EXEC)
+ pat = COND_EXEC_CODE (pat);
+
+ if (GET_CODE (pat) != SET)
+ return -1;
+ reg = SET_DEST (pat);
+ if (!REG_P (reg))
+ return -1;
+
+ mem = SET_SRC (pat);
+ if (GET_CODE (mem) == ZERO_EXTEND)
+ {
+ mem = XEXP (mem, 0);
+ extend_p = true;
+ }
+ else
+ extend_p = false;
+
+ if (GET_CODE (mem) == UNSPEC)
+ {
+ int code;
+
+ code = XINT (mem, 1);
+ if (code != UNSPEC_LDA && code != UNSPEC_LDS && code != UNSPEC_LDSA)
+ return -1;
+
+ if ((code == UNSPEC_LDA && !(ts & BEGIN_CONTROL))
+ || (code == UNSPEC_LDS && !(ts & BEGIN_DATA))
+ || code == UNSPEC_LDSA)
+ gen_p = 0;
+
+ mem = XVECEXP (mem, 0, 0);
+ gcc_assert (MEM_P (mem));
+ }
+ if (!MEM_P (mem))
+ return -1;
+ mem_reg = XEXP (mem, 0);
+ if (!REG_P (mem_reg))
+ return -1;
+
+ /* We should use MEM's mode since REG's mode in presence of ZERO_EXTEND
+ will always be DImode. */
+ mode_no = ia64_mode_to_int (GET_MODE (mem));
+
+ if (mode_no == SPEC_MODE_INVALID
+ || (extend_p
+ && !(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
+ && mode_no <= SPEC_MODE_FOR_EXTEND_LAST)))
+ return -1;
+
+ extract_insn_cached (insn);
+ gcc_assert (reg == recog_data.operand[0] && mem == recog_data.operand[1]);
+ *new_pat = ia64_gen_spec_insn (insn, ts, mode_no, gen_p != 0, extend_p);
+
+ return gen_p;
+}
+
...
@@ -7804,6 +8501,7 @@ ia64_reorg (void)
free (clocks);
}
free (stops_p);
+ stops_p = NULL;
emit_insn_group_barriers (dump_file);
There is no log entry for this change.
ia64_final_schedule = 0;
...
Index: config/ia64/ia64.md
===================================================================
--- config/ia64/ia64.md (.../insn-tick/gcc) (revision 2383)
+++ config/ia64/ia64.md (.../recovery/gcc) (revision 2383)
+(define_mode_macro MODE [BI QI HI SI DI SF DF XF TI])
+(define_mode_macro MODE_FOR_EXTEND [QI HI SI])
+
+(define_mode_attr output_a [
+ (BI "ld1.a %0 = %1%P1")
+ (QI "ld1.a %0 = %1%P1")
+ (HI "ld2.a %0 = %1%P1")
+ (SI "ld4.a %0 = %1%P1")
+ (DI
+ "@
+ ld8.a %0 = %1%P1
+ ldf8.a %0 = %1%P1")
+ (SF
+ "@
+ ldfs.a %0 = %1%P1
+ ld4.a %0 = %1%P1")
+ (DF
+ "@
+ ldfd.a %0 = %1%P1
+ ld8.a %0 = %1%P1")
+ (XF "ldfe.a %0 = %1%P1")
+ (TI "ldfp8.a %X0 = %1%P1")])
+
+(define_mode_attr output_s [
+ (BI "ld1.s %0 = %1%P1")
+ (QI "ld1.s %0 = %1%P1")
+ (HI "ld2.s %0 = %1%P1")
+ (SI "ld4.s %0 = %1%P1")
+ (DI
+ "@
+ ld8.s %0 = %1%P1
+ ldf8.s %0 = %1%P1")
+ (SF
+ "@
+ ldfs.s %0 = %1%P1
+ ld4.s %0 = %1%P1")
+ (DF
+ "@
+ ldfd.s %0 = %1%P1
+ ld8.s %0 = %1%P1")
+ (XF "ldfe.s %0 = %1%P1")
+ (TI "ldfp8.s %X0 = %1%P1")])
+
+(define_mode_attr output_sa [
+ (BI "ld1.sa %0 = %1%P1")
+ (QI "ld1.sa %0 = %1%P1")
+ (HI "ld2.sa %0 = %1%P1")
+ (SI "ld4.sa %0 = %1%P1")
+ (DI
+ "@
+ ld8.sa %0 = %1%P1
+ ldf8.sa %0 = %1%P1")
+ (SF
+ "@
+ ldfs.sa %0 = %1%P1
+ ld4.sa %0 = %1%P1")
+ (DF
+ "@
+ ldfd.sa %0 = %1%P1
+ ld8.sa %0 = %1%P1")
+ (XF "ldfe.sa %0 = %1%P1")
+ (TI "ldfp8.sa %X0 = %1%P1")])
+
+(define_mode_attr output_c_clr [
+ (BI "ld1.c.clr%O1 %0 = %1%P1")
+ (QI "ld1.c.clr%O1 %0 = %1%P1")
+ (HI "ld2.c.clr%O1 %0 = %1%P1")
+ (SI "ld4.c.clr%O1 %0 = %1%P1")
+ (DI
+ "@
+ ld8.c.clr%O1 %0 = %1%P1
+ ldf8.c.clr %0 = %1%P1")
+ (SF
+ "@
+ ldfs.c.clr %0 = %1%P1
+ ld4.c.clr%O1 %0 = %1%P1")
+ (DF
+ "@
+ ldfd.c.clr %0 = %1%P1
+ ld8.c.clr%O1 %0 = %1%P1")
+ (XF "ldfe.c.clr %0 = %1%P1")
+ (TI "ldfp8.c.clr %X0 = %1%P1")])
+
+(define_mode_attr ld_reg_constr [(BI "=*r") (QI "=r") (HI "=r") (SI "=r") (DI "=r,*f") (SF "=f,*r") (DF "=f,*r") (XF "=f") (TI "=*x")])
+(define_mode_attr ldc_reg_constr [(BI "+*r") (QI "+r") (HI "+r") (SI "+r") (DI "+r,*f") (SF "+f,*r") (DF "+f,*r") (XF "+f") (TI "+*x")])
+(define_mode_attr chk_reg_constr [(BI "*r") (QI "r") (HI "r") (SI "r") (DI "r,*f") (SF "f,*r") (DF "f,*r") (XF "f") (TI "*x")])
+
+(define_mode_attr mem_constr [(BI "*m") (QI "m") (HI "m") (SI "m") (DI "m,Q") (SF "Q,m") (DF "Q,m") (XF "m") (TI "Q")])
+
+(define_mode_attr reg_pred_prefix [(BI "gr") (QI "gr") (HI "gr") (SI "gr") (DI "grfr") (SF "grfr") (DF "grfr") (XF "fr") (TI "fr")])
+
+(define_mode_attr ld_class [(BI "ld") (QI "ld") (HI "ld") (SI "ld") (DI "ld,fld") (SF "fld,ld") (DF "fld,ld") (XF "fld") (TI "fldp")])
+(define_mode_attr chka_class [(BI "chk_a") (QI "chk_a") (HI "chk_a") (SI "chk_a") (DI "chk_a,chk_a") (SF "chk_a,chk_a") (DF "chk_a,chk_a") (XF "chk_a") (TI "chk_a")])
+(define_mode_attr chks_class [(BI "chk_s_i") (QI "chk_s_i") (HI "chk_s_i") (SI "chk_s_i") (DI "chk_s_i,chk_s_f") (SF "chk_s_f,chk_s_i") (DF "chk_s_f,chk_s_i") (XF "chk_s_f") (TI "chk_s_i")])
+
+(define_mode_attr attr_yes [(BI "yes") (QI "yes") (HI "yes") (SI "yes") (DI "yes,yes") (SF "yes,yes") (DF "yes,yes") (XF "yes") (TI "yes")])
+
I'd add log entry for these definitions:
(MODE, MODE_FOR_EXETNDED, output_a, ... , ld_reg_constr, ...,
attr_yes): Auxiliary definitions for the patterns.
after the entry
(advanced_load_check_clr_<mode>, speculation_check_<mode>):
New pattern for data and control speculation checks.