This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion
- From: Wei Mi <wmi at google dot com>
- To: Alexander Monakov <amonakov at ispras dot ru>
- Cc: Steven Bosscher <stevenb dot gcc at gmail dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>, David Li <davidxl at google dot com>
- Date: Fri, 13 Sep 2013 10:28:12 -0700
- Subject: Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion
- Authentication-results: sourceware.org; auth=none
- References: <CA+4CFy60EZ99xBGecerYLM9wD6w2v60SpJsfOGguA_PWLLupsA at mail dot gmail dot com> <alpine dot LNX dot 2 dot 00 dot 1309041253370 dot 5281 at monopod dot intra dot ispras dot ru> <CABu31nONs_DJ99YXDyWAE2a5iyoQWQyuGwLm0jrcb+mCx2M1Aw at mail dot gmail dot com> <CABtfrpCU1jkTZvA-cZ99bjhBnvQMTkNxHMDhhmkAa_k891T1zw at mail dot gmail dot com> <CA+4CFy5A2Nk-yUfQNZYQQd+O93mcJYBVty9S_5sd9VFinxmYdg at mail dot gmail dot com> <CA+4CFy4M8ckX3g8xCQEV+g4SdWgy742oBiGSep4GM74q3paVzA at mail dot gmail dot com> <alpine dot LNX dot 2 dot 00 dot 1309101543040 dot 2373 at monopod dot intra dot ispras dot ru> <CA+4CFy6nqWvmYsFSbPtQqX3MJvXFpMv3fMGXhr1-F05Qz42f8Q at mail dot gmail dot com> <alpine dot LNX dot 2 dot 00 dot 1309111358041 dot 2373 at monopod dot intra dot ispras dot ru> <CA+4CFy7Jsq_k65Nj63HA013KYuExkvtQcEKbVsMszyqVNtHC_Q at mail dot gmail dot com> <alpine dot LNX dot 2 dot 00 dot 1309112023220 dot 2373 at monopod dot intra dot ispras dot ru> <CA+4CFy7=nXH=nLcpZDT0Qv+48zCb2hp==zvQjcWJVgKuDM59Nw at mail dot gmail dot com> <alpine dot LNX dot 2 dot 00 dot 1309121340440 dot 2373 at monopod dot intra dot ispras dot ru> <CA+4CFy4ozQ-ViyzpXWr+0JDaiOkOk8GMWm5ZdaMpQKfTX_2yYw at mail dot gmail dot com> <alpine dot LNX dot 2 dot 00 dot 1309131331230 dot 2373 at monopod dot intra dot ispras dot ru>
> Thanks. At this point you need feedback from x86 and scheduler maintainers.
> I would recommend you to resubmit the patch with a Changelog text, and with
> the text of the patch inline in the email (your last mail has the patch as a
> binary attachment, which makes it harder to review and respond to). Please
> mention if the updated patch passes bootstrap and regtest.
Thanks! Here is the new patch. bootstrap and regression pass. ok for trunk?
2013-09-13 Wei Mi <wmi@google.com>
* sched-rgn.c (add_branch_dependences): Keep insns in
a SCHED_GROUP at the end of bb to remain their locations.
* config/i386/x86-tune.def (DEF_TUNE): Add m_COREI7 for
X86_TUNE_FUSE_CMP_AND_BRANCH.
* config/i386/i386.c (ix86_macro_fusion_p): New Function.
(ix86_macro_fusion_pair_p): Ditto.
* doc/tm.texi.in: Generated.
* doc/tm.texi: Ditto.
* target.def: Add two hooks: macro_fusion_p and
macro_fusion_pair_p.
* haifa-sched.c (try_group_insn): New function.
(group_insns_for_macro_fusion): New function.
(sched_init): Call group_insns_for_macro_fusion.
Index: sched-rgn.c
===================================================================
--- sched-rgn.c (revision 201963)
+++ sched-rgn.c (working copy)
@@ -2443,6 +2443,8 @@ add_branch_dependences (rtx head, rtx ta
cc0 setters remain at the end because they can't be moved away from
their cc0 user.
+ Predecessors of SCHED_GROUP_P instructions at the end remain at the end.
+
COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
@@ -2465,7 +2467,8 @@ add_branch_dependences (rtx head, rtx ta
#endif
|| (!reload_completed
&& sets_likely_spilled (PATTERN (insn)))))
- || NOTE_P (insn))
+ || NOTE_P (insn)
+ || (last != 0 && SCHED_GROUP_P (last)))
{
if (!NOTE_P (insn))
{
Index: config/i386/x86-tune.def
===================================================================
--- config/i386/x86-tune.def (revision 201963)
+++ config/i386/x86-tune.def (working copy)
@@ -196,7 +196,8 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS,
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
-DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER)
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch",
+ m_COREI7 | m_BDVER)
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM)
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 201963)
+++ config/i386/i386.c (working copy)
@@ -24850,6 +24850,99 @@ ia32_multipass_dfa_lookahead (void)
}
}
+/* Return true if target platform supports macro-fusion. */
+
+static bool
+ix86_macro_fusion_p ()
+{
+ if (TARGET_FUSE_CMP_AND_BRANCH)
+ return true;
+ else
+ return false;
+}
+
+/* Check whether current microarchitecture support macro fusion
+ for insn pair "CONDGEN + CONDJMP". Refer to
+ "Intel Architectures Optimization Reference Manual". */
+
+static bool
+ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+{
+ rtx src;
+ if (!strcmp (ix86_tune_string, "corei7"))
+ {
+ /* For Nehalem. */
+ rtx single_set = single_set (condgen);
+ /* Nehalem doesn't support macro-fusion for add/sub+jmp. */
+ if (single_set == NULL_RTX)
+ return false;
+
+ src = SET_SRC (single_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp. */
+ if (get_attr_type (condgen) != TYPE_TEST
+ && get_attr_type (condgen) != TYPE_ICMP)
+ return false;
+ return true;
+ }
+ else if (!strcmp (ix86_tune_string, "corei7-avx"))
+ {
+ /* For Sandybridge. */
+ enum rtx_code ccode;
+ rtx compare_set = NULL_RTX, test_if, cond;
+ rtx single_set = single_set (condgen);
+ if (single_set != NULL_RTX)
+ compare_set = single_set;
+ else
+ {
+ int i;
+ rtx pat = PATTERN (condgen);
+ for (i = 0; i < XVECLEN (pat, 0); i++)
+ if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+ && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
+ compare_set = XVECEXP (pat, 0, i);
+ }
+
+ if (compare_set == NULL_RTX)
+ return false;
+ src = SET_SRC (compare_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for inc/dec +
+ unsigned comparison jmp. */
+ test_if = SET_SRC (pc_set (condjmp));
+ cond = XEXP (test_if, 0);
+ ccode = GET_CODE (cond);
+ if (get_attr_type (condgen) == TYPE_INCDEC
+ && (ccode == GEU
+ || ccode == GTU
+ || ccode == LEU
+ || ccode == LTU))
+ return false;
+ return true;
+ }
+ return false;
+}
+
/* Try to reorder ready list to take advantage of Atom pipelined IMUL
execution. It is applied if
(1) IMUL instruction is on the top of list;
@@ -42982,6 +43075,10 @@ ix86_memmodel_check (unsigned HOST_WIDE_
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in (revision 201963)
+++ doc/tm.texi.in (working copy)
@@ -4940,6 +4940,10 @@ them: try the first ones in this list fi
@hook TARGET_SCHED_REORDER2
+@hook TARGET_SCHED_MACRO_FUSION_P
+
+@hook TARGET_SCHED_MACRO_FUSION_PAIR_P
+
@hook TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
@hook TARGET_SCHED_INIT
Index: doc/tm.texi
===================================================================
--- doc/tm.texi (revision 201963)
+++ doc/tm.texi (working copy)
@@ -6553,6 +6553,17 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_P (void)
+This hook is used to check whether target platform supports macro fusion.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_PAIR_P (rtx
@var{condgen}, rtx @var{condjmp})
+This hook is used to check whether two insns could be macro fused for
+target microarchitecture. If this hook returns true for the given insn pair
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched
+group, and they will not be scheduled apart.
+@end deftypefn
+
@deftypefn {Target Hook} void
TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx
@var{tail})
This hook is called after evaluation forward dependencies of insns in
chain given by two parameter values (@var{head} and @var{tail}
Index: target.def
===================================================================
--- target.def (revision 201963)
+++ target.def (working copy)
@@ -1041,6 +1041,19 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.",
int, (FILE *file, int verbose, rtx *ready, int *n_readyp, int clock), NULL)
+DEFHOOK
+(macro_fusion_p,
+ "This hook is used to check whether target platform supports macro fusion.",
+ bool, (void), NULL)
+
+DEFHOOK
+(macro_fusion_pair_p,
+ "This hook is used to check whether two insns could be macro fused for\n\
+target microarchitecture. If this hook returns true for the given insn pair\n\
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched\n\
+group, and they will not be scheduled apart.",
+ bool, (rtx condgen, rtx condjmp), NULL)
+
/* The following member value is a pointer to a function called
after evaluation forward dependencies of insns in chain given
by two parameter values (head and tail correspondingly). */
Index: haifa-sched.c
===================================================================
--- haifa-sched.c (revision 201963)
+++ haifa-sched.c (working copy)
@@ -6519,6 +6519,44 @@ setup_sched_dump (void)
? stderr : dump_file);
}
+static void
+try_group_insn (rtx insn)
+{
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ rtx prev;
+
+ targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+ prev = prev_nonnote_nondebug_insn (insn);
+ if (!any_condjump_p (insn)
+ || !reg_referenced_p (cc_reg_1, PATTERN (insn))
+ || !prev
+ || !modified_in_p (cc_reg_1, prev))
+ return;
+
+ /* Different microarchitectures support macro fusions for different
+ combinations of insn pairs. */
+ if (!targetm.sched.macro_fusion_pair_p
+ || !targetm.sched.macro_fusion_pair_p (prev, insn))
+ return;
+
+ SCHED_GROUP_P (insn) = 1;
+}
+
+/* If the last cond jump and the cond register defining insn are consecutive
+ before scheduling, we want them to be in a schedule group. This is good
+ for performance on microarchitectures supporting macro-fusion. */
+
+static void
+group_insns_for_macro_fusion ()
+{
+ basic_block bb;
+
+ FOR_EACH_BB (bb)
+ try_group_insn (BB_END (bb));
+}
+
/* Initialize some global state for the scheduler. This function works
with the common data shared between all the schedulers. It is called
from the scheduler specific initialization routine. */
@@ -6645,6 +6683,11 @@ sched_init (void)
}
curr_state = xmalloc (dfa_state_size);
+
+ /* Group compare and branch insns for macro-fusion. */
+ if (targetm.sched.macro_fusion_p
+ && targetm.sched.macro_fusion_p ())
+ group_insns_for_macro_fusion ();
}
static void haifa_init_only_bb (basic_block, basic_block);