This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion


SCHED_GROUP works after I add chain_to_prev_insn after
add_branch_dependences, in order to chain control dependences to prev
insn for sched group. Here is the new patch. Testing is going on.

Thanks,
Wei Mi.

2013-09-06  Wei Mi  <wmi@google.com>

        * config/i386/i386.c (ix86_macro_fusion_p): New function.
        (ix86_macro_fusion_pair_p): Ditto.
        * config/i386/x86-tune.def (DEF_TUNE): Add m_COREI7 for
        X86_TUNE_FUSE_CMP_AND_BRANCH.
        * sched-deps.c (group_insns_for_macro_fusion): New function.
        (sched_analyze_insn): Call group_insns_for_macro_fusion.
        (chain_to_prev_insn): Change it from static to extern.
        (chain_to_prev_insn_p): Ditto.
        * doc/tm.texi: Generated.
        * doc/tm.texi.in: Ditto.
        * sched-int.h: New declarations.
        * sched-rgn.c (add_branch_dependences): Chain control
        dependences to prev insn for sched group.
        * target.def: Add macro_fusion_p and macro_fusion_pair_p.

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 201963)
+++ config/i386/i386.c  (working copy)
@@ -24850,6 +24850,99 @@ ia32_multipass_dfa_lookahead (void)
     }
 }

+/* Return true if target platform supports macro-fusion.  */
+
+static bool
+ix86_macro_fusion_p ()
+{
+  if (TARGET_FUSE_CMP_AND_BRANCH)
+    return true;
+  else
+    return false;
+}
+
+/* Check whether current microarchitecture support macro fusion
+   for insn pair "CONDGEN + CONDJMP". Refer to
+   "Intel Architectures Optimization Reference Manual". */
+
+static bool
+ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+{
+  rtx src;
+  if (!strcmp (ix86_tune_string, "corei7"))
+    {
+      /* For Nehalem.  */
+      rtx single_set = single_set (condgen);
+      /* Nehalem doesn't support macro-fusion for add/sub+jmp.  */
+      if (single_set == NULL_RTX)
+        return false;
+
+      src = SET_SRC (single_set);
+      if (GET_CODE (src) != COMPARE)
+       return false;
+
+      /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
+        insn pattern.  */
+      if ((MEM_P (XEXP (src, 0))
+          && CONST_INT_P (XEXP (src, 1)))
+         || (MEM_P (XEXP (src, 1))
+             && CONST_INT_P (XEXP (src, 0))))
+       return false;
+
+      /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp.  */
+      if (get_attr_type (condgen) != TYPE_TEST
+         && get_attr_type (condgen) != TYPE_ICMP)
+       return false;
+      return true;
+    }
+  else if (!strcmp (ix86_tune_string, "corei7-avx"))
+    {
+      /* For Sandybridge.  */
+      enum rtx_code ccode;
+      rtx compare_set = NULL_RTX, test_if, cond;
+      rtx single_set = single_set (condgen);
+      if (single_set != NULL_RTX)
+        compare_set = single_set;
+      else
+       {
+         int i;
+         rtx pat = PATTERN (condgen);
+         for (i = 0; i < XVECLEN (pat, 0); i++)
+           if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+               && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
+             compare_set = XVECEXP (pat, 0, i);
+       }
+
+      if (compare_set == NULL_RTX)
+       return false;
+      src = SET_SRC (compare_set);
+      if (GET_CODE (src) != COMPARE)
+       return false;
+
+      /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
+        insn pattern.  */
+      if ((MEM_P (XEXP (src, 0))
+           && CONST_INT_P (XEXP (src, 1)))
+          || (MEM_P (XEXP (src, 1))
+              && CONST_INT_P (XEXP (src, 0))))
+        return false;
+
+      /* Sandybridge doesn't support macro-fusion for inc/dec +
+        unsigned comparison jmp.  */
+      test_if = SET_SRC (pc_set (condjmp));
+      cond = XEXP (test_if, 0);
+      ccode = GET_CODE (cond);
+      if (get_attr_type (condgen) == TYPE_INCDEC
+         && (ccode == GEU
+             || ccode == GTU
+             || ccode == LEU
+             || ccode == LTU))
+       return false;
+      return true;
+    }
+  return false;
+}
+
 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
    execution. It is applied if
    (1) IMUL instruction is on the top of list;
@@ -42982,6 +43075,10 @@ ix86_memmodel_check (unsigned HOST_WIDE_
 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p

 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
Index: config/i386/x86-tune.def
===================================================================
--- config/i386/x86-tune.def    (revision 201963)
+++ config/i386/x86-tune.def    (working copy)
@@ -196,7 +196,8 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS,
 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
    with a subsequent conditional jump instruction into a single
    compare-and-branch uop.  */
-DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER)
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch",
+          m_COREI7 | m_BDVER)
 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
    will impact LEA instruction selection. */
 DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM)
Index: sched-deps.c
===================================================================
--- sched-deps.c        (revision 201963)
+++ sched-deps.c        (working copy)
@@ -487,7 +487,6 @@ static void add_dependence_list (rtx, rt
 static void add_dependence_list_and_free (struct deps_desc *, rtx,
                                          rtx *, int, enum reg_note, bool);
 static void delete_all_dependences (rtx);
-static void chain_to_prev_insn (rtx);

 static void flush_pending_lists (struct deps_desc *, rtx, int, int);
 static void sched_analyze_1 (struct deps_desc *, rtx, rtx);
@@ -1660,7 +1659,7 @@ delete_all_dependences (rtx insn)
    chains backwards. Then we add the dependencies for the group to
    the previous nonnote insn.  */

-static void
+void
 chain_to_prev_insn (rtx insn)
 {
   sd_iterator_def sd_it;
@@ -2821,6 +2820,35 @@ sched_analyze_2 (struct deps_desc *deps,
     sched_deps_info->finish_rhs ();
 }

+/* If the last cond jump and the cond register defining insn are consecutive
+   before scheduling, we want them to be in a schedule group. This is good
+   for performance on microarchitectures supporting macro-fusion.  */
+
+static void
+group_insns_for_macro_fusion (rtx insn)
+{
+  unsigned int condreg1, condreg2;
+  rtx cc_reg_1;
+  rtx prev;
+
+  targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+  prev = prev_nonnote_nondebug_insn (insn);
+  if (!any_condjump_p (insn)
+      || !reg_referenced_p (cc_reg_1, PATTERN (insn))
+      || !prev
+      || !modified_in_p (cc_reg_1, prev))
+    return;
+
+  /* Different microarchitectures support macro fusions for different
+     combinations of insn pairs.  */
+  if (!targetm.sched.macro_fusion_pair_p
+      || !targetm.sched.macro_fusion_pair_p (prev, insn))
+    return;
+
+  SCHED_GROUP_P (insn) = 1;
+}
+
 /* Analyze an INSN with pattern X to find all dependencies.  */
 static void
 sched_analyze_insn (struct deps_desc *deps, rtx x, rtx insn)
@@ -2844,6 +2872,10 @@ sched_analyze_insn (struct deps_desc *de
   can_start_lhs_rhs_p = (NONJUMP_INSN_P (insn)
                         && code == SET);

+  if (targetm.sched.macro_fusion_p
+      && targetm.sched.macro_fusion_p ())
+    group_insns_for_macro_fusion (insn);
+
   if (may_trap_p (x))
     /* Avoid moving trapping instructions across function calls that might
        not always return.  */
@@ -3504,7 +3536,7 @@ call_may_noreturn_p (rtx insn)
    group, and if all INSN's dependencies should be moved to the first
    instruction of that group.  */

-static bool
+bool
 chain_to_prev_insn_p (rtx insn)
 {
   rtx prev, x;
Index: doc/tm.texi
===================================================================
--- doc/tm.texi (revision 201963)
+++ doc/tm.texi (working copy)
@@ -6553,6 +6553,17 @@ scheduling one insn causes other insns t
 cycle.  These other insns can then be taken into account properly.
 @end deftypefn

+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_P (void)
+This hook is used to check whether target platform supports macro fusion.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_PAIR_P (rtx
@var{condgen}, rtx @var{condjmp})
+This hook is used to check whether two insns could be macro fused for
+target microarchitecture. If this hook returns true for the given insn pair
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched
+group, and they will not be scheduled apart.
+@end deftypefn
+
 @deftypefn {Target Hook} void
TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx
@var{tail})
 This hook is called after evaluation forward dependencies of insns in
 chain given by two parameter values (@var{head} and @var{tail}
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in      (revision 201963)
+++ doc/tm.texi.in      (working copy)
@@ -4940,6 +4940,10 @@ them: try the first ones in this list fi

 @hook TARGET_SCHED_REORDER2

+@hook TARGET_SCHED_MACRO_FUSION_P
+
+@hook TARGET_SCHED_MACRO_FUSION_PAIR_P
+
 @hook TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK

 @hook TARGET_SCHED_INIT
Index: sched-int.h
===================================================================
--- sched-int.h (revision 201963)
+++ sched-int.h (working copy)
@@ -1302,6 +1302,8 @@ extern void finish_deps_global (void);
 extern void deps_analyze_insn (struct deps_desc *, rtx);
 extern void remove_from_deps (struct deps_desc *, rtx);
 extern void init_insn_reg_pressure_info (rtx);
+extern bool chain_to_prev_insn_p (rtx insn);
+extern void chain_to_prev_insn (rtx);

 extern dw_t get_dep_weak (ds_t, ds_t);
 extern ds_t set_dep_weak (ds_t, ds_t, dw_t);
Index: sched-rgn.c
===================================================================
--- sched-rgn.c (revision 201963)
+++ sched-rgn.c (working copy)
@@ -2507,7 +2507,7 @@ add_branch_dependences (rtx head, rtx ta
       }

   if (!targetm.have_conditional_execution ())
-    return;
+    goto chain_to_prev_insn;

   /* Finally, if the block ends in a jump, and we are doing intra-block
      scheduling, make sure that the branch depends on any COND_EXEC insns
@@ -2543,7 +2543,7 @@ add_branch_dependences (rtx head, rtx ta
      could remove always-true predicates.  */

   if (!reload_completed || ! (JUMP_P (tail) || JUMP_TABLE_DATA_P (tail)))
-    return;
+    goto chain_to_prev_insn;

   insn = tail;
   while (insn != head)
@@ -2557,6 +2557,23 @@ add_branch_dependences (rtx head, rtx ta
       if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
        add_dependence (tail, insn, REG_DEP_ANTI);
     }
+
+ chain_to_prev_insn:
+  /* Control dependences also need to be chained to the prev insn
+     for sched group.  */
+  insn = tail;
+  while (insn != head)
+    {
+      /* Fixup the dependencies in the sched group.  */
+      if (JUMP_P (insn)
+         && chain_to_prev_insn_p (insn)
+         && !sel_sched_p ())
+       chain_to_prev_insn (insn);
+
+      insn = PREV_INSN (insn);
+    }
+
+  return;
 }

 /* Data structures for the computation of data dependences in a regions.  We
Index: target.def
===================================================================
--- target.def  (revision 201963)
+++ target.def  (working copy)
@@ -1041,6 +1041,19 @@ scheduling one insn causes other insns t
 cycle.  These other insns can then be taken into account properly.",
  int, (FILE *file, int verbose, rtx *ready, int *n_readyp, int clock), NULL)

+DEFHOOK
+(macro_fusion_p,
+ "This hook is used to check whether target platform supports macro fusion.",
+ bool, (void), NULL)
+
+DEFHOOK
+(macro_fusion_pair_p,
+ "This hook is used to check whether two insns could be macro fused for\n\
+target microarchitecture. If this hook returns true for the given insn pair\n\
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched\n\
+group, and they will not be scheduled apart.",
+ bool, (rtx condgen, rtx condjmp), NULL)
+
 /* The following member value is a pointer to a function called
    after evaluation forward dependencies of insns in chain given
    by two parameter values (head and tail correspondingly).  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]