Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

H.J. Lu hjl.tools@gmail.com
Fri Sep 13 18:29:00 GMT 2013


On Fri, Sep 13, 2013 at 10:28 AM, Wei Mi <wmi@google.com> wrote:
>> Thanks.  At this point you need feedback from x86 and scheduler maintainers.
>> I would recommend you to resubmit the patch with a Changelog text, and with
>> the text of the patch inline in the email (your last mail has the patch as a
>> binary attachment, which makes it harder to review and respond to).  Please
>> mention if the updated patch passes bootstrap and regtest.
>
> Thanks! Here is the new patch. bootstrap and regression pass. ok for trunk?
>
> 2013-09-13  Wei Mi  <wmi@google.com>
>
>         * sched-rgn.c (add_branch_dependences): Keep insns in
>         a SCHED_GROUP at the end of bb to remain their locations.
>         * config/i386/x86-tune.def (DEF_TUNE): Add m_COREI7 for
>         X86_TUNE_FUSE_CMP_AND_BRANCH.
>         * config/i386/i386.c (ix86_macro_fusion_p): New Function.
>         (ix86_macro_fusion_pair_p): Ditto.
>         * doc/tm.texi.in: Generated.
>         * doc/tm.texi: Ditto.
>         * target.def: Add two hooks: macro_fusion_p and
>         macro_fusion_pair_p.
>         * haifa-sched.c (try_group_insn): New function.
>         (group_insns_for_macro_fusion): New function.
>         (sched_init): Call group_insns_for_macro_fusion.
>

> Index: config/i386/i386.c
> ===================================================================
> --- config/i386/i386.c  (revision 201963)
> +++ config/i386/i386.c  (working copy)
> @@ -24850,6 +24850,99 @@ ia32_multipass_dfa_lookahead (void)
>      }
>  }
>
> +/* Return true if target platform supports macro-fusion.  */
> +
> +static bool
> +ix86_macro_fusion_p ()
> +{
> +  if (TARGET_FUSE_CMP_AND_BRANCH)
> +    return true;
> +  else
> +    return false;
> +}
> +
> +/* Check whether current microarchitecture support macro fusion
> +   for insn pair "CONDGEN + CONDJMP". Refer to
> +   "Intel Architectures Optimization Reference Manual". */
> +
> +static bool
> +ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
> +{
> +  rtx src;
> +  if (!strcmp (ix86_tune_string, "corei7"))
> +    {
> +      /* For Nehalem.  */
> +      rtx single_set = single_set (condgen);
> +      /* Nehalem doesn't support macro-fusion for add/sub+jmp.  */
> +      if (single_set == NULL_RTX)
> +        return false;
> +
> +      src = SET_SRC (single_set);
> +      if (GET_CODE (src) != COMPARE)
> +       return false;
> +
> +      /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
> +        insn pattern.  */
> +      if ((MEM_P (XEXP (src, 0))
> +          && CONST_INT_P (XEXP (src, 1)))
> +         || (MEM_P (XEXP (src, 1))
> +             && CONST_INT_P (XEXP (src, 0))))
> +       return false;
> +
> +      /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp.  */
> +      if (get_attr_type (condgen) != TYPE_TEST
> +         && get_attr_type (condgen) != TYPE_ICMP)
> +       return false;
> +      return true;
> +    }
> +  else if (!strcmp (ix86_tune_string, "corei7-avx"))
> +    {
> +      /* For Sandybridge.  */
> +      enum rtx_code ccode;
> +      rtx compare_set = NULL_RTX, test_if, cond;
> +      rtx single_set = single_set (condgen);
> +      if (single_set != NULL_RTX)
> +        compare_set = single_set;
> +      else
> +       {
> +         int i;
> +         rtx pat = PATTERN (condgen);
> +         for (i = 0; i < XVECLEN (pat, 0); i++)
> +           if (GET_CODE (XVECEXP (pat, 0, i)) == SET
> +               && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
> +             compare_set = XVECEXP (pat, 0, i);
> +       }
> +
> +      if (compare_set == NULL_RTX)
> +       return false;
> +      src = SET_SRC (compare_set);
> +      if (GET_CODE (src) != COMPARE)
> +       return false;
> +
> +      /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
> +        insn pattern.  */
> +      if ((MEM_P (XEXP (src, 0))
> +           && CONST_INT_P (XEXP (src, 1)))
> +          || (MEM_P (XEXP (src, 1))
> +              && CONST_INT_P (XEXP (src, 0))))
> +        return false;
> +
> +      /* Sandybridge doesn't support macro-fusion for inc/dec +
> +        unsigned comparison jmp.  */
> +      test_if = SET_SRC (pc_set (condjmp));
> +      cond = XEXP (test_if, 0);
> +      ccode = GET_CODE (cond);
> +      if (get_attr_type (condgen) == TYPE_INCDEC
> +         && (ccode == GEU
> +             || ccode == GTU
> +             || ccode == LEU
> +             || ccode == LTU))
> +       return false;
> +      return true;
> +    }
> +  return false;
> +}
> +

Checking corei7/corei7-avx explicitly isn't a good idea.
It is also useful for Ivy Bridge and Haswell.  I think you
should use a variable to control it, similar to
TARGET_FUSE_CMP_AND_BRANCH.


-- 
H.J.



More information about the Gcc-patches mailing list