[PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)

Philipp Tomsich philipp.tomsich@vrull.eu
Wed Nov 17 19:44:12 GMT 2021


On Wed, 17 Nov 2021 at 20:40, Palmer Dabbelt <palmer@rivosinc.com> wrote:

> [This is my first time trying my Rivos address on the lists, so sorry if
> something goes off the rails.]
>
> On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:
> > Hi Philipp:
> >
> > Thanks for the patch, I like this approach, that can easily configure
> > different capabilities for each core :)
> >
> > So there are only a few minor comments for this patch.
> >
> > On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
> > <philipp.tomsich@vrull.eu> wrote:
> >>
> >> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
> >>
> >> The Ventana VT1 core supports quad-issue and instruction fusion.
> >> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> >> together and adds idiom matcheing for the supported fusion cases.
>
> There's a typo at "matcheing".
>
> >>
> >> gcc/ChangeLog:
> >>
> >>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
> >>         constants to identify supported fusion patterns.
> >>         (struct riscv_tune_param): Add fusible_op field.
> >>         (riscv_macro_fusion_p): Implement.
> >>         (riscv_fusion_enabled_p): Implement.
> >>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
> >>         idioms for Ventana VT1.
> >>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
> >>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to
> riscv_macro_fusion_pair_p.
> >>
> >> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
>
> This doesn't match the From (though admittedly I'm pretty new to the SoB
> stuff in GCC, so I'm not sure if that's even a rule here).
>

I noticed that I hadn't reset the authors and that patman had inserted a
Signed-off-by: for that reason, right after I sent this out.
Given that it's all me and there's both individual assignment paperwork and
company disclaimers on file for all of the email-addresses, this should be
fine.

>> ---
> >>
> >>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 196 insertions(+)
> >>
> >> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> >> index 6b918db65e9..8eac52101a3 100644
> >> --- a/gcc/config/riscv/riscv.c
> >> +++ b/gcc/config/riscv/riscv.c
> >> @@ -211,6 +211,19 @@ struct riscv_integer_op {
> >>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
> >>  #define RISCV_MAX_INTEGER_OPS 8
> >>
> >> +enum riscv_fusion_pairs
> >> +{
> >> +  RISCV_FUSE_NOTHING = 0,
> >> +  RISCV_FUSE_ZEXTW = (1 << 0),
> >> +  RISCV_FUSE_ZEXTH = (1 << 1),
> >> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> >> +  RISCV_FUSE_LDINDEXED = (1 << 3),
> >
> > RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED
> >
> > Could you add some comment for above enums, like that:
> > /* slli rx, rx, 32 + srli rx, rx, 32 */
> > RISCV_FUSE_ZEXTW
> >
> > So that we could know what kind of instruction will be funded for this
> enum.
> >
> >> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> >> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> >> +  RISCV_FUSE_LUI_LD = (1 << 6),
> >> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> >> +};
> >> +
> >>  /* Costs of various operations on the different architectures.  */
> >>
> >>  struct riscv_tune_param
> >> @@ -224,6 +237,7 @@ struct riscv_tune_param
> >>    unsigned short branch_cost;
> >>    unsigned short memory_cost;
> >>    bool slow_unaligned_access;
> >> +  unsigned int fusible_ops;
> >>  };
> >>
> >>  /* Information about one micro-arch we know about.  */
> >> @@ -289,6 +303,7 @@ static const struct riscv_tune_param
> rocket_tune_info = {
> >>    3,                                           /* branch_cost */
> >>    5,                                           /* memory_cost */
> >>    true,                                                /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
>
> There's some tab/space issues here (and in the below ones).  They align
> when merged, but the new lines are spaces-only and the old ones have
> internal spaces mixed with tabs (IIRC that's to the GCC style, if not we
> should fix these to at least be consistent).
>
> >>
> >>  /* Costs to use when optimizing for Sifive 7 Series.  */
> >> @@ -302,6 +317,7 @@ static const struct riscv_tune_param
> sifive_7_tune_info = {
> >>    4,                                           /* branch_cost */
> >>    3,                                           /* memory_cost */
> >>    true,                                                /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for T-HEAD c906.  */
> >> @@ -328,6 +344,7 @@ static const struct riscv_tune_param
> optimize_size_tune_info = {
> >>    1,                                           /* branch_cost */
> >>    2,                                           /* memory_cost */
> >>    false,                                       /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for Ventana Micro VT1.  */
> >> @@ -341,6 +358,10 @@ static const struct riscv_tune_param
> ventana_vt1_tune_info = {
> >>    4,                                           /* branch_cost */
> >>    5,                                           /* memory_cost */
> >>    false,                                       /*
> slow_unaligned_access */
> >> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
> >> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
> >> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
> >> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
> >>  };
> >>
> >>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int,
> bool *);
> >> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
> >>    return tune_param->issue_rate;
> >>  }
> >>
> >> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target
> supports
> >> +   instruction fusion of some sort.  */
> >> +
> >> +static bool
> >> +riscv_macro_fusion_p (void)
> >> +{
> >> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
> >> +}
> >> +
> >> +/* Return true iff the instruction fusion described by OP is enabled.
> */
> >> +
> >> +static bool
> >> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
> >
> > space between function name and parentheses.
> >
> > riscv_fusion_enabled_p (enum riscv_fusion_pairs op)
> >
> >> +{
> >> +  return tune_param->fusible_ops & op;
> >> +}
> >> +
> >> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV
> and CURR
> >> +   should be kept together during scheduling.  */
> >> +
> >> +static bool
> >> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
> >> +{
> >> +  rtx prev_set = single_set (prev);
> >> +  rtx curr_set = single_set (curr);
> >> +  /* prev and curr are simple SET insns i.e. no flag setting or
> branching.  */
> >> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
> >> +
> >> +  if (!riscv_macro_fusion_p ())
> >> +    return false;
> >> +
> >> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
> >> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (slli) == (set (reg:DI rD)
> >> +                              (ashift:DI (reg:DI rS) (const_int 32)))
> >> +          curr (slri) == (set (reg:DI rD)
>
> This really jumps out as the sort of thing that should be in RTL, but
> I'm not sure that's feasible.  The short branch->cmov optimization we're
> doing for the SiFive 7 series is sort of fusion and done in RTL, but the
> mechanism is very different there so it wouldn't apply directly.  There
> are other ports doing it this way so I'm not opposed to merging this
> as-is, but if there's a straight-forward way to port this to RTL then
> it's probably be a lot saner to maintain in the long term.  I can't come
> up with anything sane, though.
>
> On the subject of maintainability: I don't see any tests for this.
> Given that we're exceedingly likely to end up with other targets that
> have front-end fusion and thus try to share this code, it'd be really
> nice to have either some documentation of what fusions this core
> performs or some tests that exercise the important ones.  I can't find
> any other references to "Ventana VT1" on the internet, but LMK if
> there's something I'm missing.
>
> I'm not sure lacking tests should block merging this (as it's not like
> we have much optimization-based testing for the RISC-V port), but
> without at least some public documentation it's going to be hard to
> produce those outside of Ventana -- essentially I'm worried about ending
> up with the same black-box pipeline problem we have for the C906.
>
> I seem to remember having tests for the SiFive optimization, but I
> couldn't find any when I looked.  Maybe Jim or Kito remembers where to
> find them, otherwise I'll throw some together.
>
> >
> > slri -> srli
> >
> >> +                              (lshiftrt:DI (reg:DI rD) (const_int
> <shift>)))
> >> +        with <shift> being either 32 for FUSE_ZEXTW, or
> >> +                        `less than 32 for FUSE_ZEXTWS. */
> >
> > `less <-- I guess the ` is kind of an accident?
> >
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> >> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> >> +         && REG_P (SET_DEST (prev_set))
> >> +         && REG_P (SET_DEST (curr_set))
> >> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST
> (curr_set))
> >> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
> >> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
> >> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
> >
> > space between function name and parentheses.
> >
> >> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
> >> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
> >
> > space between function name and parentheses.
> >
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (slli) == (set (reg:DI rD)
> >> +                              (ashift:DI (reg:DI rS) (const_int 48)))
> >> +          curr (slri) == (set (reg:DI rD)
> >> +                              (lshiftrt:DI (reg:DI rD) (const_int
> 48))) */
> >
> > slri -> srli
> >
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> >> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> >> +         && REG_P (SET_DEST (prev_set))
> >> +         && REG_P (SET_DEST (curr_set))
> >> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST
> (curr_set))
> >> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
> >> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (add) == (set (reg:DI rD)
> >> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (reg:DI rD))) */
> >> +
> >> +      if (MEM_P (SET_SRC (curr_set))
> >> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST
> (prev_set))
> >> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> >> +       return true;
> >> +
> >> +      /* We are trying to match the following:
> >> +          prev (add) == (set (reg:DI rD)
> >> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
> >> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
> >> +
> >> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
> >> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
> >> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
> >> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
> >> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO
> (SET_DEST (prev_set))
> >
> > This line is over 80 columns.
> >
> >
> >> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> >> +          curr (addi) == (set (reg:DI rD)
> >> +                              (plus:DI (reg:DI rD) (const_int IMM12)))
> */
> >> +
> >> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
> >> +         && CONST_INT_P (SET_SRC (prev_set))
> >> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...]
> UNSPEC_AUIPC))
> >> +          prev (addi)  == (set (reg:DI rD)
> >> +                               (plus:DI (reg:DI rD) (const_int
> IMM12))) */
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> >> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> >> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
> >> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (plus:DI (reg:DI rD) (const_int
> IMM12)))) */
> >> +
> >> +      if (CONST_INT_P (SET_SRC (prev_set))
> >> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
> >> +         && MEM_P (SET_SRC (curr_set))
> >> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...]
> UNSPEC_AUIPC))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (plus:DI (reg:DI rD) (const_int
> IMM12)))) */
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> >> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> >> +         && MEM_P (SET_SRC (curr_set))
> >> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> >> +       return true;
> >> +    }
> >> +
> >> +  return false;
> >> +}
> >> +
> >>  /* Auxiliary function to emit RISC-V ELF attribute. */
> >>  static void
> >>  riscv_emit_attribute ()
> >> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
> >>
> >>  #undef TARGET_SCHED_ISSUE_RATE
> >>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
> >> +#undef TARGET_SCHED_MACRO_FUSION_P
> >> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
> >> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
> >> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
> >>
> >>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
> >>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
> >> --
> >> 2.32.0
> >>
>


More information about the Gcc-patches mailing list