[PATCH] RISC-V: Enable overlap-by-pieces via tune param

Christoph Müllner cmuellner@gcc.gnu.org
Thu Jul 22 09:20:01 GMT 2021


On Thu, Jul 22, 2021 at 10:53 AM Kito Cheng <kito.cheng@gmail.com> wrote:
>
> It's my first time seeing this hook :p Did you mind describing when we
> need to set it to true?
> I mean when a CPU has some feature then we can/should set it to true?

The by-pieces infrastructure allows to inline builtins quite well and
uses slow_unaligned_access and overlap_op_by_pieces to tune the
emitted instruction sequence.

In case unaligned accesses are fast, then overlap_op_by_pieces can reduce
the number of instructions (emitted by by-pieces for e.g. memset) for the cost
of overlapping memory accesses.

E.g. you want to clear 15-bytes with memset (and cheap unaligned accesses):
Without overlap_op_by_pieces you will get:
  8e:   00053023                sd      zero,0(a0)
  92:   00052423                sw      zero,8(a0)
  96:   00051623                sh      zero,12(a0)
  9a:   00050723                sb      zero,14(a0)
With overlap_op_by_pieces you will get:
  7e:   00053023                sd      zero,0(a0)
  82:   000533a3                sd      zero,7(a0)

BR
Christoph

>
>
> On Thu, Jul 22, 2021 at 7:33 AM Christoph Muellner via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > This patch adds the field overlap_op_by_pieces to the struct
> > riscv_tune_param, which allows to enable the overlap_op_by_pieces
> > feature of the by-pieces infrastructure.
> >
> > gcc/ChangeLog:
> >
> >         * config/riscv/riscv.c (struct riscv_tune_param): New field.
> >         (riscv_overlap_op_by_pieces): New function.
> >         (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> >         riscv_overlap_op_by_pieces.
> >
> > Signed-off-by: Christoph Muellner <cmuellner@gcc.gnu.org>
> > ---
> >  gcc/config/riscv/riscv.c | 14 ++++++++++++++
> >  1 file changed, 14 insertions(+)
> >
> > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> > index 576960bb37c..824e930ef05 100644
> > --- a/gcc/config/riscv/riscv.c
> > +++ b/gcc/config/riscv/riscv.c
> > @@ -220,6 +220,7 @@ struct riscv_tune_param
> >    unsigned short branch_cost;
> >    unsigned short memory_cost;
> >    bool slow_unaligned_access;
> > +  bool overlap_op_by_pieces;
> >  };
> >
> >  /* Information about one micro-arch we know about.  */
> > @@ -285,6 +286,7 @@ static const struct riscv_tune_param rocket_tune_info = {
> >    3,                                           /* branch_cost */
> >    5,                                           /* memory_cost */
> >    true,                                                /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  /* Costs to use when optimizing for Sifive 7 Series.  */
> > @@ -298,6 +300,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
> >    4,                                           /* branch_cost */
> >    3,                                           /* memory_cost */
> >    true,                                                /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  /* Costs to use when optimizing for T-HEAD c906.  */
> > @@ -311,6 +314,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
> >    3,            /* branch_cost */
> >    5,            /* memory_cost */
> >    false,            /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  /* Costs to use when optimizing for size.  */
> > @@ -324,6 +328,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
> >    1,                                           /* branch_cost */
> >    2,                                           /* memory_cost */
> >    false,                                       /* slow_unaligned_access */
> > +  false,                                       /* overlap_op_by_pieces */
> >  };
> >
> >  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> > @@ -5201,6 +5206,12 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
> >    return riscv_slow_unaligned_access_p;
> >  }
> >
> > +static bool
> > +riscv_overlap_op_by_pieces (void)
> > +{
> > +  return tune_param->overlap_op_by_pieces;
> > +}
> > +
> >  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> >
> >  static bool
> > @@ -5525,6 +5536,9 @@ riscv_asan_shadow_offset (void)
> >  #undef TARGET_SLOW_UNALIGNED_ACCESS
> >  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> >
> > +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> > +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> > +
> >  #undef TARGET_SECONDARY_MEMORY_NEEDED
> >  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> >
> > --
> > 2.31.1
> >


More information about the Gcc-patches mailing list