Add value range support into memcpy/memset expansion

Xinliang David Li davidxl@google.com
Fri Sep 27 16:48:00 GMT 2013


Nice extension. Test cases would be great to have.

thanks,

David

On Fri, Sep 27, 2013 at 7:50 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch makes it possible to access value range info from setmem/movstr that
> I plan to use in i386 memcpy/memset expansion code.  It is all quite
> straighforward except that I need to deal with cases where max size does not
> fit in HOST_WIDE_INT where I use maximal value as a marker.  It is then
> translated as NULL pointer to the expander that is bit inconsistent with other
> places that use -1 as marker of unknown value.
>
> I also think we lose some cases because of TER replacing out the SSA_NAME by
> something else, but it seems to work in quite many cases. This can be probably
> tracked incrementally by disabling TER here or finally getting away from
> expanding calls via the generic route.
>
> Bootstrapped/regtested x86_64-linux, OK?
>
> Honza
>
>         * doc/md.texi (setmem, movstr): Update documentation.
>         * builtins.c (determine_block_size): New function.
>         (expand_builtin_memcpy): Use it and pass it to
>         emit_block_move_hints.
>         (expand_builtin_memset_args): Use it and pass it to
>         set_storage_via_setmem.
>         * expr.c (emit_block_move_via_movmem): Add min_size/max_size parameters;
>         update call to expander.
>         (emit_block_move_hints): Add min_size/max_size parameters.
>         (clear_storage_hints): Likewise.
>         (set_storage_via_setmem): Likewise.
>         (clear_storage): Update.
>         * expr.h (emit_block_move_hints, clear_storage_hints,
>         set_storage_via_setmem): Update prototype.
>
> Index: doc/md.texi
> ===================================================================
> --- doc/md.texi (revision 202968)
> +++ doc/md.texi (working copy)
> @@ -5198,6 +5198,9 @@ destination and source strings are opera
>  the expansion of this pattern should store in operand 0 the address in
>  which the @code{NUL} terminator was stored in the destination string.
>
> +This patern has also several optional operands that are same as in
> +@code{setmem}.
> +
>  @cindex @code{setmem@var{m}} instruction pattern
>  @item @samp{setmem@var{m}}
>  Block set instruction.  The destination string is the first operand,
> @@ -5217,6 +5220,8 @@ respectively.  The expected alignment di
>  in a way that the blocks are not required to be aligned according to it in
>  all cases. This expected alignment is also in bytes, just like operand 4.
>  Expected size, when unknown, is set to @code{(const_int -1)}.
> +Operand 7 is the minimal size of the block and operand 8 is the
> +maximal size of the block (NULL if it can not be represented as CONST_INT).
>
>  The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}.
>
> Index: builtins.c
> ===================================================================
> --- builtins.c  (revision 202968)
> +++ builtins.c  (working copy)
> @@ -3070,6 +3070,51 @@ builtin_memcpy_read_str (void *data, HOS
>    return c_readstr (str + offset, mode);
>  }
>
> +/* LEN specify length of the block of memcpy/memset operation.
> +   Figure out its range and put it into MIN_SIZE/MAX_SIZE.  */
> +
> +static void
> +determine_block_size (tree len, rtx len_rtx,
> +                     unsigned HOST_WIDE_INT *min_size,
> +                     unsigned HOST_WIDE_INT *max_size)
> +{
> +  if (CONST_INT_P (len_rtx))
> +    {
> +      *min_size = *max_size = UINTVAL (len_rtx);
> +      return;
> +    }
> +  else
> +    {
> +      double_int min, max;
> +      if (TREE_CODE (len) == SSA_NAME
> +         && get_range_info (len, &min, &max) == VR_RANGE)
> +       {
> +         if (min.fits_uhwi ())
> +           *min_size = min.to_uhwi ();
> +         else
> +           *min_size = 0;
> +         if (max.fits_uhwi ())
> +           *max_size = max.to_uhwi ();
> +         else
> +           *max_size = (HOST_WIDE_INT)-1;
> +       }
> +      else
> +       {
> +         if (host_integerp (TYPE_MIN_VALUE (TREE_TYPE (len)), 1))
> +           *min_size = tree_low_cst (TYPE_MIN_VALUE (TREE_TYPE (len)), 1);
> +         else
> +           *min_size = 0;
> +         if (host_integerp (TYPE_MAX_VALUE (TREE_TYPE (len)), 1))
> +           *max_size = tree_low_cst (TYPE_MAX_VALUE (TREE_TYPE (len)), 1);
> +         else
> +           *max_size = GET_MODE_MASK (GET_MODE (len_rtx));
> +       }
> +    }
> +  gcc_checking_assert (*max_size <=
> +                      (unsigned HOST_WIDE_INT)
> +                         GET_MODE_MASK (GET_MODE (len_rtx)));
> +}
> +
>  /* Expand a call EXP to the memcpy builtin.
>     Return NULL_RTX if we failed, the caller should emit a normal call,
>     otherwise try to get the result in TARGET, if convenient (and in
> @@ -3092,6 +3137,8 @@ expand_builtin_memcpy (tree exp, rtx tar
>        rtx dest_mem, src_mem, dest_addr, len_rtx;
>        HOST_WIDE_INT expected_size = -1;
>        unsigned int expected_align = 0;
> +      unsigned HOST_WIDE_INT min_size;
> +      unsigned HOST_WIDE_INT max_size;
>
>        /* If DEST is not a pointer type, call the normal function.  */
>        if (dest_align == 0)
> @@ -3111,6 +3158,7 @@ expand_builtin_memcpy (tree exp, rtx tar
>        dest_mem = get_memory_rtx (dest, len);
>        set_mem_align (dest_mem, dest_align);
>        len_rtx = expand_normal (len);
> +      determine_block_size (len, len_rtx, &min_size, &max_size);
>        src_str = c_getstr (src);
>
>        /* If SRC is a string constant and block move would be done
> @@ -3139,7 +3187,8 @@ expand_builtin_memcpy (tree exp, rtx tar
>        dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
>                                          CALL_EXPR_TAILCALL (exp)
>                                          ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
> -                                        expected_align, expected_size);
> +                                        expected_align, expected_size,
> +                                        min_size, max_size);
>
>        if (dest_addr == 0)
>         {
> @@ -3553,6 +3602,8 @@ expand_builtin_memset_args (tree dest, t
>    rtx dest_mem, dest_addr, len_rtx;
>    HOST_WIDE_INT expected_size = -1;
>    unsigned int expected_align = 0;
> +  unsigned HOST_WIDE_INT min_size;
> +  unsigned HOST_WIDE_INT max_size;
>
>    dest_align = get_pointer_alignment (dest);
>
> @@ -3581,6 +3632,7 @@ expand_builtin_memset_args (tree dest, t
>    len = builtin_save_expr (len);
>
>    len_rtx = expand_normal (len);
> +  determine_block_size (len, len_rtx, &min_size, &max_size);
>    dest_mem = get_memory_rtx (dest, len);
>    val_mode = TYPE_MODE (unsigned_char_type_node);
>
> @@ -3607,7 +3659,7 @@ expand_builtin_memset_args (tree dest, t
>         }
>        else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
>                                         dest_align, expected_align,
> -                                       expected_size))
> +                                       expected_size, min_size, max_size))
>         goto do_libcall;
>
>        dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
> @@ -3629,7 +3681,7 @@ expand_builtin_memset_args (tree dest, t
>        else if (!set_storage_via_setmem (dest_mem, len_rtx,
>                                         gen_int_mode (c, val_mode),
>                                         dest_align, expected_align,
> -                                       expected_size))
> +                                       expected_size, min_size, max_size))
>         goto do_libcall;
>
>        dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
> @@ -3641,7 +3693,8 @@ expand_builtin_memset_args (tree dest, t
>    dest_addr = clear_storage_hints (dest_mem, len_rtx,
>                                    CALL_EXPR_TAILCALL (orig_exp)
>                                    ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
> -                                  expected_align, expected_size);
> +                                  expected_align, expected_size,
> +                                  min_size, max_size);
>
>    if (dest_addr == 0)
>      {
> Index: expr.c
> ===================================================================
> --- expr.c      (revision 202968)
> +++ expr.c      (working copy)
> @@ -122,7 +122,8 @@ struct store_by_pieces_d
>  static void move_by_pieces_1 (insn_gen_fn, machine_mode,
>                               struct move_by_pieces_d *);
>  static bool block_move_libcall_safe_for_call_parm (void);
> -static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT);
> +static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT,
> +                                       unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
>  static tree emit_block_move_libcall_fn (int);
>  static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
>  static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
> @@ -1113,13 +1114,18 @@ move_by_pieces_1 (insn_gen_fn genfun, ma
>     SIZE is an rtx that says how long they are.
>     ALIGN is the maximum alignment we can assume they have.
>     METHOD describes what kind of copy this is, and what mechanisms may be used.
> +   MIN_SIZE is the minimal size of block to move
> +   MAX_SIZE is the maximal size of block to move, if it can not be represented
> +   in unsigned HOST_WIDE_INT, than it is mask of all ones.
>
>     Return the address of the new block, if memcpy is called and returns it,
>     0 otherwise.  */
>
>  rtx
>  emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
> -                      unsigned int expected_align, HOST_WIDE_INT expected_size)
> +                      unsigned int expected_align, HOST_WIDE_INT expected_size,
> +                      unsigned HOST_WIDE_INT min_size,
> +                      unsigned HOST_WIDE_INT max_size)
>  {
>    bool may_use_call;
>    rtx retval = 0;
> @@ -1175,7 +1181,8 @@ emit_block_move_hints (rtx x, rtx y, rtx
>    if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align))
>      move_by_pieces (x, y, INTVAL (size), align, 0);
>    else if (emit_block_move_via_movmem (x, y, size, align,
> -                                      expected_align, expected_size))
> +                                      expected_align, expected_size,
> +                                      min_size, max_size))
>      ;
>    else if (may_use_call
>            && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
> @@ -1205,7 +1212,13 @@ emit_block_move_hints (rtx x, rtx y, rtx
>  rtx
>  emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
>  {
> -  return emit_block_move_hints (x, y, size, method, 0, -1);
> +  unsigned HOST_WIDE_INT max, min = 0;
> +  if (GET_CODE (size) == CONST_INT)
> +    min = max = UINTVAL (size);
> +  else
> +    max = GET_MODE_MASK (GET_MODE (size));
> +  return emit_block_move_hints (x, y, size, method, 0, -1,
> +                               min, max);
>  }
>
>  /* A subroutine of emit_block_move.  Returns true if calling the
> @@ -1268,13 +1281,22 @@ block_move_libcall_safe_for_call_parm (v
>
>  static bool
>  emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
> -                           unsigned int expected_align, HOST_WIDE_INT expected_size)
> +                           unsigned int expected_align, HOST_WIDE_INT expected_size,
> +                           unsigned HOST_WIDE_INT min_size,
> +                           unsigned HOST_WIDE_INT max_size)
>  {
>    int save_volatile_ok = volatile_ok;
>    enum machine_mode mode;
>
>    if (expected_align < align)
>      expected_align = align;
> +  if (expected_size != -1)
> +    {
> +      if ((unsigned HOST_WIDE_INT)expected_size > max_size)
> +       expected_size = max_size;
> +      if ((unsigned HOST_WIDE_INT)expected_size < min_size)
> +       expected_size = min_size;
> +    }
>
>    /* Since this is a move insn, we don't care about volatility.  */
>    volatile_ok = 1;
> @@ -1296,9 +1318,10 @@ emit_block_move_via_movmem (rtx x, rtx y
>           && ((CONST_INT_P (size)
>                && ((unsigned HOST_WIDE_INT) INTVAL (size)
>                    <= (GET_MODE_MASK (mode) >> 1)))
> +             || max_size <= (GET_MODE_MASK (mode) >> 1)
>               || GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
>         {
> -         struct expand_operand ops[6];
> +         struct expand_operand ops[8];
>           unsigned int nops;
>
>           /* ??? When called via emit_block_move_for_call, it'd be
> @@ -1306,18 +1329,28 @@ emit_block_move_via_movmem (rtx x, rtx y
>              that it doesn't fail the expansion because it thinks
>              emitting the libcall would be more efficient.  */
>           nops = insn_data[(int) code].n_generator_args;
> -         gcc_assert (nops == 4 || nops == 6);
> +         gcc_assert (nops == 4 || nops == 6 || nops == 8);
>
>           create_fixed_operand (&ops[0], x);
>           create_fixed_operand (&ops[1], y);
>           /* The check above guarantees that this size conversion is valid.  */
>           create_convert_operand_to (&ops[2], size, mode, true);
>           create_integer_operand (&ops[3], align / BITS_PER_UNIT);
> -         if (nops == 6)
> +         if (nops >= 6)
>             {
>               create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
>               create_integer_operand (&ops[5], expected_size);
>             }
> +         if (nops == 8)
> +           {
> +             create_integer_operand (&ops[6], min_size);
> +             /* If we can not represent the maximal size,
> +                make parameter NULL.  */
> +             if ((HOST_WIDE_INT) max_size != -1)
> +               create_integer_operand (&ops[7], max_size);
> +             else
> +               create_fixed_operand (&ops[7], NULL);
> +           }
>           if (maybe_expand_insn (code, nops, ops))
>             {
>               volatile_ok = save_volatile_ok;
> @@ -2705,7 +2738,9 @@ store_by_pieces_2 (insn_gen_fn genfun, m
>
>  rtx
>  clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
> -                    unsigned int expected_align, HOST_WIDE_INT expected_size)
> +                    unsigned int expected_align, HOST_WIDE_INT expected_size,
> +                    unsigned HOST_WIDE_INT min_size,
> +                    unsigned HOST_WIDE_INT max_size)
>  {
>    enum machine_mode mode = GET_MODE (object);
>    unsigned int align;
> @@ -2746,7 +2781,8 @@ clear_storage_hints (rtx object, rtx siz
>        && CLEAR_BY_PIECES_P (INTVAL (size), align))
>      clear_by_pieces (object, INTVAL (size), align);
>    else if (set_storage_via_setmem (object, size, const0_rtx, align,
> -                                  expected_align, expected_size))
> +                                  expected_align, expected_size,
> +                                  min_size, max_size))
>      ;
>    else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object)))
>      return set_storage_via_libcall (object, size, const0_rtx,
> @@ -2760,7 +2796,12 @@ clear_storage_hints (rtx object, rtx siz
>  rtx
>  clear_storage (rtx object, rtx size, enum block_op_methods method)
>  {
> -  return clear_storage_hints (object, size, method, 0, -1);
> +  unsigned HOST_WIDE_INT max, min = 0;
> +  if (GET_CODE (size) == CONST_INT)
> +    min = max = UINTVAL (size);
> +  else
> +    max = GET_MODE_MASK (GET_MODE (size));
> +  return clear_storage_hints (object, size, method, 0, -1, min, max);
>  }
>
>
> @@ -2857,7 +2898,9 @@ clear_storage_libcall_fn (int for_call)
>
>  bool
>  set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
> -                       unsigned int expected_align, HOST_WIDE_INT expected_size)
> +                       unsigned int expected_align, HOST_WIDE_INT expected_size,
> +                       unsigned HOST_WIDE_INT min_size,
> +                       unsigned HOST_WIDE_INT max_size)
>  {
>    /* Try the most limited insn first, because there's no point
>       including more than one in the machine description unless
> @@ -2867,6 +2910,13 @@ set_storage_via_setmem (rtx object, rtx
>
>    if (expected_align < align)
>      expected_align = align;
> +  if (expected_size != -1)
> +    {
> +      if ((unsigned HOST_WIDE_INT)expected_size > max_size)
> +       expected_size = max_size;
> +      if ((unsigned HOST_WIDE_INT)expected_size < min_size)
> +       expected_size = min_size;
> +    }
>
>    for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
>         mode = GET_MODE_WIDER_MODE (mode))
> @@ -2881,24 +2931,35 @@ set_storage_via_setmem (rtx object, rtx
>           && ((CONST_INT_P (size)
>                && ((unsigned HOST_WIDE_INT) INTVAL (size)
>                    <= (GET_MODE_MASK (mode) >> 1)))
> +             || max_size <= (GET_MODE_MASK (mode) >> 1)
>               || GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
>         {
> -         struct expand_operand ops[6];
> +         struct expand_operand ops[8];
>           unsigned int nops;
>
>           nops = insn_data[(int) code].n_generator_args;
> -         gcc_assert (nops == 4 || nops == 6);
> +         gcc_assert (nops == 4 || nops == 6 || nops == 8);
>
>           create_fixed_operand (&ops[0], object);
>           /* The check above guarantees that this size conversion is valid.  */
>           create_convert_operand_to (&ops[1], size, mode, true);
>           create_convert_operand_from (&ops[2], val, byte_mode, true);
>           create_integer_operand (&ops[3], align / BITS_PER_UNIT);
> -         if (nops == 6)
> +         if (nops >= 6)
>             {
>               create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
>               create_integer_operand (&ops[5], expected_size);
>             }
> +         if (nops == 8)
> +           {
> +             create_integer_operand (&ops[6], min_size);
> +             /* If we can not represent the maximal size,
> +                make parameter NULL.  */
> +             if ((HOST_WIDE_INT) max_size != -1)
> +               create_integer_operand (&ops[7], max_size);
> +             else
> +               create_fixed_operand (&ops[7], NULL);
> +           }
>           if (maybe_expand_insn (code, nops, ops))
>             return true;
>         }
> Index: expr.h
> ===================================================================
> --- expr.h      (revision 202968)
> +++ expr.h      (working copy)
> @@ -300,7 +300,9 @@ extern void init_block_clear_fn (const c
>  extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
>  extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
>  extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
> -                                 unsigned int, HOST_WIDE_INT);
> +                                 unsigned int, HOST_WIDE_INT,
> +                                 unsigned HOST_WIDE_INT,
> +                                 unsigned HOST_WIDE_INT);
>  extern bool emit_storent_insn (rtx to, rtx from);
>
>  /* Copy all or part of a value X into registers starting at REGNO.
> @@ -361,13 +363,17 @@ extern void use_group_regs (rtx *, rtx);
>     If OBJECT has BLKmode, SIZE is its length in bytes.  */
>  extern rtx clear_storage (rtx, rtx, enum block_op_methods);
>  extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods,
> -                               unsigned int, HOST_WIDE_INT);
> +                               unsigned int, HOST_WIDE_INT,
> +                               unsigned HOST_WIDE_INT,
> +                               unsigned HOST_WIDE_INT);
>  /* The same, but always output an library call.  */
>  rtx set_storage_via_libcall (rtx, rtx, rtx, bool);
>
>  /* Expand a setmem pattern; return true if successful.  */
>  extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
> -                                   unsigned int, HOST_WIDE_INT);
> +                                   unsigned int, HOST_WIDE_INT,
> +                                   unsigned HOST_WIDE_INT,
> +                                   unsigned HOST_WIDE_INT);
>
>  extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
>                                                      unsigned int,



More information about the Gcc-patches mailing list