Add value range support into memcpy/memset expansion
Xinliang David Li
davidxl@google.com
Fri Sep 27 16:48:00 GMT 2013
Nice extension. Test cases would be great to have.
thanks,
David
On Fri, Sep 27, 2013 at 7:50 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch makes it possible to access value range info from setmem/movstr that
> I plan to use in i386 memcpy/memset expansion code. It is all quite
> straighforward except that I need to deal with cases where max size does not
> fit in HOST_WIDE_INT where I use maximal value as a marker. It is then
> translated as NULL pointer to the expander that is bit inconsistent with other
> places that use -1 as marker of unknown value.
>
> I also think we lose some cases because of TER replacing out the SSA_NAME by
> something else, but it seems to work in quite many cases. This can be probably
> tracked incrementally by disabling TER here or finally getting away from
> expanding calls via the generic route.
>
> Bootstrapped/regtested x86_64-linux, OK?
>
> Honza
>
> * doc/md.texi (setmem, movstr): Update documentation.
> * builtins.c (determine_block_size): New function.
> (expand_builtin_memcpy): Use it and pass it to
> emit_block_move_hints.
> (expand_builtin_memset_args): Use it and pass it to
> set_storage_via_setmem.
> * expr.c (emit_block_move_via_movmem): Add min_size/max_size parameters;
> update call to expander.
> (emit_block_move_hints): Add min_size/max_size parameters.
> (clear_storage_hints): Likewise.
> (set_storage_via_setmem): Likewise.
> (clear_storage): Update.
> * expr.h (emit_block_move_hints, clear_storage_hints,
> set_storage_via_setmem): Update prototype.
>
> Index: doc/md.texi
> ===================================================================
> --- doc/md.texi (revision 202968)
> +++ doc/md.texi (working copy)
> @@ -5198,6 +5198,9 @@ destination and source strings are opera
> the expansion of this pattern should store in operand 0 the address in
> which the @code{NUL} terminator was stored in the destination string.
>
> +This patern has also several optional operands that are same as in
> +@code{setmem}.
> +
> @cindex @code{setmem@var{m}} instruction pattern
> @item @samp{setmem@var{m}}
> Block set instruction. The destination string is the first operand,
> @@ -5217,6 +5220,8 @@ respectively. The expected alignment di
> in a way that the blocks are not required to be aligned according to it in
> all cases. This expected alignment is also in bytes, just like operand 4.
> Expected size, when unknown, is set to @code{(const_int -1)}.
> +Operand 7 is the minimal size of the block and operand 8 is the
> +maximal size of the block (NULL if it can not be represented as CONST_INT).
>
> The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}.
>
> Index: builtins.c
> ===================================================================
> --- builtins.c (revision 202968)
> +++ builtins.c (working copy)
> @@ -3070,6 +3070,51 @@ builtin_memcpy_read_str (void *data, HOS
> return c_readstr (str + offset, mode);
> }
>
> +/* LEN specify length of the block of memcpy/memset operation.
> + Figure out its range and put it into MIN_SIZE/MAX_SIZE. */
> +
> +static void
> +determine_block_size (tree len, rtx len_rtx,
> + unsigned HOST_WIDE_INT *min_size,
> + unsigned HOST_WIDE_INT *max_size)
> +{
> + if (CONST_INT_P (len_rtx))
> + {
> + *min_size = *max_size = UINTVAL (len_rtx);
> + return;
> + }
> + else
> + {
> + double_int min, max;
> + if (TREE_CODE (len) == SSA_NAME
> + && get_range_info (len, &min, &max) == VR_RANGE)
> + {
> + if (min.fits_uhwi ())
> + *min_size = min.to_uhwi ();
> + else
> + *min_size = 0;
> + if (max.fits_uhwi ())
> + *max_size = max.to_uhwi ();
> + else
> + *max_size = (HOST_WIDE_INT)-1;
> + }
> + else
> + {
> + if (host_integerp (TYPE_MIN_VALUE (TREE_TYPE (len)), 1))
> + *min_size = tree_low_cst (TYPE_MIN_VALUE (TREE_TYPE (len)), 1);
> + else
> + *min_size = 0;
> + if (host_integerp (TYPE_MAX_VALUE (TREE_TYPE (len)), 1))
> + *max_size = tree_low_cst (TYPE_MAX_VALUE (TREE_TYPE (len)), 1);
> + else
> + *max_size = GET_MODE_MASK (GET_MODE (len_rtx));
> + }
> + }
> + gcc_checking_assert (*max_size <=
> + (unsigned HOST_WIDE_INT)
> + GET_MODE_MASK (GET_MODE (len_rtx)));
> +}
> +
> /* Expand a call EXP to the memcpy builtin.
> Return NULL_RTX if we failed, the caller should emit a normal call,
> otherwise try to get the result in TARGET, if convenient (and in
> @@ -3092,6 +3137,8 @@ expand_builtin_memcpy (tree exp, rtx tar
> rtx dest_mem, src_mem, dest_addr, len_rtx;
> HOST_WIDE_INT expected_size = -1;
> unsigned int expected_align = 0;
> + unsigned HOST_WIDE_INT min_size;
> + unsigned HOST_WIDE_INT max_size;
>
> /* If DEST is not a pointer type, call the normal function. */
> if (dest_align == 0)
> @@ -3111,6 +3158,7 @@ expand_builtin_memcpy (tree exp, rtx tar
> dest_mem = get_memory_rtx (dest, len);
> set_mem_align (dest_mem, dest_align);
> len_rtx = expand_normal (len);
> + determine_block_size (len, len_rtx, &min_size, &max_size);
> src_str = c_getstr (src);
>
> /* If SRC is a string constant and block move would be done
> @@ -3139,7 +3187,8 @@ expand_builtin_memcpy (tree exp, rtx tar
> dest_addr = emit_block_move_hints (dest_mem, src_mem, len_rtx,
> CALL_EXPR_TAILCALL (exp)
> ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
> - expected_align, expected_size);
> + expected_align, expected_size,
> + min_size, max_size);
>
> if (dest_addr == 0)
> {
> @@ -3553,6 +3602,8 @@ expand_builtin_memset_args (tree dest, t
> rtx dest_mem, dest_addr, len_rtx;
> HOST_WIDE_INT expected_size = -1;
> unsigned int expected_align = 0;
> + unsigned HOST_WIDE_INT min_size;
> + unsigned HOST_WIDE_INT max_size;
>
> dest_align = get_pointer_alignment (dest);
>
> @@ -3581,6 +3632,7 @@ expand_builtin_memset_args (tree dest, t
> len = builtin_save_expr (len);
>
> len_rtx = expand_normal (len);
> + determine_block_size (len, len_rtx, &min_size, &max_size);
> dest_mem = get_memory_rtx (dest, len);
> val_mode = TYPE_MODE (unsigned_char_type_node);
>
> @@ -3607,7 +3659,7 @@ expand_builtin_memset_args (tree dest, t
> }
> else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx,
> dest_align, expected_align,
> - expected_size))
> + expected_size, min_size, max_size))
> goto do_libcall;
>
> dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
> @@ -3629,7 +3681,7 @@ expand_builtin_memset_args (tree dest, t
> else if (!set_storage_via_setmem (dest_mem, len_rtx,
> gen_int_mode (c, val_mode),
> dest_align, expected_align,
> - expected_size))
> + expected_size, min_size, max_size))
> goto do_libcall;
>
> dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
> @@ -3641,7 +3693,8 @@ expand_builtin_memset_args (tree dest, t
> dest_addr = clear_storage_hints (dest_mem, len_rtx,
> CALL_EXPR_TAILCALL (orig_exp)
> ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL,
> - expected_align, expected_size);
> + expected_align, expected_size,
> + min_size, max_size);
>
> if (dest_addr == 0)
> {
> Index: expr.c
> ===================================================================
> --- expr.c (revision 202968)
> +++ expr.c (working copy)
> @@ -122,7 +122,8 @@ struct store_by_pieces_d
> static void move_by_pieces_1 (insn_gen_fn, machine_mode,
> struct move_by_pieces_d *);
> static bool block_move_libcall_safe_for_call_parm (void);
> -static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT);
> +static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
> static tree emit_block_move_libcall_fn (int);
> static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
> static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
> @@ -1113,13 +1114,18 @@ move_by_pieces_1 (insn_gen_fn genfun, ma
> SIZE is an rtx that says how long they are.
> ALIGN is the maximum alignment we can assume they have.
> METHOD describes what kind of copy this is, and what mechanisms may be used.
> + MIN_SIZE is the minimal size of block to move
> + MAX_SIZE is the maximal size of block to move, if it can not be represented
> + in unsigned HOST_WIDE_INT, than it is mask of all ones.
>
> Return the address of the new block, if memcpy is called and returns it,
> 0 otherwise. */
>
> rtx
> emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
> - unsigned int expected_align, HOST_WIDE_INT expected_size)
> + unsigned int expected_align, HOST_WIDE_INT expected_size,
> + unsigned HOST_WIDE_INT min_size,
> + unsigned HOST_WIDE_INT max_size)
> {
> bool may_use_call;
> rtx retval = 0;
> @@ -1175,7 +1181,8 @@ emit_block_move_hints (rtx x, rtx y, rtx
> if (CONST_INT_P (size) && MOVE_BY_PIECES_P (INTVAL (size), align))
> move_by_pieces (x, y, INTVAL (size), align, 0);
> else if (emit_block_move_via_movmem (x, y, size, align,
> - expected_align, expected_size))
> + expected_align, expected_size,
> + min_size, max_size))
> ;
> else if (may_use_call
> && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
> @@ -1205,7 +1212,13 @@ emit_block_move_hints (rtx x, rtx y, rtx
> rtx
> emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
> {
> - return emit_block_move_hints (x, y, size, method, 0, -1);
> + unsigned HOST_WIDE_INT max, min = 0;
> + if (GET_CODE (size) == CONST_INT)
> + min = max = UINTVAL (size);
> + else
> + max = GET_MODE_MASK (GET_MODE (size));
> + return emit_block_move_hints (x, y, size, method, 0, -1,
> + min, max);
> }
>
> /* A subroutine of emit_block_move. Returns true if calling the
> @@ -1268,13 +1281,22 @@ block_move_libcall_safe_for_call_parm (v
>
> static bool
> emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
> - unsigned int expected_align, HOST_WIDE_INT expected_size)
> + unsigned int expected_align, HOST_WIDE_INT expected_size,
> + unsigned HOST_WIDE_INT min_size,
> + unsigned HOST_WIDE_INT max_size)
> {
> int save_volatile_ok = volatile_ok;
> enum machine_mode mode;
>
> if (expected_align < align)
> expected_align = align;
> + if (expected_size != -1)
> + {
> + if ((unsigned HOST_WIDE_INT)expected_size > max_size)
> + expected_size = max_size;
> + if ((unsigned HOST_WIDE_INT)expected_size < min_size)
> + expected_size = min_size;
> + }
>
> /* Since this is a move insn, we don't care about volatility. */
> volatile_ok = 1;
> @@ -1296,9 +1318,10 @@ emit_block_move_via_movmem (rtx x, rtx y
> && ((CONST_INT_P (size)
> && ((unsigned HOST_WIDE_INT) INTVAL (size)
> <= (GET_MODE_MASK (mode) >> 1)))
> + || max_size <= (GET_MODE_MASK (mode) >> 1)
> || GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
> {
> - struct expand_operand ops[6];
> + struct expand_operand ops[8];
> unsigned int nops;
>
> /* ??? When called via emit_block_move_for_call, it'd be
> @@ -1306,18 +1329,28 @@ emit_block_move_via_movmem (rtx x, rtx y
> that it doesn't fail the expansion because it thinks
> emitting the libcall would be more efficient. */
> nops = insn_data[(int) code].n_generator_args;
> - gcc_assert (nops == 4 || nops == 6);
> + gcc_assert (nops == 4 || nops == 6 || nops == 8);
>
> create_fixed_operand (&ops[0], x);
> create_fixed_operand (&ops[1], y);
> /* The check above guarantees that this size conversion is valid. */
> create_convert_operand_to (&ops[2], size, mode, true);
> create_integer_operand (&ops[3], align / BITS_PER_UNIT);
> - if (nops == 6)
> + if (nops >= 6)
> {
> create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
> create_integer_operand (&ops[5], expected_size);
> }
> + if (nops == 8)
> + {
> + create_integer_operand (&ops[6], min_size);
> + /* If we can not represent the maximal size,
> + make parameter NULL. */
> + if ((HOST_WIDE_INT) max_size != -1)
> + create_integer_operand (&ops[7], max_size);
> + else
> + create_fixed_operand (&ops[7], NULL);
> + }
> if (maybe_expand_insn (code, nops, ops))
> {
> volatile_ok = save_volatile_ok;
> @@ -2705,7 +2738,9 @@ store_by_pieces_2 (insn_gen_fn genfun, m
>
> rtx
> clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
> - unsigned int expected_align, HOST_WIDE_INT expected_size)
> + unsigned int expected_align, HOST_WIDE_INT expected_size,
> + unsigned HOST_WIDE_INT min_size,
> + unsigned HOST_WIDE_INT max_size)
> {
> enum machine_mode mode = GET_MODE (object);
> unsigned int align;
> @@ -2746,7 +2781,8 @@ clear_storage_hints (rtx object, rtx siz
> && CLEAR_BY_PIECES_P (INTVAL (size), align))
> clear_by_pieces (object, INTVAL (size), align);
> else if (set_storage_via_setmem (object, size, const0_rtx, align,
> - expected_align, expected_size))
> + expected_align, expected_size,
> + min_size, max_size))
> ;
> else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object)))
> return set_storage_via_libcall (object, size, const0_rtx,
> @@ -2760,7 +2796,12 @@ clear_storage_hints (rtx object, rtx siz
> rtx
> clear_storage (rtx object, rtx size, enum block_op_methods method)
> {
> - return clear_storage_hints (object, size, method, 0, -1);
> + unsigned HOST_WIDE_INT max, min = 0;
> + if (GET_CODE (size) == CONST_INT)
> + min = max = UINTVAL (size);
> + else
> + max = GET_MODE_MASK (GET_MODE (size));
> + return clear_storage_hints (object, size, method, 0, -1, min, max);
> }
>
>
> @@ -2857,7 +2898,9 @@ clear_storage_libcall_fn (int for_call)
>
> bool
> set_storage_via_setmem (rtx object, rtx size, rtx val, unsigned int align,
> - unsigned int expected_align, HOST_WIDE_INT expected_size)
> + unsigned int expected_align, HOST_WIDE_INT expected_size,
> + unsigned HOST_WIDE_INT min_size,
> + unsigned HOST_WIDE_INT max_size)
> {
> /* Try the most limited insn first, because there's no point
> including more than one in the machine description unless
> @@ -2867,6 +2910,13 @@ set_storage_via_setmem (rtx object, rtx
>
> if (expected_align < align)
> expected_align = align;
> + if (expected_size != -1)
> + {
> + if ((unsigned HOST_WIDE_INT)expected_size > max_size)
> + expected_size = max_size;
> + if ((unsigned HOST_WIDE_INT)expected_size < min_size)
> + expected_size = min_size;
> + }
>
> for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
> mode = GET_MODE_WIDER_MODE (mode))
> @@ -2881,24 +2931,35 @@ set_storage_via_setmem (rtx object, rtx
> && ((CONST_INT_P (size)
> && ((unsigned HOST_WIDE_INT) INTVAL (size)
> <= (GET_MODE_MASK (mode) >> 1)))
> + || max_size <= (GET_MODE_MASK (mode) >> 1)
> || GET_MODE_BITSIZE (mode) >= BITS_PER_WORD))
> {
> - struct expand_operand ops[6];
> + struct expand_operand ops[8];
> unsigned int nops;
>
> nops = insn_data[(int) code].n_generator_args;
> - gcc_assert (nops == 4 || nops == 6);
> + gcc_assert (nops == 4 || nops == 6 || nops == 8);
>
> create_fixed_operand (&ops[0], object);
> /* The check above guarantees that this size conversion is valid. */
> create_convert_operand_to (&ops[1], size, mode, true);
> create_convert_operand_from (&ops[2], val, byte_mode, true);
> create_integer_operand (&ops[3], align / BITS_PER_UNIT);
> - if (nops == 6)
> + if (nops >= 6)
> {
> create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
> create_integer_operand (&ops[5], expected_size);
> }
> + if (nops == 8)
> + {
> + create_integer_operand (&ops[6], min_size);
> + /* If we can not represent the maximal size,
> + make parameter NULL. */
> + if ((HOST_WIDE_INT) max_size != -1)
> + create_integer_operand (&ops[7], max_size);
> + else
> + create_fixed_operand (&ops[7], NULL);
> + }
> if (maybe_expand_insn (code, nops, ops))
> return true;
> }
> Index: expr.h
> ===================================================================
> --- expr.h (revision 202968)
> +++ expr.h (working copy)
> @@ -300,7 +300,9 @@ extern void init_block_clear_fn (const c
> extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
> extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
> extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
> - unsigned int, HOST_WIDE_INT);
> + unsigned int, HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT);
> extern bool emit_storent_insn (rtx to, rtx from);
>
> /* Copy all or part of a value X into registers starting at REGNO.
> @@ -361,13 +363,17 @@ extern void use_group_regs (rtx *, rtx);
> If OBJECT has BLKmode, SIZE is its length in bytes. */
> extern rtx clear_storage (rtx, rtx, enum block_op_methods);
> extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods,
> - unsigned int, HOST_WIDE_INT);
> + unsigned int, HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT);
> /* The same, but always output an library call. */
> rtx set_storage_via_libcall (rtx, rtx, rtx, bool);
>
> /* Expand a setmem pattern; return true if successful. */
> extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
> - unsigned int, HOST_WIDE_INT);
> + unsigned int, HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT,
> + unsigned HOST_WIDE_INT);
>
> extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
> unsigned int,
More information about the Gcc-patches
mailing list