This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [RFC] Handle memset in DSE (take 2)
Jakub Jelinek wrote:
> Hi!
>
> On Fri, Dec 12, 2008 at 06:28:57PM +0100, Jakub Jelinek wrote:
>
>> This (on top of http://gcc.gnu.org/ml/gcc-patches/2008-12/msg00751.html)
>> is a step toward fixing PR31150 in RTL DSE.
>>
>
>
You need to add the bugzilla to the changelog. Aside from that,
approved for whatever release you think it is appropriate (you are a
release manager.)
Kenny
> As discussed on IRC, this updated patch uses the unsigned HOST_WIDE_INT
> bitmask for positions_needed of <= HOST_BITS_PER_WIDE_INT bytes
> long stores (the most common case, all non-BLKmode stores and also many
> smaller BLKmode stores), but uses a real bitmap to track bytes that are not
> needed for larger BLKmode stores. It adds a couple of static inline
> accessor functions which hide this implementation detail from the callers.
>
> Bootstrapped/regtested on x86_64-linux.
>
> 2008-12-15 Jakub Jelinek <jakub@redhat.com>
>
> * dse.c (struct store_info): Add is_large bool field, change
> positions_needed into a union of a bitmask and bitmap + count.
> (free_store_info): Free bitmap if is_large.
> (set_usage_bits): Don't look at stores where
> offset + width >= MAX_OFFSET.
> (set_position_unneeded, set_all_positions_unneeded,
> any_positions_needed_p, all_positions_needed_p): New static inline
> functions.
> (record_store): Handle BLKmode stores of CONST_INT, if
> MEM_SIZE is set on the MEM. Use the new positions_needed
> accessor inlines.
> (replace_read): Handle reads from BLKmode CONST_INT stores.
> (check_mem_read_rtx): Use all_positions_needed_p function.
> (dse_step1): Free large positions_needed bitmaps and clear is_large.
>
> * dse.c (struct store_info): Change begin and end types to
> HOST_WIDE_INT.
>
> * dse.c (record_store): Fix check for unused store.
>
> * expr.c (block_clear_fn): No longer static.
> * expr.h (block_clear_fn): Declare.
> * dse.c (scan_insn): Memset and bzero can just read their
> arguments.
>
> --- gcc/expr.c.jj 2008-11-20 14:26:03.000000000 +0100
> +++ gcc/expr.c 2008-12-12 16:44:33.000000000 +0100
> @@ -2698,7 +2698,7 @@ set_storage_via_libcall (rtx object, rtx
> for the function we use for block clears. The first time FOR_CALL
> is true, we call assemble_external. */
>
> -static GTY(()) tree block_clear_fn;
> +tree block_clear_fn;
>
> void
> init_block_clear_fn (const char *asmspec)
> --- gcc/expr.h.jj 2008-12-11 11:40:19.000000000 +0100
> +++ gcc/expr.h 2008-12-12 16:44:20.000000000 +0100
> @@ -404,6 +404,7 @@ enum block_op_methods
> BLOCK_OP_TAILCALL
> };
>
> +extern tree GTY(()) block_clear_fn;
> extern void init_block_move_fn (const char *);
> extern void init_block_clear_fn (const char *);
>
> --- gcc/dse.c.jj 2008-11-27 17:08:26.000000000 +0100
> +++ gcc/dse.c 2008-12-15 11:19:21.000000000 +0100
> @@ -205,6 +205,9 @@ struct store_info
> /* False means this is a clobber. */
> bool is_set;
>
> + /* False if a single HOST_WIDE_INT bitmap is used for positions_needed. */
> + bool is_large;
> +
> /* The id of the mem group of the base address. If rtx_varies_p is
> true, this is -1. Otherwise, it is the index into the group
> table. */
> @@ -224,12 +227,26 @@ struct store_info
>
> /* The offset of the first and byte before the last byte associated
> with the operation. */
> - int begin, end;
> + HOST_WIDE_INT begin, end;
>
> - /* An bitmask as wide as the number of bytes in the word that
> - contains a 1 if the byte may be needed. The store is unused if
> - all of the bits are 0. */
> - unsigned HOST_WIDE_INT positions_needed;
> + union
> + {
> + /* A bitmask as wide as the number of bytes in the word that
> + contains a 1 if the byte may be needed. The store is unused if
> + all of the bits are 0. This is used if IS_LARGE is false. */
> + unsigned HOST_WIDE_INT small_bitmask;
> +
> + struct
> + {
> + /* A bitmap with one bit per byte. Cleared bit means the position
> + is needed. Used if IS_LARGE is false. */
> + bitmap bitmap;
> +
> + /* Number of set bits (i.e. unneeded bytes) in BITMAP. If it is
> + equal to END - BEGIN, the whole store is unused. */
> + int count;
> + } large;
> + } positions_needed;
>
> /* The next store info for this insn. */
> struct store_info *next;
> @@ -748,6 +765,8 @@ free_store_info (insn_info_t insn_info)
> while (store_info)
> {
> store_info_t next = store_info->next;
> + if (store_info->is_large)
> + BITMAP_FREE (store_info->positions_needed.large.bitmap);
> if (store_info->cse_base)
> pool_free (cse_store_info_pool, store_info);
> else
> @@ -899,7 +918,7 @@ set_usage_bits (group_info_t group, HOST
> {
> HOST_WIDE_INT i;
>
> - if ((offset > -MAX_OFFSET) && (offset < MAX_OFFSET))
> + if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET)
> for (i=offset; i<offset+width; i++)
> {
> bitmap store1;
> @@ -1161,6 +1180,75 @@ clear_rhs_from_active_local_stores (void
> }
>
>
> +/* Mark byte POS bytes from the beginning of store S_INFO as unneeded. */
> +
> +static inline void
> +set_position_unneeded (store_info_t s_info, int pos)
> +{
> + if (__builtin_expect (s_info->is_large, false))
> + {
> + if (!bitmap_bit_p (s_info->positions_needed.large.bitmap, pos))
> + {
> + s_info->positions_needed.large.count++;
> + bitmap_set_bit (s_info->positions_needed.large.bitmap, pos);
> + }
> + }
> + else
> + s_info->positions_needed.small_bitmask
> + &= ~(((unsigned HOST_WIDE_INT) 1) << pos);
> +}
> +
> +/* Mark the whole store S_INFO as unneeded. */
> +
> +static inline void
> +set_all_positions_unneeded (store_info_t s_info)
> +{
> + if (__builtin_expect (s_info->is_large, false))
> + {
> + int pos, end = s_info->end - s_info->begin;
> + for (pos = 0; pos < end; pos++)
> + bitmap_set_bit (s_info->positions_needed.large.bitmap, pos);
> + s_info->positions_needed.large.count = end;
> + }
> + else
> + s_info->positions_needed.small_bitmask = (unsigned HOST_WIDE_INT) 0;
> +}
> +
> +/* Return TRUE if any bytes from S_INFO store are needed. */
> +
> +static inline bool
> +any_positions_needed_p (store_info_t s_info)
> +{
> + if (__builtin_expect (s_info->is_large, false))
> + return (s_info->positions_needed.large.count
> + < s_info->end - s_info->begin);
> + else
> + return (s_info->positions_needed.small_bitmask
> + != (unsigned HOST_WIDE_INT) 0);
> +}
> +
> +/* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
> + store are needed. */
> +
> +static inline bool
> +all_positions_needed_p (store_info_t s_info, int start, int width)
> +{
> + if (__builtin_expect (s_info->is_large, false))
> + {
> + int end = start + width;
> + while (start < end)
> + if (bitmap_bit_p (s_info->positions_needed.large.bitmap, start++))
> + return false;
> + return true;
> + }
> + else
> + {
> + unsigned HOST_WIDE_INT mask = lowpart_bitmask (width) << start;
> + return (s_info->positions_needed.small_bitmask & mask) == mask;
> + }
> +}
> +
> +
> /* BODY is an instruction pattern that belongs to INSN. Return 1 if
> there is a candidate store, after adding it to the appropriate
> local store group if so. */
> @@ -1182,14 +1270,15 @@ record_store (rtx body, bb_info_t bb_inf
> if (GET_CODE (body) != SET && GET_CODE (body) != CLOBBER)
> return 0;
>
> + mem = SET_DEST (body);
> +
> /* If this is not used, then this cannot be used to keep the insn
> from being deleted. On the other hand, it does provide something
> that can be used to prove that another store is dead. */
> store_is_unused
> - = (find_reg_note (insn_info->insn, REG_UNUSED, body) != NULL);
> + = (find_reg_note (insn_info->insn, REG_UNUSED, mem) != NULL);
>
> /* Check whether that value is a suitable memory location. */
> - mem = SET_DEST (body);
> if (!MEM_P (mem))
> {
> /* If the set or clobber is unused, then it does not effect our
> @@ -1208,15 +1297,26 @@ record_store (rtx body, bb_info_t bb_inf
> fprintf (dump_file, " adding wild read for (clobber (mem:BLK (scratch))\n");
> add_wild_read (bb_info);
> insn_info->cannot_delete = true;
> + return 0;
> }
> - else if (!store_is_unused)
> - {
> - /* If the set or clobber is unused, then it does not effect our
> - ability to get rid of the entire insn. */
> - insn_info->cannot_delete = true;
> - clear_rhs_from_active_local_stores ();
> + /* Handle (set (mem:BLK (addr) [... S36 ...]) (const_int 0))
> + as memset (addr, 0, 36); */
> + else if (!MEM_SIZE (mem)
> + || !CONST_INT_P (MEM_SIZE (mem))
> + || GET_CODE (body) != SET
> + || INTVAL (MEM_SIZE (mem)) <= 0
> + || INTVAL (MEM_SIZE (mem)) > MAX_OFFSET
> + || !CONST_INT_P (SET_SRC (body)))
> + {
> + if (!store_is_unused)
> + {
> + /* If the set or clobber is unused, then it does not effect our
> + ability to get rid of the entire insn. */
> + insn_info->cannot_delete = true;
> + clear_rhs_from_active_local_stores ();
> + }
> + return 0;
> }
> - return 0;
> }
>
> /* We can still process a volatile mem, we just cannot delete it. */
> @@ -1229,12 +1329,20 @@ record_store (rtx body, bb_info_t bb_inf
> return 0;
> }
>
> - width = GET_MODE_SIZE (GET_MODE (mem));
> + if (GET_MODE (mem) == BLKmode)
> + width = INTVAL (MEM_SIZE (mem));
> + else
> + {
> + width = GET_MODE_SIZE (GET_MODE (mem));
> + gcc_assert ((unsigned) width <= HOST_BITS_PER_WIDE_INT);
> + }
>
> if (spill_alias_set)
> {
> bitmap store1 = clear_alias_group->store1_p;
> bitmap store2 = clear_alias_group->store2_p;
> +
> + gcc_assert (GET_MODE (mem) != BLKmode);
>
> if (bitmap_bit_p (store1, spill_alias_set))
> bitmap_set_bit (store2, spill_alias_set);
> @@ -1315,7 +1423,7 @@ record_store (rtx body, bb_info_t bb_inf
> && (GET_MODE (mem) == entry->mode))
> {
> del = true;
> - s_info->positions_needed = (unsigned HOST_WIDE_INT) 0;
> + set_all_positions_unneeded (s_info);
> }
> if (dump_file)
> fprintf (dump_file, " trying spill store in insn=%d alias_set=%d\n",
> @@ -1329,10 +1437,10 @@ record_store (rtx body, bb_info_t bb_inf
> fprintf (dump_file, " trying store in insn=%d gid=%d[%d..%d)\n",
> INSN_UID (ptr->insn), s_info->group_id,
> (int)s_info->begin, (int)s_info->end);
> - for (i = offset; i < offset+width; i++)
> - if (i >= s_info->begin && i < s_info->end)
> - s_info->positions_needed
> - &= ~(((unsigned HOST_WIDE_INT) 1) << (i - s_info->begin));
> + for (i = MAX (offset, s_info->begin);
> + i < offset + width && i < s_info->end;
> + i++)
> + set_position_unneeded (s_info, i - s_info->begin);
> }
> else if (s_info->rhs)
> /* Need to see if it is possible for this store to overwrite
> @@ -1348,9 +1456,9 @@ record_store (rtx body, bb_info_t bb_inf
>
> /* An insn can be deleted if every position of every one of
> its s_infos is zero. */
> - if (s_info->positions_needed != (unsigned HOST_WIDE_INT) 0)
> + if (any_positions_needed_p (s_info))
> del = false;
> -
> +
> if (del)
> {
> insn_info_t insn_to_delete = ptr;
> @@ -1368,8 +1476,6 @@ record_store (rtx body, bb_info_t bb_inf
> ptr = next;
> }
>
> - gcc_assert ((unsigned) width <= HOST_BITS_PER_WIDE_INT);
> -
> /* Finish filling in the store_info. */
> store_info->next = insn_info->store_rec;
> insn_info->store_rec = store_info;
> @@ -1377,7 +1483,17 @@ record_store (rtx body, bb_info_t bb_inf
> store_info->alias_set = spill_alias_set;
> store_info->mem_addr = get_addr (XEXP (mem, 0));
> store_info->cse_base = base;
> - store_info->positions_needed = lowpart_bitmask (width);
> + if (width > HOST_BITS_PER_WIDE_INT)
> + {
> + store_info->is_large = true;
> + store_info->positions_needed.large.count = 0;
> + store_info->positions_needed.large.bitmap = BITMAP_ALLOC (NULL);
> + }
> + else
> + {
> + store_info->is_large = false;
> + store_info->positions_needed.small_bitmask = lowpart_bitmask (width);
> + }
> store_info->group_id = group_id;
> store_info->begin = offset;
> store_info->end = offset + width;
> @@ -1580,7 +1696,9 @@ replace_read (store_info_t store_info, i
> /* To get here the read is within the boundaries of the write so
> shift will never be negative. Start out with the shift being in
> bytes. */
> - if (BYTES_BIG_ENDIAN)
> + if (store_mode == BLKmode)
> + shift = 0;
> + else if (BYTES_BIG_ENDIAN)
> shift = store_info->end - read_info->end;
> else
> shift = read_info->begin - store_info->begin;
> @@ -1608,6 +1726,31 @@ replace_read (store_info_t store_info, i
> if (shift)
> read_reg = find_shift_sequence (access_size, store_info, read_info, shift,
> optimize_bb_for_speed_p (BLOCK_FOR_INSN (read_insn->insn)));
> + else if (store_mode == BLKmode)
> + {
> + /* The store is a memset (addr, const_val, const_size). */
> + gcc_assert (CONST_INT_P (store_info->rhs));
> + store_mode = int_mode_for_mode (read_mode);
> + if (store_mode == BLKmode)
> + read_reg = NULL_RTX;
> + else if (store_info->rhs == const0_rtx)
> + read_reg = extract_low_bits (read_mode, store_mode, const0_rtx);
> + else if (GET_MODE_BITSIZE (store_mode) > HOST_BITS_PER_WIDE_INT)
> + read_reg = NULL_RTX;
> + else
> + {
> + unsigned HOST_WIDE_INT c
> + = INTVAL (store_info->rhs) & (BITS_PER_UNIT - 1);
> + int shift = BITS_PER_UNIT;
> + while (shift < HOST_BITS_PER_WIDE_INT)
> + {
> + c |= (c << shift);
> + shift <<= 1;
> + }
> + read_reg = GEN_INT (trunc_int_for_mode (c, store_mode));
> + read_reg = extract_low_bits (read_mode, store_mode, read_reg);
> + }
> + }
> else
> read_reg = extract_low_bits (read_mode, store_mode,
> copy_rtx (store_info->rhs));
> @@ -1833,18 +1976,15 @@ check_mem_read_rtx (rtx *loc, void *data
> else
> {
> if (store_info->rhs
> - && (offset >= store_info->begin)
> - && (offset + width <= store_info->end))
> - {
> - unsigned HOST_WIDE_INT mask
> - = (lowpart_bitmask (width)
> - << (offset - store_info->begin));
> -
> - if ((store_info->positions_needed & mask) == mask
> - && replace_read (store_info, i_ptr,
> - read_info, insn_info, loc))
> - return 0;
> - }
> + && offset >= store_info->begin
> + && offset + width <= store_info->end
> + && all_positions_needed_p (store_info,
> + offset - store_info->begin,
> + width)
> + && replace_read (store_info, i_ptr, read_info,
> + insn_info, loc))
> + return 0;
> +
> /* The bases are the same, just see if the offsets
> overlap. */
> if ((offset < store_info->end)
> @@ -1902,18 +2042,12 @@ check_mem_read_rtx (rtx *loc, void *data
> if (store_info->rhs
> && store_info->group_id == -1
> && store_info->cse_base == base
> - && (offset >= store_info->begin)
> - && (offset + width <= store_info->end))
> - {
> - unsigned HOST_WIDE_INT mask
> - = (lowpart_bitmask (width)
> - << (offset - store_info->begin));
> -
> - if ((store_info->positions_needed & mask) == mask
> - && replace_read (store_info, i_ptr,
> - read_info, insn_info, loc))
> - return 0;
> - }
> + && offset >= store_info->begin
> + && offset + width <= store_info->end
> + && all_positions_needed_p (store_info,
> + offset - store_info->begin, width)
> + && replace_read (store_info, i_ptr, read_info, insn_info, loc))
> + return 0;
>
> if (!store_info->alias_set)
> remove = canon_true_dependence (store_info->mem,
> @@ -1986,18 +2120,53 @@ scan_insn (bb_info_t bb_info, rtx insn)
>
> if (CALL_P (insn))
> {
> + bool const_call, memset_call = false;
> +
> insn_info->cannot_delete = true;
>
> /* Const functions cannot do anything bad i.e. read memory,
> however, they can read their parameters which may have
> - been pushed onto the stack. */
> - if (RTL_CONST_CALL_P (insn))
> + been pushed onto the stack.
> + memset and bzero don't read memory either. */
> + const_call = RTL_CONST_CALL_P (insn);
> + if (!const_call)
> + {
> + rtx call = PATTERN (insn);
> + if (GET_CODE (call) == PARALLEL)
> + call = XVECEXP (call, 0, 0);
> + if (GET_CODE (call) == SET)
> + call = SET_SRC (call);
> + if (GET_CODE (call) == CALL
> + && MEM_P (XEXP (call, 0))
> + && GET_CODE (XEXP (XEXP (call, 0), 0)) == SYMBOL_REF)
> + {
> + rtx symbol = XEXP (XEXP (call, 0), 0);
> + if (SYMBOL_REF_DECL (symbol)
> + && TREE_CODE (SYMBOL_REF_DECL (symbol)) == FUNCTION_DECL)
> + {
> + if (DECL_BUILT_IN_CLASS (SYMBOL_REF_DECL (symbol))
> + == BUILT_IN_NORMAL)
> + switch (DECL_FUNCTION_CODE (SYMBOL_REF_DECL (symbol)))
> + {
> + case BUILT_IN_MEMSET:
> + case BUILT_IN_BZERO:
> + memset_call = true;
> + default:
> + break;
> + }
> + else if (SYMBOL_REF_DECL (symbol) == block_clear_fn)
> + memset_call = true;
> + }
> + }
> + }
> + if (const_call || memset_call)
> {
> insn_info_t i_ptr = active_local_stores;
> insn_info_t last = NULL;
>
> if (dump_file)
> - fprintf (dump_file, "const call %d\n", INSN_UID (insn));
> + fprintf (dump_file, "%s call %d\n",
> + const_call ? "const" : "memset", INSN_UID (insn));
>
> /* See the head comment of the frame_read field. */
> if (reload_completed)
> @@ -2233,6 +2402,18 @@ dse_step1 (void)
> {
> if (ptr->contains_cselib_groups)
> free_store_info (ptr);
> + else
> + {
> + store_info_t s_info;
> +
> + /* Free at least positions_needed bitmaps. */
> + for (s_info = ptr->store_rec; s_info; s_info = s_info->next)
> + if (s_info->is_large)
> + {
> + BITMAP_FREE (s_info->positions_needed.large.bitmap);
> + s_info->is_large = false;
> + }
> + }
> ptr = ptr->prev_insn;
> }
>
>
>
> Jakub
>