This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Fix gimple store merging (PR tree-optimization/78436)
- From: Richard Biener <rguenther at suse dot de>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Kyrill Tkachov <kyrylo dot tkachov at foss dot arm dot com>, gcc-patches at gcc dot gnu dot org
- Date: Tue, 22 Nov 2016 08:58:00 +0100 (CET)
- Subject: Re: [PATCH] Fix gimple store merging (PR tree-optimization/78436)
- Authentication-results: sourceware.org; auth=none
- References: <20161121191532.GM3541@tucnak.redhat.com>
On Mon, 21 Nov 2016, Jakub Jelinek wrote:
> Hi!
>
> The
> if (!BYTES_BIG_ENDIAN)
> - shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> + {
> + shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> + if (shift_amnt == 0)
> + byte_size--;
> + }
> hunk below is the actual fix for the PR, where we originally store:
> 8-bit 0 at offset 24-bits followed by 24-bit negative value at offset 0,
> little endian. encode_tree_to_bitpos actually allocates 1 extra byte in the
> buffer and byte_size is also 1 byte longer, for the case where the
> bits need to be shifted (it only cares about shifts within bytes, so 0 to
> BITS_PER_UNIT - 1). If no shifting is done and there is no padding, we are
> also fine, because native_encode_expr will only actually write the size of
> TYPE_MODE bytes. But in this case padding is 1 byte, so native_encode_expr
> writes 4 bytes (the last one is 0xff), byte_size is initially 5, as padding
> is 1, it is decremented to 4. But we actually want to store just 3 bytes,
> not 4; when we store 4, we overwrite the earlier value of the following
> byte.
>
> The rest of the patch are just cleanups.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok.
Richard.
> 2016-11-21 Jakub Jelinek <jakub@redhat.com>
>
> PR tree-optimization/78436
> * gimple-ssa-store-merging.c (zero_char_buf): Removed.
> (shift_bytes_in_array, shift_bytes_in_array_right,
> merged_store_group::apply_stores): Formatting fixes.
> (clear_bit_region): Likewise. Use memset.
> (encode_tree_to_bitpos): Formatting fixes. Fix comment typos - EPXR
> instead of EXPR and inerted instead of inserted. Use memset instead
> of zero_char_buf. For !BYTES_BIG_ENDIAN decrease byte_size by 1
> if shift_amnt is 0.
>
> * gcc.c-torture/execute/pr78436.c: New test.
>
> --- gcc/gimple-ssa-store-merging.c.jj 2016-11-09 15:22:36.000000000 +0100
> +++ gcc/gimple-ssa-store-merging.c 2016-11-21 10:54:51.746090238 +0100
> @@ -199,17 +199,6 @@ dump_char_array (FILE *fd, unsigned char
> fprintf (fd, "\n");
> }
>
> -/* Fill a byte array PTR of SZ elements with zeroes. This is to be used by
> - encode_tree_to_bitpos to zero-initialize most likely small arrays but
> - with a non-compile-time-constant size. */
> -
> -static inline void
> -zero_char_buf (unsigned char *ptr, unsigned int sz)
> -{
> - for (unsigned int i = 0; i < sz; i++)
> - ptr[i] = 0;
> -}
> -
> /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
> bits between adjacent elements. AMNT should be within
> [0, BITS_PER_UNIT).
> @@ -224,14 +213,13 @@ shift_bytes_in_array (unsigned char *ptr
> return;
>
> unsigned char carry_over = 0U;
> - unsigned char carry_mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - amnt));
> + unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
> unsigned char clear_mask = (~0U) << amnt;
>
> for (unsigned int i = 0; i < sz; i++)
> {
> unsigned prev_carry_over = carry_over;
> - carry_over
> - = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
> + carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
>
> ptr[i] <<= amnt;
> if (i != 0)
> @@ -263,10 +251,9 @@ shift_bytes_in_array_right (unsigned cha
> for (unsigned int i = 0; i < sz; i++)
> {
> unsigned prev_carry_over = carry_over;
> - carry_over
> - = (ptr[i] & carry_mask);
> + carry_over = ptr[i] & carry_mask;
>
> - carry_over <<= ((unsigned char)BITS_PER_UNIT - amnt);
> + carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
> ptr[i] >>= amnt;
> ptr[i] |= prev_carry_over;
> }
> @@ -327,7 +314,7 @@ clear_bit_region (unsigned char *ptr, un
> /* Second base case. */
> else if ((start + len) <= BITS_PER_UNIT)
> {
> - unsigned char mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - len));
> + unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
> mask >>= BITS_PER_UNIT - (start + len);
>
> ptr[0] &= ~mask;
> @@ -346,8 +333,7 @@ clear_bit_region (unsigned char *ptr, un
> unsigned int nbytes = len / BITS_PER_UNIT;
> /* We could recurse on each byte but do the loop here to avoid
> recursing too deep. */
> - for (unsigned int i = 0; i < nbytes; i++)
> - ptr[i] = 0U;
> + memset (ptr, '\0', nbytes);
> /* Clear the remaining sub-byte region if there is one. */
> if (len % BITS_PER_UNIT != 0)
> clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
> @@ -362,7 +348,7 @@ clear_bit_region (unsigned char *ptr, un
>
> static bool
> encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
> - unsigned int total_bytes)
> + unsigned int total_bytes)
> {
> unsigned int first_byte = bitpos / BITS_PER_UNIT;
> tree tmp_int = expr;
> @@ -370,8 +356,8 @@ encode_tree_to_bitpos (tree expr, unsign
> || mode_for_size (bitlen, MODE_INT, 0) == BLKmode;
>
> if (!sub_byte_op_p)
> - return native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
> - != 0;
> + return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
> + != 0);
>
> /* LITTLE-ENDIAN
> We are writing a non byte-sized quantity or at a position that is not
> @@ -381,7 +367,7 @@ encode_tree_to_bitpos (tree expr, unsign
> xxx xxxxxxxx xxx< bp>
> |______EXPR____|
>
> - First native_encode_expr EPXR into a temporary buffer and shift each
> + First native_encode_expr EXPR into a temporary buffer and shift each
> byte in the buffer by 'bp' (carrying the bits over as necessary).
> |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
> <------bitlen---->< bp>
> @@ -400,7 +386,7 @@ encode_tree_to_bitpos (tree expr, unsign
> <bp >xxx xxxxxxxx xxx
> |_____EXPR_____|
>
> - First native_encode_expr EPXR into a temporary buffer and shift each
> + First native_encode_expr EXPR into a temporary buffer and shift each
> byte in the buffer to the right by (carrying the bits over as necessary).
> We shift by as much as needed to align the most significant bit of EXPR
> with bitpos:
> @@ -418,7 +404,7 @@ encode_tree_to_bitpos (tree expr, unsign
> /* Allocate an extra byte so that we have space to shift into. */
> unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
> unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
> - zero_char_buf (tmpbuf, byte_size);
> + memset (tmpbuf, '\0', byte_size);
> /* The store detection code should only have allowed constants that are
> accepted by native_encode_expr. */
> if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0)
> @@ -453,7 +439,7 @@ encode_tree_to_bitpos (tree expr, unsign
> }
>
> /* Clear the bit region in PTR where the bits from TMPBUF will be
> - inerted into. */
> + inserted into. */
> if (BYTES_BIG_ENDIAN)
> clear_bit_region_be (ptr + first_byte,
> BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
> @@ -493,7 +479,11 @@ encode_tree_to_bitpos (tree expr, unsign
>
> /* Create the shifted version of EXPR. */
> if (!BYTES_BIG_ENDIAN)
> - shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> + {
> + shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
> + if (shift_amnt == 0)
> + byte_size--;
> + }
> else
> {
> gcc_assert (BYTES_BIG_ENDIAN);
> @@ -648,8 +638,7 @@ merged_store_group::apply_stores ()
> /* Create a buffer of a size that is 2 times the number of bytes we're
> storing. That way native_encode_expr can write power-of-2-sized
> chunks without overrunning. */
> - buf_size
> - = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
> + buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
> val = XCNEWVEC (unsigned char, buf_size);
>
> FOR_EACH_VEC_ELT (stores, i, info)
> --- gcc/testsuite/gcc.c-torture/execute/pr78436.c.jj 2016-11-21 10:58:28.209378756 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr78436.c 2016-11-21 10:57:45.000000000 +0100
> @@ -0,0 +1,23 @@
> +/* PR tree-optimization/78436 */
> +
> +struct S
> +{
> + long int a : 24;
> + signed char b : 8;
> +} s;
> +
> +__attribute__((noinline, noclone)) void
> +foo ()
> +{
> + s.b = 0;
> + s.a = -1193165L;
> +}
> +
> +int
> +main ()
> +{
> + foo ();
> + if (s.b != 0)
> + __builtin_abort ();
> + return 0;
> +}
>
> Jakub
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)