[PATCH] Fix PR61306: improve handling of sign and cast in bswap
Richard Biener
richard.guenther@gmail.com
Wed Jun 11 08:32:00 GMT 2014
On Tue, Jun 10, 2014 at 10:49 AM, Thomas Preud'homme
<thomas.preudhomme@arm.com> wrote:
>> From: Richard Biener [mailto:richard.guenther@gmail.com]
>> Sent: Wednesday, June 04, 2014 5:39 PM
>> To: Thomas Preud'homme
>>
>
>>
>> I'm failing to get why you possibly need two casts ... you should
>> only need one, from the bswap/load result to the final type
>> (zero-extended as you say - so the load type should simply be
>> unsigned which it is already).
>
> You are right indeed. I failed to realize that the problems I
> encountered were caused by an initially wrong understanding of
> the reason behind PR61306. All this code is not necessary.
>
>>
>> So I think that the testcase in the patch is fixed already by
>> doing the n->type change (and a proper sign-extension detection).
>>
>> Can you please split that part out?
>
> Doing so I realize the patch was incomplete. Sign extension can
> be triggered in two distinct place in the code (right shift and cast)
> that can both lead to incorrect code being generated. With some
> efforts I managed to create two testcases that work both on
> GCC trunk but also GCC 4.9 and 4.8.
>
> ChangeLog entries are:
>
> *** gcc/ChangeLog ***
>
> 2014-06-05 Thomas Preud'homme <thomas.preudhomme@arm.com>
>
> PR tree-optimization/61306
> * tree-ssa-math-opts.c (struct symbolic_number): Store type of
> expression instead of its size.
> (do_shift_rotate): Adapt to change in struct symbolic_number. Return
> false to prevent optimization when the result is unpredictable due to
> arithmetic right shift of signed type with highest byte is set.
> (verify_symbolic_number_p): Adapt to change in struct symbolic_number.
> (init_symbolic_number): Likewise.
> (find_bswap_or_nop_1): Likewise. Return NULL to prevent optimization
> when the result is unpredictable due to sign extension.
>
> *** gcc/testsuite/ChangeLog ***
>
> 2014-06-05 Thomas Preud'homme <thomas.preudhomme@arm.com>
>
> * gcc.c-torture/execute/pr61306-1.c: New test.
> * gcc.c-torture/execute/pr61306-2.c: Likewise.
> * gcc.c-torture/execute/pr61306-3.c: Likewise.
>
>
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr61306-1.c b/gcc/testsuite/gcc.c-torture/execute/pr61306-1.c
> new file mode 100644
> index 0000000..f6e8ff3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr61306-1.c
> @@ -0,0 +1,39 @@
> +#ifdef __INT32_TYPE__
> +typedef __INT32_TYPE__ int32_t;
> +#else
> +typedef int int32_t;
> +#endif
> +
> +#ifdef __UINT32_TYPE__
> +typedef __UINT32_TYPE__ uint32_t;
> +#else
> +typedef unsigned uint32_t;
> +#endif
> +
> +#define __fake_const_swab32(x) ((uint32_t)( \
> + (((uint32_t)(x) & (uint32_t)0x000000ffUL) << 24) | \
> + (((uint32_t)(x) & (uint32_t)0x0000ff00UL) << 8) | \
> + (((uint32_t)(x) & (uint32_t)0x00ff0000UL) >> 8) | \
> + (( (int32_t)(x) & (int32_t)0xff000000UL) >> 24)))
> +
> +/* Previous version of bswap optimization failed to consider sign extension
> + and as a result would replace an expression *not* doing a bswap by a
> + bswap. */
> +
> +__attribute__ ((noinline, noclone)) uint32_t
> +fake_bswap32 (uint32_t in)
> +{
> + return __fake_const_swab32 (in);
> +}
> +
> +int
> +main(void)
> +{
> + if (sizeof (int32_t) * __CHAR_BIT__ != 32)
> + return 0;
> + if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
> + return 0;
> + if (fake_bswap32 (0x87654321) != 0xffffff87)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr61306-2.c b/gcc/testsuite/gcc.c-torture/execute/pr61306-2.c
> new file mode 100644
> index 0000000..6cbbd19
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr61306-2.c
> @@ -0,0 +1,40 @@
> +#ifdef __INT16_TYPE__
> +typedef __INT16_TYPE__ int16_t;
> +#else
> +typedef short int16_t;
> +#endif
> +
> +#ifdef __UINT32_TYPE__
> +typedef __UINT32_TYPE__ uint32_t;
> +#else
> +typedef unsigned uint32_t;
> +#endif
> +
> +#define __fake_const_swab32(x) ((uint32_t)( \
> + (((uint32_t) (x) & (uint32_t)0x000000ffUL) << 24) | \
> + (((uint32_t)(int16_t)(x) & (uint32_t)0x00ffff00UL) << 8) | \
> + (((uint32_t) (x) & (uint32_t)0x00ff0000UL) >> 8) | \
> + (((uint32_t) (x) & (uint32_t)0xff000000UL) >> 24)))
> +
> +
> +/* Previous version of bswap optimization failed to consider sign extension
> + and as a result would replace an expression *not* doing a bswap by a
> + bswap. */
> +
> +__attribute__ ((noinline, noclone)) uint32_t
> +fake_bswap32 (uint32_t in)
> +{
> + return __fake_const_swab32 (in);
> +}
> +
> +int
> +main(void)
> +{
> + if (sizeof (uint32_t) * __CHAR_BIT__ != 32)
> + return 0;
> + if (sizeof (int16_t) * __CHAR_BIT__ != 16)
> + return 0;
> + if (fake_bswap32 (0x81828384) != 0xff838281)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.c-torture/execute/pr61306-3.c b/gcc/testsuite/gcc.c-torture/execute/pr61306-3.c
> new file mode 100644
> index 0000000..6086e27
> --- /dev/null
> +++ b/gcc/testsuite/gcc.c-torture/execute/pr61306-3.c
> @@ -0,0 +1,13 @@
> +short a = -1;
> +int b;
> +char c;
> +
> +int
> +main ()
> +{
> + c = a;
> + b = a | c;
> + if (b != -1)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
> index 658b341..d92c433 100644
> --- a/gcc/tree-ssa-math-opts.c
> +++ b/gcc/tree-ssa-math-opts.c
> @@ -1622,7 +1622,7 @@ make_pass_cse_sincos (gcc::context *ctxt)
>
> struct symbolic_number {
> uint64_t n;
> - int size;
> + tree type;
> tree base_addr;
> tree offset;
> HOST_WIDE_INT bytepos;
> @@ -1652,13 +1652,15 @@ do_shift_rotate (enum tree_code code,
> struct symbolic_number *n,
> int count)
> {
> + int bitsize = TYPE_PRECISION (n->type);
> +
> if (count % 8 != 0)
> return false;
>
> /* Zero out the extra bits of N in order to avoid them being shifted
> into the significant bits. */
> - if (n->size < (int)sizeof (int64_t))
> - n->n &= ((uint64_t)1 << (n->size * BITS_PER_UNIT)) - 1;
> + if (bitsize < 8 * (int)sizeof (int64_t))
> + n->n &= ((uint64_t)1 << bitsize) - 1;
>
> switch (code)
> {
> @@ -1666,20 +1668,23 @@ do_shift_rotate (enum tree_code code,
> n->n <<= count;
> break;
> case RSHIFT_EXPR:
> + /* Arithmetic shift of signed type: result is dependent on the value. */
> + if (!TYPE_UNSIGNED (n->type) && (n->n & (0xff << (bitsize - 8))))
> + return false;
> n->n >>= count;
> break;
> case LROTATE_EXPR:
> - n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
> + n->n = (n->n << count) | (n->n >> (bitsize - count));
> break;
> case RROTATE_EXPR:
> - n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
> + n->n = (n->n >> count) | (n->n << (bitsize - count));
> break;
> default:
> return false;
> }
> /* Zero unused bits for size. */
> - if (n->size < (int)sizeof (int64_t))
> - n->n &= ((uint64_t)1 << (n->size * BITS_PER_UNIT)) - 1;
> + if (bitsize < 8 * (int)sizeof (int64_t))
> + n->n &= ((uint64_t)1 << bitsize) - 1;
> return true;
> }
>
> @@ -1696,7 +1701,7 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
> if (TREE_CODE (lhs_type) != INTEGER_TYPE)
> return false;
>
> - if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
> + if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
> return false;
>
> return true;
> @@ -1708,20 +1713,23 @@ verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
> static bool
> init_symbolic_number (struct symbolic_number *n, tree src)
> {
> + int size;
> +
> n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
>
> /* Set up the symbolic number N by setting each byte to a value between 1 and
> the byte size of rhs1. The highest order byte is set to n->size and the
> lowest order byte to 1. */
> - n->size = TYPE_PRECISION (TREE_TYPE (src));
> - if (n->size % BITS_PER_UNIT != 0)
> + n->type = TREE_TYPE (src);
> + size = TYPE_PRECISION (n->type);
> + if (size % BITS_PER_UNIT != 0)
> return false;
> - n->size /= BITS_PER_UNIT;
> - n->range = n->size;
> + size /= BITS_PER_UNIT;
> + n->range = size;
> n->n = CMPNOP;
>
> - if (n->size < (int)sizeof (int64_t))
> - n->n &= ((uint64_t)1 << (n->size * BITS_PER_UNIT)) - 1;
> + if (size < (int)sizeof (int64_t))
> + n->n &= ((uint64_t)1 << (size * BITS_PER_UNIT)) - 1;
>
> return true;
> }
> @@ -1857,12 +1865,12 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
> {
> case BIT_AND_EXPR:
> {
> - int i;
> + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
> uint64_t val = int_cst_value (rhs2);
> uint64_t tmp = val;
>
> /* Only constants masking full bytes are allowed. */
> - for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
> + for (i = 0; i < size; i++, tmp >>= BITS_PER_UNIT)
> if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
> return NULL_TREE;
>
> @@ -1878,21 +1886,30 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
> break;
> CASE_CONVERT:
> {
> - int type_size;
> + int type_size, old_type_size;
> + tree type;
>
> - type_size = TYPE_PRECISION (gimple_expr_type (stmt));
> + type = gimple_expr_type (stmt);
> + type_size = TYPE_PRECISION (type);
> if (type_size % BITS_PER_UNIT != 0)
> return NULL_TREE;
>
> + /* Sign extension: result is dependent on the value. */
> + old_type_size = TYPE_PRECISION (n->type);
> + if (!TYPE_UNSIGNED (n->type)
> + && type_size > old_type_size
> + && n->n & (0xff << (old_type_size - 8)))
> + return NULL_TREE;
> +
> if (type_size / BITS_PER_UNIT < (int)(sizeof (int64_t)))
> {
> /* If STMT casts to a smaller type mask out the bits not
> belonging to the target type. */
> n->n &= ((uint64_t)1 << type_size) - 1;
> }
> - n->size = type_size / BITS_PER_UNIT;
> + n->type = type;
> if (!n->base_addr)
> - n->range = n->size;
> + n->range = type_size / BITS_PER_UNIT;
> }
> break;
> default:
> @@ -1905,7 +1922,7 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
>
> if (rhs_class == GIMPLE_BINARY_RHS)
> {
> - int i;
> + int i, size;
> struct symbolic_number n1, n2;
> uint64_t mask;
> tree source_expr2;
> @@ -1931,7 +1948,7 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
> if (!source_expr2)
> return NULL_TREE;
>
> - if (n1.size != n2.size)
> + if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
> return NULL_TREE;
>
> if (!n1.vuse != !n2.vuse ||
> @@ -1997,8 +2014,9 @@ find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
> n->base_addr = n1.base_addr;
> n->offset = n1.offset;
> n->bytepos = n1.bytepos;
> - n->size = n1.size;
> - for (i = 0, mask = 0xff; i < n->size; i++, mask <<= BITS_PER_UNIT)
> + n->type = n1.type;
> + size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
> + for (i = 0, mask = 0xff; i < size; i++, mask <<= BITS_PER_UNIT)
> {
> uint64_t masked1, masked2;
>
>
> Is this OK for trunk? Does this bug qualify for a backport patch to
> 4.8 and 4.9 branches?
This is ok for trunk and also for backporting (after a short while to
see if there is any fallout).
Thanks,
RIchard.
> Best regards,
>
> Thomas
>
>
More information about the Gcc-patches
mailing list