[PATCH] Improve integer bit test on atomic builtin return
Richard Biener
rguenther@suse.de
Tue Oct 5 10:07:30 GMT 2021
On Mon, 4 Oct 2021, H.J. Lu wrote:
> commit adedd5c173388ae505470df152b9cb3947339566
> Author: Jakub Jelinek <jakub@redhat.com>
> Date: Tue May 3 13:37:25 2016 +0200
>
> re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
>
> optimized bit test on atomic builtin return with lock bts/btr/btc. But
> it works only for unsigned integers since atomic builtins operate on the
> 'uintptr_t' type. It fails on bool:
>
> _1 = atomic builtin;
> _4 = (_Bool) _1;
>
> and signed integers:
>
> _1 = atomic builtin;
> _2 = (int) _1;
> _5 = _2 & (1 << N);
>
> Improve bit test on atomic builtin return by converting:
>
> _1 = atomic builtin;
> _4 = (_Bool) _1;
>
> to
>
> _1 = atomic builtin;
> _5 = _1 & (1 << 0);
> _4 = (_Bool) _5;
>
> and converting:
>
> _1 = atomic builtin;
> _2 = (int) _1;
> _5 = _2 & (1 << N);
>
> to
> _1 = atomic builtin;
> _6 = _1 & (1 << N);
> _5 = (int) _6;
Why not do this last bit with match.pd patterns (and independent on
whether _1 is defined by an atomic builtin)? For the first suggested
transform that's likely going to be undone by folding, no?
Richard.
> gcc/
>
> PR middle-end/102566
> * tree-ssa-ccp.c (optimize_atomic_bit_test_and): Handle cast
> between atomic builtin and bit test.
>
> gcc/testsuite/
>
> PR middle-end/102566
> * g++.target/i386/pr102566-1.C: New test.
> * gcc.target/i386/pr102566-1a.c: Likewise.
> * gcc.target/i386/pr102566-1b.c: Likewise.
> * gcc.target/i386/pr102566-2.c: Likewise.
> ---
> gcc/testsuite/g++.target/i386/pr102566-1.C | 12 ++
> gcc/testsuite/gcc.target/i386/pr102566-1a.c | 188 ++++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr102566-1b.c | 107 +++++++++++
> gcc/testsuite/gcc.target/i386/pr102566-2.c | 14 ++
> gcc/tree-ssa-ccp.c | 136 +++++++++++++-
> 5 files changed, 452 insertions(+), 5 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr102566-1.C
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1a.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-1b.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr102566-2.c
>
> diff --git a/gcc/testsuite/g++.target/i386/pr102566-1.C b/gcc/testsuite/g++.target/i386/pr102566-1.C
> new file mode 100644
> index 00000000000..6e33298d8bf
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr102566-1.C
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target c++11 } } */
> +/* { dg-options "-O2" } */
> +
> +#include <atomic>
> +
> +bool tbit(std::atomic<int> &i)
> +{
> + return i.fetch_or(1, std::memory_order_relaxed) & 1;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1a.c b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> new file mode 100644
> index 00000000000..a915de354e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1a.c
> @@ -0,0 +1,188 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +void bar (void);
> +
> +__attribute__((noinline, noclone)) int
> +f1 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f2 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
> + int t2 = t1 & mask;
> + return t2 != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f3 (long int *a, int bit)
> +{
> + long int mask = 1l << bit;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f4 (int *a)
> +{
> + int mask = 1 << 7;
> + return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f5 (int *a)
> +{
> + int mask = 1 << 13;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f6 (int *a)
> +{
> + int mask = 1 << 0;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f7 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
> + bar ();
> +}
> +
> +__attribute__((noinline, noclone)) void
> +f8 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
> + bar ();
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f9 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f10 (int *a)
> +{
> + int mask = 1 << 7;
> + return (__sync_fetch_and_xor (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f11 (int *a)
> +{
> + int mask = 1 << 13;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f12 (int *a)
> +{
> + int mask = 1 << 0;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f13 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f14 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f15 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f16 (int *a)
> +{
> + int mask = 1 << 7;
> + return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f17 (int *a)
> +{
> + int mask = 1 << 13;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f18 (int *a)
> +{
> + int mask = 1 << 0;
> + return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f19 (long int *a, int bit)
> +{
> + long int mask = 1l << bit;
> + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f20 (long int *a)
> +{
> + long int mask = 1l << 7;
> + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
> +}
> +
> +__attribute__((noinline, noclone)) int
> +f21 (int *a, int bit)
> +{
> + int mask = 1 << bit;
> + return (__sync_fetch_and_or (a, mask) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f22 (long int *a)
> +{
> + long int mask = 1l << 7;
> + return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) long int
> +f23 (long int *a)
> +{
> + long int mask = 1l << 7;
> + return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f24 (short int *a)
> +{
> + short int mask = 1 << 7;
> + return (__sync_fetch_and_or (a, mask) & mask) != 0;
> +}
> +
> +__attribute__((noinline, noclone)) short int
> +f25 (short int *a)
> +{
> + short int mask = 1 << 7;
> + return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-1b.c b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> new file mode 100644
> index 00000000000..c4dab8135c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-1b.c
> @@ -0,0 +1,107 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -g" } */
> +
> +int cnt;
> +
> +__attribute__((noinline, noclone)) void
> +bar (void)
> +{
> + cnt++;
> +}
> +
> +#include "pr102566-1a.c"
> +
> +int a;
> +long int b;
> +unsigned long int c;
> +unsigned short int d;
> +
> +int
> +main ()
> +{
> + __atomic_store_n (&a, 15, __ATOMIC_RELAXED);
> + if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
> + || f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
> + __builtin_abort ();
> + if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
> + || f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
> + __builtin_abort ();
> + __atomic_store_n (&b, 24, __ATOMIC_RELAXED);
> + if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
> + || f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
> + __builtin_abort ();
> + __atomic_store_n (&a, 0, __ATOMIC_RELAXED);
> + if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
> + || f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
> + __builtin_abort ();
> + if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> + || f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
> + __builtin_abort ();
> + if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
> + || f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (cnt != 0
> + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> + || f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
> + || f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
> + || f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
> + __builtin_abort ();
> + if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> + __builtin_abort ();
> + if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> + || f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> + || f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + __atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
> + if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
> + || f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
> + __builtin_abort ();
> + if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
> + || f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
> + || f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> + || f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
> + || f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
> + __builtin_abort ();
> + __atomic_store_n (&a, 128, __ATOMIC_RELAXED);
> + if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
> + || f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
> + __builtin_abort ();
> + __atomic_store_n (&c, 1, __ATOMIC_RELAXED);
> + if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> + || f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
> + || f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
> + __builtin_abort ();
> + if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
> + || f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
> + __builtin_abort ();
> + __atomic_store_n (&d, 1, __ATOMIC_RELAXED);
> + if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> + || f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
> + || cnt != 2)
> + __builtin_abort ();
> + return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102566-2.c b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> new file mode 100644
> index 00000000000..d1c30315353
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102566-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +#include <stdatomic.h>
> +#include <stdbool.h>
> +
> +bool
> +foo (_Atomic int *v)
> +{
> + return atomic_fetch_or_explicit (v, 1, memory_order_relaxed) & 1;
> +}
> +
> +/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btsl" 1 } } */
> +/* { dg-final { scan-assembler-not "cmpxchg" } } */
> diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
> index 70ce6a4d5b8..a3f7b7f233e 100644
> --- a/gcc/tree-ssa-ccp.c
> +++ b/gcc/tree-ssa-ccp.c
> @@ -3279,10 +3279,115 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
> || !single_imm_use (lhs, &use_p, &use_stmt)
> || !is_gimple_assign (use_stmt)
> - || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
> || !gimple_vdef (call))
> return;
>
> + mask = gimple_call_arg (call, 1);
> + tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
> + if (rhs_code != BIT_AND_EXPR)
> + {
> + if (rhs_code != NOP_EXPR)
> + return;
> +
> + tree nop_lhs = gimple_assign_lhs (use_stmt);
> + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nop_lhs))
> + return;
> +
> + tree nop_rhs = gimple_assign_rhs1 (use_stmt);
> +
> + gimple *g;
> + gimple_stmt_iterator gsi;
> + tree var;
> +
> + if (TREE_CODE (TREE_TYPE (nop_lhs)) == BOOLEAN_TYPE)
> + {
> + /* Convert
> + _1 = atomic bit op;
> + _4 = (_Bool) _1;
> + to
> + _1 = atomic bit op;
> + _5 = _1 & 1;
> + _4 = (_Bool) _5;
> + */
> + var = make_ssa_name (TREE_TYPE (nop_rhs));
> + replace_uses_by (nop_rhs, var);
> + g = gimple_build_assign (var, BIT_AND_EXPR, nop_rhs,
> + build_int_cst (TREE_TYPE (lhs), 1));
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_insert_before (&gsi, g, GSI_NEW_STMT);
> + use_stmt = g;
> + }
> + else if (TYPE_PRECISION (TREE_TYPE (nop_lhs))
> + == TYPE_PRECISION (TREE_TYPE (nop_rhs)))
> + {
> + gimple *use_nop_stmt;
> + if (!single_imm_use (nop_lhs, &use_p, &use_nop_stmt)
> + || !is_gimple_assign (use_nop_stmt)
> + || gimple_assign_rhs_code (use_nop_stmt) != BIT_AND_EXPR)
> + return;
> +
> + tree op_mask = mask;
> + if (TREE_CODE (op_mask) == SSA_NAME)
> + {
> + g = SSA_NAME_DEF_STMT (op_mask);
> + if (gimple_assign_rhs_code (g) == NOP_EXPR)
> + {
> + tree mask_nop_lhs = gimple_assign_lhs (g);
> +
> + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (mask_nop_lhs))
> + return;
> +
> + tree mask_nop_rhs = gimple_assign_rhs1 (g);
> + if (TYPE_PRECISION (TREE_TYPE (mask_nop_lhs))
> + != TYPE_PRECISION (TREE_TYPE (mask_nop_rhs)))
> + return;
> + op_mask = mask_nop_rhs;
> + g = SSA_NAME_DEF_STMT (op_mask);
> + }
> +
> + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
> + {
> + if (!is_gimple_assign (g)
> + || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
> + return;
> + tree reset_mask = gimple_assign_rhs1 (g);
> + if (TREE_CODE (op_mask) != SSA_NAME)
> + return;
> + g = SSA_NAME_DEF_STMT (reset_mask);
> + }
> +
> + if (!is_gimple_assign (g)
> + || gimple_assign_rhs_code (g) != LSHIFT_EXPR
> + || !integer_onep (gimple_assign_rhs1 (g)))
> + return;
> + }
> +
> + /* Convert
> + _1 = atomic bit op;
> + _2 = (int) _1;
> + _5 = _2 & N;
> + to
> + _1 = atomic bit op;
> + _6 = _1 & N;
> + _5 = (int) _6;
> + */
> + replace_uses_by (nop_lhs, lhs);
> + tree use_nop_lhs = gimple_assign_lhs (use_nop_stmt);
> + var = make_ssa_name (TREE_TYPE (use_nop_lhs));
> + gimple_assign_set_lhs (use_nop_stmt, var);
> + gsi = gsi_for_stmt (use_stmt);
> + gsi_remove (&gsi, true);
> + release_defs (use_stmt);
> + gsi_remove (gsip, true);
> + var = build1 (NOP_EXPR, TREE_TYPE (use_nop_lhs), var);
> + gsi = gsi_for_stmt (use_nop_stmt);
> + g = gimple_build_assign (use_nop_lhs, var);
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + use_stmt = use_nop_stmt;
> + mask = op_mask;
> + }
> + }
> +
> switch (fn)
> {
> case IFN_ATOMIC_BIT_TEST_AND_SET:
> @@ -3301,7 +3406,6 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
> return;
>
> - mask = gimple_call_arg (call, 1);
> tree use_lhs = gimple_assign_lhs (use_stmt);
> if (!use_lhs)
> return;
> @@ -3434,18 +3538,40 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
> of the specified bit after the atomic operation (makes only sense
> for xor, otherwise the bit content is compile time known),
> we need to invert the bit. */
> + tree mask_convert = mask;
> + gimple *g_convert = nullptr;
> + if (!use_bool && TREE_TYPE (lhs) != TREE_TYPE (mask))
> + {
> + mask_convert = make_ssa_name (TREE_TYPE (lhs));
> + tree var = build1 (NOP_EXPR, TREE_TYPE (lhs), mask);
> + g_convert = gimple_build_assign (mask_convert, var);
> + }
> g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
> BIT_XOR_EXPR, new_lhs,
> use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
> - : mask);
> + : mask_convert);
> new_lhs = gimple_assign_lhs (g);
> if (throws)
> {
> - gsi_insert_on_edge_immediate (e, g);
> + if (g_convert)
> + {
> + gsi_insert_on_edge_immediate (e, g_convert);
> + gsi = gsi_for_stmt (g_convert);
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + }
> + else
> + gsi_insert_on_edge_immediate (e, g);
> gsi = gsi_for_stmt (g);
> }
> else
> - gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + {
> + if (g_convert)
> + {
> + gsi_insert_after (&gsi, g_convert, GSI_NEW_STMT);
> + gsi = gsi_for_stmt (g_convert);
> + }
> + gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> + }
> }
> if (use_bool && has_debug_uses)
> {
>
--
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
More information about the Gcc-patches
mailing list