[PATCH] sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582]
Jakub Jelinek
jakub@redhat.com
Thu Feb 13 07:52:00 GMT 2020
Hi!
The following patch is first step towards fixing PR93582.
vn_reference_lookup_3 right now punts on anything that isn't byte aligned,
so to be able to lookup a constant bitfield store, one needs to use
the exact same COMPONENT_REF, otherwise it isn't found.
This patch lifts up that that restriction if the bits to be loaded are
covered by a single store of a constant (keeps the restriction so far
for the multiple store case, can tweak that incrementally, but I think
for bisection etc. it is worth to do it one step at a time).
Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
and powerpc64-linux (the last one regtested with \{-m32,-m64\}).
Additionally, I've bootstrapped/regtested it on those with statistics
gathering on when val was non-NULL (i.e. we managed to see something
through) when any of offseti, offset2i, maxsizei or size2i wasn't multiple
of BITS_PER_UNIT (i.e. when this optimization triggered). Below is
a list of the unique cases where it triggered, the first column says on
which of the 5 ABIs it triggered, qmath stands for those that enable
libquadmath, i.e. ia32,x86_64,ppc64le.
Ok for trunk?
all ../../gcc/gimple-ssa-backprop.c {anonymous}::backprop::process_var
all gcc/gcc/testsuite/gcc.c-torture/compile/20191015-1.c f
all gcc/gcc/testsuite/gcc.c-torture/compile/20191015-2.c f
all gcc/gcc/testsuite/gcc.c-torture/compile/20200105-1.c g
all gcc/gcc/testsuite/gcc.c-torture/compile/20200105-2.c g
all gcc/gcc/testsuite/gcc.c-torture/compile/20200105-3.c g
all gcc/gcc/testsuite/gcc.c-torture/execute/20181120-1.c main
all gcc/gcc/testsuite/gcc.c-torture/execute/921204-1.c main
all gcc/gcc/testsuite/gcc.c-torture/execute/pr58726.c main
all gcc/gcc/testsuite/gcc.c-torture/execute/pr86492.c foo
all gcc/gcc/testsuite/gcc.dg/store_merging_24.c foo
all gcc/gcc/testsuite/gcc.dg/store_merging_25.c foo
all gcc/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c foo
all gcc/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c foo
all gcc/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c foo
all gcc/gcc/testsuite/g++.dg/other/pr89692.C foo
all ../../../libgcc/unwind-dw2-fde-dip.c init_object
all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info
all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info_bases
all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info_table
all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_info_table_bases
all ../../../libgcc/unwind-dw2-fde-dip.c __register_frame_table
all ../../../libgcc/unwind-dw2-fde-dip.c search_object
all ../../../libgcc/unwind-dw2-fde-dip.c _Unwind_IteratePhdrCallback
ia32 /tmp/921204-1.exe.NdZMiZ.ltrans0.o main
ia32 /tmp/ccK7HiiN.o main
ia32 /tmp/ccWtcuID.o foo
ia32 /tmp/pr86492.exe.IKs2SA.ltrans0.o foo
lp64 gcc/gcc/testsuite/gnat.dg/pack22.adb pack22
ppc32 /tmp/921204-1.exe.1GoDpE.ltrans0.o main
ppc32 /tmp/ccivx4sg.o foo
ppc32 /tmp/ccLFNqjQ.o main
ppc32 /tmp/pr86492.exe.VkJDwY.ltrans0.o foo
ppc32 /tmp/pr88739.exe.y33n0X.ltrans0.o main
ppc64le cd2a32a.adb cd2a32a
ppc64le /tmp/921204-1.exe.la923O.ltrans0.o main
ppc64le /tmp/ccH3NcwE.o main
ppc64le /tmp/ccmHysWx.o foo
ppc64le /tmp/pr86492.exe.EYd24l.ltrans0.o foo
ppc64le /tmp/pr88739.exe.uAT6pA.ltrans0.o main
ppc64 /tmp/921204-1.exe.vtoTSo.ltrans0.o main
ppc64 /tmp/ccm4RGtK.o main
ppc64 /tmp/cczZpRCD.o foo
ppc64 /tmp/pr86492.exe.UdbN8I.ltrans0.o foo
ppc64 /tmp/pr88739.exe.DtQe1H.ltrans0.o main
qmath ../../../libquadmath/math/expq.c expq
qmath ../../../libquadmath/math/fmaq.c fmaq
qmath ../../../libquadmath/math/nanq.c nanq
qmath ../../../libquadmath/strtod/strtoflt128.c ____strtoflt128_internal
x86_64 cd2a32a.adb cd2a32a
x86_64 /tmp/921204-1.exe.0059fB.ltrans0.o main
x86_64 /tmp/cci9lHhD.o main
x86_64 /tmp/ccTlWSbV.o foo
x86_64 /tmp/pr86492.exe.NE0yez.ltrans0.o foo
x86_64 /tmp/pr88739.exe.PKCG2M.ltrans0.o main
x86 gcc/gcc/testsuite/gcc.dg/torture/pr45017.c main
x86 gcc/gcc/testsuite/gcc.target/i386/pr37870.c main
2020-02-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/93582
* fold-const.h (shift_bytes_in_array_left,
shift_bytes_in_array_right): Declare.
* fold-const.c (shift_bytes_in_array_left,
shift_bytes_in_array_right): New function, moved from
gimple-ssa-store-merging.c, no longer static.
* gimple-ssa-store-merging.c (shift_bytes_in_array): Move
to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
(shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
(encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
shift_bytes_in_array.
(verify_shift_bytes_in_array): Rename to ...
(verify_shift_bytes_in_array_left): ... this. Use
shift_bytes_in_array_left instead of shift_bytes_in_array.
(store_merging_c_tests): Call verify_shift_bytes_in_array_left
instead of verify_shift_bytes_in_array.
* tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr
/ native_interpret_expr where the store covers all needed bits,
punt on PDP-endian, otherwise allow all involved offsets and sizes
not to be byte-aligned.
* gcc.dg/tree-ssa/pr93582-1.c: New test.
* gcc.dg/tree-ssa/pr93582-2.c: New test.
* gcc.dg/tree-ssa/pr93582-3.c: New test.
--- gcc/fold-const.h.jj 2020-02-12 11:43:57.533684956 +0100
+++ gcc/fold-const.h 2020-02-12 15:19:38.737897631 +0100
@@ -30,6 +30,10 @@ extern int native_encode_initializer (tr
int off = -1);
extern tree native_interpret_expr (tree, const unsigned char *, int);
extern bool can_native_interpret_type_p (tree);
+extern void shift_bytes_in_array_left (unsigned char *, unsigned int,
+ unsigned int);
+extern void shift_bytes_in_array_right (unsigned char *, unsigned int,
+ unsigned int);
/* Fold constants as much as possible in an expression.
Returns the simplified expression.
--- gcc/fold-const.c.jj 2020-02-12 11:43:57.532684971 +0100
+++ gcc/fold-const.c 2020-02-12 15:19:38.740897586 +0100
@@ -8354,6 +8354,70 @@ can_native_interpret_type_p (tree type)
}
}
+/* Routines for manipulation of native_encode_expr encoded data if the encoded
+ or extracted constant positions and/or sizes aren't byte aligned. */
+
+/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
+ bits between adjacent elements. AMNT should be within
+ [0, BITS_PER_UNIT).
+ Example, AMNT = 2:
+ 00011111|11100000 << 2 = 01111111|10000000
+ PTR[1] | PTR[0] PTR[1] | PTR[0]. */
+
+void
+shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz,
+ unsigned int amnt)
+{
+ if (amnt == 0)
+ return;
+
+ unsigned char carry_over = 0U;
+ unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
+ unsigned char clear_mask = (~0U) << amnt;
+
+ for (unsigned int i = 0; i < sz; i++)
+ {
+ unsigned prev_carry_over = carry_over;
+ carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
+
+ ptr[i] <<= amnt;
+ if (i != 0)
+ {
+ ptr[i] &= clear_mask;
+ ptr[i] |= prev_carry_over;
+ }
+ }
+}
+
+/* Like shift_bytes_in_array_left but for big-endian.
+ Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
+ bits between adjacent elements. AMNT should be within
+ [0, BITS_PER_UNIT).
+ Example, AMNT = 2:
+ 00011111|11100000 >> 2 = 00000111|11111000
+ PTR[0] | PTR[1] PTR[0] | PTR[1]. */
+
+void
+shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
+ unsigned int amnt)
+{
+ if (amnt == 0)
+ return;
+
+ unsigned char carry_over = 0U;
+ unsigned char carry_mask = ~(~0U << amnt);
+
+ for (unsigned int i = 0; i < sz; i++)
+ {
+ unsigned prev_carry_over = carry_over;
+ carry_over = ptr[i] & carry_mask;
+
+ carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
+ ptr[i] >>= amnt;
+ ptr[i] |= prev_carry_over;
+ }
+}
+
/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
directly on the VECTOR_CST encoding, in a way that works for variable-
length vectors. Return the resulting VECTOR_CST on success or null
--- gcc/gimple-ssa-store-merging.c.jj 2020-02-12 11:43:57.558684581 +0100
+++ gcc/gimple-ssa-store-merging.c 2020-02-12 15:19:38.742897557 +0100
@@ -1475,66 +1475,6 @@ dump_char_array (FILE *fd, unsigned char
fprintf (fd, "\n");
}
-/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
- bits between adjacent elements. AMNT should be within
- [0, BITS_PER_UNIT).
- Example, AMNT = 2:
- 00011111|11100000 << 2 = 01111111|10000000
- PTR[1] | PTR[0] PTR[1] | PTR[0]. */
-
-static void
-shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
-{
- if (amnt == 0)
- return;
-
- unsigned char carry_over = 0U;
- unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
- unsigned char clear_mask = (~0U) << amnt;
-
- for (unsigned int i = 0; i < sz; i++)
- {
- unsigned prev_carry_over = carry_over;
- carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
-
- ptr[i] <<= amnt;
- if (i != 0)
- {
- ptr[i] &= clear_mask;
- ptr[i] |= prev_carry_over;
- }
- }
-}
-
-/* Like shift_bytes_in_array but for big-endian.
- Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
- bits between adjacent elements. AMNT should be within
- [0, BITS_PER_UNIT).
- Example, AMNT = 2:
- 00011111|11100000 >> 2 = 00000111|11111000
- PTR[0] | PTR[1] PTR[0] | PTR[1]. */
-
-static void
-shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
- unsigned int amnt)
-{
- if (amnt == 0)
- return;
-
- unsigned char carry_over = 0U;
- unsigned char carry_mask = ~(~0U << amnt);
-
- for (unsigned int i = 0; i < sz; i++)
- {
- unsigned prev_carry_over = carry_over;
- carry_over = ptr[i] & carry_mask;
-
- carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
- ptr[i] >>= amnt;
- ptr[i] |= prev_carry_over;
- }
-}
-
/* Clear out LEN bits starting from bit START in the byte array
PTR. This clears the bits to the *right* from START.
START must be within [0, BITS_PER_UNIT) and counts starting from
@@ -1793,7 +1733,7 @@ encode_tree_to_bitpos (tree expr, unsign
/* Create the shifted version of EXPR. */
if (!BYTES_BIG_ENDIAN)
{
- shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
+ shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
if (shift_amnt == 0)
byte_size--;
}
@@ -5092,11 +5032,11 @@ verify_array_eq (unsigned char *x, unsig
}
}
-/* Test shift_bytes_in_array and that it carries bits across between
+/* Test shift_bytes_in_array_left and that it carries bits across between
bytes correctly. */
static void
-verify_shift_bytes_in_array (void)
+verify_shift_bytes_in_array_left (void)
{
/* byte 1 | byte 0
00011111 | 11100000. */
@@ -5105,13 +5045,13 @@ verify_shift_bytes_in_array (void)
memcpy (in, orig, sizeof orig);
unsigned char expected[2] = { 0x80, 0x7f };
- shift_bytes_in_array (in, sizeof (in), 2);
+ shift_bytes_in_array_left (in, sizeof (in), 2);
verify_array_eq (in, expected, sizeof (in));
memcpy (in, orig, sizeof orig);
memcpy (expected, orig, sizeof orig);
/* Check that shifting by zero doesn't change anything. */
- shift_bytes_in_array (in, sizeof (in), 0);
+ shift_bytes_in_array_left (in, sizeof (in), 0);
verify_array_eq (in, expected, sizeof (in));
}
@@ -5196,7 +5136,7 @@ verify_clear_bit_region_be (void)
void
store_merging_c_tests (void)
{
- verify_shift_bytes_in_array ();
+ verify_shift_bytes_in_array_left ();
verify_shift_bytes_in_array_right ();
verify_clear_bit_region ();
verify_clear_bit_region_be ();
--- gcc/tree-ssa-sccvn.c.jj 2020-02-12 11:43:57.604683889 +0100
+++ gcc/tree-ssa-sccvn.c 2020-02-12 18:35:14.311359233 +0100
@@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
&& is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt)
- && CHAR_BIT == 8 && BITS_PER_UNIT == 8
+ && CHAR_BIT == 8
+ && BITS_PER_UNIT == 8
+ && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
/* native_encode and native_decode operate on arrays of bytes
and so fundamentally need a compile-time size and offset. */
&& maxsize.is_constant (&maxsizei)
- && maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
- && offseti % BITS_PER_UNIT == 0
&& (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
|| (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
&& is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
@@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree
&& !reverse
&& !storage_order_barrier_p (lhs)
&& known_eq (maxsize2, size2)
- && multiple_p (size2, BITS_PER_UNIT)
- && multiple_p (offset2, BITS_PER_UNIT)
&& adjust_offsets_for_equal_base_address (base, &offset,
base2, &offset2)
&& offset.is_constant (&offseti)
@@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree
&& known_subrange_p (offseti, maxsizei, offset2, size2))
{
/* We support up to 512-bit values (for V8DFmode). */
- unsigned char buffer[64];
+ unsigned char buffer[65];
int len;
tree rhs = gimple_assign_rhs1 (def_stmt);
if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs);
- unsigned pad = 0;
- if (BYTES_BIG_ENDIAN
- && is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
- {
- /* On big-endian the padding is at the 'front' so
- just skip the initial bytes. */
- fixed_size_mode mode
- = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
- pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
- }
len = native_encode_expr (rhs,
- buffer, sizeof (buffer),
- ((offseti - offset2i) / BITS_PER_UNIT
- + pad));
+ buffer, sizeof (buffer) - 1,
+ (offseti - offset2i) / BITS_PER_UNIT);
if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
{
tree type = vr->type;
+ unsigned char *buf = buffer;
+ unsigned int amnt = 0;
/* Make sure to interpret in a type that has a range
covering the whole access size. */
if (INTEGRAL_TYPE_P (vr->type)
&& maxsizei != TYPE_PRECISION (vr->type))
type = build_nonstandard_integer_type (maxsizei,
TYPE_UNSIGNED (type));
- tree val = native_interpret_expr (type, buffer,
- maxsizei / BITS_PER_UNIT);
+ if (BYTES_BIG_ENDIAN)
+ {
+ /* For big-endian native_encode_expr stored the rhs
+ such that the LSB of it is the LSB of buffer[len - 1].
+ That bit is stored into memory at position
+ offset2 + size2 - 1, i.e. in byte
+ base + (offset2 + size2 - 1) / BITS_PER_UNIT.
+ E.g. for offset2 1 and size2 14, rhs -1 and memory
+ previously cleared that is:
+ 0 1
+ 01111111|11111110
+ Now, if we want to extract offset 2 and size 12 from
+ it using native_interpret_expr (which actually works
+ for integral bitfield types in terms of byte size of
+ the mode), the native_encode_expr stored the value
+ into buffer as
+ XX111111|11111111
+ and returned len 2 (the X bits are outside of
+ precision).
+ Let sz be maxsize / BITS_PER_UNIT if not extracting
+ a bitfield, and GET_MODE_SIZE otherwise.
+ We need to align the LSB of the value we want to
+ extract as the LSB of buf[sz - 1].
+ The LSB from memory we need to read is at position
+ offset + maxsize - 1. */
+ HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
+ if (INTEGRAL_TYPE_P (type))
+ sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
+ amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+ - offseti - maxsizei) % BITS_PER_UNIT;
+ if (amnt)
+ shift_bytes_in_array_right (buffer, len, amnt);
+ amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
+ - offseti - maxsizei - amnt) / BITS_PER_UNIT;
+ if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
+ len = 0;
+ else
+ {
+ buf = buffer + len - sz - amnt;
+ len -= (buf - buffer);
+ }
+ }
+ else
+ {
+ amnt = ((unsigned HOST_WIDE_INT) offset2i
+ - offseti) % BITS_PER_UNIT;
+ if (amnt)
+ {
+ buffer[len] = 0;
+ shift_bytes_in_array_left (buffer, len + 1, amnt);
+ buf = buffer + 1;
+ }
+ }
+ tree val = native_interpret_expr (type, buf, len);
/* If we chop off bits because the types precision doesn't
match the memory access size this is ok when optimizing
reads but not when called from the DSE code during
@@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
return data->finish (get_alias_set (lhs), val);
}
}
- else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
+ else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
+ size2i)
+ && maxsizei % BITS_PER_UNIT == 0
+ && offseti % BITS_PER_UNIT == 0
+ && size2i % BITS_PER_UNIT == 0
+ && offset2i % BITS_PER_UNIT == 0)
{
pd_data pd;
tree rhs = gimple_assign_rhs1 (def_stmt);
--- gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c.jj 2020-02-12 18:23:02.522285857 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c 2020-02-12 18:24:51.284661882 +0100
@@ -0,0 +1,17 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
+
+union U {
+ struct S { int a : 1, b : 4, c : 27; } s;
+ struct T { int d : 2; int e : 2; int f : 28; } t;
+};
+
+int
+foo (void)
+{
+ union U u;
+ u.s.b = 10;
+ return u.t.e;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c.jj 2020-02-12 18:23:05.608239781 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c 2020-02-12 18:31:12.443970643 +0100
@@ -0,0 +1,17 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */
+
+union U {
+ struct S { int a : 1, b : 14, c : 17; } s;
+ struct T { int d : 2; int e : 12; int f : 18; } t;
+};
+
+int
+foo (void)
+{
+ union U u;
+ u.s.b = -7005;
+ return u.t.e;
+}
--- gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c.jj 2020-02-12 18:30:55.372225545 +0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c 2020-02-12 18:31:20.136855767 +0100
@@ -0,0 +1,18 @@
+/* PR tree-optimization/93582 */
+/* { dg-do compile { target int32 } } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */
+/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */
+
+union U {
+ struct S { int a : 1, b : 14, c : 17; } s;
+ struct T { int d : 10; int e : 4; int f : 18; } t;
+};
+
+int
+foo (void)
+{
+ union U u;
+ u.s.b = -7005;
+ return u.t.e;
+}
Jakub
More information about the Gcc-patches
mailing list