[PATCH] Add a new option "-ftree-bitfield-merge" (patch / doc inside)
Richard Biener
rguenther@suse.de
Fri Nov 8 14:11:00 GMT 2013
On Fri, 8 Nov 2013, Richard Biener wrote:
> > Hello,
> > This is new patch version.
> > Comments from Bernhard Reutner-Fischer review applied.
> > Also, test case bitfildmrg2.c modified - it is now execute test.
> >
> >
> > Example:
> >
> > Original code:
> > <unnamed-unsigned:3> D.1351;
> > <unnamed-unsigned:9> D.1350;
> > <unnamed-unsigned:7> D.1349;
> > D.1349_2 = p1_1(D)->f1;
> > p2_3(D)->f1 = D.1349_2;
> > D.1350_4 = p1_1(D)->f2;
> > p2_3(D)->f2 = D.1350_4;
> > D.1351_5 = p1_1(D)->f3;
> > p2_3(D)->f3 = D.1351_5;
> >
> > Optimized code:
> > <unnamed-unsigned:19> D.1358;
> > _16 = pr1_2(D)->_field0;
> > pr2_4(D)->_field0 = _16;
> >
> > Algorithm works on basic block level and consists of following 3 major steps:
> > 1. Go trough basic block statements list. If there are statement pairs
> > that implement copy of bit field content from one memory location to
> > another record statements pointers and other necessary data in
> > corresponding data structure.
> > 2. Identify records that represent adjacent bit field accesses and
> > mark them as merged.
> > 3. Modify trees accordingly.
> >
> > New command line option "-ftree-bitfield-merge" is introduced.
> >
> > Tested - passed gcc regression tests.
>
> Comments inline (sorry for the late reply...)
>
> > Changelog -
> >
> > gcc/ChangeLog:
> > 2013-09-24 Zoran Jovanovic (zoran.jovanovic@imgtec.com)
> > * Makefile.in : Added tree-sra.c to GTFILES.
> > * common.opt (ftree-bitfield-merge): New option.
> > * doc/invoke.texi: Added reference to "-ftree-bitfield-merge".
> > * tree-sra.c (ssa_bitfield_merge): New function.
> > Entry for (-ftree-bitfield-merge).
> > (bitfield_stmt_access_pair_htab_hash): New function.
> > (bitfield_stmt_access_pair_htab_eq): New function.
> > (cmp_access): New function.
> > (create_and_insert_access): New function.
> > (get_bit_offset): New function.
> > (get_merged_bit_field_size): New function.
> > (add_stmt_access_pair): New function.
> > * dwarf2out.c (simple_type_size_in_bits): moved to tree.c.
> > (field_byte_offset): declaration moved to tree.h, static removed.
> > * testsuite/gcc.dg/tree-ssa/bitfldmrg1.c: New test.
> > * testsuite/gcc.dg/tree-ssa/bitfldmrg2.c: New test.
> > * tree-ssa-sccvn.c (expressions_equal_p): moved to tree.c.
> > * tree-ssa-sccvn.h (expressions_equal_p): declaration moved to tree.h.
> > * tree.c (expressions_equal_p): moved from tree-ssa-sccvn.c.
> > (simple_type_size_in_bits): moved from dwarf2out.c.
> > * tree.h (expressions_equal_p): declaration added.
> > (field_byte_offset): declaration added.
> >
> > Patch -
> >
> > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > index a2e3f7a..54aa8e7 100644
> > --- a/gcc/Makefile.in
> > +++ b/gcc/Makefile.in
> > @@ -3847,6 +3847,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h
> > $(srcdir)/coretypes.h \
> > $(srcdir)/asan.c \
> > $(srcdir)/ubsan.c \
> > $(srcdir)/tsan.c $(srcdir)/ipa-devirt.c \
> > + $(srcdir)/tree-sra.c \
> > @all_gtfiles@
> >
> > # Compute the list of GT header files from the corresponding C sources,
> > diff --git a/gcc/common.opt b/gcc/common.opt
> > index 202e169..afac514 100644
> > --- a/gcc/common.opt
> > +++ b/gcc/common.opt
> > @@ -2164,6 +2164,10 @@ ftree-sra
> > Common Report Var(flag_tree_sra) Optimization
> > Perform scalar replacement of aggregates
> >
> > +ftree-bitfield-merge
> > +Common Report Var(flag_tree_bitfield_merge) Init(0) Optimization
> > +Enable bit-field merge on trees
> > +
>
> Drop the tree- prefix for new options, it doesn't tell users anything.
> I suggest
>
> fmerge-bitfields
> Common Report Var(flag_tree_bitfield_merge) Init(0) Optimization
> Merge loads and stores of consecutive bitfields
>
> and adjust docs with regarding to the flag name change of course.
>
> > ftree-ter
> > Common Report Var(flag_tree_ter) Optimization
> > Replace temporary expressions in the SSA->normal pass
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index aa0f4ed..e588cae 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -412,7 +412,7 @@ Objective-C and Objective-C++ Dialects}.
> > -fsplit-ivs-in-unroller -fsplit-wide-types -fstack-protector @gol
> > -fstack-protector-all -fstack-protector-strong -fstrict-aliasing @gol
> > -fstrict-overflow -fthread-jumps -ftracer -ftree-bit-ccp @gol
> > --ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
> > +-ftree-bitfield-merge -ftree-builtin-call-dce -ftree-ccp -ftree-ch @gol
> > -ftree-coalesce-inline-vars -ftree-coalesce-vars -ftree-copy-prop @gol
> > -ftree-copyrename -ftree-dce -ftree-dominator-opts -ftree-dse @gol
> > -ftree-forwprop -ftree-fre -ftree-loop-if-convert @gol
> > @@ -7679,6 +7679,11 @@ pointer alignment information.
> > This pass only operates on local scalar variables and is enabled by default
> > at @option{-O} and higher. It requires that @option{-ftree-ccp} is enabled.
> >
> > +@item -ftree-bitfield-merge
> > +@opindex ftree-bitfield-merge
> > +Combines several adjacent bit-field accesses that copy values
> > +from one memory location to another into one single bit-field access.
> > +
> > @item -ftree-ccp
> > @opindex ftree-ccp
> > Perform sparse conditional constant propagation (CCP) on trees. This
> > diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
> > index 95049e4..e74db17 100644
> > --- a/gcc/dwarf2out.c
> > +++ b/gcc/dwarf2out.c
> > @@ -3108,8 +3108,6 @@ static HOST_WIDE_INT ceiling (HOST_WIDE_INT,
> > unsigned int);
> > static tree field_type (const_tree);
> > static unsigned int simple_type_align_in_bits (const_tree);
> > static unsigned int simple_decl_align_in_bits (const_tree);
> > -static unsigned HOST_WIDE_INT simple_type_size_in_bits (const_tree);
> > -static HOST_WIDE_INT field_byte_offset (const_tree);
> > static void add_AT_location_description (dw_die_ref, enum
> > dwarf_attribute,
> > dw_loc_list_ref);
> > static void add_data_member_location_attribute (dw_die_ref, tree);
> > @@ -10149,25 +10147,6 @@ is_base_type (tree type)
> > return 0;
> > }
> >
> > -/* Given a pointer to a tree node, assumed to be some kind of a ..._TYPE
> > - node, return the size in bits for the type if it is a constant, or else
> > - return the alignment for the type if the type's size is not constant, or
> > - else return BITS_PER_WORD if the type actually turns out to be an
> > - ERROR_MARK node. */
> > -
> > -static inline unsigned HOST_WIDE_INT
> > -simple_type_size_in_bits (const_tree type)
> > -{
> > - if (TREE_CODE (type) == ERROR_MARK)
> > - return BITS_PER_WORD;
> > - else if (TYPE_SIZE (type) == NULL_TREE)
> > - return 0;
> > - else if (host_integerp (TYPE_SIZE (type), 1))
> > - return tree_low_cst (TYPE_SIZE (type), 1);
> > - else
> > - return TYPE_ALIGN (type);
> > -}
> > -
> > /* Similarly, but return a double_int instead of UHWI. */
> >
> > static inline double_int
> > @@ -14521,7 +14500,7 @@ round_up_to_align (double_int t, unsigned int align)
> > because the offset is actually variable. (We can't handle the latter case
> > just yet). */
> >
> > -static HOST_WIDE_INT
> > +HOST_WIDE_INT
> > field_byte_offset (const_tree decl)
> > {
> > double_int object_offset_in_bits;
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitfldmrg1.c
> > b/gcc/testsuite/gcc.dg/tree-ssa/bitfldmrg1.c
> > new file mode 100644
> > index 0000000..e9e96b7
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitfldmrg1.c
> > @@ -0,0 +1,30 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -ftree-bitfield-merge -fdump-tree-esra" } */
> > +
> > +struct S
> > +{
> > + unsigned f1:7;
> > + unsigned f2:9;
> > + unsigned f3:3;
> > + unsigned f4:5;
> > + unsigned f5:1;
> > + unsigned f6:2;
> > +};
> > +
> > +unsigned
> > +foo (struct S *p1, struct S *p2, int *ptr)
> > +{
> > + p2->f1 = p1->f1;
> > + p2->f2 = p1->f2;
> > + p2->f3 = p1->f3;
> > + *ptr = 7;
> > + p2->f4 = p1->f4;
> > + p2->f5 = p1->f5;
> > + p2->f6 = p1->f6;
> > + return 0;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump "19" "esra" } } */
> > +/* { dg-final { scan-tree-dump "8" "esra"} } */
>
> That's an awfully unspecific dump scan ;) Please make it more
> specific and add a comment what code generation you expect.
>
> > +/* { dg-final { cleanup-tree-dump "esra" } } */
> > +
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitfldmrg2.c
> > b/gcc/testsuite/gcc.dg/tree-ssa/bitfldmrg2.c
> > new file mode 100644
> > index 0000000..653e904
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/bitfldmrg2.c
> > @@ -0,0 +1,90 @@
> > +/* Check whether use of -ftree-bitfield-merge results in presence of overlaping
> > + unions results in incorrect code. */
> > +/* { dg-options "-O2 -ftree-bitfield-merge" } */
> > +/* { dg-do run } */
> > +#include <stdio.h>
> > +extern void abort (void);
> > +
> > +struct s1
> > +{
> > + unsigned f1:4;
> > + unsigned f2:4;
> > + unsigned f3:4;
> > +};
> > +
> > +struct s2
> > +{
> > + unsigned char c;
> > + unsigned f1:4;
> > + unsigned f2:4;
> > + unsigned f3:4;
> > +};
> > +
> > +struct s3
> > +{
> > + unsigned f1:3;
> > + unsigned f2:3;
> > + unsigned f3:3;
> > +};
> > +
> > +struct s4
> > +{
> > + unsigned f0:3;
> > + unsigned f1:3;
> > + unsigned f2:3;
> > + unsigned f3:3;
> > +};
> > +
> > +union un_1
> > +{
> > + struct s1 a;
> > + struct s2 b;
> > +};
> > +
> > +union un_2
> > +{
> > + struct s3 a;
> > + struct s4 b;
> > +};
> > +
> > +void f1 (union un_1 *p1, union un_1 *p2)
> > +{
> > + p2->a.f3 = p1->b.f3;
> > + p2->a.f2 = p1->b.f2;
> > + p2->a.f1 = p1->b.f1;
> > +
> > + if (p1->b.f1 != 3)
> > + abort ();
> > +}
> > +
> > +void f2 (union un_2 *p1, union un_2 *p2)
> > +{
> > + p2->b.f1 = p1->a.f1;
> > + p2->b.f2 = p1->a.f2;
> > + p2->b.f3 = p1->a.f3;
> > +
> > + if (p2->b.f1 != 0 || p2->b.f2 != 0 || p2->b.f3 != 0)
> > + abort ();
> > +}
> > +
> > +int main ()
> > +{
> > + union un_1 u1;
> > + union un_2 u2;
> > +
> > + u1.b.f1 = 1;
> > + u1.b.f2 = 2;
> > + u1.b.f3 = 3;
> > + u1.b.c = 0;
> > +
> > + f1 (&u1, &u1);
> > +
> > + u2.b.f0 = 0;
> > + u2.b.f1 = 1;
> > + u2.b.f2 = 2;
> > + u2.b.f3 = 3;
> > +
> > + f2 (&u2, &u2);
> > +
> > + return 0;
> > +}
> > diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> > index 58c7565..610245b 100644
> > --- a/gcc/tree-sra.c
> > +++ b/gcc/tree-sra.c
> > @@ -92,6 +92,7 @@ along with GCC; see the file COPYING3. If not see
> > #include "gimple-pretty-print.h"
> > #include "ipa-inline.h"
> > #include "ipa-utils.h"
> > +#include "ggc.h"
>
> You shouldn't need that I think.
>
> > /* Enumeration of all aggregate reductions we can do. */
> > enum sra_mode { SRA_MODE_EARLY_IPA, /* early call regularization */
> > @@ -3424,12 +3425,476 @@ perform_intra_sra (void)
> > return ret;
> > }
> >
> > +/* This optimization combines several adjacent bit-field accesses that copy
> > + values from one memory location to another into one single bit-field
> > + access. */
> > +
> > +/* Data for single bit-field read/write sequence. */
> > +struct GTY (()) bitfield_access_d {
> > + gimple load_stmt; /* Bit-field load statement. */
> > + gimple store_stmt; /* Bit-field store statement. */
> > + unsigned src_offset_words; /* Bit-field offset at src in words. */
> > + unsigned src_bit_offset; /* Bit-field offset inside source word. */
> > + unsigned src_bit_size; /* Size of bit-field in source word. */
> > + unsigned dst_offset_words; /* Bit-field offset at dst in words. */
> > + unsigned dst_bit_offset; /* Bit-field offset inside destination
> > + word. */
> > + unsigned src_field_offset; /* Source field offset. */
> > + unsigned dst_bit_size; /* Size of bit-field in destination word. */
> > + tree src_addr; /* Address of source memory access. */
> > + tree dst_addr; /* Address of destination memory access. */
> > + bool merged; /* True if access is merged with another
> > + one. */
> > + bool modified; /* True if bit-field size is modified. */
> > + bool is_barrier; /* True if access is barrier (call or mem
> > + access). */
> > + struct bitfield_access_d *next; /* Access with which this one is merged. */
> > + tree bitfield_type; /* Field type. */
> > + tree bitfield_representative; /* Bit field representative
> > of original
> > + declaration. */
> > + tree field_decl_context; /* Context of original bit-field
> > + declaration. */
> > +};
> > +
> > +typedef struct bitfield_access_d bitfield_access_o;
> > +typedef struct bitfield_access_d *bitfield_access;
> > +
> > +/* Connecting register with bit-field access sequence that defines value in
> > + that register. */
> > +struct GTY (()) bitfield_stmt_access_pair_d
> > +{
> > + gimple stmt;
> > + bitfield_access access;
> > +};
> > +
> > +typedef struct bitfield_stmt_access_pair_d bitfield_stmt_access_pair_o;
> > +typedef struct bitfield_stmt_access_pair_d *bitfield_stmt_access_pair;
> > +
> > +static GTY ((param_is (struct bitfield_stmt_access_pair_d)))
> > + htab_t bitfield_stmt_access_htab;
>
> Nor does this need to be registered with the garbage collector. Its
> lifetime is not greater than that of the SRA pass, right?
>
> > +/* Hash table callbacks for bitfield_stmt_access_htab. */
> > +
> > +static hashval_t
> > +bitfield_stmt_access_pair_htab_hash (const void *p)
> > +{
> > + const bitfield_stmt_access_pair entry = (const bitfield_stmt_access_pair)p;
> > + return (hashval_t) (uintptr_t) entry->stmt;
> > +}
> > +
> > +static int
> > +bitfield_stmt_access_pair_htab_eq (const void *p1, const void *p2)
> > +{
> > + const struct bitfield_stmt_access_pair_d *entry1 =
> > + (const bitfield_stmt_access_pair)p1;
> > + const struct bitfield_stmt_access_pair_d *entry2 =
> > + (const bitfield_stmt_access_pair)p2;
> > + return entry1->stmt == entry2->stmt;
> > +}
> > +
> > +/* Counter used for generating unique names for new fields. */
> > +static unsigned new_field_no;
>
> Must be stale...?
>
> > +/* Compare two bit-field access records. */
> > +
> > +static int
> > +cmp_access (const void *p1, const void *p2)
> > +{
> > + const bitfield_access a1 = (*(const bitfield_access*)p1);
> > + const bitfield_access a2 = (*(const bitfield_access*)p2);
> > +
> > + if (a1->bitfield_representative - a2->bitfield_representative)
> > + return a1->bitfield_representative - a2->bitfield_representative;
>
> Subtracting two unrelated pointers is undefined - I suggest you
> use DECL_UID (a1->bitfield_representative).
>
> > + if (!expressions_equal_p (a1->src_addr, a2->src_addr))
> > + return a1 - a2;
>
> Same - the comparison ends up depending on memory layout, that's bad.
>
> > +
> > + if (!expressions_equal_p (a1->dst_addr, a2->dst_addr))
> > + return a1 - a2;
> > + if (a1->src_offset_words - a2->src_offset_words)
> > + return a1->src_offset_words - a2->src_offset_words;
> > +
> > + return a1->src_bit_offset - a2->src_bit_offset;
> > +}
> > +
> > +/* Create new bit-field access structure and add it to given bitfield_accesses
> > + htab. */
> > +
> > +static bitfield_access
> > +create_and_insert_access (vec<bitfield_access>
> > + *bitfield_accesses)
> > +{
> > + bitfield_access access = ggc_alloc_bitfield_access_d ();
> > + memset (access, 0, sizeof (struct bitfield_access_d));
> > + bitfield_accesses->safe_push (access);
> > + return access;
> > +}
> > +
> > +/* Slightly modified add_bit_offset_attribute from dwarf2out.c. */
> > +
> > +static inline HOST_WIDE_INT
> > +get_bit_offset (tree decl)
> > +{
> > + tree type = DECL_BIT_FIELD_TYPE (decl);
> > + HOST_WIDE_INT bitpos_int;
> > +
> > + /* Must be a field and a bit-field. */
> > + gcc_assert (type && TREE_CODE (decl) == FIELD_DECL);
> > + /* Bit position and decl size should be integer constants that can be
> > + represented in a signle HOST_WIDE_INT. */
> > + if (! host_integerp (bit_position (decl), 0)
> > + || ! host_integerp (DECL_SIZE (decl), 1))
> > + return -1;
> > +
> > + bitpos_int = int_bit_position (decl);
> > + return bitpos_int;
> > +}
> > +
> > +/* Returns size of combined bitfields. Size cannot be larger than size
> > + of largest directly accessible memory unit. */
> > +
> > +static int
> > +get_merged_bit_field_size (bitfield_access access)
> > +{
> > + bitfield_access tmp_access = access;
> > + int size = 0;
> > +
> > + while (tmp_access)
> > + {
> > + size += tmp_access->src_bit_size;
> > + tmp_access = tmp_access->next;
> > + }
> > + return size;
> > +}
> > +
> > +/* Adds new pair consisting of statement and bit-field access structure that
> > + contains it. */
> > +
> > +static bool add_stmt_access_pair (bitfield_access access, gimple stmt)
> > +{
> > + bitfield_stmt_access_pair new_pair;
> > + void **slot;
> > + if (!bitfield_stmt_access_htab)
> > + bitfield_stmt_access_htab =
> > + htab_create_ggc (128, bitfield_stmt_access_pair_htab_hash,
> > + bitfield_stmt_access_pair_htab_eq, NULL);
> > + new_pair = ggc_alloc_bitfield_stmt_access_pair_o ();
> > + new_pair->stmt = stmt;
> > + new_pair->access = access;
> > + slot = htab_find_slot (bitfield_stmt_access_htab, new_pair, INSERT);
> > + if (*slot == HTAB_EMPTY_ENTRY)
> > + {
> > + *slot = new_pair;
> > + return true;
> > + }
> > + return false;
> > +}
> > +
> > +/* Returns true if given COMPONENT_REF is part of an union. */
> > +
> > +static bool part_of_union_p (tree component)
> > +{
> > + tree tmp = component;
> > + bool res = false;
> > + while (TREE_CODE (tmp) == COMPONENT_REF)
> > + {
> > + if (TREE_CODE (TREE_TYPE (tmp)) == UNION_TYPE)
> > + {
> > + res = true;
> > + break;
> > + }
> > + tmp = TREE_OPERAND (tmp, 0);
> > + }
> > + if (tmp && (TREE_CODE (TREE_TYPE (tmp)) == UNION_TYPE))
> > + res = true;
> > + return res;
> > +}
> > +
> > +/* Main entry point for the bit-field merge optimization. */
>
> Ok, I'm skipping to here (I was wondering what you need all the above
> functions for)
>
> > +static unsigned int
> > +ssa_bitfield_merge (void)
> > +{
> > + basic_block bb;
> > + unsigned int todoflags = 0;
> > + vec<bitfield_access> bitfield_accesses;
> > + int ix, iy;
> > + bitfield_access access;
> > + bool cfg_changed = false;
> > +
> > + /* In the strict volatile bitfields case, doing code changes here may prevent
> > + other optimizations, in particular in a SLOW_BYTE_ACCESS setting. */
> > + if (flag_strict_volatile_bitfields > 0)
> > + return 0;
>
> Hmm, I'm not sure we should care ... - but well, I don't care ;)
>
> > + FOR_EACH_BB (bb)
> > + {
> > + gimple_stmt_iterator gsi;
> > + vec<bitfield_access> bitfield_accesses_merge = vNULL;
> > + tree prev_representative = NULL_TREE;
> > + bitfield_accesses.create (0);
> > +
> > + /* Identify all bitfield copy sequences in the basic-block. */
> > + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
> > + {
> > + gimple stmt = gsi_stmt (gsi);
> > + tree lhs, rhs;
> > + void **slot;
> > + struct bitfield_stmt_access_pair_d asdata;
> > +
> > + if (!is_gimple_assign (stmt))
>
> Instead of checking TREE_THIS_VOLATILE below check here
> || gimple_has_volatile_ops (stmt)
>
> also you can narrow the assigns to visit by checking for
>
> !is_gimple_assign_single (stmt)
>
> instead
>
> > + {
> > + gsi_next (&gsi);
> > + continue;
> > + }
> > +
> > + lhs = gimple_assign_lhs (stmt);
> > + rhs = gimple_assign_rhs1 (stmt);
> > +
> > + if (TREE_CODE (rhs) == COMPONENT_REF)
> > + {
> > + use_operand_p use;
> > + gimple use_stmt;
> > + tree op0 = TREE_OPERAND (rhs, 0);
> > + tree op1 = TREE_OPERAND (rhs, 1);
> > +
> > + if (TREE_CODE (op1) == FIELD_DECL && DECL_BIT_FIELD_TYPE (op1)
>
> op1 is always a FIELD_DECL
>
> > + && !TREE_THIS_VOLATILE (op1) && !part_of_union_p (rhs))
>
> I wonder what's the issue with unions ... sure, for
>
> union {
> int field1 : 3;
> int field2 : 2;
> };
>
> but for
>
> union {
> struct {
> int field1 : 3;
> int field2 : 2;
> } a;
> ...
> };
>
> ? Thus I'd simply check
>
> && TREE_CODE (DECL_CONTEXT (op1)) != UNION_TYPE
> && TREE_CODE (DECL_CONTEXT (op1)) != QUAL_UNION_TYPE
>
> maybe introduce
>
> #define UNION_TYPE_P(TYPE)
> (TREE_CODE (TYPE) == UNION_TYPE \
> || TREE_CODE (TYPE) == QUAL_UNION_TYPE)
>
> alongside RECORD_OR_UNION_TYPE_P in tree.h.
>
> > + {
> > + if (single_imm_use (lhs, &use, &use_stmt)
> > + && is_gimple_assign (use_stmt))
> > + {
> > + tree use_lhs = gimple_assign_lhs (use_stmt);
> > + if (TREE_CODE (use_lhs) == COMPONENT_REF)
>
> I'm not sure I follow the logic here, but it seems that you match
> a very specific pattern only - a bitfield copy.
>
> I'd have applied the lowering (this is really a lowering pass
> with a cost model you make up here) if the same
> DECL_BIT_FIELD_REPRESENTATIVE is used more than once in a BB
> on the same underlying base object. That is, we are reasonably
> sure we can remove a redundant load and eventually redundant stores.
>
> Thus, very simplistic you'd just record a op0,
> DECL_BIT_FIELD_REPRESENTATIVE pair into the hashtable, using
> iterative_hash_expr for op0 and DECL_UID for the representative,
> counting the number of times you see them. Make sure to apply
> the same for stores to bitfields, of course.
>
> > + {
> > + tree use_op0 = TREE_OPERAND (use_lhs, 0);
> > + tree use_op1 = TREE_OPERAND (use_lhs, 1);
> > + tree tmp_repr = DECL_BIT_FIELD_REPRESENTATIVE (op1);
> > +
> > + if (TREE_CODE (use_op1) == FIELD_DECL
> > + && DECL_BIT_FIELD_TYPE (use_op1)
> > + && !TREE_THIS_VOLATILE (use_op1))
> > + {
> > + if (prev_representative
> > + && (prev_representative != tmp_repr))
> > + {
> > + /* If previous access has different
> > + representative then barrier is needed
> > + between it and new access. */
> > + access = create_and_insert_access
> > + (&bitfield_accesses);
> > + access->is_barrier = true;
> > + }
> > + prev_representative = tmp_repr;
> > + /* Create new bit-field access structure. */
> > + access = create_and_insert_access
> > + (&bitfield_accesses);
> > + /* Collect access data - load instruction. */
> > + access->src_bit_size = tree_low_cst
> > + (DECL_SIZE (op1), 1);
> > + access->src_bit_offset = get_bit_offset (op1);
> > + access->src_offset_words =
> > + field_byte_offset (op1) / UNITS_PER_WORD;
> > + access->src_field_offset =
> > + tree_low_cst (DECL_FIELD_OFFSET (op1),1);
> > + access->src_addr = op0;
> > + access->load_stmt = gsi_stmt (gsi);
> > + /* Collect access data - store instruction. */
> > + access->dst_bit_size =
> > + tree_low_cst (DECL_SIZE (use_op1), 1);
> > + access->dst_bit_offset =
> > + get_bit_offset (use_op1);
> > + access->dst_offset_words =
> > + field_byte_offset (use_op1) / UNITS_PER_WORD;
> > + access->dst_addr = use_op0;
> > + access->store_stmt = use_stmt;
> > + add_stmt_access_pair (access, stmt);
> > + add_stmt_access_pair (access, use_stmt);
> > + access->bitfield_type
> > + = DECL_BIT_FIELD_TYPE (use_op1);
> > + access->bitfield_representative = tmp_repr;
> > + access->field_decl_context =
> > + DECL_FIELD_CONTEXT (op1);
> > + }
> > + }
> > + }
> > + }
> > + }
> > +
> > + /* Insert barrier for merging if statement is function call or memory
> > + access. */
> > + if (bitfield_stmt_access_htab)
> > + {
> > + asdata.stmt = stmt;
> > + slot
> > + = htab_find_slot (bitfield_stmt_access_htab, &asdata,
> > + NO_INSERT);
> > + if (!slot
> > + && ((gimple_code (stmt) == GIMPLE_CALL)
> > + || (gimple_has_mem_ops (stmt))))
> > + {
> > + /* Create new bit-field access structure. */
> > + access = create_and_insert_access (&bitfield_accesses);
> > + /* Mark it as barrier. */
> > + access->is_barrier = true;
> > + }
> > + }
> > + gsi_next (&gsi);
> > + }
> > +
> > + /* If there are no at least two accesses go to the next basic block. */
> > + if (bitfield_accesses.length () <= 1)
> > + {
> > + bitfield_accesses.release ();
> > + continue;
> > + }
> > +
> > + /* Find bit-field accesses that can be merged. */
> > + for (ix = 0; bitfield_accesses.iterate (ix, &access); ix++)
> > + {
> > + bitfield_access head_access;
> > + bitfield_access mrg_access;
> > + bitfield_access prev_access;
> > + if (!bitfield_accesses_merge.exists ())
> > + bitfield_accesses_merge.create (0);
> > +
> > + bitfield_accesses_merge.safe_push (access);
> > +
> > + if (!access->is_barrier
> > + && !(access == bitfield_accesses.last ()
> > + && !bitfield_accesses_merge.is_empty ()))
> > + continue;
> > +
> > + bitfield_accesses_merge.qsort (cmp_access);
> > +
> > + head_access = prev_access = NULL;
> > + for (iy = 0; bitfield_accesses_merge.iterate (iy, &mrg_access); iy++)
> > + {
> > + if (head_access
> > + && expressions_equal_p (head_access->src_addr,
> > + mrg_access->src_addr)
> > + && expressions_equal_p (head_access->dst_addr,
> > + mrg_access->dst_addr)
> > + && prev_access->src_offset_words
> > + == mrg_access->src_offset_words
> > + && prev_access->dst_offset_words
> > + == mrg_access->dst_offset_words
> > + && prev_access->src_bit_offset + prev_access->src_bit_size
> > + == mrg_access->src_bit_offset
> > + && prev_access->dst_bit_offset + prev_access->dst_bit_size
> > + == mrg_access->dst_bit_offset
> > + && prev_access->bitfield_representative
> > + == mrg_access->bitfield_representative)
> > + {
> > + /* Merge conditions are satisfied - merge accesses. */
> > + mrg_access->merged = true;
> > + prev_access->next = mrg_access;
> > + head_access->modified = true;
> > + prev_access = mrg_access;
> > + }
> > + else
> > + head_access = prev_access = mrg_access;
> > + }
> > + bitfield_accesses_merge.release ();
> > + bitfield_accesses_merge = vNULL;
> > + }
> > +
> > + /* Modify generated code. */
>
> Ick, so you are actually applying an optimization instead of just
> lowering the accesses to DECL_BIT_FIELD_REPRESENTATIVE accesses
> and letting CSE and DSE do their job. Hmm. I don't think this is
> a good idea.
>
> Instead what I'd like to see is more something like the following
> (bah, I never merged BIT_FIELD_EXPR which performs the bit-field-insert
> in a single stmt); quickly hacked together, without a cost model,
> just the lowering piece (likely not endian clean - but who knows).
>
> Index: gcc/tree-sra.c
> ===================================================================
> --- gcc/tree-sra.c (revision 204561)
> +++ gcc/tree-sra.c (working copy)
> @@ -3445,10 +3445,121 @@ perform_intra_sra (void)
> return ret;
> }
>
> +static void
> +lower_bitfields (void)
> +{
> + basic_block bb;
> + FOR_EACH_BB (bb)
> + for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
> + !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> + gimple stmt = gsi_stmt (gsi);
> + if (!gimple_assign_single_p (stmt)
> + || gimple_has_volatile_ops (stmt))
> + continue;
> +
> + /* Lower a bitfield read. */
> + tree ref = gimple_assign_rhs1 (stmt);
> + if (TREE_CODE (ref) == COMPONENT_REF
> + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (ref, 1))
> + && DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1)))
> + {
> + tree field = TREE_OPERAND (ref, 1);
> + tree rep = DECL_BIT_FIELD_REPRESENTATIVE (field);
> + unsigned HOST_WIDE_INT off;
> + if (host_integerp (DECL_FIELD_OFFSET (field), 1)
> + && host_integerp (DECL_FIELD_OFFSET (rep), 1))
> + off = (tree_low_cst (DECL_FIELD_OFFSET (field), 1)
> + - tree_low_cst (DECL_FIELD_OFFSET (rep), 1)) * BITS_PER_UNIT;
> + else
> + off = 0;
> + off += (tree_low_cst (DECL_FIELD_BIT_OFFSET (field), 1)
> + - tree_low_cst (DECL_FIELD_BIT_OFFSET (rep), 1));
> + tree loadres = make_ssa_name (TREE_TYPE (rep), NULL);
> + gimple load
> + = gimple_build_assign (loadres,
> + build3 (COMPONENT_REF, TREE_TYPE (rep),
> + TREE_OPERAND (ref, 0), rep,
> + NULL_TREE));
> + gimple_set_vuse (load, gimple_vuse (stmt));
> + gsi_insert_before (&gsi, load, GSI_SAME_STMT);
> + gimple_assign_set_rhs1 (stmt,
> + build3 (BIT_FIELD_REF, TREE_TYPE (ref),
> + loadres,
> + DECL_SIZE (field),
> + bitsize_int (off)));
> + update_stmt (stmt);
> + }
> + ref = gimple_assign_lhs (stmt);
> + if (TREE_CODE (ref) == COMPONENT_REF
> + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (ref, 1))
> + && DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1)))
> + {
> + tree field = TREE_OPERAND (ref, 1);
> + tree rep = DECL_BIT_FIELD_REPRESENTATIVE (field);
> + unsigned HOST_WIDE_INT off;
> + if (host_integerp (DECL_FIELD_OFFSET (field), 1)
> + && host_integerp (DECL_FIELD_OFFSET (rep), 1))
> + off = (tree_low_cst (DECL_FIELD_OFFSET (field), 1)
> + - tree_low_cst (DECL_FIELD_OFFSET (rep), 1)) * BITS_PER_UNIT;
> + else
> + off = 0;
> + off += (tree_low_cst (DECL_FIELD_BIT_OFFSET (field), 1)
> + - tree_low_cst (DECL_FIELD_BIT_OFFSET (rep), 1));
> + tree loadres = make_ssa_name (TREE_TYPE (rep), NULL);
> + gimple load
> + = gimple_build_assign (loadres,
> + build3 (COMPONENT_REF, TREE_TYPE (rep),
> + unshare_expr (TREE_OPERAND (ref, 0)),
> + rep,
> + NULL_TREE));
> + gimple_set_vuse (load, gimple_vuse (stmt));
> + gsi_insert_before (&gsi, load, GSI_SAME_STMT);
> + /* FIXME: BIT_FIELD_EXPR. */
> + /* Mask out bits. */
> + tree masked = make_ssa_name (TREE_TYPE (rep), NULL);
> + gimple tems
> + = gimple_build_assign_with_ops (BIT_AND_EXPR,
> + masked, loadres,
> + double_int_to_tree (TREE_TYPE (rep),
> + double_int::mask (TREE_INT_CST_LOW (DECL_SIZE (field))).lshift (off)));
> + gsi_insert_before (&gsi, tems, GSI_SAME_STMT);
> + /* Zero-extend the value to representative size. */
> + tree tem2 = make_ssa_name (unsigned_type_for (TREE_TYPE (field)), NULL);
> + tems = gimple_build_assign_with_ops (NOP_EXPR, tem2,
> + gimple_assign_rhs1 (stmt),
> + NULL_TREE);
> + gsi_insert_before (&gsi, tems, GSI_SAME_STMT);
> + tree tem = make_ssa_name (TREE_TYPE (rep), NULL);
> + tems = gimple_build_assign_with_ops (NOP_EXPR, tem, tem2, NULL_TREE);
> + gsi_insert_before (&gsi, tems, GSI_SAME_STMT);
> + /* Shift the value into place. */
> + tem2 = make_ssa_name (TREE_TYPE (rep), NULL);
> + tems = gimple_build_assign_with_ops (LSHIFT_EXPR, tem2, tem,
> + size_int (off));
> + gsi_insert_before (&gsi, tems, GSI_SAME_STMT);
> + /* Merge masked loaded value and value. */
> + tree modres = make_ssa_name (TREE_TYPE (rep), NULL);
> + gimple mod
> + = gimple_build_assign_with_ops (BIT_IOR_EXPR, modres,
> + masked, tem2);
> + gsi_insert_before (&gsi, mod, GSI_SAME_STMT);
> + /* Finally adjust the store. */
> + gimple_assign_set_rhs1 (stmt, modres);
> + gimple_assign_set_lhs (stmt,
> + build3 (COMPONENT_REF, TREE_TYPE (rep),
> + TREE_OPERAND (ref, 0), rep,
> + NULL_TREE));
> + update_stmt (stmt);
> + }
> + }
> +}
> +
> /* Perform early intraprocedural SRA. */
> static unsigned int
> early_intra_sra (void)
> {
> + lower_bitfields ();
> sra_mode = SRA_MODE_EARLY_INTRA;
> return perform_intra_sra ();
> }
>
>
> The idea is that this lowering then makes ESRA able to handle
> it (you can see that for example in the result from
> gcc.c-torture/execute/20000113-1.c which is miscompiled by
> the above, eh ... what did I say about not testing it??).
>
> I'll try to get the above working and cleaned up a bit today
> and maybe early next week. So stay tuned, I'll hand it over
> to make you test if it works as advertised for you.
Just forgot to invert the mask. So, simple cost model as I
suggested and the lowering I suggested shows that for your
copying testcase we fail to optimize this because of aliasing
issues ... we get
_18 = p1_2(D)->D.1790;
_3 = BIT_FIELD_REF <_18, 7, 0>;
_19 = p2_4(D)->D.1790;
_20 = _19 & 4294967168;
_21 = (unsigned int) _3;
_22 = _21 | _20;
p2_4(D)->D.1790 = _22;
_23 = p1_2(D)->D.1790;
_6 = BIT_FIELD_REF <_23, 9, 7>;
_24 = _22 & 4294901887;
_25 = (unsigned int) _6;
_26 = _25 << 7;
_27 = _24 | _26;
p2_4(D)->D.1790 = _27;
and because p1 and p2 alias the now bigger memory accesses conflict.
Interesting issue ... ;)
Code generation quality with lowering to shifts and masks for bitfield
writes is another issue (BIT_FIELD_EXPR, or BIT_FIELD_COMPOSE as
I renamed it to IIRC would somewhat make that easier to fix).
Updated and somewhat tested patch (with cost model as I proposed)
below. Old BIT_FIELD_COMPOSE patch attached.
Richard.
Index: gcc/tree-sra.c
===================================================================
--- gcc/tree-sra.c (revision 204561)
+++ gcc/tree-sra.c (working copy)
@@ -3445,10 +3445,279 @@ perform_intra_sra (void)
return ret;
}
+/* Bitfield access and hashtable support commoning same base and
+ representative. */
+
+struct bfaccess
+{
+ bfaccess (tree r) : ref (r), count (1) {}
+
+ tree ref;
+ unsigned count;
+
+ /* hash_table support */
+ typedef bfaccess value_type;
+ typedef bfaccess compare_type;
+ static inline hashval_t hash (const bfaccess *);
+ static inline int equal (const bfaccess*, const bfaccess *);
+ static inline void remove (bfaccess*);
+};
+
+hashval_t
+bfaccess::hash (const bfaccess *a)
+{
+ return iterative_hash_hashval_t
+ (iterative_hash_expr (TREE_OPERAND (a->ref, 0), 0),
+ DECL_UID (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (a->ref, 1))));
+}
+
+int
+bfaccess::equal (const bfaccess *a, const bfaccess *b)
+{
+ return ((DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (a->ref, 1))
+ == DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (b->ref, 1)))
+ && operand_equal_p (TREE_OPERAND (a->ref, 0),
+ TREE_OPERAND (b->ref, 0), 0));
+}
+
+void
+bfaccess::remove (bfaccess *a)
+{
+ delete a;
+}
+
+/* Return whether REF is a bitfield access the bit offset of the bitfield
+ within the representative in *OFF if that is not NULL. */
+
+static bool
+bitfield_access_p (tree ref, unsigned HOST_WIDE_INT *off)
+{
+ if (TREE_CODE (ref) != COMPONENT_REF)
+ return false;
+
+ tree field = TREE_OPERAND (ref, 1);
+ if (!DECL_BIT_FIELD_TYPE (field))
+ return false;
+
+ tree rep = DECL_BIT_FIELD_REPRESENTATIVE (field);
+ if (!rep)
+ return false;
+
+ if (!off)
+ return true;
+
+ if (host_integerp (DECL_FIELD_OFFSET (field), 1)
+ && host_integerp (DECL_FIELD_OFFSET (rep), 1))
+ *off = (tree_low_cst (DECL_FIELD_OFFSET (field), 1)
+ - tree_low_cst (DECL_FIELD_OFFSET (rep), 1)) * BITS_PER_UNIT;
+ else
+ *off = 0;
+ *off += (tree_low_cst (DECL_FIELD_BIT_OFFSET (field), 1)
+ - tree_low_cst (DECL_FIELD_BIT_OFFSET (rep), 1));
+
+ return true;
+}
+
+
+/* Lower the bitfield read at *GSI, the offset of the bitfield
+ relative to the bitfield representative is OFF bits. */
+
+static void
+lower_bitfield_read (gimple_stmt_iterator *gsi, unsigned HOST_WIDE_INT off)
+{
+ gimple stmt = gsi_stmt (*gsi);
+ tree ref = gimple_assign_rhs1 (stmt);
+ tree field = TREE_OPERAND (ref, 1);
+ tree rep = DECL_BIT_FIELD_REPRESENTATIVE (field);
+
+ tree loadres = make_ssa_name (TREE_TYPE (rep), NULL);
+ gimple load
+ = gimple_build_assign (loadres,
+ build3 (COMPONENT_REF, TREE_TYPE (rep),
+ TREE_OPERAND (ref, 0), rep,
+ NULL_TREE));
+ gimple_set_vuse (load, gimple_vuse (stmt));
+ gsi_insert_before (gsi, load, GSI_SAME_STMT);
+ gimple_assign_set_rhs1 (stmt,
+ build3 (BIT_FIELD_REF, TREE_TYPE (ref),
+ loadres,
+ DECL_SIZE (field),
+ bitsize_int (off)));
+ update_stmt (stmt);
+}
+
+/* Lower the bitfield write at *GSI, the offset of the bitfield
+ relative to the bitfield representative is OFF bits. */
+
+static void
+lower_bitfield_write (gimple_stmt_iterator *gsi, unsigned HOST_WIDE_INT off)
+{
+ gimple stmt = gsi_stmt (*gsi);
+ tree ref = gimple_assign_lhs (stmt);
+ tree field = TREE_OPERAND (ref, 1);
+ tree rep = DECL_BIT_FIELD_REPRESENTATIVE (field);
+
+ tree loadres = make_ssa_name (TREE_TYPE (rep), NULL);
+ gimple load
+ = gimple_build_assign (loadres,
+ build3 (COMPONENT_REF, TREE_TYPE (rep),
+ unshare_expr
+ (TREE_OPERAND (ref, 0)),
+ rep,
+ NULL_TREE));
+ gimple_set_vuse (load, gimple_vuse (stmt));
+ gsi_insert_before (gsi, load, GSI_SAME_STMT);
+ /* FIXME: BIT_FIELD_EXPR. */
+ /* Mask out bits. */
+ tree masked = make_ssa_name (TREE_TYPE (rep), NULL);
+ tree mask
+ = double_int_to_tree (TREE_TYPE (rep),
+ ~double_int::mask
+ (TREE_INT_CST_LOW (DECL_SIZE (field)))
+ .lshift (off));
+ gimple tems
+ = gimple_build_assign_with_ops (BIT_AND_EXPR,
+ masked, loadres, mask);
+ gsi_insert_before (gsi, tems, GSI_SAME_STMT);
+ /* Zero-extend the value to representative size. */
+ tree tem2;
+ if (!TYPE_UNSIGNED (TREE_TYPE (field)))
+ {
+ tem2 = make_ssa_name (unsigned_type_for (TREE_TYPE (field)),
+ NULL);
+ tems = gimple_build_assign_with_ops (NOP_EXPR, tem2,
+ gimple_assign_rhs1 (stmt),
+ NULL_TREE);
+ gsi_insert_before (gsi, tems, GSI_SAME_STMT);
+ }
+ else
+ tem2 = gimple_assign_rhs1 (stmt);
+ tree tem = make_ssa_name (TREE_TYPE (rep), NULL);
+ tems = gimple_build_assign_with_ops (NOP_EXPR, tem,
+ tem2, NULL_TREE);
+ gsi_insert_before (gsi, tems, GSI_SAME_STMT);
+ /* Shift the value into place. */
+ if (off != 0)
+ {
+ tem2 = make_ssa_name (TREE_TYPE (rep), NULL);
+ tems = gimple_build_assign_with_ops (LSHIFT_EXPR, tem2, tem,
+ size_int (off));
+ gsi_insert_before (gsi, tems, GSI_SAME_STMT);
+ }
+ else
+ tem2 = tem;
+ /* Merge masked loaded value and value. */
+ tree modres = make_ssa_name (TREE_TYPE (rep), NULL);
+ gimple mod
+ = gimple_build_assign_with_ops (BIT_IOR_EXPR, modres,
+ masked, tem2);
+ gsi_insert_before (gsi, mod, GSI_SAME_STMT);
+ /* Finally adjust the store. */
+ gimple_assign_set_rhs1 (stmt, modres);
+ gimple_assign_set_lhs (stmt,
+ build3 (COMPONENT_REF, TREE_TYPE (rep),
+ TREE_OPERAND (ref, 0), rep,
+ NULL_TREE));
+ update_stmt (stmt);
+}
+
+/* Lower bitfield accesses to accesses of their
+ DECL_BIT_FIELD_REPRESENTATIVE. */
+
+static void
+lower_bitfields (bool all)
+{
+ basic_block bb;
+
+ hash_table <bfaccess> bf;
+ bf.create (1);
+
+ FOR_EACH_BB (bb)
+ {
+ bool any = false;
+ bf.empty ();
+
+ /* We do two passes, the first one identifying interesting
+ bitfield accesses and the second one actually lowering them. */
+ if (!all)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!gimple_assign_single_p (stmt)
+ || gimple_has_volatile_ops (stmt))
+ continue;
+
+ tree ref = gimple_assign_rhs1 (stmt);
+ if (bitfield_access_p (ref, NULL))
+ {
+ bfaccess a(ref);
+ bfaccess **slot = bf.find_slot (&a, INSERT);
+ if (*slot)
+ (*slot)->count++;
+ else
+ *slot = new bfaccess(a);
+ if ((*slot)->count > 1)
+ any = true;
+ }
+
+ ref = gimple_assign_lhs (stmt);
+ if (bitfield_access_p (ref, NULL))
+ {
+ bfaccess a(ref);
+ bfaccess **slot = bf.find_slot (&a, INSERT);
+ if (*slot)
+ (*slot)->count++;
+ else
+ *slot = new bfaccess(a);
+ if ((*slot)->count > 1)
+ any = true;
+ }
+ }
+
+ if (!all && !any)
+ continue;
+
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (!gimple_assign_single_p (stmt)
+ || gimple_has_volatile_ops (stmt))
+ continue;
+
+ tree ref;
+ unsigned HOST_WIDE_INT off;
+
+ /* Lower a bitfield read. */
+ ref = gimple_assign_rhs1 (stmt);
+ if (bitfield_access_p (ref, &off))
+ {
+ bfaccess a(ref);
+ bfaccess *aa = bf.find (&a);
+ if (all || (aa->count > 1))
+ lower_bitfield_read (&gsi, off);
+ }
+ /* Lower a bitfield write to a read-modify-write cycle. */
+ ref = gimple_assign_lhs (stmt);
+ if (bitfield_access_p (ref, &off))
+ {
+ bfaccess a(ref);
+ bfaccess *aa = bf.find (&a);
+ if (all || (aa->count > 1))
+ lower_bitfield_write (&gsi, off);
+ }
+ }
+ }
+
+ bf.dispose ();
+}
+
/* Perform early intraprocedural SRA. */
static unsigned int
early_intra_sra (void)
{
+ lower_bitfields (false);
sra_mode = SRA_MODE_EARLY_INTRA;
return perform_intra_sra ();
}
-------------- next part --------------
2011-06-16 Richard Guenther <rguenther@suse.de>
* expr.c (expand_expr_real_1): Handle BIT_FIELD_COMPOSE.
* fold-const.c (operand_equal_p): Likewise.
(build_bit_mask): New function.
(fold_quaternary_loc): Likewise.
(fold): Call it.
(fold_build4_stat_loc): New function.
* gimplify.c (gimplify_expr): Handle BIT_FIELD_COMPOSE.
* tree-inline.c (estimate_operator_cost): Likewise.
* tree-pretty-print.c (dump_generic_node): Likewise.
* tree-ssa-operands.c (get_expr_operands): Likewise.
* tree.def (BIT_FIELD_COMPOSE): New tree code.
* tree.h (build_bit_mask): Declare.
(fold_quaternary): Define.
(fold_quaternary_loc): Declare.
(fold_build4): Define.
(fold_build4_loc): Likewise.
(fold_build4_stat_loc): Declare.
* gimple.c (gimple_rhs_class_table): Handle BIT_FIELD_COMPOSE.
Index: trunk/gcc/expr.c
===================================================================
*** trunk.orig/gcc/expr.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/expr.c 2011-07-05 14:15:25.000000000 +0200
*************** expand_expr_real_1 (tree exp, rtx target
*** 8693,8698 ****
--- 8693,8710 ----
return expand_constructor (exp, target, modifier, false);
+ case BIT_FIELD_COMPOSE:
+ {
+ unsigned bitpos = (unsigned) TREE_INT_CST_LOW (TREE_OPERAND (exp, 3));
+ unsigned bitsize = (unsigned) TREE_INT_CST_LOW (TREE_OPERAND (exp, 2));
+ rtx op0 = expand_normal (TREE_OPERAND (exp, 0));
+ rtx op1 = expand_normal (TREE_OPERAND (exp, 1));
+ rtx dst = gen_reg_rtx (mode);
+ emit_move_insn (dst, op0);
+ store_bit_field (dst, bitsize, bitpos, mode, op1);
+ return dst;
+ }
+
case TARGET_MEM_REF:
{
addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (exp));
Index: trunk/gcc/fold-const.c
===================================================================
*** trunk.orig/gcc/fold-const.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/fold-const.c 2011-07-05 14:15:25.000000000 +0200
*************** operand_equal_p (const_tree arg0, const_
*** 2667,2672 ****
--- 2667,2675 ----
case DOT_PROD_EXPR:
return OP_SAME (0) && OP_SAME (1) && OP_SAME (2);
+ case BIT_FIELD_COMPOSE:
+ return OP_SAME (0) && OP_SAME (1) && OP_SAME (2) && OP_SAME (3);
+
default:
return 0;
}
*************** contains_label_p (tree st)
*** 13236,13241 ****
--- 13239,13257 ----
(walk_tree_without_duplicates (&st, contains_label_1 , NULL) != NULL_TREE);
}
+ /* Builds and returns a mask of integral type TYPE for masking out
+ BITSIZE bits at bit position BITPOS in a word of type TYPE.
+ The mask has the bits set from bit BITPOS to BITPOS + BITSIZE - 1. */
+
+ tree
+ build_bit_mask (tree type, unsigned int bitsize, unsigned int bitpos)
+ {
+ tree mask = double_int_to_tree (type, double_int_mask (bitsize));
+ mask = const_binop (LSHIFT_EXPR, mask, size_int (bitpos));
+
+ return mask;
+ }
+
/* Fold a ternary expression of code CODE and type TYPE with operands
OP0, OP1, and OP2. Return the folded expression if folding is
successful. Otherwise, return NULL_TREE. */
*************** fold_ternary_loc (location_t loc, enum t
*** 13573,13578 ****
--- 13589,13606 ----
}
}
+ /* Perform constant folding. */
+ if (TREE_CODE (op0) == INTEGER_CST)
+ {
+ unsigned bitpos = (unsigned) TREE_INT_CST_LOW (op2);
+ unsigned bitsize = (unsigned) TREE_INT_CST_LOW (op1);
+ double_int res;
+ res = double_int_rshift (tree_to_double_int (op0), bitpos,
+ HOST_BITS_PER_DOUBLE_INT, false);
+ res = double_int_ext (res, bitsize, TYPE_UNSIGNED (type));
+ return double_int_to_tree (type, res);
+ }
+
/* A bit-field-ref that referenced the full argument can be stripped. */
if (INTEGRAL_TYPE_P (TREE_TYPE (arg0))
&& TYPE_PRECISION (TREE_TYPE (arg0)) == tree_low_cst (arg1, 1)
*************** fold_ternary_loc (location_t loc, enum t
*** 13597,13602 ****
--- 13625,13701 ----
} /* switch (code) */
}
+ /* Fold a quaternary expression of code CODE and type TYPE with operands
+ OP0, OP1, OP2, and OP3. Return the folded expression if folding is
+ successful. Otherwise, return NULL_TREE. */
+
+ tree
+ fold_quaternary_loc (location_t loc, enum tree_code code, tree type,
+ tree op0, tree op1, tree op2, tree op3)
+ {
+ tree arg0 = NULL_TREE, arg1 = NULL_TREE;
+ enum tree_code_class kind = TREE_CODE_CLASS (code);
+
+ gcc_assert (IS_EXPR_CODE_CLASS (kind)
+ && TREE_CODE_LENGTH (code) == 4);
+
+ /* Strip any conversions that don't change the mode. This is safe
+ for every expression, except for a comparison expression because
+ its signedness is derived from its operands. So, in the latter
+ case, only strip conversions that don't change the signedness.
+
+ Note that this is done as an internal manipulation within the
+ constant folder, in order to find the simplest representation of
+ the arguments so that their form can be studied. In any cases,
+ the appropriate type conversions should be put back in the tree
+ that will get out of the constant folder. */
+ if (op0)
+ {
+ arg0 = op0;
+ STRIP_NOPS (arg0);
+ }
+
+ if (op1)
+ {
+ arg1 = op1;
+ STRIP_NOPS (arg1);
+ }
+
+ switch (code)
+ {
+ case BIT_FIELD_COMPOSE:
+ /* Perform (partial) constant folding of BIT_FIELD_COMPOSE. */
+ if (TREE_CODE (arg0) == INTEGER_CST
+ || TREE_CODE (arg1) == INTEGER_CST)
+ {
+ unsigned bitpos = (unsigned) TREE_INT_CST_LOW (op3);
+ unsigned bitsize = (unsigned) TREE_INT_CST_LOW (op2);
+ tree bits, mask;
+ /* build a mask to mask/clear the bits in the word. */
+ mask = build_bit_mask (type, bitsize, bitpos);
+ /* extend the bits to the word type, shift them to the right
+ place and mask the bits. */
+ bits = fold_convert_loc (loc, type, arg1);
+ bits = fold_build2_loc (loc, BIT_AND_EXPR, type,
+ fold_build2_loc (loc, LSHIFT_EXPR, type,
+ bits, size_int (bitpos)),
+ mask);
+ /* switch to clear mask and do the composition. */
+ mask = fold_build1_loc (loc, BIT_NOT_EXPR, type, mask);
+ return fold_build2_loc (loc, BIT_IOR_EXPR, type,
+ fold_build2_loc (loc, BIT_AND_EXPR, type,
+ fold_convert (type, arg0),
+ mask),
+ bits);
+ }
+
+ return NULL_TREE;
+
+ default:
+ return NULL_TREE;
+ }
+ }
+
/* Perform constant folding and related simplification of EXPR.
The related simplifications include x*1 => x, x*0 => 0, etc.,
and application of the associative law.
*************** fold (tree expr)
*** 13638,13644 ****
if (IS_EXPR_CODE_CLASS (kind))
{
tree type = TREE_TYPE (t);
! tree op0, op1, op2;
switch (TREE_CODE_LENGTH (code))
{
--- 13737,13743 ----
if (IS_EXPR_CODE_CLASS (kind))
{
tree type = TREE_TYPE (t);
! tree op0, op1, op2, op3;
switch (TREE_CODE_LENGTH (code))
{
*************** fold (tree expr)
*** 13657,13662 ****
--- 13756,13768 ----
op2 = TREE_OPERAND (t, 2);
tem = fold_ternary_loc (loc, code, type, op0, op1, op2);
return tem ? tem : expr;
+ case 4:
+ op0 = TREE_OPERAND (t, 0);
+ op1 = TREE_OPERAND (t, 1);
+ op2 = TREE_OPERAND (t, 2);
+ op3 = TREE_OPERAND (t, 3);
+ tem = fold_quaternary_loc (loc, code, type, op0, op1, op2, op3);
+ return tem ? tem : expr;
default:
break;
}
*************** fold_build3_stat_loc (location_t loc, en
*** 14113,14118 ****
--- 14219,14310 ----
return tem;
}
+ /* Fold a quaternary tree expression with code CODE of type TYPE with
+ operands OP0, OP1, OP2, and OP3. Return a folded expression if
+ successful. Otherwise, return a tree expression with code CODE of
+ type TYPE with operands OP0, OP1, OP2, and OP3. */
+
+ tree
+ fold_build4_stat_loc (location_t loc, enum tree_code code, tree type,
+ tree op0, tree op1, tree op2, tree op3 MEM_STAT_DECL)
+ {
+ tree tem;
+ #ifdef ENABLE_FOLD_CHECKING
+ unsigned char checksum_before_op0[16],
+ checksum_before_op1[16],
+ checksum_before_op2[16],
+ checksum_before_op3[16],
+ checksum_after_op0[16],
+ checksum_after_op1[16],
+ checksum_after_op2[16];
+ checksum_after_op3[16];
+ struct md5_ctx ctx;
+ htab_t ht;
+
+ ht = htab_create (32, htab_hash_pointer, htab_eq_pointer, NULL);
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op0, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_before_op0);
+ htab_empty (ht);
+
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op1, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_before_op1);
+ htab_empty (ht);
+
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op2, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_before_op2);
+ htab_empty (ht);
+
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op3, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_before_op3);
+ htab_empty (ht);
+ #endif
+
+ gcc_assert (TREE_CODE_CLASS (code) != tcc_vl_exp);
+ tem = fold_quaternary_loc (loc, code, type, op0, op1, op2, op3);
+ if (!tem)
+ tem = build4_stat_loc (loc, code, type, op0, op1, op2, op3 PASS_MEM_STAT);
+
+ #ifdef ENABLE_FOLD_CHECKING
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op0, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_after_op0);
+ htab_empty (ht);
+
+ if (memcmp (checksum_before_op0, checksum_after_op0, 16))
+ fold_check_failed (op0, tem);
+
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op1, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_after_op1);
+ htab_empty (ht);
+
+ if (memcmp (checksum_before_op1, checksum_after_op1, 16))
+ fold_check_failed (op1, tem);
+
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op2, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_after_op2);
+ htab_delete (ht);
+
+ if (memcmp (checksum_before_op2, checksum_after_op2, 16))
+ fold_check_failed (op2, tem);
+
+ md5_init_ctx (&ctx);
+ fold_checksum_tree (op3, &ctx, ht);
+ md5_finish_ctx (&ctx, checksum_after_op3);
+ htab_delete (ht);
+
+ if (memcmp (checksum_before_op3, checksum_after_op3, 16))
+ fold_check_failed (op3, tem);
+ #endif
+ return tem;
+ }
+
+
/* Fold a CALL_EXPR expression of type TYPE with operands FN and NARGS
arguments in ARGARRAY, and a null static chain.
Return a folded expression if successful. Otherwise, return a CALL_EXPR
Index: trunk/gcc/gimplify.c
===================================================================
*** trunk.orig/gcc/gimplify.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/gimplify.c 2011-07-05 14:15:25.000000000 +0200
*************** gimplify_expr (tree *expr_p, gimple_seq
*** 7239,7244 ****
--- 7239,7248 ----
/* Classified as tcc_expression. */
goto expr_3;
+ case BIT_FIELD_COMPOSE:
+ /* Arguments 3 and 4 are constants. */
+ goto expr_2;
+
case POINTER_PLUS_EXPR:
/* Convert ((type *)A)+offset into &A->field_of_type_and_offset.
The second is gimple immediate saving a need for extra statement.
Index: trunk/gcc/tree-inline.c
===================================================================
*** trunk.orig/gcc/tree-inline.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/tree-inline.c 2011-07-05 14:15:25.000000000 +0200
*************** estimate_operator_cost (enum tree_code c
*** 3414,3419 ****
--- 3414,3423 ----
return weights->div_mod_cost;
return 1;
+ /* Bit-field insertion needs several shift and mask operations. */
+ case BIT_FIELD_COMPOSE:
+ return 3;
+
default:
/* We expect a copy assignment with no operator. */
gcc_assert (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS);
Index: trunk/gcc/tree-pretty-print.c
===================================================================
*** trunk.orig/gcc/tree-pretty-print.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/tree-pretty-print.c 2011-07-05 14:15:25.000000000 +0200
*************** dump_generic_node (pretty_printer *buffe
*** 1217,1222 ****
--- 1217,1234 ----
pp_string (buffer, ">");
break;
+ case BIT_FIELD_COMPOSE:
+ pp_string (buffer, "BIT_FIELD_COMPOSE <");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 3), spc, flags, false);
+ pp_string (buffer, ">");
+ break;
+
case ARRAY_REF:
case ARRAY_RANGE_REF:
op0 = TREE_OPERAND (node, 0);
Index: trunk/gcc/tree-ssa-operands.c
===================================================================
*** trunk.orig/gcc/tree-ssa-operands.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/tree-ssa-operands.c 2011-07-05 14:15:25.000000000 +0200
*************** get_expr_operands (gimple stmt, tree *ex
*** 974,979 ****
--- 974,984 ----
get_expr_operands (stmt, &TREE_OPERAND (expr, 0), flags);
return;
+ case BIT_FIELD_COMPOSE:
+ gcc_assert (TREE_CODE (TREE_OPERAND (expr, 2)) == INTEGER_CST
+ && TREE_CODE (TREE_OPERAND (expr, 3)) == INTEGER_CST);
+ /* Fallthru. */
+
case TRUTH_AND_EXPR:
case TRUTH_OR_EXPR:
case TRUTH_XOR_EXPR:
Index: trunk/gcc/tree.def
===================================================================
*** trunk.orig/gcc/tree.def 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/tree.def 2011-07-05 14:15:25.000000000 +0200
*************** DEFTREECODE (ADDR_EXPR, "addr_expr", tcc
*** 784,789 ****
--- 784,801 ----
descriptor of type ptr_mode. */
DEFTREECODE (FDESC_EXPR, "fdesc_expr", tcc_expression, 2)
+ /* Given a word, a value and a bitfield position and size within
+ the word, produce the value that results if replacing the
+ described parts of word with value.
+ Operand 0 is a tree for the word of integral type;
+ Operand 1 is a tree for the value of integral type;
+ Operand 2 is a tree giving the constant number of bits being
+ referenced which is less or equal to the precision of the value;
+ Operand 3 is a tree giving the constant position of the first referenced
+ bit such that the sum of operands 2 and 3 is less than or equal to the
+ precision of the word. */
+ DEFTREECODE (BIT_FIELD_COMPOSE, "bitfield_compose", tcc_expression, 4)
+
/* Given two real or integer operands of the same type,
returns a complex value of the corresponding complex type. */
DEFTREECODE (COMPLEX_EXPR, "complex_expr", tcc_binary, 2)
Index: trunk/gcc/tree.h
===================================================================
*** trunk.orig/gcc/tree.h 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/tree.h 2011-07-05 14:15:25.000000000 +0200
*************** extern bool is_typedef_decl (tree x);
*** 5140,5145 ****
--- 5140,5146 ----
extern bool typedef_variant_p (tree);
extern bool auto_var_in_fn_p (const_tree, const_tree);
extern tree build_low_bits_mask (tree, unsigned);
+ extern tree build_bit_mask (tree type, unsigned int, unsigned int);
extern tree tree_strip_nop_conversions (tree);
extern tree tree_strip_sign_nop_conversions (tree);
extern tree lhd_gcc_personality (void);
*************** extern tree fold_binary_loc (location_t,
*** 5196,5201 ****
--- 5197,5205 ----
#define fold_ternary(CODE,T1,T2,T3,T4)\
fold_ternary_loc (UNKNOWN_LOCATION, CODE, T1, T2, T3, T4)
extern tree fold_ternary_loc (location_t, enum tree_code, tree, tree, tree, tree);
+ #define fold_quaternary(CODE,T1,T2,T3,T4,T5)\
+ fold_quaternary_loc (UNKNOWN_LOCATION, CODE, T1, T2, T3, T4, T5)
+ extern tree fold_quaternary_loc (location_t, enum tree_code, tree, tree, tree, tree, tree);
#define fold_build1(c,t1,t2)\
fold_build1_stat_loc (UNKNOWN_LOCATION, c, t1, t2 MEM_STAT_INFO)
#define fold_build1_loc(l,c,t1,t2)\
*************** extern tree fold_build2_stat_loc (locati
*** 5214,5219 ****
--- 5218,5229 ----
fold_build3_stat_loc (l, c, t1, t2, t3, t4 MEM_STAT_INFO)
extern tree fold_build3_stat_loc (location_t, enum tree_code, tree, tree, tree,
tree MEM_STAT_DECL);
+ #define fold_build4(c,t1,t2,t3,t4,t5)\
+ fold_build4_stat_loc (UNKNOWN_LOCATION, c, t1, t2, t3, t4, t5 MEM_STAT_INFO)
+ #define fold_build4_loc(l,c,t1,t2,t3,t4,t5)\
+ fold_build4_stat_loc (l, c, t1, t2, t3, t4, t5 MEM_STAT_INFO)
+ extern tree fold_build4_stat_loc (location_t, enum tree_code, tree, tree, tree,
+ tree, tree MEM_STAT_DECL);
extern tree fold_build1_initializer_loc (location_t, enum tree_code, tree, tree);
extern tree fold_build2_initializer_loc (location_t, enum tree_code, tree, tree, tree);
extern tree fold_build3_initializer_loc (location_t, enum tree_code, tree, tree, tree, tree);
Index: trunk/gcc/gimple.c
===================================================================
*** trunk.orig/gcc/gimple.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/gimple.c 2011-07-05 14:15:25.000000000 +0200
*************** get_gimple_rhs_num_ops (enum tree_code c
*** 2623,2629 ****
|| (SYM) == ADDR_EXPR \
|| (SYM) == WITH_SIZE_EXPR \
|| (SYM) == SSA_NAME \
! || (SYM) == VEC_COND_EXPR) ? GIMPLE_SINGLE_RHS \
: GIMPLE_INVALID_RHS),
#define END_OF_BASE_TREE_CODES (unsigned char) GIMPLE_INVALID_RHS,
--- 2623,2630 ----
|| (SYM) == ADDR_EXPR \
|| (SYM) == WITH_SIZE_EXPR \
|| (SYM) == SSA_NAME \
! || (SYM) == VEC_COND_EXPR \
! || (SYM) == BIT_FIELD_COMPOSE) ? GIMPLE_SINGLE_RHS \
: GIMPLE_INVALID_RHS),
#define END_OF_BASE_TREE_CODES (unsigned char) GIMPLE_INVALID_RHS,
Index: trunk/gcc/cfgexpand.c
===================================================================
*** trunk.orig/gcc/cfgexpand.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/cfgexpand.c 2011-07-05 14:15:25.000000000 +0200
*************** expand_debug_expr (tree exp)
*** 3236,3246 ****
case VEC_WIDEN_MULT_LO_EXPR:
return NULL;
! /* Misc codes. */
case ADDR_SPACE_CONVERT_EXPR:
case FIXED_CONVERT_EXPR:
case OBJ_TYPE_REF:
case WITH_SIZE_EXPR:
return NULL;
case DOT_PROD_EXPR:
--- 3236,3247 ----
case VEC_WIDEN_MULT_LO_EXPR:
return NULL;
! /* Misc codes. */
case ADDR_SPACE_CONVERT_EXPR:
case FIXED_CONVERT_EXPR:
case OBJ_TYPE_REF:
case WITH_SIZE_EXPR:
+ case BIT_FIELD_COMPOSE:
return NULL;
case DOT_PROD_EXPR:
Index: trunk/gcc/gimple-fold.c
===================================================================
*** trunk.orig/gcc/gimple-fold.c 2011-07-05 13:39:12.000000000 +0200
--- trunk/gcc/gimple-fold.c 2011-07-05 14:19:14.000000000 +0200
*************** gimple_fold_stmt_to_constant_1 (gimple s
*** 2864,2869 ****
--- 2864,2886 ----
return build_vector (TREE_TYPE (rhs), nreverse (list));
}
+ else if (TREE_CODE (rhs) == BIT_FIELD_COMPOSE)
+ {
+ tree val0 = TREE_OPERAND (rhs, 0);
+ tree val1 = TREE_OPERAND (rhs, 1);
+ if (TREE_CODE (val0) == SSA_NAME)
+ val0 = (*valueize) (val0);
+ if (TREE_CODE (val1) == SSA_NAME)
+ val1 = (*valueize) (val1);
+ if (TREE_CODE (val0) == INTEGER_CST
+ && TREE_CODE (val1) == INTEGER_CST)
+ return fold_quaternary_loc (EXPR_LOCATION (rhs),
+ TREE_CODE (rhs),
+ TREE_TYPE (rhs),
+ val0, val1,
+ TREE_OPERAND (rhs, 2),
+ TREE_OPERAND (rhs, 3));
+ }
if (kind == tcc_reference)
{
More information about the Gcc-patches
mailing list