Avoid char[] array in tree_def

Jan Hubicka hubicka@ucw.cz
Sat Oct 31 22:35:01 GMT 2020


> On 10/29/20 1:40 PM, Richard Biener wrote:
> > On Thu, 29 Oct 2020, Jakub Jelinek wrote:
> > 
> > > On Thu, Oct 29, 2020 at 05:00:40PM +0100, Jan Hubicka wrote:
> > > > > 
> > > > > That's ugly and will for sure defeat warning / access code
> > > > > when we access this as char[], no?  I mean, we could
> > > > > as well use 'int str[1];' here?
> > > > 
> > > > Well, we always get char pointer via macro that is IMO OK, but I am also
> > > > not very much in love with this.
> > > 
> > > Do we treat signed char [...]; as typeless storage too, or just
> > > what the C++ standard requires (i.e. char, unsigned char and std::byte
> > > where the last one is enum type with unsigned char underlying type)?
> > 
> > All that is covered by is_byte_access_type which includes all
> > character types (including char16_t and wchar it seems) and std::byte.
> 
> Well, that's a bug, apparently from the PR94923 patch (r11-499); previously
> it was char, signed char, and unsigned char.  And even that is too much;
> even C++98 said just char and unsigned char could be used for bytewise
> access.
> 
> When C++17 clarified this with the notion of "provides storage", it applied
> to only unsigned char and std::byte, not even the full set of byte-access
> types.  We still need to allow bytewise access using plain char, but perhaps
> we don't need to treat plain char arrays as typeless.
> 
> Attributes to say that a particular array does or does not provide storage
> for objects of other types do sound useful.

Thanks a lot for explanation!  
I am adding Martin Sebor to CC. Perhaps you can fix the problem with 
std::byte, and signed char to imply typeless storage and ideally also
add an attribute? This seem too deep into C++ FE details for me.

I was thiking a bit more about all accesses to trees getting same alias
set.  This is because we allow type puning through unions.  If our tree
datastructure was a C++ class hiearchy, this would not happen since
accesses to specialized tree node would not be through unions but
through casted pointers.

We could do that with our acessor macros, too.
I tried to turn (NODE)->base in our accesors to ((tree_base *)(node))->base
and this breaks with const_tree pointers. However the following seem to
work and get better TBAA.

I think converting other acessors should be quite easy and make our
predicates more easy to optimize.

What do you think?
Honza

diff --git a/gcc/tree.h b/gcc/tree.h
index 7f0aa5b8d1d..cd8146808f7 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -235,13 +235,24 @@ as_internal_fn (combined_fn code)
 
 #define NULL_TREE (tree) NULL
 
+inline tree_base *
+as_a_tree_base (tree t)
+{
+  return (tree_base *)t;
+}
+inline const tree_base *
+as_a_tree_base (const_tree t)
+{
+  return (const tree_base *)t;
+}
+
 /* Define accessors for the fields that all tree nodes have
    (though some fields are not used for all kinds of nodes).  */
 
 /* The tree-code says what kind of node it is.
    Codes are defined in tree.def.  */
-#define TREE_CODE(NODE) ((enum tree_code) (NODE)->base.code)
-#define TREE_SET_CODE(NODE, VALUE) ((NODE)->base.code = (VALUE))
+#define TREE_CODE(NODE) ((enum tree_code) (as_a_tree_base (NODE)->code))
+#define TREE_SET_CODE(NODE, VALUE) (as_a_tree_base (NODE)->code = (VALUE))
 
 /* When checking is enabled, errors will be generated if a tree node
    is accessed incorrectly. The macros die with a fatal error.  */
@@ -642,7 +653,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
    In IDENTIFIER_NODEs, this means that some extern decl for this name
    had its address taken.  That matters for inline functions.
    In a STMT_EXPR, it means we want the result of the enclosed expression.  */
-#define TREE_ADDRESSABLE(NODE) ((NODE)->base.addressable_flag)
+#define TREE_ADDRESSABLE(NODE) (as_a_tree_base (NODE)->addressable_flag)
 
 /* Set on a CALL_EXPR if the call is in a tail position, ie. just before the
    exit of a function.  Calls for which this is true are candidates for tail
@@ -670,7 +681,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 /* In a VAR_DECL, nonzero means allocate static storage.
    In a FUNCTION_DECL, nonzero if function has been defined.
    In a CONSTRUCTOR, nonzero means allocate static storage.  */
-#define TREE_STATIC(NODE) ((NODE)->base.static_flag)
+#define TREE_STATIC(NODE) (as_a_tree_base (NODE)->static_flag)
 
 /* In an ADDR_EXPR, nonzero means do not use a trampoline.  */
 #define TREE_NO_TRAMPOLINE(NODE) (ADDR_EXPR_CHECK (NODE)->base.static_flag)
@@ -701,7 +712,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
    warnings concerning the decl should be suppressed.  This is used at
    least for used-before-set warnings, and it set after one warning is
    emitted.  */
-#define TREE_NO_WARNING(NODE) ((NODE)->base.nowarning_flag)
+#define TREE_NO_WARNING(NODE) (as_a_tree_base (NODE)->nowarning_flag)
 
 /* Nonzero if we should warn about the change in empty class parameter
    passing ABI in this TU.  */
@@ -744,7 +755,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
    In an IDENTIFIER_NODE, nonzero means an external declaration
    accessible from outside this translation unit was previously seen
    for this name in an inner scope.  */
-#define TREE_PUBLIC(NODE) ((NODE)->base.public_flag)
+#define TREE_PUBLIC(NODE) (as_a_tree_base (NODE)->public_flag)
 
 /* In a _TYPE, indicates whether TYPE_CACHED_VALUES contains a vector
    of cached values, or is something else.  */
@@ -766,7 +777,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
    reference to a volatile variable.  In a ..._DECL, this is set only if the
    declaration said `volatile'.  This will never be set for a constant.  */
 #define TREE_SIDE_EFFECTS(NODE) \
-  (NON_TYPE_CHECK (NODE)->base.side_effects_flag)
+  (as_a_tree_base (NON_TYPE_CHECK (NODE))->side_effects_flag)
 
 /* In a LABEL_DECL, nonzero means this label had its address taken
    and therefore can never be deleted and is a jump target for
@@ -796,7 +807,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
    because eventually we may make that a different bit.
 
    If this bit is set in an expression, so is TREE_SIDE_EFFECTS.  */
-#define TREE_THIS_VOLATILE(NODE) ((NODE)->base.volatile_flag)
+#define TREE_THIS_VOLATILE(NODE) (as_a_tree_base (NODE)->volatile_flag)
 
 /* Nonzero means this node will not trap.  In an INDIRECT_REF, means
    accessing the memory pointed to won't generate a trap.  However,
@@ -877,20 +888,20 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
    In a BLOCK node, nonzero if reorder_blocks has already seen this block.
    In an SSA_NAME node, nonzero if the SSA_NAME occurs in an abnormal
    PHI node.  */
-#define TREE_ASM_WRITTEN(NODE) ((NODE)->base.asm_written_flag)
+#define TREE_ASM_WRITTEN(NODE) (as_a_tree_base (NODE)->asm_written_flag)
 
 /* Nonzero in a _DECL if the name is used in its scope.
    Nonzero in an expr node means inhibit warning if value is unused.
    In IDENTIFIER_NODEs, this means that some extern decl for this name
    was used.
    In a BLOCK, this means that the block contains variables that are used.  */
-#define TREE_USED(NODE) ((NODE)->base.used_flag)
+#define TREE_USED(NODE) (as_a_tree_base (NODE)->used_flag)
 
 /* In a FUNCTION_DECL, nonzero means a call to the function cannot
    throw an exception.  In a CALL_EXPR, nonzero means the call cannot
    throw.  We can't easily check the node type here as the C++
    frontend also uses this flag (for AGGR_INIT_EXPR).  */
-#define TREE_NOTHROW(NODE) ((NODE)->base.nothrow_flag)
+#define TREE_NOTHROW(NODE) (as_a_tree_base (NODE)->nothrow_flag)
 
 /* In a CALL_EXPR, means that it's safe to use the target of the call
    expansion as the return slot for a call that returns in memory.  */
@@ -939,9 +950,9 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
   (CALL_EXPR_CHECK (NODE)->base.protected_flag)
 
 /* Used in classes in C++.  */
-#define TREE_PRIVATE(NODE) ((NODE)->base.private_flag)
+#define TREE_PRIVATE(NODE) (as_a_tree_base (NODE)->private_flag)
 /* Used in classes in C++. */
-#define TREE_PROTECTED(NODE) ((NODE)->base.protected_flag)
+#define TREE_PROTECTED(NODE) (as_a_tree_base (NODE)->protected_flag)
 
 /* True if reference type NODE is a C++ rvalue reference.  */
 #define TYPE_REF_IS_RVALUE(NODE) \
@@ -950,7 +961,7 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 /* Nonzero in a _DECL if the use of the name is defined as a
    deprecated feature by __attribute__((deprecated)).  */
 #define TREE_DEPRECATED(NODE) \
-  ((NODE)->base.deprecated_flag)
+  (as_a_tree_base (NODE)->deprecated_flag)
 
 /* Nonzero indicates an IDENTIFIER_NODE that names an anonymous
    aggregate, (as created by anon_aggr_name_format).  */
@@ -2170,7 +2181,7 @@ extern tree vector_element_bits_tree (const_tree);
 
 /* Used to keep track of visited nodes in tree traversals.  This is set to
    0 by copy_node and make_node.  */
-#define TREE_VISITED(NODE) ((NODE)->base.visited)
+#define TREE_VISITED(NODE) (as_a_tree_base (NODE)->visited)
 
 /* If set in an ARRAY_TYPE, indicates a string type (for languages
    that distinguish string from array of char).


More information about the Gcc-patches mailing list