commit 7f0e3685900b527b64b81c3b44af36fd9e99bea3 Author: Brian Sobulefsky Date: Tue Feb 9 16:25:43 2021 -0800 Address the bug found in test file gcc/testsuite/gcc.dg/analyzer/casts-1.c, as recorded by the XFAIL, and some simpler and more complex versions found during the investigation of it. PR analyzer/98797 was opened for these bugs. The simplest manifestation of this bug can be replicated with: char arr[] = {'A', 'B', 'C', 'D'}; char *pt = ar; __analyzer_eval(*(pt + 0) == 'A'); __analyzer_eval(*(pt + 1) == 'B'); //etc The result of the eval call is "UNKNOWN" because the analyzer is unable to determine the value at the pointer. Note that array access (such as arr[0]) is correctly handled. The code responsible for this is in file region-model-manager.cc, function region_model_manager::maybe_fold_sub_svalue. The relevant section is commented /* Handle getting individual chars from STRING_CST */. This section only had a case for an element_region. A case needed to be added for an offset_region. Additionally, when the offset was 0, such as in *pt or *(pt + 0), the function region_model_manager::get_offset_region was failing to make the needed offset region at all. This was due to the test for a constant 0 pointer that was then returning get_cast_region. A special case is needed for when PARENT is of type array_type whose type matches TYPE. In this case, get_offset_region is allowed to continue to normal conclusion. The original bug noted in gcc/testsuite/gcc.dg/analyzer/casts-1.c was for the case: struct s1 { char a; char b; char c; char d; }; struct s2 { char arr[4]; }; struct s2 x = {{'A', 'B', 'C', 'D'}}; struct s1 *p = (struct s1 *)&x; __analyzer_eval (p->a == 'A'); //etc This requires a case added to region_model_manager::maybe_fold_sub_svalue in the individual characters from string constant section for a field region. Finally, the prior only works for the case where struct s2 was a single field struct. The more general case is: struct s3 { char arr1[2]; char arr2[2]; }; struct s3 x = {{'A', 'B'}, {'C', 'D'}}; Here the array will never be found in the get_any_binding routines. A new routine is added to class binding_cluster that checks if the region being searched is a field_region of a cast_region, and if so, tries to find a field of the original region that contains the region under examination. This new function is named binding_cluster::get_parent_cast_binding. It is called from get_binding_recursive. gcc/analyzer/ChangeLog: PR analyzer/98797 * region-model-manager.cc (region_model_manager::get_offset_region): Add check for a PARENT array whose type matches TYPE, and have this skip returning get_cast_region and rather conclude the function normally. * region-model-manager.cc (region_model_manager::maybe_fold_sub_svalue): Update the get character from string_cst section to include cases for an offset_region and a field_region. * store.cc (binding_cluster::get_binding_recursive): Add a call to new function get_parent_cast_binding with conditional return of the return value (if non NULL). * store.cc (binding_cluster::get_parent_cast_binding): New function. * store.h (class binding_cluster): Add declaration for new member function get_parent_class_binding. * region.cc (get_field_at_bit_offset): Change to external linkage so it can be used outside the file. * analyzer.h (get_field_at_bit_offset): Add forward declaration. * analyzer.h (maybe_convert_bit_to_byte_offset): Add forward declaration of new function. * analyzer.cc (maybe_convert_bit_to_byte_offset): New function. gcc/testsuite/ChangeLog: PR analyzer/98797 * gcc.dg/analyzer/casts-1.c: Update file to no longer expect failures and add test cases for additional bugs solved. diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc index df8d881f3cd..60888958e51 100644 --- a/gcc/analyzer/analyzer.cc +++ b/gcc/analyzer/analyzer.cc @@ -60,6 +60,13 @@ get_stmt_location (const gimple *stmt, function *fun) return stmt->location; } +extern tree maybe_convert_bit_to_byte_offset (HOST_WIDE_INT offset_bits) +{ + if (offset_bits % BITS_PER_UNIT) + return NULL_TREE; + return build_int_cst (integer_type_node, offset_bits / BITS_PER_UNIT); +} + } // namespace ana /* Helper function for checkers. Is the CALL to the given function name, diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index f50ac662516..2c1e4562184 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -198,6 +198,11 @@ public: virtual logger *get_logger () const = 0; }; +extern tree +get_field_at_bit_offset (tree record_type, bit_offset_t bit_offset); + +extern tree maybe_convert_bit_to_byte_offset (HOST_WIDE_INT offset_bits); + } // namespace ana extern bool is_special_named_call_p (const gcall *call, const char *funcname, diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index dfd2413e914..bf0b5966e1b 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -601,16 +601,46 @@ region_model_manager::maybe_fold_sub_svalue (tree type, /* Handle getting individual chars from a STRING_CST. */ if (tree cst = parent_svalue->maybe_get_constant ()) - if (TREE_CODE (cst) == STRING_CST) - if (const element_region *element_reg - = subregion->dyn_cast_element_region ()) + { + bit_offset_t subreg_bit_size; + if (TREE_CODE (cst) == STRING_CST + && subregion->get_bit_size (&subreg_bit_size) + && subreg_bit_size.to_shwi () == sizeof(char) * BITS_PER_UNIT) { - const svalue *idx_sval = element_reg->get_index (); - if (tree cst_idx = idx_sval->maybe_get_constant ()) - if (const svalue *char_sval - = maybe_get_char_from_string_cst (cst, cst_idx)) - return get_or_create_cast (type, char_sval); + if (const element_region *element_reg + = subregion->dyn_cast_element_region ()) + { + const svalue *idx_sval = element_reg->get_index (); + if (tree cst_idx = idx_sval->maybe_get_constant ()) + if (const svalue *char_sval + = maybe_get_char_from_string_cst (cst, cst_idx)) + return get_or_create_cast (type, char_sval); + } + else if (const offset_region *offset_reg + = subregion->dyn_cast_offset_region ()) + { + const svalue *offset_sval = offset_reg->get_byte_offset (); + if (tree cst_offset = offset_sval->maybe_get_constant ()) + if (const svalue *char_sval + = maybe_get_char_from_string_cst (cst, cst_offset)) + return get_or_create_cast (type, char_sval); + } + else if (const field_region *field_reg + = subregion->dyn_cast_field_region ()) + { + if (!(field_reg->get_offset ().symbolic_p ())) + { + HOST_WIDE_INT field_offset_bits + = int_bit_position (field_reg->get_field ()); + if (tree cst_offset + = maybe_convert_bit_to_byte_offset (field_offset_bits)) + if (const svalue *char_sval + = maybe_get_char_from_string_cst (cst, cst_offset)) + return get_or_create_cast (type, char_sval); + } + } } + } /* SUB(INIT(r)).FIELD -> INIT(r.FIELD) i.e. @@ -867,8 +897,25 @@ region_model_manager::get_offset_region (const region *parent, { /* If BYTE_OFFSET is zero, return PARENT. */ if (tree cst_offset = byte_offset->maybe_get_constant ()) - if (zerop (cst_offset)) - return get_cast_region (parent, type); + { + if (zerop (cst_offset)) + { + /* Special case: PARENT type is array_type whose type matches TYPE + In this case, we want to access array element 0 no differently + than any other element. We also make sure byte offset has an + integer constant */ + if (parent->get_type () + && TREE_CODE(parent->get_type ()) == ARRAY_TYPE + && TREE_TYPE(parent->get_type ()) == type) + { + tree offset_int + = build_int_cst (integer_type_node, 0); + byte_offset = get_or_create_constant_svalue (offset_int); + } + else + return get_cast_region (parent, type); + } + } /* Fold OFFSET_REGION(OFFSET_REGION(REG, X), Y) to OFFSET_REGION(REG, (X + Y)). */ diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 6db1fc91afd..39ffd7518ef 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -224,7 +224,7 @@ region::get_bit_size (bit_size_t *out) const /* Get the field within RECORD_TYPE at BIT_OFFSET. */ -static tree +tree get_field_at_bit_offset (tree record_type, bit_offset_t bit_offset) { gcc_assert (TREE_CODE (record_type) == RECORD_TYPE); diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index abdb336da91..2990c61105b 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -996,14 +996,117 @@ binding_cluster::get_binding_recursive (store_manager *mgr, return sval; if (reg != m_base_region) if (const region *parent_reg = reg->get_parent_region ()) - if (const svalue *parent_sval - = get_binding_recursive (mgr, parent_reg, kind)) + { + if (const svalue *parent_sval + = get_binding_recursive (mgr, parent_reg, kind)) + { + /* Extract child svalue from parent svalue. */ + region_model_manager *rmm_mgr = mgr->get_svalue_manager (); + return rmm_mgr->get_or_create_sub_svalue (reg->get_type (), + parent_sval, reg); + } + else if (const svalue *parent_cast_field_sval + = get_parent_cast_binding (mgr, reg, kind)) + { + /* PARENT_REG is a cast_region and we found a covering binding + in the original_region */ + return parent_cast_field_sval; + } + } + return NULL; +} + +/* Get a binding for access to a field of a cast_region. + Note the original motivation for this was access of the form: + + struct s3 x = {{'A', 'B'}; {'C', 'D'}}; + struct s2 *p = (struct s2) &x; + __analyzer_eval (p->a == 'A'); + + et al. for the other fields (see analyzer/casts-1.c in the testsuite). + Access using get_binding_recursive fails because it is fundamentally + unidirectional (from the field_region p->a up the chain of parents). This + routine can probably be expanded, and even further broken out, as other + cases are discovered and understood. */ +const svalue * +binding_cluster::get_parent_cast_binding (store_manager *mgr, + const region *reg, + enum binding_kind kind) const +{ + + const region *parent_reg = reg->get_parent_region (); + + if(!parent_reg) + return NULL; + + const cast_region *parent_cast = parent_reg->dyn_cast_cast_region (); + const field_region *field_reg = reg->dyn_cast_field_region (); + + /* Subroutine only for a field_region whose parent is a cast_region */ + if (!(parent_cast && field_reg)) + return NULL; + + const region *original_reg = parent_cast->get_original_region (); + region_offset reg_offset = field_reg->get_offset (); + bit_size_t reg_bit_size; + tree original_tree = original_reg->get_type (); + + /* Generic guard for missing type, non record type (should not really happen), + symbolic regions, and unknown bit size */ + if (original_tree + && TREE_CODE(original_tree) == RECORD_TYPE + && !reg_offset.symbolic_p() + && field_reg->get_bit_size(®_bit_size)) + { + + bit_offset_t reg_bit_offset = reg_offset.get_bit_offset (); + HOST_WIDE_INT start_offset = reg_bit_offset.get_val ()[0]; + HOST_WIDE_INT end_offset = start_offset + reg_bit_size.get_val ()[0] - 1; + tree covering_field = NULL_TREE; + + covering_field = get_field_at_bit_offset (original_tree, start_offset); + + if (covering_field + && DECL_SIZE(covering_field) /* tree-core.h says may be null */ + && int_bit_position (covering_field) <= start_offset + && end_offset <= int_bit_position (covering_field) + + wi::to_offset (DECL_SIZE(covering_field)).to_shwi () -1) { - /* Extract child svalue from parent svalue. */ + /* We found a field that entirely covers REG. The following code + should probably be generalized to more cases, as of now it will + basically handle the case where a char array has been initialized + into the original struct type. Further work might be to handle when + a field to a struct is itself a struct, which is likely recursive.*/ + region_model_manager *rmm_mgr = mgr->get_svalue_manager (); - return rmm_mgr->get_or_create_sub_svalue (reg->get_type (), - parent_sval, reg); + const region *covering_field_reg = rmm_mgr + ->get_field_region (original_reg, covering_field); + + if (const svalue *parent_sval = get_binding_recursive (mgr, + covering_field_reg, kind)) + { + + HOST_WIDE_INT covering_field_offset + = int_bit_position (covering_field); + + tree cst_offset + = maybe_convert_bit_to_byte_offset (start_offset + - covering_field_offset); + if (TREE_CODE(covering_field_reg->get_type()) == ARRAY_TYPE + && cst_offset) + { + const svalue *arr_index = rmm_mgr + ->get_or_create_constant_svalue (cst_offset); + const region *elmt_reg = rmm_mgr + ->get_element_region (covering_field_reg, + TREE_TYPE (covering_field_reg->get_type ()), + arr_index); + return rmm_mgr->get_or_create_sub_svalue ( + elmt_reg->get_type (), parent_sval, elmt_reg); + } + } } + } return NULL; } diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index 2bcef6c398a..9c6838ab3cb 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -416,6 +416,9 @@ public: const svalue *get_binding_recursive (store_manager *mgr, const region *reg, enum binding_kind kind) const; + const svalue *get_parent_cast_binding (store_manager *mgr, + const region *reg, + enum binding_kind kind) const; const svalue *get_any_binding (store_manager *mgr, const region *reg) const; const svalue *maybe_get_compound_binding (store_manager *mgr, diff --git a/gcc/testsuite/gcc.dg/analyzer/casts-1.c b/gcc/testsuite/gcc.dg/analyzer/casts-1.c index 15cd85f77cf..be6043405bc 100644 --- a/gcc/testsuite/gcc.dg/analyzer/casts-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/casts-1.c @@ -38,12 +38,46 @@ void test_2 () __analyzer_eval (x.arr[2] == 'C'); /* { dg-warning "TRUE" } */ __analyzer_eval (x.arr[3] == 'D'); /* { dg-warning "TRUE" } */ struct s1 *p = (struct s1 *)&x; - __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */ - /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */ - __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */ - /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */ - __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */ - /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */ - __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */ - /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */ + __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */ + + __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */ + + __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */ + + __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */ + +} + +struct s3 +{ + char arr1[2]; + char arr2[2]; +}; + +void test_3 () +{ + struct s3 x = {{'A', 'B'}, {'C', 'D'}}; + __analyzer_eval (x.arr1[0] == 'A'); /* { dg-warning "TRUE" } */ + __analyzer_eval (x.arr1[1] == 'B'); /* { dg-warning "TRUE" } */ + __analyzer_eval (x.arr2[0] == 'C'); /* { dg-warning "TRUE" } */ + __analyzer_eval (x.arr2[1] == 'D'); /* { dg-warning "TRUE" } */ + struct s1 *p = (struct s1 *)&x; + __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */ + __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */ + __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */ + __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */ +} + +void test_4 () +{ + char ar[] = {'A', 'B', 'C', 'D'}; + char *pt = ar; + __analyzer_eval (ar[0]== 'A'); /* { dg-warning "TRUE" } */ + __analyzer_eval (ar[1]== 'B'); /* { dg-warning "TRUE" } */ + __analyzer_eval (ar[2]== 'C'); /* { dg-warning "TRUE" } */ + __analyzer_eval (ar[3]== 'D'); /* { dg-warning "TRUE" } */ + __analyzer_eval (*(pt + 0) == 'A'); /* { dg-warning "TRUE" } */ + __analyzer_eval (*(pt + 1) == 'B'); /* { dg-warning "TRUE" } */ + __analyzer_eval (*(pt + 2) == 'C'); /* { dg-warning "TRUE" } */ + __analyzer_eval (*(pt + 3) == 'D'); /* { dg-warning "TRUE" } */ }