GCC successfully folds some simple calls to strlen with constant arguments but not others, both with and without optimization. The results with optimization are somewhat better but still not as good as they could be. Besides opening up other optimization opportunities, improving folding (even without optimization) will help detect more coding bugs (such as buffer overflows). The following test case shows that GCC folds global constant character arrays and constant pointers to string literals but it fails to do the same for static local const arrays or pointers or global aggregates. With optimization, it still fails to fold const aggregates and ends up calling strlen. $ cat xyz.c && /build/gcc-trunk-svn/gcc/xgcc -B /build/gcc-trunk-svn/gcc -O0 -S -Wall -Wextra -Wpedantic -fdump-tree-optimized=/dev/stdout strlen.c extern int vsprintf (char*, const char*, __builtin_va_list); void g (char *d, __builtin_va_list va) { const char f[] = "%i"; vsprintf (d, f, va); } void h (char *d, const char *f, __builtin_va_list va) { vsprintf (d, f, va); } ;; Function global_array (global_array, funcdef_no=0, decl_uid=1756, cgraph_uid=0, symbol_order=3) global_array () { <bb 2>: <bb 3>: return; } ;; Function global_pointer (global_pointer, funcdef_no=1, decl_uid=1759, cgraph_uid=1, symbol_order=4) global_pointer () { const char * s.0_1; long unsigned int _2; <bb 2>: s.0_1 = "abc"; _2 = 3; if (_2 != 3) goto <bb 3>; else goto <bb 4>; <bb 3>: __builtin_abort (); <bb 4>: return; } ;; Function local_array (local_array, funcdef_no=2, decl_uid=1762, cgraph_uid=2, symbol_order=5) local_array () { static const char a[4] = "abc"; long unsigned int _1; <bb 2>: _1 = __builtin_strlen (&a); if (_1 != 3) goto <bb 3>; else goto <bb 4>; <bb 3>: __builtin_abort (); <bb 4>: return; } ;; Function local_pointer (local_pointer, funcdef_no=3, decl_uid=1766, cgraph_uid=3, symbol_order=6) local_pointer () { const char * const s; long unsigned int _1; <bb 2>: s_2 = "abc"; _1 = __builtin_strlen (s_2); if (_1 != 3) goto <bb 3>; else goto <bb 4>; <bb 3>: __builtin_abort (); <bb 4>: return; } ;; Function global_struct (global_struct, funcdef_no=4, decl_uid=1770, cgraph_uid=4, symbol_order=7) global_struct () { long unsigned int _1; <bb 2>: _1 = __builtin_strlen (&x.a); if (_1 != 3) goto <bb 3>; else goto <bb 4>; <bb 3>: __builtin_abort (); <bb 4>: return; } ;; Function local_struct (local_struct, funcdef_no=5, decl_uid=1773, cgraph_uid=5, symbol_order=8) local_struct () { static const struct X x = {.a="abc"}; long unsigned int _1; <bb 2>: _1 = __builtin_strlen (&x.a); if (_1 != 3) goto <bb 3>; else goto <bb 4>; <bb 3>: __builtin_abort (); <bb 4>: return; }
Contents of strlen.c?
Sorry, wrong test case. Here's the right one: const char a[] = "abc"; const char* const s = "abc"; void global_array (void) { if (__builtin_strlen (a) != 3) __builtin_abort (); } void local_array (void) { static const char a[] = "abc"; if (__builtin_strlen (a) != 3) __builtin_abort (); } void global_pointer (void) { if (__builtin_strlen (s) != 3) __builtin_abort (); } void local_pointer (void) { static const char* const s = "abc"; if (__builtin_strlen (s) != 3) __builtin_abort (); } struct X { char a [4]; }; const struct X x = { "abc" }; void global_struct (void) { if (__builtin_strlen (x.a) != 3) __builtin_abort (); } void local_struct (void) { static const struct X x = { "abc" }; if (__builtin_strlen (x.a) != 3) __builtin_abort (); }
Ok, I'm less interested in -O0 but even with -O1 we fail for global_struct and local_struct. We can improve things by doing Index: gcc/expr.c =================================================================== --- gcc/expr.c (revision 239778) +++ gcc/expr.c (working copy) @@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. #include "tree-chkp.h" #include "rtl-chkp.h" #include "ccmp.h" +#include "tree-dfa.h" /* If this is nonzero, we do not bother generating VOLATILE @@ -11098,51 +11081,14 @@ string_constant (tree arg, tree *ptr_off if (TREE_CODE (arg) == ADDR_EXPR) { - if (TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST) - { - *ptr_offset = size_zero_node; - return TREE_OPERAND (arg, 0); - } - else if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL) - { - array = TREE_OPERAND (arg, 0); - offset = size_zero_node; - } - else if (TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF) - { - array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0); - offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1); - if (TREE_CODE (array) != STRING_CST - && TREE_CODE (array) != VAR_DECL) - return 0; - - /* Check if the array has a nonzero lower bound. */ - lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0)); - if (!integer_zerop (lower_bound)) - { - /* If the offset and base aren't both constants, return 0. */ - if (TREE_CODE (lower_bound) != INTEGER_CST) - return 0; - if (TREE_CODE (offset) != INTEGER_CST) - return 0; - /* Adjust offset by the lower bound. */ - offset = size_diffop (fold_convert (sizetype, offset), - fold_convert (sizetype, lower_bound)); - } - } - else if (TREE_CODE (TREE_OPERAND (arg, 0)) == MEM_REF) - { - array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0); - offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1); - if (TREE_CODE (array) != ADDR_EXPR) - return 0; - array = TREE_OPERAND (array, 0); - if (TREE_CODE (array) != STRING_CST - && TREE_CODE (array) != VAR_DECL) - return 0; - } - else + HOST_WIDE_INT off; + array = get_addr_base_and_unit_offset (TREE_OPERAND (arg, 0), &off); + if (! array + || (TREE_CODE (array) != VAR_DECL + && TREE_CODE (array) != CONST_DECL + && TREE_CODE (array) != STRING_CST)) return 0; + *ptr_offset = size_int (off); } else if (TREE_CODE (arg) == PLUS_EXPR || TREE_CODE (arg) == POINTER_PLUS_EXPR) { but then we only handle STRING_CST ctors and not {.a="abc"} we do have some generic fold-a-ctor-reference-at-offset thing (gimple-fold.c:fold_*ctor_reference) but that hasn't the get-me-the-ctor-element-at-offset part factored out. Or finding the ctor element that the offset is within (we can then adjust offset). So a bit of refactoring is missing here.
Working on a patch.
Patch: https://gcc.gnu.org/ml/gcc-patches/2018-07/msg00279.html
Author: msebor Date: Mon Jul 9 20:33:48 2018 New Revision: 262522 URL: https://gcc.gnu.org/viewcvs?rev=262522&root=gcc&view=rev Log: PR middle-end/77357 - strlen of constant strings not folded gcc/ChangeLog: PR middle-end/77357 PR middle-end/86428 * builtins.c (c_strlen): Avoid out-of-bounds warnings when accessing implicitly initialized array elements. * expr.c (string_constant): Handle string initializers of character arrays within aggregates. * gimple-fold.c (fold_array_ctor_reference): Add argument. Store element offset. As a special case, handle zero size. (fold_nonarray_ctor_reference): Same. (fold_ctor_reference): Add argument. Store subobject offset. * gimple-fold.h (fold_ctor_reference): Add argument. gcc/testsuite/ChangeLog: PR middle-end/77357 * gcc.dg/strlenopt-49.c: New test. * gcc.dg/strlenopt-50.c: New test. * gcc.dg/strlenopt-51.c: New test. * gcc.dg/strlenopt-52.c: New test. Added: trunk/gcc/testsuite/gcc.dg/strlenopt-49.c trunk/gcc/testsuite/gcc.dg/strlenopt-50.c trunk/gcc/testsuite/gcc.dg/strlenopt-51.c trunk/gcc/testsuite/gcc.dg/strlenopt-52.c Modified: trunk/gcc/ChangeLog trunk/gcc/builtins.c trunk/gcc/expr.c trunk/gcc/fold-const.c trunk/gcc/fold-const.h trunk/gcc/gimple-fold.c trunk/gcc/gimple-fold.h trunk/gcc/testsuite/ChangeLog trunk/gcc/testsuite/gcc.c-torture/execute/builtins/strlen-3.c
Committed in r262522.
(In reply to Martin Sebor from comment #7) > Committed in r262522. Unfortunately this commit apparently causes GCC to generate incorrect code when compiling Emacs. See Bug#86528.