Bug 77357 - strlen of constant strings not folded
Summary: strlen of constant strings not folded
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: middle-end (show other bugs)
Version: 7.0
: P3 enhancement
Target Milestone: ---
Assignee: Martin Sebor
URL:
Keywords: missed-optimization, patch
Depends on:
Blocks: strlen
  Show dependency treegraph
 
Reported: 2016-08-23 22:47 UTC by Martin Sebor
Modified: 2018-07-17 03:39 UTC (History)
3 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2016-08-24 00:00:00


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Martin Sebor 2016-08-23 22:47:53 UTC
GCC successfully folds some simple calls to strlen with constant arguments but not others, both with and without optimization.  The results with optimization are somewhat better but still not as good as they could be.  Besides opening up other optimization opportunities, improving folding (even without optimization) will help detect more coding bugs (such as buffer overflows).

The following test case shows that GCC folds global constant character arrays and constant pointers to string literals but it fails to do the same for static local const arrays or pointers or global aggregates.

With optimization, it still fails to fold const aggregates and ends up calling strlen.

$ cat xyz.c && /build/gcc-trunk-svn/gcc/xgcc -B /build/gcc-trunk-svn/gcc -O0 -S -Wall -Wextra -Wpedantic -fdump-tree-optimized=/dev/stdout strlen.c
extern int vsprintf (char*, const char*, __builtin_va_list);

void g (char *d, __builtin_va_list va)
{
  const char f[] = "%i";
  vsprintf (d, f, va);
}

void h (char *d, const char *f, __builtin_va_list va)
{
  vsprintf (d, f, va);
}


;; Function global_array (global_array, funcdef_no=0, decl_uid=1756, cgraph_uid=0, symbol_order=3)

global_array ()
{
  <bb 2>:

  <bb 3>:
  return;

}



;; Function global_pointer (global_pointer, funcdef_no=1, decl_uid=1759, cgraph_uid=1, symbol_order=4)

global_pointer ()
{
  const char * s.0_1;
  long unsigned int _2;

  <bb 2>:
  s.0_1 = "abc";
  _2 = 3;
  if (_2 != 3)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_abort ();

  <bb 4>:
  return;

}



;; Function local_array (local_array, funcdef_no=2, decl_uid=1762, cgraph_uid=2, symbol_order=5)

local_array ()
{
  static const char a[4] = "abc";
  long unsigned int _1;

  <bb 2>:
  _1 = __builtin_strlen (&a);
  if (_1 != 3)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_abort ();

  <bb 4>:
  return;

}



;; Function local_pointer (local_pointer, funcdef_no=3, decl_uid=1766, cgraph_uid=3, symbol_order=6)

local_pointer ()
{
  const char * const s;
  long unsigned int _1;

  <bb 2>:
  s_2 = "abc";
  _1 = __builtin_strlen (s_2);
  if (_1 != 3)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_abort ();

  <bb 4>:
  return;

}



;; Function global_struct (global_struct, funcdef_no=4, decl_uid=1770, cgraph_uid=4, symbol_order=7)

global_struct ()
{
  long unsigned int _1;

  <bb 2>:
  _1 = __builtin_strlen (&x.a);
  if (_1 != 3)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_abort ();

  <bb 4>:
  return;

}



;; Function local_struct (local_struct, funcdef_no=5, decl_uid=1773, cgraph_uid=5, symbol_order=8)

local_struct ()
{
  static const struct X x = {.a="abc"};
  long unsigned int _1;

  <bb 2>:
  _1 = __builtin_strlen (&x.a);
  if (_1 != 3)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  __builtin_abort ();

  <bb 4>:
  return;

}
Comment 1 Richard Biener 2016-08-24 07:49:13 UTC
Contents of strlen.c?
Comment 2 Martin Sebor 2016-08-24 14:22:36 UTC
Sorry, wrong test case.  Here's the right one:

const char a[] = "abc";
const char* const s = "abc";

void global_array (void)
{
  if (__builtin_strlen (a) != 3)
    __builtin_abort ();
}

void local_array (void)
{
  static const char a[] = "abc";
  if (__builtin_strlen (a) != 3)
    __builtin_abort ();
}

void global_pointer (void)
{
  if (__builtin_strlen (s) != 3)
    __builtin_abort ();
}

void local_pointer (void)
{
  static const char* const s = "abc";
  if (__builtin_strlen (s) != 3)
    __builtin_abort ();
}


struct X { char a [4]; };
const struct X x = { "abc" };

void global_struct (void)
{
  if (__builtin_strlen (x.a) != 3)
    __builtin_abort ();
}

void local_struct (void)
{
  static const struct X x =  { "abc" };
  if (__builtin_strlen (x.a) != 3)
    __builtin_abort ();
}
Comment 3 Richard Biener 2016-08-26 12:53:29 UTC
Ok, I'm less interested in -O0 but even with -O1 we fail for global_struct
and local_struct.  We can improve things by doing

Index: gcc/expr.c
===================================================================
--- gcc/expr.c  (revision 239778)
+++ gcc/expr.c  (working copy)
@@ -60,6 +60,7 @@ along with GCC; see the file COPYING3.
 #include "tree-chkp.h"
 #include "rtl-chkp.h"
 #include "ccmp.h"
+#include "tree-dfa.h"
 
 
 /* If this is nonzero, we do not bother generating VOLATILE
@@ -11098,51 +11081,14 @@ string_constant (tree arg, tree *ptr_off
 
   if (TREE_CODE (arg) == ADDR_EXPR)
     {
-      if (TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST)
-       {
-         *ptr_offset = size_zero_node;
-         return TREE_OPERAND (arg, 0);
-       }
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == VAR_DECL)
-       {
-         array = TREE_OPERAND (arg, 0);
-         offset = size_zero_node;
-       }
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == ARRAY_REF)
-       {
-         array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-         offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
-         if (TREE_CODE (array) != STRING_CST
-             && TREE_CODE (array) != VAR_DECL)
-           return 0;
-
-         /* Check if the array has a nonzero lower bound.  */
-         lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0));
-         if (!integer_zerop (lower_bound))
-           {
-             /* If the offset and base aren't both constants, return 0.  */
-             if (TREE_CODE (lower_bound) != INTEGER_CST)
-               return 0;
-             if (TREE_CODE (offset) != INTEGER_CST)
-               return 0;
-             /* Adjust offset by the lower bound.  */
-             offset = size_diffop (fold_convert (sizetype, offset),
-                                   fold_convert (sizetype, lower_bound));
-           }
-       }
-      else if (TREE_CODE (TREE_OPERAND (arg, 0)) == MEM_REF)
-       {
-         array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0);
-         offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1);
-         if (TREE_CODE (array) != ADDR_EXPR)
-           return 0;
-         array = TREE_OPERAND (array, 0);
-         if (TREE_CODE (array) != STRING_CST
-             && TREE_CODE (array) != VAR_DECL)
-           return 0;
-       }
-      else
+      HOST_WIDE_INT off;
+      array = get_addr_base_and_unit_offset (TREE_OPERAND (arg, 0), &off);
+      if (! array
+         || (TREE_CODE (array) != VAR_DECL
+             && TREE_CODE (array) != CONST_DECL
+             && TREE_CODE (array) != STRING_CST))
        return 0;
+      *ptr_offset = size_int (off);
     }
   else if (TREE_CODE (arg) == PLUS_EXPR || TREE_CODE (arg) == POINTER_PLUS_EXPR)
     {


but then we only handle STRING_CST ctors and not {.a="abc"} we do have
some generic fold-a-ctor-reference-at-offset thing (gimple-fold.c:fold_*ctor_reference) but that hasn't the get-me-the-ctor-element-at-offset part factored out.  Or finding the ctor element that the offset
is within (we can then adjust offset).

So a bit of refactoring is missing here.
Comment 4 Martin Sebor 2018-07-03 02:32:21 UTC
Working on a patch.
Comment 5 Martin Sebor 2018-07-05 23:56:02 UTC
Patch: https://gcc.gnu.org/ml/gcc-patches/2018-07/msg00279.html
Comment 6 Martin Sebor 2018-07-09 20:34:20 UTC
Author: msebor
Date: Mon Jul  9 20:33:48 2018
New Revision: 262522

URL: https://gcc.gnu.org/viewcvs?rev=262522&root=gcc&view=rev
Log:
PR middle-end/77357 - strlen of constant strings not folded

gcc/ChangeLog:

	PR middle-end/77357
	PR middle-end/86428
	* builtins.c (c_strlen): Avoid out-of-bounds warnings when
	accessing implicitly initialized array elements.
	* expr.c (string_constant): Handle string initializers of
	character arrays within aggregates.
	* gimple-fold.c (fold_array_ctor_reference): Add argument.
	Store element offset.  As a special case, handle zero size.
	(fold_nonarray_ctor_reference): Same.
	(fold_ctor_reference): Add argument.  Store subobject offset.
	* gimple-fold.h (fold_ctor_reference): Add argument.

gcc/testsuite/ChangeLog:

	PR middle-end/77357
	* gcc.dg/strlenopt-49.c: New test.
	* gcc.dg/strlenopt-50.c: New test.
	* gcc.dg/strlenopt-51.c: New test.
	* gcc.dg/strlenopt-52.c: New test.

Added:
    trunk/gcc/testsuite/gcc.dg/strlenopt-49.c
    trunk/gcc/testsuite/gcc.dg/strlenopt-50.c
    trunk/gcc/testsuite/gcc.dg/strlenopt-51.c
    trunk/gcc/testsuite/gcc.dg/strlenopt-52.c
Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/builtins.c
    trunk/gcc/expr.c
    trunk/gcc/fold-const.c
    trunk/gcc/fold-const.h
    trunk/gcc/gimple-fold.c
    trunk/gcc/gimple-fold.h
    trunk/gcc/testsuite/ChangeLog
    trunk/gcc/testsuite/gcc.c-torture/execute/builtins/strlen-3.c
Comment 7 Martin Sebor 2018-07-09 20:35:33 UTC
Committed in r262522.
Comment 8 Paul Eggert 2018-07-16 04:28:14 UTC
(In reply to Martin Sebor from comment #7)
> Committed in r262522.

Unfortunately this commit apparently causes GCC to generate incorrect code when compiling Emacs. See Bug#86528.